In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructField, StructType,StringType,IntegerType,LongType
from pyspark.sql.functions import col

spark = SparkSession.builder.master("local[*]").appName("app").getOrCreate()

In [0]:
schema = StructType(
    [
        StructField("name", StringType(),False),
        StructField("continent",StringType(),False),
        StructField("area",IntegerType(),False),
        StructField("population", IntegerType(), False),
        StructField("gdp",LongType(),False)
    ]
)

data = [
    ("Afghanistan" , "Asia"      , 652230  , 25500100   , 20343000000)  ,
    ("Albania"     , "Europe"    , 28748   , 2831741    , 12960000000)  ,
    ("Algeria"     , "Africa"    , 2381741 , 37100000   , 188681000000) ,
    ("Andorra"     , "Europe"    , 468     , 78115      , 3712000000 )  ,
    ("Angola"      , "Africa"    , 1246700 , 20609294   , 100990000000)
]

world = spark.createDataFrame(data,schema)
world.show()


+-----------+---------+-------+----------+------------+
|       name|continent|   area|population|         gdp|
+-----------+---------+-------+----------+------------+
|Afghanistan|     Asia| 652230|  25500100| 20343000000|
|    Albania|   Europe|  28748|   2831741| 12960000000|
|    Algeria|   Africa|2381741|  37100000|188681000000|
|    Andorra|   Europe|    468|     78115|  3712000000|
|     Angola|   Africa|1246700|  20609294|100990000000|
+-----------+---------+-------+----------+------------+



In [0]:
# A country is big if:
#     * it has an area of at least three million (i.e., 3000000 km2), or
#     * it has a population of at least twenty-five million (i.e., 25000000).
# Write a solution to find the name, population, and area of the big countries. Return the result table in any order.

world.where((col("area")>= 3000000) | (col("population")>=25000000)).select("name","population","area").show()

+-----------+----------+-------+
|       name|population|   area|
+-----------+----------+-------+
|Afghanistan|  25500100| 652230|
|    Algeria|  37100000|2381741|
+-----------+----------+-------+



In [0]:
world = world.createOrReplaceTempView("world")
spark.sql("select name,area,population from world where population>=25000000 or area>=3000000;").show()

+-----------+-------+----------+
|       name|   area|population|
+-----------+-------+----------+
|Afghanistan| 652230|  25500100|
|    Algeria|2381741|  37100000|
+-----------+-------+----------+



In [0]:
spark.stop()