# Advanced SQL puzzles with PySpark

In [1]:
import pyspark

In [2]:
from pyspark import SparkContext
from pyspark.sql import SparkSession

In [3]:
spark = SparkSession.builder.appName('PySpark DataFrame From RDD').getOrCreate()

### Puzzle #1

In [48]:
rows1 = [("Sugar", ), 
        ("Bread", ), 
        ("Juice", ), 
        ("Soda", ),
        ("Flour", )
      ]
nameColumns1 = ["Item"]
df1 = spark.createDataFrame(data=rows, schema = nameColumns1)
df1.show(truncate=False)

+-----+
|Item |
+-----+
|Sugar|
|Bread|
|Juice|
|Soda |
|Flour|
+-----+



In [49]:
rows1_ = [("Sugar", ), 
          ("Bread", ), 
          ("Butter", ), 
          ("Cheese", ),
          ("Fruit", )
         ]
nameColumns1_ = ["Item"]
df1_ = spark.createDataFrame(data=rows1_, schema = nameColumns1_)
df1_.show(truncate=False)

+------+
|Item  |
+------+
|Sugar |
|Bread |
|Butter|
|Cheese|
|Fruit |
+------+



In [50]:
df1.join(df1_, df1.Item ==  df1_.Item, "outer").show(truncate=False)

+-----+------+
|Item |Item  |
+-----+------+
|Bread|Bread |
|null |Butter|
|null |Cheese|
|Flour|null  |
|null |Fruit |
|Juice|null  |
|Soda |null  |
|Sugar|Sugar |
+-----+------+



### Puzzle #5

In [51]:
rows5 = [("1001", "Cellular", "555-897-5421"), 
         ("1001", "Work", "555-897-6542"), 
         ("1001", "Home", "555-698-9874"), 
         ("2002", "Cellular", "555-963-6544"),
         ("2002", "Work", "555-812-9856"),
         ("3003", "Cellular", "555-987-6541")]
nameColumns5 = ["Customer ID", "Type", "Phone Number"]
df5 = spark.createDataFrame(data=rows5, schema = nameColumns5)
df5.show(truncate=False)

+-----------+--------+------------+
|Customer ID|Type    |Phone Number|
+-----------+--------+------------+
|1001       |Cellular|555-897-5421|
|1001       |Work    |555-897-6542|
|1001       |Home    |555-698-9874|
|2002       |Cellular|555-963-6544|
|2002       |Work    |555-812-9856|
|3003       |Cellular|555-987-6541|
+-----------+--------+------------+



In [56]:
df5.groupBy("Customer ID").pivot("Type").count().show()

+-----------+--------+----+----+
|Customer ID|Cellular|Home|Work|
+-----------+--------+----+----+
|       3003|       1|null|null|
|       2002|       1|null|   1|
|       1001|       1|   1|   1|
+-----------+--------+----+----+

