In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType, StructField,StringType,IntegerType
from pyspark.sql.functions import col,sum

spark = SparkSession.builder.appName("app").master("local[2]").getOrCreate()

In [0]:
schema = StructType([
    StructField("id",IntegerType(),False),
    StructField("revenue",IntegerType(),False),
    StructField("month",StringType(),False)
])

data = [
(1    , 8000    , "Jan")   ,
(2    , 9000    , "Jan")   ,
(3    , 10000   , "Feb")   ,
(1    , 7000    , "Feb")   ,
(1    , 6000    , "Mar")
]
dept = spark.createDataFrame(data,schema)
dept.show()


+---+-------+-----+
| id|revenue|month|
+---+-------+-----+
|  1|   8000|  Jan|
|  2|   9000|  Jan|
|  3|  10000|  Feb|
|  1|   7000|  Feb|
|  1|   6000|  Mar|
+---+-------+-----+



In [0]:
# Reformat the table such that there is a department id column and a revenue column for each month from even if data is not present.
# Return the result table in any order.
import calendar
months = [calendar.month_abbr[i] for i in range(1,13)]
dept.groupBy("id").pivot("month",months).sum("revenue").orderBy(col("id")).show()


+---+----+-----+----+----+----+----+----+----+----+----+----+----+
| id| Jan|  Feb| Mar| Apr| May| Jun| Jul| Aug| Sep| Oct| Nov| Dec|
+---+----+-----+----+----+----+----+----+----+----+----+----+----+
|  1|8000| 7000|6000|null|null|null|null|null|null|null|null|null|
|  2|9000| null|null|null|null|null|null|null|null|null|null|null|
|  3|null|10000|null|null|null|null|null|null|null|null|null|null|
+---+----+-----+----+----+----+----+----+----+----+----+----+----+



In [0]:
dept.createOrReplaceTempView("dept")
spark.sql("select * from dept pivot( sum(revenue) for `month` in ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')) order by id").show()

+---+----+-----+----+----+----+----+----+----+----+----+----+----+
| id| Jan|  Feb| Mar| Apr| May| Jun| Jul| Aug| Sep| Oct| Nov| Dec|
+---+----+-----+----+----+----+----+----+----+----+----+----+----+
|  1|8000| 7000|6000|null|null|null|null|null|null|null|null|null|
|  2|9000| null|null|null|null|null|null|null|null|null|null|null|
|  3|null|10000|null|null|null|null|null|null|null|null|null|null|
+---+----+-----+----+----+----+----+----+----+----+----+----+----+



In [0]:
spark.stop()