# Good reference

1. Usage with example (<a href='https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.GroupedData.pivot.html#pyspark.sql.GroupedData.pivot'>link</a>)

2. Docs (<a href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.GroupedData.pivot.html#pyspark.sql.GroupedData.pivot">link</a>)

In [1]:
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("DataFrame.GroupData.pivot") \
    .getOrCreate()

In [2]:
columns = ["year", "course", "earnings"]
data = [(2012, "dotNET", 99920), (2013, "Python", 1004000), (2012, "Java", 34000)       ]

In [3]:
df = spark.sparkContext.parallelize(data).toDF(columns)
df.show(truncate=False)

+----+------+--------+
|year|course|earnings|
+----+------+--------+
|2012|dotNET|99920   |
|2013|Python|1004000 |
|2012|Java  |34000   |
+----+------+--------+



In [4]:
pivotDF = df \
    .groupBy("year") \
    .pivot("course", ["dotNET", "Java"]) \
    .sum("earnings") \
    .na.fill(0)

pivotDF.show(truncate=False)

+----+------+-----+
|year|dotNET|Java |
+----+------+-----+
|2012|99920 |34000|
|2013|0     |0    |
+----+------+-----+



In [5]:
pivotDF.toJSON().collect()

['{"year":2012,"dotNET":99920,"Java":34000}',
 '{"year":2013,"dotNET":0,"Java":0}']