In [1]:
from pyspark.sql import *
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [2]:
# creating spark session object
spark = SparkSession.builder.master("local[*]").getOrCreate()

In [6]:
# Transpose row to column
# To transpose columns to rows: Use selectExpr with union in DataFrames.

data = [
    ("Hi", "Hello"),
    ("How", "There"),
    ("Are", "You")
]

columns = ["greeting1", "greeting2"]
df = spark.createDataFrame(data, columns)

df.selectExpr("greeting1 as greeting").union(df.selectExpr("greeting2 as greeting")).show(20,False)

# Spark sql
df.createOrReplaceTempView("temp")
spark.sql("""select greeting1 as greeting from temp union select greeting2 as greeting from temp""").show(5,False)


+--------+
|greeting|
+--------+
|Hi      |
|How     |
|Are     |
|Hello   |
|There   |
|You     |
+--------+

+--------+
|greeting|
+--------+
|Hi      |
|How     |
|Are     |
|Hello   |
|There   |
+--------+
only showing top 5 rows



In [21]:
# Transpose rows to column
# To transpose rows to columns: Use groupBy with pivot in DataFrames.
data = [
    ("A", "value1"),
    ("A", "value2"),
    ("B", "value3"),
    ("B", "value4")
]

columns = ["category", "value"]
df = spark.createDataFrame(data, columns)

df.groupBy("category").pivot("value").count().show(5,False)

# spark sql
# In SQL use case statement with group by
df.createOrReplaceTempView("temp")
spark.sql("""select category, count(case when value =='value1' then 1 else null end) as value1 from temp group by category""").show(5,False)


+--------+------+------+------+------+
|category|value1|value2|value3|value4|
+--------+------+------+------+------+
|B       |null  |null  |1     |1     |
|A       |1     |1     |null  |null  |
+--------+------+------+------+------+

+--------+------+
|category|value1|
+--------+------+
|A       |1     |
|B       |0     |
+--------+------+

