In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit

spark = SparkSession.builder.appName("UnionOperations").getOrCreate()

commentary_data = [
    (1, "Great shot by Kohli!", "4 runs"),
    (2, "Bowled him!", "Wicket"),
    (3, "Six over long-on!", "6 runs")
]
over_data = [
    (1, 12.4, 4, 1),
    (2, 13.2, 0, 1),
    (3, 14.1, 6, 0)
]
commentary_df = spark.createDataFrame(
    commentary_data,
    ["match_id", "commentary_text", "outcome"]
)

over_df = spark.createDataFrame(
    over_data,
    ["match_id", "over_rate", "runs", "wickets"]
)

commentary_for_union = commentary_df.withColumn("over_rate", lit(None).cast("double")) \
                                   .withColumn("runs", lit(None).cast("int")) \
                                   .withColumn("wickets", lit(None).cast("int")) \
                                   .withColumn("commentary_text", lit(None).cast("string")) \
                                   .withColumn("outcome", lit(None).cast("string"))
over_for_union = over_df.withColumn("commentary_text", lit(None).cast("string")) \
                        .withColumn("outcome", lit(None).cast("string"))
union_df = commentary_for_union.select("match_id", "over_rate", "runs", "wickets", "commentary_text", "outcome") \
                              .union(over_for_union.select("match_id", "over_rate", "runs", "wickets", "commentary_text", "outcome"))
union_by_name_df = commentary_df.unionByName(over_df, allowMissingColumns=True)

print("=== Standard Union (position-based) ===")
union_df.show()

print("\n=== Union by Name (Spark 3.1+) ===")
union_by_name_df.show()
spark.stop()

=== Standard Union (position-based) ===
+--------+---------+----+-------+---------------+-------+
|match_id|over_rate|runs|wickets|commentary_text|outcome|
+--------+---------+----+-------+---------------+-------+
|       1|     NULL|NULL|   NULL|           NULL|   NULL|
|       2|     NULL|NULL|   NULL|           NULL|   NULL|
|       3|     NULL|NULL|   NULL|           NULL|   NULL|
|       1|     12.4|   4|      1|           NULL|   NULL|
|       2|     13.2|   0|      1|           NULL|   NULL|
|       3|     14.1|   6|      0|           NULL|   NULL|
+--------+---------+----+-------+---------------+-------+


=== Union by Name (Spark 3.1+) ===
+--------+--------------------+-------+---------+----+-------+
|match_id|     commentary_text|outcome|over_rate|runs|wickets|
+--------+--------------------+-------+---------+----+-------+
|       1|Great shot by Kohli!| 4 runs|     NULL|NULL|   NULL|
|       2|         Bowled him!| Wicket|     NULL|NULL|   NULL|
|       3|   Six over long-o