In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.functions import col, max

# Initialize Spark session
spark = SparkSession.builder \
    .appName("Create Table tbl_maxval") \
    .getOrCreate()

# Define the schema for the tbl_maxval table
schema = StructType([
    StructField("col1", StringType(), True),
    StructField("col2", IntegerType(), True),
    StructField("col3", IntegerType(), True)
])

# Insert records into tbl_maxval
data = [
    ('a', 10, 20),
    ('b', 50, 30)
]

# Create a DataFrame from the data and schema
df = spark.createDataFrame(data, schema)

# Show the DataFrame contents
df.display()



col1,col2,col3
a,10,20
b,50,30


In [0]:

# Optionally, register the DataFrame as a temporary view to run SQL queries
df.createOrReplaceTempView("tbl_maxval")

# Example SQL query (optional)
spark.sql("SELECT * FROM tbl_maxval").display()

col1,col2,col3
a,10,20
b,50,30


In [0]:
%sql
select col1, col2
from tbl_maxval
union all
select col1, col3
from tbl_maxval

col1,col2
a,10
b,50
a,20
b,30


In [0]:
%sql
with cte as (
select col1, col2
from tbl_maxval
union all
select col1, col3
from tbl_maxval
)
select col1, max(col2) as maxval
from cte
group by col1;

col1,maxval
a,20
b,50


In [0]:
cte = df.select("col1", "col2").unionAll(df.select("col1", col("col3").alias("col2")))

# Perform GROUP BY and calculate the maximum value
result = cte.groupBy("col1").agg(max("col2").alias("maxval"))

# Show the results
result.display()

col1,maxval
a,20
b,50
