In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,IntegerType
from pyspark.sql.functions import count,max,col

spark = SparkSession.builder.master("local[*]").appName("app").getOrCreate()

In [0]:
schema = StructType([
    StructField("num",IntegerType(),False)
])

# Note: DataFrame expects data to be list of list or list of tuple so we are using (value,) so that it can become a tuple else we will get error -> StructType can not accept object 8 in type <class 'int'>
data = [
    (8,), 
    (8,),  
    (3,),  
    (3,), 
    (1,), 
    (1,), 
    (6,), 
    (6,)
]

nums = spark.createDataFrame(data,schema)
nums.show()

+---+
|num|
+---+
|  8|
|  8|
|  3|
|  3|
|  1|
|  1|
|  6|
|  6|
+---+



In [0]:
# A single number is a number that appeared only once in the MyNumbers table. Find the largest single number. If there is no single number, report null.

nums.groupBy("num").agg(count("num").alias("count")).filter(col("count")==1).select("num").agg(max("num").alias("num")).show() # case when there is no single largest number

+----+
| num|
+----+
|null|
+----+



In [0]:
data = [
    (8,), 
    (8,),  
    (3,),  
    (3,), 
    (1,), 
    (2,), 
    (5,), 
    (6,)
]

nums1 = spark.createDataFrame(data,schema)
nums1.show()

+---+
|num|
+---+
|  8|
|  8|
|  3|
|  3|
|  1|
|  2|
|  5|
|  6|
+---+



In [0]:
nums1.groupBy("num").agg(count("num").alias("count")).where(col("count")==1).select("num").agg(max("num").alias("num")).show() # case when single largest number is present

+---+
|num|
+---+
|  6|
+---+



In [0]:
nums1.createOrReplaceTempView("nums")

spark.sql("select max(num) as num from (select num from nums group by num having count(*)=1) nums1").show()

+---+
|num|
+---+
|  6|
+---+



In [0]:
spark.stop()