In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructField,StructType,StringType,IntegerType
from pyspark.sql.functions import col, count

spark = SparkSession.builder.appName("app").master("local[2]").getOrCreate()

In [0]:
schema = StructType([
    StructField("student_id",IntegerType(),False),
    StructField("student_name",StringType(),False)
])
data = [
    ( 1          , "Alice"       ) ,
    ( 2          , "Bob"         ) ,
    ( 13         , "John"        ) ,
    ( 6          , "Alex"        ) 
]
stu = spark.createDataFrame(data,schema)
stu.show()


+----------+------------+
|student_id|student_name|
+----------+------------+
|         1|       Alice|
|         2|         Bob|
|        13|        John|
|         6|        Alex|
+----------+------------+



In [0]:
schema = StructType([
    StructField("subject_name",StringType(),False)
])
data = [
    ("Math",),
    ("Physics",),
    ("Programming",)
]
sub = spark.createDataFrame(data,schema)
sub.show()

+------------+
|subject_name|
+------------+
|        Math|
|     Physics|
| Programming|
+------------+



In [0]:
schema = StructType([
    StructField("student_id",IntegerType(),False),
    StructField("subject_name",StringType(),False)
])
data = [
    ( 1          , "Math"         ),
    ( 1          , "Physics"      ),
    ( 1          , "Programming"  ),
    ( 2          , "Programming"  ),
    ( 1          , "Physics"      ),
    ( 1          , "Math"         ),
    ( 13         , "Math"         ),
    ( 13         , "Programming"  ),
    ( 13         , "Physics"      ),
    ( 2          , "Math"         ),
    ( 1          , "Math"         )]

exam = spark.createDataFrame(data,schema)
exam.show()

+----------+------------+
|student_id|subject_name|
+----------+------------+
|         1|        Math|
|         1|     Physics|
|         1| Programming|
|         2| Programming|
|         1|     Physics|
|         1|        Math|
|        13|        Math|
|        13| Programming|
|        13|     Physics|
|         2|        Math|
|         1|        Math|
+----------+------------+



In [0]:
# Write a solution to find the number of times each student attended each exam.
# Return the result table ordered by student_id and subject_name.
stu_sub = stu.crossJoin(sub)
stu_sub.join(exam,((stu_sub.student_id==exam.student_id) & (stu_sub.subject_name==exam.subject_name)),'left').groupBy(stu_sub.student_id,stu_sub.student_name,stu_sub.subject_name)\
    .agg(count(exam.student_id).alias("attended_exams")).orderBy(stu_sub.student_id,stu_sub.subject_name).show()


+----------+------------+------------+--------------+
|student_id|student_name|subject_name|attended_exams|
+----------+------------+------------+--------------+
|         1|       Alice|        Math|             3|
|         1|       Alice|     Physics|             2|
|         1|       Alice| Programming|             1|
|         2|         Bob|        Math|             1|
|         2|         Bob|     Physics|             0|
|         2|         Bob| Programming|             1|
|         6|        Alex|        Math|             0|
|         6|        Alex|     Physics|             0|
|         6|        Alex| Programming|             0|
|        13|        John|        Math|             1|
|        13|        John|     Physics|             1|
|        13|        John| Programming|             1|
+----------+------------+------------+--------------+



In [0]:
stu.createOrReplaceTempView("stu")
sub.createOrReplaceTempView("sub")
exam.createOrReplaceTempView("exam")

spark.sql("select stu.student_id,sub.subject_name,coalesce(count(exam.student_id),0) as attended_exams from stu,sub left join exam on stu.student_id=exam.student_id and sub.subject_name=exam.subject_name group by 1,2 order by 1,2").show()

+----------+------------+--------------+
|student_id|subject_name|attended_exams|
+----------+------------+--------------+
|         1|        Math|             3|
|         1|     Physics|             2|
|         1| Programming|             1|
|         2|        Math|             1|
|         2|     Physics|             0|
|         2| Programming|             1|
|         6|        Math|             0|
|         6|     Physics|             0|
|         6| Programming|             0|
|        13|        Math|             1|
|        13|     Physics|             1|
|        13| Programming|             1|
+----------+------------+--------------+



In [0]:
spark.stop()