In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,StringType,IntegerType
from pyspark.sql.functions import col,count,round

spark = SparkSession.builder.appName("app").master("local[2]").getOrCreate()

In [0]:
schema = StructType([
    StructField("user_id",IntegerType(),False),
    StructField("user_name",StringType(),False)
])

data = [
    ( 6       , "Alice"  )   ,
    ( 2       , "Bob"    )   ,
    ( 7       , "Alex"   )  
]

users = spark.createDataFrame(data,schema)
users.show()

+-------+---------+
|user_id|user_name|
+-------+---------+
|      6|    Alice|
|      2|      Bob|
|      7|     Alex|
+-------+---------+



In [0]:
schema = StructType([
    StructField("contest_id",IntegerType(),False),
    StructField("user_id",IntegerType(),False)
])
data = [
( 215        , 6     )  ,
( 209        , 2     )  ,
( 208        , 2     )  ,
( 210        , 6     )  ,
( 208        , 6     )  ,
( 209        , 7     )  ,
( 209        , 6     )  ,
( 215        , 7     )  ,
( 208        , 7     )  ,
( 210        , 2     )  ,
( 207        , 2     )  ,
( 210        , 7     )  
]

register = spark.createDataFrame(data,schema)
register.show()

+----------+-------+
|contest_id|user_id|
+----------+-------+
|       215|      6|
|       209|      2|
|       208|      2|
|       210|      6|
|       208|      6|
|       209|      7|
|       209|      6|
|       215|      7|
|       208|      7|
|       210|      2|
|       207|      2|
|       210|      7|
+----------+-------+



In [0]:
# Write a solution to find the percentage of the users registered in each contest rounded to two decimals.
# Return the result table ordered by percentage in descending order. In case of a tie, order it by contest_id in ascending order.

total_users = users.count()
register.groupBy("contest_id").agg(round((100*count("user_id")/total_users),2).alias("percentage")).sort(col("percentage").desc(),"contest_id").show()

+----------+----------+
|contest_id|percentage|
+----------+----------+
|       208|     100.0|
|       209|     100.0|
|       210|     100.0|
|       215|     66.67|
|       207|     33.33|
+----------+----------+



In [0]:
users.createOrReplaceTempView("u")
register.createOrReplaceTempView("r")
spark.sql("select contest_id, round(100*count(*)/(select count(*) from u),2) as percentage from r group by 1 order by 2 desc,1").show()

+----------+----------+
|contest_id|percentage|
+----------+----------+
|       208|     100.0|
|       209|     100.0|
|       210|     100.0|
|       215|     66.67|
|       207|     33.33|
+----------+----------+



In [0]:
spark.stop()