<h2>Write a Pyspark code to find the winner in each group.
The winner in each group is the player who scored the maximum total points within the group. In the case of tie, the lowest player_id wins. </h2>

In [5]:
from pyspark.sql import SparkSession
spark=SparkSession.builder.appName('Group-Winner-By-Points').getOrCreate()

In [44]:
schema="""
player_id int,
group_id int
"""
data=[(15,1),
(25,1),
(30,1),
(45,1),
(10,2),
(35,2),
(50,2),
(20,3),
(40,3)]

player_df=spark.createDataFrame(data,schema)
player_df.show()

schema="""
match_id int,
first_player int,
second_player int,
first_score int,
second_score int
"""

data=[(1,15,45,3,0),
(2,30,25,1,2),
(3,30,15,2,0),
(4,40,20,5,2),
(5,35,50,1,1)]

match_df=spark.createDataFrame(data,schema)
match_df.show()

+---------+--------+
|player_id|group_id|
+---------+--------+
|       15|       1|
|       25|       1|
|       30|       1|
|       45|       1|
|       10|       2|
|       35|       2|
|       50|       2|
|       20|       3|
|       40|       3|
+---------+--------+

+--------+------------+-------------+-----------+------------+
|match_id|first_player|second_player|first_score|second_score|
+--------+------------+-------------+-----------+------------+
|       1|          15|           45|          3|           0|
|       2|          30|           25|          1|           2|
|       3|          30|           15|          2|           0|
|       4|          40|           20|          5|           2|
|       5|          35|           50|          1|           1|
+--------+------------+-------------+-----------+------------+



In [76]:
from pyspark.sql.functions import broadcast,col,expr
player_wise_score_df1=player_df.join(broadcast(match_df),player_df.player_id==match_df.first_player).select(player_df.player_id,player_df.group_id,match_df.first_score.alias('score'))
player_wise_score_df2=player_df.join(broadcast(match_df),player_df.player_id==match_df.second_player).select(player_df.player_id,player_df.group_id,match_df.second_score.alias('score'))
player_wise_score=player_wise_score_df1.unionAll(player_wise_score_df2)
player_wise_score=player_wise_score.groupBy("group_id","player_id").sum('score').withColumnRenamed('sum(score)','score')

In [77]:
schema="""
player_id int,
group_id int
"""
data=[(15,1),
(25,1),
(30,1),
(45,1),
(10,2),
(35,2),
(50,2),
(20,3),
(40,3)]

player_df=spark.createDataFrame(data,schema)
player_df.show()

schema="""
match_id int,
first_player int,
second_player int,
first_score int,
second_score int
"""

data=[(1,15,45,3,0),
(2,30,25,1,2),
(3,30,15,2,0),
(4,40,20,5,2),
(5,35,50,1,1)]

match_df=spark.createDataFrame(data,schema)
match_df.show()

+---------+--------+
|player_id|group_id|
+---------+--------+
|       15|       1|
|       25|       1|
|       30|       1|
|       45|       1|
|       10|       2|
|       35|       2|
|       50|       2|
|       20|       3|
|       40|       3|
+---------+--------+

+--------+------------+-------------+-----------+------------+
|match_id|first_player|second_player|first_score|second_score|
+--------+------------+-------------+-----------+------------+
|       1|          15|           45|          3|           0|
|       2|          30|           25|          1|           2|
|       3|          30|           15|          2|           0|
|       4|          40|           20|          5|           2|
|       5|          35|           50|          1|           1|
+--------+------------+-------------+-----------+------------+



In [83]:
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number

window_spec=Window.partitionBy(col('group_id')).orderBy(col('score').desc(),col('player_id').asc())

final_df=player_wise_score.withColumn('rn',row_number().over(window_spec)).filter(col('rn')==1).drop(col('rn')).show()



+--------+---------+-----+
|group_id|player_id|score|
+--------+---------+-----+
|       1|       15|    3|
|       2|       35|    1|
|       3|       40|    5|
+--------+---------+-----+



                                                                                                    