-- Implement your solution here

with player_details as (
select first_player as player_id,first_score as player_score
from matches
union 
select second_player as player_id,second_score as player_score
from matches),

player_total_score as (
select player_id, sum(player_score) as player_score
from player_details 
group by player_id),

player_group as (
select p2.group_id, p2.player_id, p1.player_score, row_number() over (partition by p2.group_id order by p1.player_score desc, p2.player_id asc) as row_no
from player_total_score as p1 right join players as p2
on p1.player_id=p2.player_id
)

select group_id, player_id as winner_id
from player_group
where row_no=1



In [33]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit, round
from pyspark.sql.types import IntegerType

class CouncilsJob:

    def __init__(self):
        self.spark_session = (SparkSession.builder
                                          .master("local[*]")
                                          .appName("EnglandCouncilsJob")
                                          .getOrCreate())
        self.input_directory = "data"

    def extract_councils(self):
        district_councils_df = self.spark_session.read.csv(f'{self.input_directory}/england_councils/district_councils.csv', header=True)
        london_boroughs_df  = self.spark_session.read.csv(f'{self.input_directory}/england_councils/london_boroughs.csv', header=True)
        metropolitan_districts_df  = self.spark_session.read.csv(f'{self.input_directory}/england_councils/metropolitan_districts.csv', header=True)
        unitary_authorities_df  = self.spark_session.read.csv(f'{self.input_directory}/england_councils/unitary_authorities.csv', header=True)
        
        district_councils_df = district_councils_df.withColumn('council_type', lit('District Council'))
        london_boroughs_df = london_boroughs_df.withColumn('council_type', lit('London Borough'))
        metropolitan_districts_df = metropolitan_districts_df.withColumn('council_type', lit('Metropolitan District'))
        unitary_authorities_df = unitary_authorities_df.withColumn('council_type', lit('Unitary Authority'))

        df1 =  district_councils_df.union(london_boroughs_df)
        df2 = df1.union(metropolitan_districts_df)
        councils_df = df2.union(unitary_authorities_df)
        return councils_df

    def extract_avg_price(self):
        property_avg_price_df = self.spark_session.read.csv(f'{self.input_directory}/property_avg_price.csv', header=True)
        avg_price_df = property_avg_price_df.select('local_authority','avg_price_nov_2019')
        avg_price_df = avg_price_df.withColumn('avg_price_nov_2019', round(avg_price_df.avg_price_nov_2019,1))
        avg_price_df = avg_price_df.withColumnRenamed('local_authority','council')
        return avg_price_df

    def extract_sales_volume(self):
        property_sales_volume_df = self.spark_session.read.csv(f'{self.input_directory}/property_sales_volume.csv', header=True)
        sales_volume_df = property_sales_volume_df.select('local_authority','sales_volume_sep_2019')
        sales_volume_df = sales_volume_df.withColumn('sales_volume_sep_2019', sales_volume_df.sales_volume_sep_2019.cast(IntegerType()))
        sales_volume_df = sales_volume_df.withColumnRenamed('local_authority','council')
        return sales_volume_df

    def transform(self, councils_df, avg_price_df, sales_volume_df):
        avg_price_df = avg_price_df.withColumnRenamed('council', 'avg_council')
        sales_volume_df = sales_volume_df.withColumnRenamed('council', 'sales_council')
        merged_df1 = councils_df.join(avg_price_df, councils_df['council']==avg_price_df['avg_council'], how='left')
        merged_df2 = merged_df1.join(sales_volume_df, merged_df1['council']==sales_volume_df['sales_council'], how='left')
        output_df = merged_df2.select('council','county','council_type','avg_price_nov_2019','sales_volume_sep_2019')
        return output_df

    def run(self):
        return self.transform(self.extract_councils(), self.extract_avg_price(), self.extract_sales_volume())
        
obj = CouncilsJob()
#outputdf = obj.extract_councils()
#outputdf = obj.extract_avg_price()
#outputdf = obj.extract_sales_volume()
outputdf = obj.run()
#outputdf.count()
#outputdf.show()
outputdf.printSchema()

root
 |-- council: string (nullable = true)
 |-- county: string (nullable = true)
 |-- council_type: string (nullable = false)
 |-- avg_price_nov_2019: double (nullable = true)
 |-- sales_volume_sep_2019: integer (nullable = true)

