- Dataframe creation of the table players_male_Base in the silver DB, using Spark

In [0]:
gm_avg_rating_df = spark.table('silver.players_male_base')

- Filtering and counting active GM players per country

In [0]:
from pyspark.sql import functions as F

# Filter out rows where the rating is 0 or null, the player is not a GM, or the flag is null
filtered_df = gm_avg_rating_df.filter(
    (gm_avg_rating_df.rating != 0) & 
    (gm_avg_rating_df.rating.isNotNull()) & 
    (gm_avg_rating_df.title == 'GM') & 
    (gm_avg_rating_df.flag.isNull())
)

# Group by country and calculate the average rating
average_rating_by_country = filtered_df.groupBy('country').agg(
    F.avg('rating').alias('average_rating'),
    F.count('*').alias('players')  # Add line_count column to count the number of lines per country
)

# Order by players count in descending order and average_rating in descending order
average_rating_by_country = average_rating_by_country.orderBy(F.desc('players'), F.desc('average_rating')).withColumn('average_rating', F.round('average_rating', 1))

# Display the result
display(average_rating_by_country)

country,average_rating,players
RUS,2475.0,79
GER,2478.1,74
IND,2527.1,67
USA,2519.3,66
FRA,2494.0,47
UKR,2520.6,46
ESP,2472.9,46
SRB,2411.5,46
ISR,2475.7,42
POL,2509.0,41


In [0]:
average_rating_by_country.write.mode('overwrite').saveAsTable('gold.M_GM_average_rating_by_country')