In [0]:
%sql

CREATE DATABASE IF NOT EXISTS gold;

In [0]:
df_athletes = spark.table("silver.athletes")
df_coaches = spark.table("silver.coaches")
df_entriesGender = spark.table("silver.entries_gender")
df_medals = spark.table("silver.medals")
df_teams = spark.table("silver.teams")

In [0]:
df_athletes.show()
df_coaches.show()
df_entriesGender.show()
df_medals.show()
df_teams.show()

+--------------------+--------------------+-----------------+
|         person_name|             country|       discipline|
+--------------------+--------------------+-----------------+
|     ALLIKVEE Martin|             Estonia|         Swimming|
|       BARAKAT Hanna|           Palestine|        Athletics|
|     BEN ISMAIL Afef|             Tunisia|     Canoe Sprint|
|BISHINDEE Urantun...|            Mongolia|          Archery|
|       BRANSER Marie|Democratic Republ...|             Judo|
|         COLMAN Samy|             Morocco|       Equestrian|
|        CORDON Kevin|           Guatemala|        Badminton|
| DORNBACH Maximilian|             Germany|    Cycling Track|
|        EFOLOKO Jona|       Great Britain|        Athletics|
|    ELZOHEIRY Mariam|               Egypt|          Fencing|
|    GASPAROTTO Marta|               Italy|Baseball/Softball|
|           GIL Bryan|               Spain|         Football|
|TUNJUNG Gregoria ...|           Indonesia|        Badminton|
|HIGUITA

### 1️⃣ Top 10 Countries by Total Medals 

**📌 Purpose:**  
Visualize the most successful countries based on the total number of medals won.

In [0]:
top_countries = df_medals.orderBy(F.desc("total")).limit(10)
display(top_countries)

rank,team_country,gold,silver,bronze,total,rank_by_total
1,United States of America,39,41,33,113,1
2,People's Republic of China,38,32,18,88,2
5,ROC,20,28,23,71,3
4,Great Britain,22,21,22,65,4
3,Japan,27,14,17,58,5
6,Australia,17,7,22,46,6
10,Italy,10,10,20,40,7
9,Germany,10,11,16,37,8
7,Netherlands,10,12,14,36,9
8,France,10,12,11,33,10


Databricks visualization. Run in Databricks to view.

### 2️⃣ Gender Distribution by Discipline 

**📌 Purpose:**  
Visualize the distribution of male and female participants across different disciplines.

In [0]:
display(df_entriesGender)

discipline,female,male,total
Surfing,20,20,40
Artistic Gymnastics,98,98,196
Wrestling,96,193,289
Artistic Swimming,105,0,105
Water Polo,122,146,268
Sport Climbing,20,20,40
Skateboarding,40,40,80
Marathon Swimming,25,25,50
Golf,60,60,120
Judo,192,201,393


Databricks visualization. Run in Databricks to view.

### 3️⃣ Number of Athletes per Country

**📌 Purpose:**  
Visualize the number of athletes representing each country.

In [0]:
athlete_counts = (
    df_athletes
    .groupBy("country")
    .agg(F.countDistinct("person_name").alias("num_athletes"))
    .orderBy(F.desc("num_athletes"))
    .limit(15)
)
display(athlete_counts)

country,num_athletes
United States of America,614
Japan,585
Australia,470
Germany,399
People's Republic of China,398
France,377
Canada,368
Great Britain,366
Italy,354
Spain,324


Databricks visualization. Run in Databricks to view.

### 4️⃣ Participation per Discipline

**📌 Purpose:**  
Visualize the total number of participants in each discipline

In [0]:
discipline_counts = (
    df_athletes
    .groupBy("discipline")
    .agg(F.count("*").alias("num_participants"))
    .orderBy(F.desc("num_participants"))
    .limit(15)
)

display(discipline_counts)


discipline,num_participants
Athletics,2068
Swimming,743
Football,567
Rowing,496
Hockey,406
Judo,373
Handball,342
Shooting,342
Sailing,336
Rugby Sevens,283


Databricks visualization. Run in Databricks to view.

### 5️⃣ Medal Share Distribution for India – Pie Chart  🥇🥈🥉

**📌 Purpose:**  
Show the percentage share of each type of medal (Gold, Silver, Bronze) that India has won.

In [0]:
# Prepare India's medal data in long format
india_medals_long = india_medals.selectExpr(
    "'Gold' as medal_type", "gold as count"
).union(
    india_medals.selectExpr("'Silver' as medal_type", "silver as count")
).union(
    india_medals.selectExpr("'Bronze' as medal_type", "bronze as count")
)

# Display pie chart
display(india_medals_long)



medal_type,count
Gold,1
Silver,2
Bronze,4


Databricks visualization. Run in Databricks to view.