In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, State

In [3]:
olympics_df = pd.read_csv('data/olympics_cleaned.csv')

# Data Cleaning: Remove rows with missing height/weight
df_cleaned = olympics_df.dropna(subset=['height', 'weight']).copy()

df_cleaned

Unnamed: 0,id,name,sex,age,height,weight,team,noc,games,year,season,city,sport,event,medal,country
0,62,Giovanni Abagnale,M,21.0,198.0,90.0,Italy,ITA,2016 Summer,2016,Summer,Rio de Janeiro,Rowing,Rowing Men's Coxless Pairs,Bronze,Brazil
1,65,Patimat Abakarova,F,21.0,165.0,49.0,Azerbaijan,AZE,2016 Summer,2016,Summer,Rio de Janeiro,Taekwondo,Taekwondo Women's Flyweight,Bronze,Brazil
2,73,Luc Abalo,M,31.0,182.0,86.0,France,FRA,2016 Summer,2016,Summer,Rio de Janeiro,Handball,Handball Men's Handball,Silver,Brazil
3,250,Saeid Morad Abdevali,M,26.0,170.0,80.0,Iran,IRI,2016 Summer,2016,Summer,Rio de Janeiro,Wrestling,"Wrestling Men's Middleweight, Greco-Roman",Bronze,Brazil
4,455,Denis Mikhaylovich Ablyazin,M,24.0,161.0,62.0,Russia,RUS,2016 Summer,2016,Summer,Rio de Janeiro,Gymnastics,Gymnastics Men's Team All-Around,Silver,Brazil
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39713,66362,"Francis Adonijah ""Frank"" Lane",M,21.0,170.0,69.0,United States,USA,1896 Summer,1896,Summer,Athina,Athletics,Athletics Men's 100 metres,Bronze,Greece
39757,107613,Carl Schuhmann,M,26.0,159.0,70.0,Germany,GER,1896 Summer,1896,Summer,Athina,Gymnastics,Gymnastics Men's Horse Vault,Gold,Greece
39758,107613,Carl Schuhmann,M,26.0,159.0,70.0,Germany,GER,1896 Summer,1896,Summer,Athina,Gymnastics,"Gymnastics Men's Parallel Bars, Teams",Gold,Greece
39759,107613,Carl Schuhmann,M,26.0,159.0,70.0,Germany,GER,1896 Summer,1896,Summer,Athina,Wrestling,"Wrestling Men's Unlimited Class, Greco-Roman",Gold,Greece


In [13]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from dash import dash_table
import plotly.express as px
import pandas as pd

#data selection
olympics_df_q1 = pd.read_csv("data/olympics_cleaned.csv")
olympics_df_q1["medal"] = olympics_df_q1["medal"].astype(str)
olympics_df_q1 = olympics_df_q1.dropna(subset=["medal", "team", "year"])

# initialize app + create layout
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("Olympics Data Dashboard"),

    dcc.Dropdown(
        id='country-dropdown',
        options=[{'label': country, 'value': country} for country in olympics_df_q1["team"].unique()],
        multi=True,
        placeholder="Select countries..."
    ),

    dcc.Dropdown(
        id='sort-order-dropdown',
        options=[
            {'label': 'Ascending', 'value': 'asc'},
            {'label': 'Descending', 'value': 'desc'}
        ],
        value='desc',
        clearable=False
    ),

    # Graph1: amount of medals per selected country
    dcc.Graph(id='medal-bar-chart'),

    # Graph2: medals for selected country per game
    html.H2("Medals Over Time"),
    dcc.Graph(id='medal-line-chart'),

    # Graph3: top 10 Athletes in selected country in selected game
    html.H3("Top 10 Athletes and Their Medals"),
    dcc.Graph(id='athlete-bar-chart'),
    
    #Table1: for selected country and Olympic, show each athlete that achieved medals and what medal they achieved in a table
    html.H3("All Athletes and Medals for Selected Country and Olympics"),
    dash_table.DataTable(
        id='athlete-table',
        columns=[
            {'name': 'Athlete', 'id': 'name'},
            {'name': 'Gold', 'id': 'Gold'},
            {'name': 'Silver', 'id': 'Silver'},
            {'name': 'Bronze', 'id': 'Bronze'},
            {'name': 'Total Medals', 'id': 'total'}
        ],
        style_table={'overflowX': 'auto'},
        style_cell={'textAlign': 'left', 'padding': '5px'},
        style_header={'backgroundColor': '#f9f9f9', 'fontWeight': 'bold'},
        style_data_conditional=[
            {
                'if': {'column_id': 'Gold'},
                'backgroundColor': '#FFF8DC'
            },
            {
                'if': {'column_id': 'Silver'},
                'backgroundColor': '#F5F5F5'
            },
            {
                'if': {'column_id': 'Bronze'},
                'backgroundColor': '#FAEBD7'
            },
        ]
    )
])

# Callback 1:  bar chart for selected countries
@app.callback(
    Output('medal-bar-chart', 'figure'),
    [Input('country-dropdown', 'value'),
     Input('sort-order-dropdown', 'value')]
)
def update_country_graph(selected_countries, sort_order):
    if not selected_countries:
        return px.bar(title="Select at least one country to display medal counts")

    filtered_df = olympics_df_q1[olympics_df_q1["team"].isin(selected_countries)]

    medal_counts = (filtered_df.groupby(["team", "medal"]).size()
                    .reset_index(name="count"))
    #count total medals
    total_medals = medal_counts.groupby("team")["count"].sum().reset_index()
    total_medals = total_medals.sort_values(by="count", ascending=(sort_order == "asc"))

    #use total medal count to count gold, silver, bronze
    medal_counts = medal_counts.merge(total_medals[["team", "count"]], on="team", suffixes=("", "_total"))
    medal_counts = medal_counts.sort_values(by="count_total", ascending=(sort_order == "asc"))

    #make sure gold at top, silver in middle, bronze at bottom
    medal_order = ["Bronze", "Silver", "Gold"]
    medal_counts["medal"] = pd.Categorical(medal_counts["medal"], categories=medal_order, ordered=True)
    medal_counts = medal_counts.sort_values(by=["count_total", "medal"], ascending=[(sort_order == "asc"), True])

    #plot 
    fig = px.bar(
        medal_counts, x="team", y="count", color="medal", barmode="stack",
        title="Medal Counts by Country",
        labels={"count": "Number of Medals", "team": "Country", "medal": "Medal Type"},
        color_discrete_map={"Gold": "#FFD700", "Silver": "#C0C0C0", "Bronze": "#CD7F32"}
    )

    for team in total_medals["team"]:
        total = total_medals.loc[total_medals["team"] == team, "count"].values[0]
        fig.add_annotation(
            x=team, y=total, text=str(total), showarrow=False, font=dict(size=11, color="black")
        )

    #clickmode and graph size 
    fig.update_layout(height=700, clickmode="event+select")
    return fig

# Callback 2: Update line chart when clicking on a country's bar
@app.callback(
    Output('medal-line-chart', 'figure'),
    [Input('medal-bar-chart', 'clickData')]
)
def update_medal_trend(clickData):
    if clickData is None:
        return px.line(title="Click on a country's bar to see medal trends")

    #based on click data from graph 1 
    selected_country = clickData["points"][0]["x"]
    #find country medals for selected country from clickdata graph1
    country_medals = olympics_df_q1[olympics_df_q1["team"] == selected_country]
    medals_per_games = (country_medals.groupby(["games", "city", "country"]).size()
                        .reset_index(name="count").sort_values("games"))

    #make linegraph 
    fig = px.line(
        medals_per_games, x="games", y="count", markers=True,
        title=f"Total Medals Won by {selected_country} Over Different Olympics",
        labels={"count": "Number of Medals", "games": "Olympic Games Edition", "country": "Host Country"},
        hover_data={"city": True, "country": True} 
    )

    #format graph
    fig.update_traces(line=dict(color="#800080", width=3))
    fig.update_layout(
        height=600, width=1200, clickmode="event+select",
        xaxis=dict(tickangle=-30, tickmode="array", tickfont=dict(size=10), title_standoff=10)
    )
    return fig

# Callback 3: update top 10 athletes bar chart and table
@app.callback(
    [Output('athlete-bar-chart', 'figure'),
     Output('athlete-table', 'data')],
    [Input('medal-bar-chart', 'clickData'),
     Input('medal-line-chart', 'clickData')]
)

#use click data from previous country selection and specific olympic game selection
def update_athlete_graph(clickData_country, clickData_game):
    if not clickData_country or not clickData_game:
        return px.bar(title="Click on a country's bar to see top athletes for a selected Olympic Game"), []

    selected_country = clickData_country["points"][0]["x"]
    selected_game = clickData_game["points"][0]["x"]

    filtered_athletes = olympics_df_q1[
        (olympics_df_q1["team"] == selected_country) &
        (olympics_df_q1["games"] == selected_game)
    ]
    #find athletes that achieved medals and count them
    filtered_athletes = filtered_athletes[filtered_athletes["medal"].isin(["Gold", "Silver", "Bronze"])]
    all_athlete_counts = filtered_athletes.groupby(["name", "medal"]).size().reset_index(name="count")

    #create table with those results
    pivot_table = all_athlete_counts.pivot(index="name", columns="medal", values="count").fillna(0).astype(int)
    pivot_table = pivot_table.reindex(columns=["Gold", "Silver", "Bronze"], fill_value=0)
    pivot_table["total"] = pivot_table.sum(axis=1)
    pivot_table = pivot_table.sort_values(by=["total", "Gold", "Silver", "Bronze"], ascending=[False, False, False, False]).reset_index()

    table_data = pivot_table.to_dict("records")

    top_10_names = pivot_table.head(10)["name"]
    top_athletes = all_athlete_counts[all_athlete_counts["name"].isin(top_10_names)]

    top_athletes_total = top_athletes.groupby("name")["count"].sum().reset_index(name="total_count")
    top_athletes = top_athletes.merge(top_athletes_total, on="name")

    top_athletes["medal"] = pd.Categorical(top_athletes["medal"], categories=["Gold", "Silver", "Bronze"], ordered=True)
    top_athletes = top_athletes.sort_values(by=["total_count", "medal"], ascending=[False, True])

    fig = px.bar(
        top_athletes,
        x="name",
        y="count",
        color="medal",
        barmode="stack",
        title=f"Top 10 Athletes from {selected_country} in {selected_game} (Total Medals: {len(filtered_athletes)})",
        labels={"count": "Number of Medals", "name": "Athlete", "medal": "Medal Type"},
        color_discrete_map={"Gold": "#FFD700", "Silver": "#C0C0C0", "Bronze": "#CD7F32"}
    )

    fig.update_layout(
        xaxis_title="Athlete",
        yaxis_title="Number of Medals",
        xaxis_tickangle=30,
        barmode="stack",
        coloraxis_showscale=False
    )

    return fig, table_data

# Run the app
if __name__ == '__main__':
    app.run(debug=True, port=7001)
