# <b><b>Exploratory Data Analysis on Paris Olympic dataset.

In [1]:
import pandas as pd
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

<b><b> Dataset link:https://www.kaggle.com/datasets/piterfm/paris-2024-olympic-summer-games

In [2]:
athletes = pd.read_csv('athletes.csv')
coaches = pd.read_csv('coaches.csv')
events = pd.read_csv('events.csv')
medals = pd.read_csv('medallists.csv')
total_medals = pd.read_csv('medals_total.csv')

## Athletes

In [3]:
fig = px.bar(athletes['gender'].value_counts(),title='Number of Male and female Athletes in Paris Olympics.')
fig.update_layout(width=1000,
                  height=500,
                    xaxis_title_font=dict(size=18),
                    yaxis_title_font=dict(size=18),
                    title_font=dict(size=30))
fig.show()

In [4]:
fig = px.bar(athletes['country'].value_counts().sort_values(ascending=False).nlargest(10),
             title='Top 10 Athletes participant countries in Paris Olympics.')
fig.update_layout(width=1000,
                  height=500,
                    xaxis_title_font=dict(size=18),
                    yaxis_title_font=dict(size=18),
                    title_font=dict(size=30))
fig.show()

In [5]:
top_10_countries = athletes['country'].value_counts().nlargest(10).index

# Filter the athletes data for the top 10 countries
filtered_athletes = athletes[athletes['country'].isin(top_10_countries)]

# Group by 'country' and 'gender' to count occurrences
athlete_counts = filtered_athletes.groupby(['country', 'gender']).size().reset_index(name='count')

fig = px.bar(athlete_counts, 
             x='country', 
             y='count', 
             color='gender', 
             title='Male & Female Athletes of Top 10 Countries in Paris Olympics.',
             labels={'count': 'Number of Athletes'},
             barmode='stack')

fig.update_layout(
    width=1000,
    height=500,
    xaxis_title='Country',
    yaxis_title='Number of Athletes',
    title_font=dict(size=30))
fig.show()


## Coaches

In [6]:
fig = px.bar(coaches['gender'].value_counts(),title='Number of Male and Female Coaches in Paris Olympics.',
             color_discrete_sequence=px.colors.qualitative.Set2)
fig.update_layout(width=1000,
                  height=500,
                    xaxis_title_font=dict(size=18),
                    yaxis_title_font=dict(size=18),
                    title_font=dict(size=30))
fig.show()

In [7]:
fig = px.bar(coaches['country'].value_counts().sort_values(ascending=False).nlargest(10),
             title='Top 10 Coaches participant countries in Paris Olympics.',
             color_discrete_sequence=px.colors.qualitative.Set2)

fig.update_layout(width=1000,
                  height=500,
                    xaxis_title_font=dict(size=18),
                    yaxis_title_font=dict(size=18),
                    title_font=dict(size=30))
fig.show()

In [8]:
fig = px.bar(coaches['disciplines'].value_counts(),title='Disciplines types of Coaches in Paris Olympics.',
             color_discrete_sequence=px.colors.qualitative.Set2)
fig.update_layout(width=1000,
                  height=500,
                    xaxis_title_font=dict(size=18),
                    yaxis_title_font=dict(size=18),
                    title_font=dict(size=30),
                    xaxis=dict(tickangle=270,tickfont=dict(size=14,color='black')))
fig.show()

In [9]:
fig = px.bar(coaches['function'].value_counts(),title='Function types of Coaches in Paris Olympics.',
             color_discrete_sequence=px.colors.qualitative.Set2)
fig.update_layout(width=1000,
                  height=500,
                    xaxis_title_font=dict(size=18),
                    yaxis_title_font=dict(size=18),
                    title_font=dict(size=30))
fig.show()

## Events

In [10]:
fig = px.bar(events['event'].value_counts().sort_values(ascending=False).nlargest(10),title='Types of Top 10 Events in Paris Olympics.',
             color_discrete_sequence=px.colors.qualitative.Vivid)
fig.update_layout(width=1000,
                  height=500,
                    xaxis_title_font=dict(size=18),
                    yaxis_title_font=dict(size=18),
                    title_font=dict(size=30),
                    xaxis=dict(tickangle=270,tickfont=dict(size=14,color='black')))
fig.show()

## Medal

In [11]:
medal_counts = medals.groupby(['gender', 'medal_type']).size().reset_index(name='count')

fig = px.bar(medal_counts, 
             x='gender', 
             y='count', 
             color='medal_type', 
             title='Medal Types achieve by Gender in Paris Olympics.',
             barmode='stack')

fig.update_layout(
    width=1000,
    height=500,
    xaxis_title_font=dict(size=18),
    yaxis_title_font=dict(size=18),
    title_font=dict(size=30))
fig.show()


In [12]:
medal_counts = medals.groupby(['discipline', 'medal_type']).size().reset_index(name='count')

fig = px.bar(medal_counts, 
             x='discipline', 
             y='count', 
             color='medal_type', 
             title='Medal Types by Discipline in Paris Olympics.',
             barmode='stack')

fig.update_layout(
    width=1200,
    height=600,
    xaxis_title_font=dict(size=18),
    yaxis_title_font=dict(size=18),
    title_font=dict(size=30),
    xaxis=dict(tickangle=270,tickfont=dict(size=14,color='black')))
fig.show()


In [13]:
fig = px.bar(total_medals, 
             x=total_medals['country'].head(20), 
             y=total_medals['Total'].head(20),  
             title='Total Medals achieve by Top 20 Countries in Paris Olympics.')

fig.update_layout(
    width=1100,
    height=550,
    xaxis_title = 'Country',
    yaxis_title ='Number of Medals',
    xaxis_title_font=dict(size=18),
    yaxis_title_font=dict(size=18),
    title_font=dict(size=30),
    xaxis=dict(tickangle=270,tickfont=dict(size=14,color='black')))
fig.show()

In [14]:
df = total_medals.drop(columns=['country_code','country_long','Total'])
# Reshape the data to long format using pd.melt()
df_long = pd.melt(df.head(20), id_vars='country', 
                  value_vars=['Gold Medal', 'Silver Medal', 'Bronze Medal'], 
                  var_name='medal', 
                  value_name='count')

fig = px.bar(df_long, 
             x='country', 
             y='count', 
             color='medal', 
             title='Medals achieve by Top 20 Countries in Paris Olympics.',
             labels={'count': 'Number of Medals'},
             barmode='stack')

fig.update_layout(
    width=1200,
    height=600,
    xaxis_title='Country',
    yaxis_title='Number of Medals',
    title_font=dict(size=30),
    xaxis=dict(tickangle=270,tickfont=dict(size=14,color='black')))
fig.show()
