This notebook is a basic exploratory data analysis (EDA) of the 2020 Summer Olympics that took place in Japan, 23 Jul-8 Aug, 2021.

**Used Dataset: [Tokyo 2020 Olympics Medals](https://www.kaggle.com/berkayalan/2021-olympics-medals-in-tokyo)**

**If you like this project then please don't forget to upvote.**

# Import libraries

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import os
from tqdm.autonotebook import tqdm
import warnings
warnings.filterwarnings("ignore")
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
print("Libraries imported")        

# Loading Dataset 

In [None]:
#read .csv file into dataFrame
df=pd.read_csv('../input/2021-olympics-medals-in-tokyo/Tokyo Medals 2021.csv')

In [None]:
#show first five rows of the dataset
df.head()

The dataset has total 93 rows and 6 columns.

In [None]:
df.shape

Now let's check the datatypes!

In [None]:
df.dtypes

Let's check, if there is any null values in the dataframe.

In [None]:
df.isnull().sum()

# Basic Analysis

**Medal Distribution By Country**

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(50,45), sharey=True)
sns.barplot(y = 'Country',x='Total', data = df,
               palette="rocket")
plt.xlabel('Total Medal',size=50)
plt.ylabel('Country',size=50)
plt.title('Total medals vs all Countries',size=50)
plt.show()


**Total Medals vs Top 10 Countries**

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(14,8), sharey=True)
sns.barplot(y = 'Country',x='Total', data = df[:10],
               palette="rocket")
plt.xlabel('Total Medal',size=20)
plt.ylabel('Country',size=20)
plt.title('Total medals vs top 10 Countries',size=20)
plt.show()


# Animated Map

**Countries With Tokyo Olympic Medals**

In [None]:
df_total = df.sort_values(by = ['Total'],ascending = False).reset_index()

fig = px.choropleth(df_total, locations="Country", locationmode="country names",
                    color = 'Total',
                    color_continuous_scale=px.colors.diverging.Portland,
                    hover_name = 'Country',
                    animation_frame = 'Country',
                    hover_data  = ['Rank By Total'],
                   title="Countries With Tokyo Olympic Medals" )
fig.show()



In [None]:
top_countries=df.sort_values(by="Rank By Total",ascending=True)[:11]
top_countries

In [None]:
plt.figure(figsize=(9, 5))
splot=sns.barplot(y = 'Total',x='Country', data = df[:10],palette="rocket")
plt.xlabel('Total Medal',size=20)
plt.ylabel('Country',size=20)
plt.title('Total medals vs top 10 Countries',size=20)
for p in splot.patches:
    splot.annotate(format(round(p.get_height()/1), '.0f')+"", 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   size=15,
                   xytext = (0, -12),color='cyan', 
                   textcoords = 'offset points')
plt.xticks(rotation=90)
plt.show()


Top 3 countries with total metals are **USA**, **China** and **Japan**.

**Gold Medal vs. Countries**

In [None]:
plt.figure(figsize=(9, 5))
splot=sns.barplot(y = 'Gold Medal',x='Country', data = df[:10],palette="rocket")
plt.xlabel('Gold Medal',size=20)
plt.ylabel('Country',size=20)
plt.title('Total medals vs top 10 Countries',size=20)
for p in splot.patches:
    splot.annotate(format(round(p.get_height()/1), '.0f')+"", 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   size=15,
                   xytext = (0, -12),color='cyan', 
                   textcoords = 'offset points')
plt.xticks(rotation=90)
plt.show()


Now let's just check the pairwise relationships in the dataset.

In [None]:
sns.pairplot(df, hue="Gold Medal")

**Silver Medal vs. Countries**

In [None]:
plt.figure(figsize=(9, 5))
splot=sns.barplot(y = 'Silver Medal',x='Country', data = df[:10],palette="rocket")
plt.xlabel('Silver Medal',size=20)
plt.ylabel('Country',size=20)
plt.title('Total medals vs top 10 Countries',size=20)
for p in splot.patches:
    splot.annotate(format(round(p.get_height()/1), '.0f')+"", 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   size=15,
                   xytext = (0, -12),color='cyan', 
                   textcoords = 'offset points')
plt.xticks(rotation=90)
plt.show()


In [None]:
sns.pairplot(df, hue="Silver Medal")

**Bronze Medal vs. Countries**

In [None]:
plt.figure(figsize=(9, 5))
splot=sns.barplot(y = 'Bronze Medal',x='Country', data = df[:10],palette="rocket")
plt.xlabel('Bronze Medal',size=20)
plt.ylabel('Country',size=20)
plt.title('Total medals vs top 10 Countries',size=20)
for p in splot.patches:
    splot.annotate(format(round(p.get_height()/1), '.0f')+"", 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   size=15,
                   xytext = (0, -12),color='cyan', 
                   textcoords = 'offset points')
plt.xticks(rotation=90)
plt.show()


In [None]:
sns.pairplot(df, hue="Bronze Medal")

**Top 10 Countries with the most Olympic Medals**

In [None]:
df1 = df.sort_values(by = ['Total'],ascending = False).reset_index().head(10)

gold = go.Bar(x = df1['Country'],
             y = df1['Gold Medal'],
             name = 'Gold',
             marker = dict(color = 'blue'))

silver = go.Bar(x = df1['Country'],
             y = df1['Silver Medal'],
             name = 'Silver',
             marker = dict(color = 'teal'))

bronze = go.Bar(x = df1['Country'],
             y = df1['Bronze Medal'],
             name = 'Bronze',
             marker = dict(color = 'cyan'))

data = [gold,silver,bronze]
layout = dict(title ='Top 10 Countries with the most Olympic Medals',
              title_x = 0.5,
              title_font = dict(size = 20, color = 'black'),
              barmode = 'relative',
              xaxis = dict(title = 'Country'),
              yaxis = dict(title = 'Count'))
fig = go.Figure(data = data, layout = layout)
fig.show()

# Stay Tuned..