In [None]:
import pandas as pd
import numpy as np

### Reading Data and preparing it

In [None]:
df = pd.read_csv('athlete_events.csv')
regions = pd.read_csv('noc_regions.csv')
df_21 = pd.read_csv('Tokyo_2021_dataset.csv')

In [None]:
# Merge results with the region dataset
df = pd.merge(df, regions, left_on='NOC', right_on='NOC')
df = df.query('Season == "Summer"') # only interested in summer Olympics in this project

In [None]:
# Replacing the country name with common known names
df.replace('USA', "United States of America", inplace = True)
df.replace('Tanzania', "United Republic of Tanzania", inplace = True)
df.replace('Democratic Republic of Congo', "Democratic Republic of the Congo", inplace = True)
df.replace('Congo', "Republic of the Congo", inplace = True)
df.replace('Lao', "Laos", inplace = True)
df.replace('Syrian Arab Republic', "Syria", inplace = True)
df.replace('Serbia', "Republic of Serbia", inplace = True)
df.replace('Czechia', "Czech Republic", inplace = True)
df.replace('UAE', "United Arab Emirates", inplace = True)
df.replace('UK', "United Kingdom", inplace = True)
df_21.replace('Great Britain', "United Kingdom", inplace = True)
df_21.replace("People's Republic of China", "China", inplace = True)
df_21.replace("ROC", "Russia", inplace = True)

In [None]:
def host_country(col):
    if col == "Rio de Janeiro":
        return "Brazil"
    elif col == "London":
        return "United Kingdom"
    elif col == "Beijing":
        return  "China"
    elif col == "Athina":
        return  "Greece"
    elif col == "Sydney" or col == "Melbourne":
        return  "Australia"
    elif col == "Atlanta" or col == "Los Angeles" or col == "St. Louis":
        return  "United States of America"
    elif col == "Barcelona":
        return  "Spain"
    elif col == "Seoul":
        return  "South Korea"
    elif col == "Moskva":
        return  "Russia"
    elif col == "Montreal":
        return  "Canada"
    elif col == "Munich" or col == "Berlin":
        return  "Germany"
    elif col == "Mexico City":
        return  "Mexico"
    elif col == "Tokyo":
        return  "Japan"
    elif col == "Roma":
        return  "Italy"
    elif col == "Paris":
        return  "France"
    elif col == "Helsinki":
        return  "Finland"
    elif col == "Amsterdam":
        return  "Netherlands"
    elif col == "Antwerpen":
        return  "Belgium"
    elif col == "Stockholm":
        return  "Sweden"
    else:
        return "Other"


# Applying this function

df['Host_Country'] = df['City'].apply(host_country)

In [None]:
df_new = df.groupby(['Year','Host_Country','region','Medal'])['Medal'].count().unstack().fillna(0).astype(int).reset_index()

df_new['Is_Host'] = np.where(df_new['Host_Country'] == df_new['region'],1,0)
df_new['Total Medals'] = df_new['Bronze'] + df_new['Silver'] + df_new['Gold']

In [None]:
# Preparing DF 2021 Dataset

df_21_refined = df_21[['Team/NOC', "Gold Medal", "Silver Medal", "Bronze Medal"]]
df_21_refined['Total Medals'] = df_21_refined[["Gold Medal", "Silver Medal", "Bronze Medal"]].sum(axis=1)
df_21_refined['Year'] = 2021

df_21_refined = df_21_refined.rename(columns={'Gold Medal':'Gold', 'Silver Medal':'Silver','Bronze Medal':'Bronze'})

df_21_refined['Is_Host'] = np.where(df_21_refined['Team/NOC'] == 'Japan',1,0)
df_21_refined['Host_Country'] = 'Japan'
df_21_refined = df_21_refined.rename(columns={'Team/NOC':'region'})

# Adding 2021 data to historic
df_full = pd.concat([df_new, df_21_refined])

In [None]:
df_full

### 2021 Olympics

In [None]:
import plotly.express as px
import plotly.graph_objects as go

In [None]:
import matplotlib
import matplotlib.pyplot as plt

In [None]:
df_fig1 = df_full[df_full['Year'] == 2021]
df_fig1 = df_fig1.sort_values(by = ['Total Medals'], ascending = False).iloc[0:10].reset_index()
df_fig1['index_column'] = df_fig1.index

In [None]:
background_color = '#ECE7E2'

In [None]:
def remove_overlapping(linelength, is_host):
    
    print(linelength)
    if is_host == 1:
        return linelength - 2.2
    else:
        return linelength - 1

In [None]:
# Draw points
data = [
    go.Scatter(
        x=df_fig1['Total Medals'],
        y=df_fig1['region'],
        mode='markers',
        marker=dict(color='#2C4545', size=15)
    ),
    
    go.Scatter(
        x=[df_fig1['Total Medals'][4]],
        y=[df_fig1['region'][4]],
        mode='markers',
        marker=dict(color='red', size=25)
    )
]

layout = go.Layout(
    title_text= 'Total Medals 2021 - Top 10 Countries',
    title_font_size= 30,
    width= 800,
    height= 550,
    plot_bgcolor= background_color
)

fig1 = go.Figure(data, layout)

# Draw lines
for i in range(0, len(df_fig1)):
    if df_fig1['Is_Host'][i] == 0:
        fig1.add_shape(type='line',
                       x0 = 0, y0 = i,
                       x1 = df_fig1['Total Medals'][i],
                       y1 = i,
                       line=dict(color='#2C4545', width = 1))
    else:
        fig1.add_shape(type='line',
                   x0 = 0, y0 = i,
                   x1 = df_fig1['Total Medals'][i],
                   y1 = i,
                   line=dict(color='red', width = 1))

fig1.update(layout_showlegend=False)
fig1.update_xaxes(title = 'Total Medals', range=[0, max(df_fig1['Total Medals']) + 5], showgrid=False)
fig1.update_yaxes(showgrid=False)

fig1.show(config={'displayModeBar': False})