In [1]:
import matplotlib.pyplot as plt
import pandas as pd

#https://github.com/tejdeepadabala/OlympicsMedalProject.git

file_path = './Summer Olympic medalists 1896 to 2008 - EDITIONS.tsv'
editions = pd.read_csv(file_path, sep='\t')
editions = editions[['Edition', 'Grand Total', 'City', 'Country']]

file_path = 'Summer Olympic medalists 1896 to 2008 - IOC COUNTRY CODES.csv'
ioc_codes = pd.read_csv(file_path)
ioc_codes = ioc_codes[['Country', 'NOC']]

medals_dict = {}

for year in editions['Edition']:

    # Create the file path: file_path
    file_path = 'summer_{:d}.csv'.format(year)
    
    # Load file_path into a DataFrame: medals_dict[year]
    medals_dict[year] = pd.read_csv(file_path)
    
    # Extract relevant columns: medals_dict[year]
    medals_dict[year] = medals_dict[year][['Athlete', 'NOC', 'Medal']]
    
    # Assign year to column 'Edition' of medals_dict
    medals_dict[year]['Edition'] = year
    
# Concatenate medals_dict: medals
medals = pd.concat(medals_dict, ignore_index = True)

medal_counts = medals.pivot_table(index = 'Edition', columns = 'NOC', values = 'Athlete', aggfunc = 'count')

totals = editions.set_index('Edition')

# Reassign totals['Grand Total']: totals
totals = totals['Grand Total']
fractions = medal_counts.divide(totals, axis = 'rows')
mean_fractions = fractions.expanding().mean()
fractions_change = mean_fractions.pct_change()*100
fractions_change = fractions_change.reset_index()


hosts = pd.merge(editions,ioc_codes, how= 'left')

# Extract relevant columns and set index: hosts
hosts = hosts[['Edition', 'NOC']].set_index('Edition')

# Fix missing 'NOC' values of hosts
print(hosts.loc[hosts.NOC.isnull()])
hosts.loc[1972, 'NOC'] = 'FRG'
hosts.loc[1980, 'NOC'] = 'URS'
hosts.loc[1988, 'NOC'] = 'KOR'

# Reset Index of hosts: hosts
hosts = hosts.reset_index()


reshaped = pd.melt(fractions_change, id_vars = 'Edition', value_name = 'Change')
print(reshaped.shape, fractions_change.shape)
chn = reshaped[reshaped['NOC']=='CHN']

merged = pd.merge(reshaped,hosts)
print(merged.head())
influence = merged.set_index('Edition').sort_index()

# Extract influence['Change']: change
change = influence['Change']

# Make bar plot of change: ax
ax = change.plot(kind = 'bar')

# Customize the plot to improve readability
ax.set_ylabel("% Change of Host Country Medal Count")
ax.set_title("Is there a Host Country Advantage?")
ax.set_xticklabels(editions['City'])

# Display the plot
plt.show()