In [6]:
import pandas as pd
import csv

# Loading in Olympic data
olympics_path = "../datasources/athlete_events.csv"
noc_path = "../datasources/noc_regions.csv"
census_path = "../datasources/internationalcensus.csv"
gdp_path = "../datasources/internationalgdp.csv"

# Read the cleaned olympic data in cleandata folder

olympics_df = pd.read_csv(olympics_path)

# Read the country data in the NOC file located in datasources folder
noc_df = pd.read_csv(noc_path)

# Read the census data and rename column for smooth merge
census_df = pd.read_csv(census_path)
census_df = census_df.rename(columns ={"Country/Area Name":"region"})

# Read the gdp data, rename column for smooth merge, drop unnecessary columns, melt for ease of merge, change Year column to integer
gdp_df = pd.read_csv(gdp_path)
gdp_df= gdp_df.rename(columns ={"Country Name":"region"})
gdp_df = gdp_df.drop(["Country Code","Indicator Name","Indicator Code"], axis=1)
gdp_df = pd.melt(gdp_df,'region',var_name='Year',value_name='GDP')
gdp_df["Year"]=gdp_df["Year"].astype(int)

In [10]:
# merge clean_df and noc_regions by NOC
merge_df = pd.merge(olympics_df, noc_df, on="NOC")

# merge merge_df and international census data by country and year
merge2_df = pd.merge(merge_df, census_df, on=["region","Year"], how = "inner")

# merge merge2_df and gdp data by country and year

merge3_df = pd.merge(merge2_df, gdp_df, on=["region","Year"])

# Removing columns notes, Annual Growth Rate %, Density (per sq km),Total Fertility Rate, Life Expectancy at Birth, Under-5 Mortality Rate
olympiccountries_df = merge3_df.drop(["notes","Annual Growth Rate %", "Density (per sq km)","Total Fertility Rate", "Life Expectancy at Birth", "Under-5 Mortality Rate"], axis=1)
olympiccountries_df.head(10)
olympiccountries_df.set_index("ID")

Unnamed: 0_level_0,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,...,Sport,Event,Medal,region,Row,FIPS,GENC,Population,Area (sq km),GDP
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,A Dijiang,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,...,Basketball,Basketball Men's Basketball,,China,6234,CH,CN,1182230625,9326410,4.269160e+11
6848,Bai Chongguang,M,21.0,184.0,83.0,China,CHN,1992 Summer,1992,Summer,...,Boxing,Boxing Men's Light-Heavyweight,,China,6234,CH,CN,1182230625,9326410,4.269160e+11
6854,Bai Mei,F,17.0,166.0,46.0,China,CHN,1992 Summer,1992,Summer,...,Rhythmic Gymnastics,Rhythmic Gymnastics Women's Individual,,China,6234,CH,CN,1182230625,9326410,4.269160e+11
11225,Bi Zhong,M,23.0,188.0,110.0,China,CHN,1992 Summer,1992,Summer,...,Athletics,Athletics Men's Hammer Throw,,China,6234,CH,CN,1182230625,9326410,4.269160e+11
17295,Cai Yanshu,M,28.0,169.0,79.0,China,CHN,1992 Summer,1992,Summer,...,Weightlifting,Weightlifting Men's Light-Heavyweight,,China,6234,CH,CN,1182230625,9326410,4.269160e+11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87277,Frans Ntaole,M,33.0,168.0,62.0,Lesotho,LES,1984 Summer,1984,Summer,...,Athletics,Athletics Men's Marathon,,Lesotho,17400,LT,LS,1519492,30355,3.331585e+08
98490,Gabashane Vincent Rakabaele,M,35.0,163.0,54.0,Lesotho,LES,1984 Summer,1984,Summer,...,Athletics,Athletics Men's Marathon,,Lesotho,17400,LT,LS,1519492,30355,3.331585e+08
122215,Lefa Tsapi,M,23.0,170.0,63.0,Lesotho,LES,1984 Summer,1984,Summer,...,Boxing,Boxing Men's Welterweight,,Lesotho,17400,LT,LS,1519492,30355,3.331585e+08
81698,Motsapi Moorosi,M,27.0,170.0,62.0,Lesotho,LES,1972 Summer,1972,Summer,...,Athletics,Athletics Men's 100 metres,,Lesotho,17388,LT,LS,1116779,30355,8.091583e+07


In [11]:
olympiccountries_df.to_csv("clean_df")