In [5]:
import requests
import pandas as pd
from sqlalchemy import create_engine, inspect
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
import csv
import numpy as np



------------------------------Start SQL File Here-----------------------------

In [12]:
database_path= "../data/Olympics.sqlite"

In [13]:
engine = create_engine(f"sqlite:///{database_path}")
conn = engine.connect()

In [14]:
inspector = inspect(conn)

In [15]:
inspector.get_table_names()

['countryData', 'hostsData', 'iocData', 'olympicsData']

In [16]:
full_data=pd.read_sql("SELECT * FROM olympicsData",conn)

#drops data from 1906 since it is not considered part of olympics
reduced_data=full_data.loc[(full_data["Year"] !=1906)]


In [17]:
#gets rid of all team members except one so it just counts one medal for a team
data=reduced_data.drop_duplicates(subset=['Event','Medal','Sport','Year','NOC']).reset_index(drop = True)

data

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,15,Arvo Ossian Aaltonen,M,30,,,Finland,FIN,1920 Summer,1920,Summer,Antwerpen,Swimming,Swimming Men's 200 metres Breaststroke,Bronze
1,15,Arvo Ossian Aaltonen,M,30,,,Finland,FIN,1920 Summer,1920,Summer,Antwerpen,Swimming,Swimming Men's 400 metres Breaststroke,Bronze
2,16,Juhamatti Tapio Aaltonen,M,28,184,85,Finland,FIN,2014 Winter,2014,Winter,Sochi,Ice Hockey,Ice Hockey Men's Ice Hockey,Bronze
3,17,Paavo Johannes Aaltonen,M,28,175,64,Finland,FIN,1948 Summer,1948,Summer,London,Gymnastics,Gymnastics Men's Individual All-Around,Bronze
4,17,Paavo Johannes Aaltonen,M,32,175,64,Finland,FIN,1952 Summer,1952,Summer,Helsinki,Gymnastics,Gymnastics Men's Team All-Around,Bronze
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18676,135481,"Jules Alexis ""Louis"" Zutter",M,30,,,Switzerland,SUI,1896 Summer,1896,Summer,Athina,Gymnastics,Gymnastics Men's Parallel Bars,Silver
18677,135486,Viktor Valeryevich Zuyev,M,21,188,91,Belarus,BLR,2004 Summer,2004,Summer,Athina,Boxing,Boxing Men's Heavyweight,Silver
18678,135489,Anastasiya Valeryevna Zuyeva-Fesikova,F,22,182,71,Russia,RUS,2012 Summer,2012,Summer,London,Swimming,Swimming Women's 200 metres Backstroke,Silver
18679,135521,Anton Zwerina,M,23,,66,Austria,AUT,1924 Summer,1924,Summer,Paris,Weightlifting,Weightlifting Men's Lightweight,Silver


In [21]:
my_data=data.groupby(["NOC","Year","Season","City","Sport","Medal","Sex"]).count()
len(my_data)

12422

In [22]:
my_data=my_data[['Name']]
my_data.reset_index(inplace=True)

In [23]:
my_data.sort_values(['Year','NOC'], ascending=(True,True),inplace=True)
my_data

Unnamed: 0,NOC,Year,Season,City,Sport,Medal,Sex,Name
107,AUS,1896,Summer,Athina,Athletics,Gold,M,2
108,AUS,1896,Summer,Athina,Tennis,Bronze,M,1
464,AUT,1896,Summer,Athina,Cycling,Bronze,M,2
465,AUT,1896,Summer,Athina,Cycling,Gold,M,1
466,AUT,1896,Summer,Athina,Swimming,Gold,M,1
...,...,...,...,...,...,...,...,...
12326,VEN,2016,Summer,Rio de Janeiro,Athletics,Silver,F,1
12327,VEN,2016,Summer,Rio de Janeiro,Boxing,Bronze,M,1
12328,VEN,2016,Summer,Rio de Janeiro,Cycling,Bronze,F,1
12331,VIE,2016,Summer,Rio de Janeiro,Shooting,Gold,M,1


In [24]:
my_data.rename(columns={"Name":"Medals_Won"},inplace=True)

In [27]:
my_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Season,City,Sport,Medal,Sex,Medals_Won
NOC,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AFG,2008,1,1,1,1,1,1
AFG,2012,1,1,1,1,1,1
AHO,1988,1,1,1,1,1,1
ALG,1984,1,1,1,1,1,1
ALG,1992,2,2,2,2,2,2
...,...,...,...,...,...,...,...
ZAM,1984,1,1,1,1,1,1
ZAM,1996,1,1,1,1,1,1
ZIM,1980,1,1,1,1,1,1
ZIM,2004,3,3,3,3,3,3


In [14]:
#dictionary of ISO Countries - I had to hard codethis because it ISO abbreviations are different from standard

country_ISO_List = {
    "AFG": "Afghanistan",
    "AHO": "Netherlands Antilles",
    "ALG": "Algeria",
    "ANZ": "Austria",
    "ARG": "Argentina",
    "ARM": "Armenia",
    "AUS": "Australia",
    "AUT": "Austria",
    "AZE": "Azerbaijan",
    "BAH": "Bahamas",
    "BAR": "Barbados",
    "BDI": "Burundi",
    "BEL": "Belgium",
    "BER": "Bermuda",
    "BLR": "Belarus",
    "BOH": "Bohemia",
    "BOT": "Botswana",
    "BRA": "Brazil",
    "BRN": "Bahrain",
    "BUL": "Bulgaria",
    "CAN": "Canada",
    "CHI": "Chile",
    "CHN": "China",
    "CIV": "Côte d'Ivoire",
    "CMR": "Cameroon",
    "COL": "Colombia",
    "CRC": "Costa Rica",
    "CRO": "Croatia",
    "CUB": "Cuba",
    "CYP": "Cyprus",
    "CZE": "Czechia",
    "DEN": "Denmark",
    "DJI": "Djibouti",
    "DOM": "Dominican Republic",
    "ECU": "Ecuador",
    "EGY": "Egypt",
    "ERI": "Eritrea",
    "ESP": "Spain",
    "EST": "Estonia",
    "ETH": "Ethiopia",
    "EUN" : "Unified Team",
    "FIJ": "Fiji",
    "FIN": "Finland",
    "FRA": "France",
    "FRG": "Germany",    
    "GAB": "Gabon",
    "GBR": "Great Britain",
    "GDR": "East Germany",
    "GEO": "Georgia",
    "GER": "Germany",
    "GHA": "Ghana",
    "GRE": "Greece",
    "GRN": "Grenada",
    "GUA": "Guatemala",
    "GUY": "Guyana",
    "HAI": "Haiti",
    "HKG": "Hong Kong",
    "HUN": "Hungary",
    "INA": "Indonesia",
    "IND": "India",
    "IOA": "Individual",
    "IRI": "Iran",
    "IRL": "Ireland",
    "IRQ": "Iraq",
    "ISL": "Iceland",
    "ISR": "Israel",
    "ISV": "Virgin Islands",
    "ITA": "Italy",
    "JAM": "Jamaica",
    "JOR": "Jordan",
    "JPN": "Japan",
    "KAZ": "Kazakhstan",
    "KEN": "Kenya",
    "KGZ": "Kyrgyzstan",
    "KOR": "Korea",
    "KSA": "Saudi Arabia",
    "KUW": "Kuwait",
    "LAT": "Latvia",
    "LIB": "Lebanon",
    "LIE": "Liechtenstein",
    "LTU": "Lithuania",
    "LUX": "Luxembourg", 
    "MAR": "Morocco",
    "MAS": "Malaysia",
    "MDA": "Moldova",
    "MEX": "Mexico",
    "MGL": "Mongolia",
    "MKD": "Macedonia",
    "MNE": "Montenegro",
    "MON": "Monaco",
    "MOZ": "Mozambique",
    "MRI": "Mauritius",
    "NAM": "Namibia",
    "NED": "Netherlands",
    "NEP": "Nepal",
    "NGR": "Nigeria",
    "NIG": "Nicaragua",
    "NOR": "Norway",
    "NZL": "New Zealand",
    "PAK": "Pakistan",
    "PAN": "Panama",
    "PAR": "Paraguay",
    "PER": "Peru",
    "PHI": "Philippines",
    "POL": "Poland",
    "POR": "Portugal",
    "PRK": "North Korea",
    "PUR": "Puerto Rico",
    "QAT": "Qatar",
    "ROU": "Romania",
    "RSA": "South Africa",
    "RUS": "Russia",
    "SCG": "Serbia and Montenegro",
    "SEN": "Senegal",
    "SGP": "Singapore",
    "SLO": "Slovenia",
    "SRB": "Serbia",
    "SRI": "Sri Lanka",
    "SUD": "Sudan",
    "SUI": "Switzerland",
    "SUR": "Suriname",
    "SVK": "Slovakia",
    "SWE": "Sweden",
    "SYR": "Syrian Arab Republic",
    "TAN": "Tanzania",
    "TCH": "Czechoslovakia",
    "TGA": "Tonga",
    "THA": "Thailand",
    "TJK": "Tajikistan",
    "TOG": "Togo",
    "TPE": "Republic of China",
    "TTO": "Trinidad and Tobago","TUN": "Tunisia",
    "TUN": "Tunisia",
    "TUR": "Turkey",
    "UAE": "United Arab Emirates",
    "UAR": "Egypt",
    "UGA": "Uganda",
    "UKR": "Ukraine",
    "URS": "Soviet Union",
    "URU": "Uruguay",
    "USA": "United States",
    "UZB": "Uzbekistan",
    "VEN": "Venezuela",
    "VIE": "Vietnam",
    "WIF": "Wallis and Futuna",
    "YUG": "Yugoslavia",
    "ZAM": "Zambia",
    "ZIM": "Zimbabwe"
};

In [15]:
#set up db of coutnries
countryNames=pd.DataFrame({
    "NOC":list(country_ISO_List.keys()),
    "Name":list(country_ISO_List.values())
})


countryNames.head(5)


Unnamed: 0,NOC,Name
0,AFG,Afghanistan
1,AHO,Netherlands Antilles
2,ALG,Algeria
3,ANZ,Austria
4,ARG,Argentina


In [16]:
db_merged=pd.merge(my_data,countryNames,on="NOC",how="inner")

In [17]:
db_merged.rename(columns={"NOC":"Country"},inplace=True)

In [18]:
db_merged=db_merged[['Year','Season','City','Country', 'Name','Sex','Sport','Medal','Medals_Won']]

In [19]:
db_merged.sort_values(['Year','Country'],inplace=True)

In [20]:
#final db to pull into js
db_merged.head()

Unnamed: 0,Year,Season,City,Country,Name,Sex,Sport,Medal,Medals_Won
0,1896,Summer,Athina,AUS,Australia,M,Athletics,Gold,2
1,1896,Summer,Athina,AUS,Australia,M,Tennis,Bronze,1
357,1896,Summer,Athina,AUT,Austria,M,Cycling,Bronze,2
358,1896,Summer,Athina,AUT,Austria,M,Cycling,Gold,1
359,1896,Summer,Athina,AUT,Austria,M,Swimming,Gold,1


In [21]:
db_merged.groupby(['Season','Name']).count().head(135)

Unnamed: 0_level_0,Unnamed: 1_level_0,Year,City,Country,Sex,Sport,Medal,Medals_Won
Season,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Summer,Afghanistan,2,2,2,2,2,2,2
Summer,Algeria,14,14,14,14,14,14,14
Summer,Argentina,67,67,67,67,67,67,67
Summer,Armenia,12,12,12,12,12,12,12
Summer,Australia,344,344,344,344,344,344,344
Summer,...,...,...,...,...,...,...,...
Summer,Ukraine,106,106,106,106,106,106,106
Summer,Unified Team,51,51,51,51,51,51,51
Summer,United Arab Emirates,2,2,2,2,2,2,2
Summer,United States,988,988,988,988,988,988,988


In [22]:
db_merged.groupby('Year').count()

Unnamed: 0_level_0,Season,City,Country,Name,Sex,Sport,Medal,Medals_Won
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1896,66,66,66,66,66,66,66,66
1900,152,152,152,152,152,152,152,152
1904,86,86,86,86,86,86,86,86
1908,174,174,174,174,174,174,174,174
1912,177,177,177,177,177,177,177,177
1920,241,241,241,241,241,241,241,241
1924,266,266,266,266,266,266,266,266
1928,278,278,278,278,278,278,278,278
1932,253,253,253,253,253,253,253,253
1936,293,293,293,293,293,293,293,293
