In [1]:
import pandas as pd
import sqlite3
from matplotlib import pyplot as plt

In [2]:
conn = sqlite3.connect("../Resources/gdp_olympic.sqlite")

## Summer

In [23]:
# Summer Table
summer_df = pd.read_sql('SELECT year, country_code, medal FROM summer', conn)
summer_df['game'] = 'summer'
summer_df.head()

Unnamed: 0,year,country_code,medal,game
0,1896,HUN,Gold,summer
1,1896,AUT,Silver,summer
2,1896,GRE,Bronze,summer
3,1896,GRE,Gold,summer
4,1896,GRE,Silver,summer


## Winter

In [4]:
# Winter Table
winter_df = pd.read_sql('SELECT year, country_code, medal FROM winter', conn)
winter_df['game'] = 'winter'
winter_df.head()

Unnamed: 0,year,country_code,medal,game
0,1924,FRA,Bronze,winter
1,1924,FRA,Bronze,winter
2,1924,FRA,Bronze,winter
3,1924,FRA,Bronze,winter
4,1924,SUI,Gold,winter


## Join Summer & Winter

In [5]:
# Join summer and winter tables
games = pd.concat([summer_df, winter_df], sort=False)
games.head()

Unnamed: 0,year,country_code,medal,game
0,1896,HUN,Gold,summer
1,1896,AUT,Silver,summer
2,1896,GRE,Bronze,summer
3,1896,GRE,Gold,summer
4,1896,GRE,Silver,summer


## GDP

In [6]:
# Country GDPs
wdi_df = pd.read_sql('SELECT country_name, country_code, year, gdp_per_cap FROM wdi', conn)
wdi_df.head()

Unnamed: 0,country_name,country_code,year,gdp_per_cap
0,Afghanistan,AFG,1960,59.77
1,Afghanistan,AFG,1961,59.86
2,Afghanistan,AFG,1962,58.46
3,Afghanistan,AFG,1963,78.71
4,Afghanistan,AFG,1964,82.1


## Merge: Games & GDP

In [7]:
# Merge with country GDPs
df = pd.merge(games, wdi_df, left_on=['country_code', 'year'], right_on=['country_code', 'year'])
df.head()

Unnamed: 0,year,country_code,medal,game,country_name,gdp_per_cap
0,1960,GBR,Bronze,summer,United Kingdom,1380.31
1,1960,GBR,Bronze,summer,United Kingdom,1380.31
2,1960,GBR,Silver,summer,United Kingdom,1380.31
3,1960,GBR,Bronze,summer,United Kingdom,1380.31
4,1960,GBR,Gold,summer,United Kingdom,1380.31


### Count of Medals

In [8]:
# Count of Medals
df2 = df.groupby(['year', 'country_name', 'game', 'medal']).count()
df2 = df2.reset_index()
df2.head()

Unnamed: 0,year,country_name,game,medal,country_code,gdp_per_cap
0,1960,Argentina,summer,Bronze,1,1
1,1960,Argentina,summer,Silver,3,3
2,1960,Australia,summer,Bronze,9,9
3,1960,Australia,summer,Gold,10,10
4,1960,Australia,summer,Silver,17,17


In [9]:
df2 = df2.iloc[:, 0:5]

In [10]:
df2 = df2.rename(columns={'country_code':'medal_count'})
df2.head()

Unnamed: 0,year,country_name,game,medal,medal_count
0,1960,Argentina,summer,Bronze,1
1,1960,Argentina,summer,Silver,3
2,1960,Australia,summer,Bronze,9
3,1960,Australia,summer,Gold,10
4,1960,Australia,summer,Silver,17


In [12]:
medals_df = pd.pivot_table(df2, values='medal_count', index=['year', 'country_name', 'game'], columns='medal', fill_value=0)
medals_df = medals_df.reset_index()
medals_df.head()

medal,year,country_name,game,Bronze,Gold,Silver
0,1960,Argentina,summer,1,0,3
1,1960,Australia,summer,9,10,17
2,1960,Austria,summer,0,1,2
3,1960,Austria,winter,3,1,2
4,1960,Belgium,summer,2,0,2


### GDP per year

In [13]:
# GDP per year
df3 = df.groupby(['year', 'country_code']).max()
df3 = df3.reset_index()
df3 = df3.drop(columns=['medal','game'])
df3.head()

Unnamed: 0,year,country_code,country_name,gdp_per_cap
0,1960,ARG,Argentina,
1,1960,AUS,Australia,1807.79
2,1960,AUT,Austria,935.46
3,1960,BEL,Belgium,1273.69
4,1960,BRA,Brazil,210.11


In [14]:
final_df = pd.merge(medals_df, df3, left_on=['country_name', 'year'], right_on=['country_name', 'year'])
final_df.head()

Unnamed: 0,year,country_name,game,Bronze,Gold,Silver,country_code,gdp_per_cap
0,1960,Argentina,summer,1,0,3,ARG,
1,1960,Australia,summer,9,10,17,AUS,1807.79
2,1960,Austria,summer,0,1,2,AUT,935.46
3,1960,Austria,winter,3,1,2,AUT,935.46
4,1960,Belgium,summer,2,0,2,BEL,1273.69


In [16]:
# Reorder columns
final_df = final_df[['year','country_name','gdp_per_cap','game', 'Gold', 'Silver', 'Bronze']]
final_df.head()

Unnamed: 0,year,country_name,gdp_per_cap,game,Gold,Silver,Bronze
0,1960,Argentina,,summer,0,3,1
1,1960,Australia,1807.79,summer,10,17,9
2,1960,Austria,935.46,summer,1,2,0
3,1960,Austria,935.46,winter,1,2,3
4,1960,Belgium,1273.69,summer,0,2,2


In [18]:
# Rename columns
final_df = final_df.rename(columns={'country_name':'country',
                         'gdp_per_cap':'gdp',
                         'Gold':'gold',
                         'Silver':'silver',
                         'Bronze':'bronze'
                        })
final_df.head()

Unnamed: 0,year,country,gdp,game,gold,silver,bronze
0,1960,Argentina,,summer,0,3,1
1,1960,Australia,1807.79,summer,10,17,9
2,1960,Austria,935.46,summer,1,2,0
3,1960,Austria,935.46,winter,1,2,3
4,1960,Belgium,1273.69,summer,0,2,2


## Export data to dictionary

In [19]:
gdp_medals = final_df.to_dict(orient='records')

In [22]:
gdp_medals

[{'year': 1960,
  'country': 'Argentina',
  'gdp': '',
  'game': 'summer',
  'gold': 0,
  'silver': 3,
  'bronze': 1},
 {'year': 1960,
  'country': 'Australia',
  'gdp': 1807.79,
  'game': 'summer',
  'gold': 10,
  'silver': 17,
  'bronze': 9},
 {'year': 1960,
  'country': 'Austria',
  'gdp': 935.46,
  'game': 'summer',
  'gold': 1,
  'silver': 2,
  'bronze': 0},
 {'year': 1960,
  'country': 'Austria',
  'gdp': 935.46,
  'game': 'winter',
  'gold': 1,
  'silver': 2,
  'bronze': 3},
 {'year': 1960,
  'country': 'Belgium',
  'gdp': 1273.69,
  'game': 'summer',
  'gold': 0,
  'silver': 2,
  'bronze': 2},
 {'year': 1960,
  'country': 'Brazil',
  'gdp': 210.11,
  'game': 'summer',
  'gold': 0,
  'silver': 0,
  'bronze': 13},
 {'year': 1960,
  'country': 'Canada',
  'gdp': '',
  'game': 'summer',
  'gold': 0,
  'silver': 9,
  'bronze': 0},
 {'year': 1960,
  'country': 'Canada',
  'gdp': '',
  'game': 'winter',
  'gold': 3,
  'silver': 17,
  'bronze': 1},
 {'year': 1960,
  'country': 'Egypt, 