In [43]:
import pandas as pd
import numpy as np
from sqlalchemy import *
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
import pymysql
from config import sqlpass
from flask import Flask, jsonify
import json

In [2]:
df = pd.read_csv("athlete_events.csv")

In [3]:
df.head()

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,1,A Dijiang,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,Barcelona,Basketball,Basketball Men's Basketball,
1,2,A Lamusi,M,23.0,170.0,60.0,China,CHN,2012 Summer,2012,Summer,London,Judo,Judo Men's Extra-Lightweight,
2,3,Gunnar Nielsen Aaby,M,24.0,,,Denmark,DEN,1920 Summer,1920,Summer,Antwerpen,Football,Football Men's Football,
3,4,Edgar Lindenau Aabye,M,34.0,,,Denmark/Sweden,DEN,1900 Summer,1900,Summer,Paris,Tug-Of-War,Tug-Of-War Men's Tug-Of-War,Gold
4,5,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Netherlands,NED,1988 Winter,1988,Winter,Calgary,Speed Skating,Speed Skating Women's 500 metres,


In [4]:
rds_connection_string = f"root:{sqlpass}@127.0.0.1/olympics_db?charset=utf8"
engine = create_engine(f'mysql://{rds_connection_string}', encoding='utf-8')

In [5]:
#use this to add the csv to mysql database
#df.to_sql(name='athletes', con=engine, if_exists='append', index=False)

In [6]:
inspector = inspect(engine)
inspector.get_table_names()

['athletes']

In [7]:
#gets all the name of the columns
columns = inspector.get_columns('athletes')
for x in columns:
    print(x['name'])

IND
ID
Name
Sex
Age
Height
Weight
Team
NOC
Games
Year
Season
City
Sport
Event
Medal


In [8]:
Base = automap_base()
Base.prepare(engine, reflect=True)
Athletes = Base.classes.athletes

In [9]:
Base.classes.keys()


['athletes']

In [10]:
session = Session(engine)

In [53]:
#Create a table grouped by Team, Year, and Medal to get medal counts.
#"Name","Sex","Team","Sport","Medal"
items = session.query(Athletes.Name, Athletes.Sex, Athletes.Team, Athletes.Sport, func.count(Athletes.Medal == "Gold")).\
    filter_by(Medal="Gold").\
    group_by(Athletes.Name).\
    order_by(func.count(Athletes.Medal == "Gold").desc()).limit(100)

for x in items:
    print(x)

('Michael Fred Phelps, II', 'M', 'United States', 'Swimming', 23)
('Raymond Clarence "Ray" Ewry', 'M', 'United States', 'Athletics', 10)
('Paavo Johannes Nurmi', 'M', 'Finland', 'Athletics', 9)
('Larysa Semenivna Latynina (Diriy-)', 'F', 'Soviet Union', 'Gymnastics', 9)
('Mark Andrew Spitz', 'M', 'United States', 'Swimming', 9)
('Frederick Carlton "Carl" Lewis', 'M', 'United States', 'Athletics', 9)
('Usain St. Leo Bolt', 'M', 'Jamaica', 'Athletics', 8)
('Birgit Fischer-Schmidt', 'F', 'East Germany', 'Canoeing', 8)
('Sawao Kato', 'M', 'Japan', 'Gymnastics', 8)
('Ole Einar Bjrndalen', 'M', 'Norway', 'Biathlon', 8)
('Matthew Nicholas "Matt" Biondi', 'M', 'United States', 'Swimming', 8)
('Jennifer Elisabeth "Jenny" Thompson (-Cumpelik)', 'F', 'United States', 'Swimming', 8)
('Vra slavsk (-Odloilov)', 'F', 'Czechoslovakia', 'Gymnastics', 7)
('Borys Anfiyanovych Shakhlin', 'M', 'Soviet Union', 'Gymnastics', 7)
('Donald Arthur "Don" Schollander', 'M', 'United States', 'Swimming', 7)
('Viktor

In [41]:
work = {'data': [
         {'Name':x.Name, 'Sex':
            x.Sex, 'Country' :x.Team, 'Sport': x.Sport, 'Gold Medals': x[4]}
        for x in items
       ]}
print(work)



{'data': [{'Name': 'Michael Fred Phelps, II', 'Sex': 'M', 'Country': 'United States', 'Sport': 'Swimming', 'Gold Medals': 23}, {'Name': 'Raymond Clarence "Ray" Ewry', 'Sex': 'M', 'Country': 'United States', 'Sport': 'Athletics', 'Gold Medals': 10}, {'Name': 'Larysa Semenivna Latynina (Diriy-)', 'Sex': 'F', 'Country': 'Soviet Union', 'Sport': 'Gymnastics', 'Gold Medals': 9}, {'Name': 'Mark Andrew Spitz', 'Sex': 'M', 'Country': 'United States', 'Sport': 'Swimming', 'Gold Medals': 9}, {'Name': 'Paavo Johannes Nurmi', 'Sex': 'M', 'Country': 'Finland', 'Sport': 'Athletics', 'Gold Medals': 9}, {'Name': 'Frederick Carlton "Carl" Lewis', 'Sex': 'M', 'Country': 'United States', 'Sport': 'Athletics', 'Gold Medals': 9}, {'Name': 'Usain St. Leo Bolt', 'Sex': 'M', 'Country': 'Jamaica', 'Sport': 'Athletics', 'Gold Medals': 8}, {'Name': 'Jennifer Elisabeth "Jenny" Thompson (-Cumpelik)', 'Sex': 'F', 'Country': 'United States', 'Sport': 'Swimming', 'Gold Medals': 8}, {'Name': 'Matthew Nicholas "Matt" B

RuntimeError: Working outside of application context.

This typically means that you attempted to use functionality that needed
to interface with the current application object in some way. To solve
this, set up an application context with app.app_context().  See the
documentation for more information.

In [45]:

jsonified_data = json.dumps(work)

In [67]:
#Create a table grouped by Team, Year, and Medal to get medal counts.
#"Name","Sex","Team","Sport","Medal"
medalg = session.query( Athletes.Team, func.count(Athletes.Medal)).\
    filter(Athletes.Medal == "Gold").\
    group_by(Athletes.Team).\
    order_by(func.count(Athletes.Medal).desc()).all()

for x in medalg:
    print(x)

('United States', 2474)
('Soviet Union', 1058)
('Germany', 679)
('Italy', 535)
('Great Britain', 519)
('France', 455)
('Sweden', 451)
('Hungary', 432)
('Canada', 422)
('East Germany', 369)
('Russia', 366)
('Australia', 342)
('China', 308)
('Norway', 299)
('Netherlands', 277)
('Japan', 247)
('South Korea', 211)
('Finland', 198)
('Denmark', 168)
('Cuba', 164)
('Romania', 161)
('West Germany', 155)
('Switzerland', 144)
('India', 138)
('Yugoslavia', 130)
('Unified Team', 123)
('Poland', 117)
('Spain', 108)
('Brazil', 103)
('Austria', 95)
('Belgium', 94)
('Argentina', 91)
('New Zealand', 85)
('Czechoslovakia', 81)
('Croatia', 58)
('Bulgaria', 54)
('Ukraine', 47)
('Pakistan', 42)
('Czech Republic', 42)
('Greece', 42)
('Turkey', 40)
('Jamaica', 38)
('United States-1', 38)
('Kenya', 34)
('South Africa', 32)
('Uruguay', 31)
('Mexico', 30)
('China-1', 28)
('Germany-1', 28)
('East Germany-1', 24)
('Belarus', 24)
('Nigeria', 23)
('Ethiopia', 22)
('Soviet Union-1', 22)
('Russia-1', 22)
('Cameroon',

In [68]:
#Create a table grouped by Team, Year, and Medal to get medal counts.
#"Name","Sex","Team","Sport","Medal"
medals = session.query( Athletes.Team, func.count(Athletes.Medal)).\
    filter(Athletes.Medal == "Silver").\
    group_by(Athletes.Team).\
    order_by(func.count(Athletes.Medal).desc()).all()

for x in medals:
    print(x)

('United States', 1512)
('Soviet Union', 716)
('Germany', 627)
('Great Britain', 582)
('France', 518)
('Italy', 508)
('Sweden', 476)
('Australia', 453)
('Canada', 413)
('Russia', 351)
('Norway', 330)
('Hungary', 330)
('China', 325)
('Netherlands', 321)
('East Germany', 309)
('Japan', 307)
('Finland', 263)
('Spain', 239)
('Denmark', 223)
('Czechoslovakia', 223)
('South Korea', 222)
('Switzerland', 213)
('Romania', 200)
('Poland', 193)
('West Germany', 184)
('Austria', 168)
('Yugoslavia', 167)
('Belgium', 161)
('Brazil', 161)
('Bulgaria', 144)
('Cuba', 127)
('Argentina', 84)
('Jamaica', 75)
('Greece', 70)
('Unified Team', 69)
('New Zealand', 56)
('Croatia', 54)
('Ukraine', 52)
('South Africa', 47)
('Pakistan', 45)
('Belarus', 44)
('Kenya', 41)
('United States-1', 33)
('Czech Republic', 32)
('Nigeria', 30)
('Serbia', 29)
('Chinese Taipei', 28)
('Turkey', 27)
('Serbia and Montenegro', 26)
('Mexico', 26)
('Kazakhstan', 25)
('Switzerland-1', 22)
('Royal Club Nautique de Gand', 21)
('Iran', 2

In [71]:
#Create a table grouped by Team, Year, and Medal to get medal counts.
#"Name","Sex","Team","Sport","Medal"
medalb = session.query( Athletes.Team, func.count(Athletes.Medal)).\
    filter(Athletes.Medal == "Bronze").\
    group_by(Athletes.Team).\
    order_by(func.count(Athletes.Medal).desc()).all()

for x in medalb:
    print(x)

('United States', 1233)
('Germany', 678)
('Soviet Union', 677)
('France', 577)
('Great Britain', 572)
('Australia', 511)
('Sweden', 507)
('Italy', 484)
('Finland', 415)
('Canada', 408)
('Russia', 393)
('Netherlands', 390)
('Hungary', 365)
('Japan', 357)
('Romania', 290)
('Norway', 281)
('China', 268)
('East Germany', 263)
('Poland', 253)
('Switzerland', 231)
('West Germany', 219)
('Brazil', 185)
('Czechoslovakia', 182)
('Denmark', 162)
('South Korea', 159)
('Belgium', 154)
('Austria', 150)
('Bulgaria', 144)
('Spain', 136)
('Cuba', 116)
('Ukraine', 98)
('Yugoslavia', 93)
('Argentina', 91)
('New Zealand', 82)
('Unified Team', 79)
('Belarus', 71)
('Greece', 62)
('Czech Republic', 60)
('South Africa', 52)
('Mexico', 51)
('Lithuania', 48)
('Nigeria', 46)
('Jamaica', 44)
('Serbia', 41)
('India', 40)
('Croatia', 37)
('Pakistan', 34)
('North Korea', 33)
('Kazakhstan', 32)
('Kenya', 31)
('Uruguay', 30)
('United States-1', 30)
('Switzerland-1', 30)
('Iran', 29)
('Turkey', 28)
('Slovenia', 27)
('

In [72]:
#Create a table grouped by Team, Year, and Medal to get medal counts.
#"Name","Sex","Team","Sport","Medal"
medalt = session.query( Athletes.Team, func.count(Athletes.Medal)).\
    group_by(Athletes.Team).\
    order_by(func.count(Athletes.Medal).desc()).all()

for x in medalt:
    print(x)

('United States', 5219)
('Soviet Union', 2451)
('Germany', 1984)
('Great Britain', 1673)
('France', 1550)
('Italy', 1527)
('Sweden', 1434)
('Australia', 1306)
('Canada', 1243)
('Hungary', 1127)
('Russia', 1110)
('Netherlands', 988)
('East Germany', 941)
('Japan', 911)
('Norway', 910)
('China', 901)
('Finland', 876)
('Romania', 651)
('South Korea', 592)
('Switzerland', 588)
('Poland', 563)
('West Germany', 558)
('Denmark', 553)
('Czechoslovakia', 486)
('Spain', 483)
('Brazil', 449)
('Austria', 413)
('Belgium', 409)
('Cuba', 407)
('Yugoslavia', 390)
('Bulgaria', 342)
('Unified Team', 271)
('Argentina', 266)
('New Zealand', 223)
('India', 197)
('Ukraine', 197)
('Greece', 174)
('Jamaica', 157)
('Croatia', 149)
('Belarus', 139)
('Czech Republic', 134)
('South Africa', 131)
('Pakistan', 121)
('Mexico', 107)
('Kenya', 106)
('United States-1', 101)
('Nigeria', 99)
('Turkey', 95)
('Serbia', 85)
('Kazakhstan', 77)
('Switzerland-1', 72)
('Germany-1', 70)
('Iran', 68)
('North Korea', 65)
('Serbia 