In [1]:
import pandas as pd
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float

In [2]:
# Create an engine to a SQLite database file called `winter_olympics.sqlite`
engine = create_engine("sqlite:///winter_olympics.sqlite")

In [3]:
# Create a connection to the engine called `conn`
conn = engine.connect()

In [4]:
# Use `declarative_base` from SQLAlchemy to model the medals table as an ORM class

Base = declarative_base()

class Medals(Base):
    __tablename__ = 'medals'

    id = Column(Integer, primary_key=True)
    year = Column(Integer)
    sport = Column(Text)
    event = Column(Text)
    country = Column(Text)
    gender = Column(Text)
    medal_rank = Column(Integer)
    medal = Column(Text)
    name = Column(Text)
    age = Column(Integer)
   
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [5]:
# Use `declarative_base` from SQLAlchemy to model the population table as an ORM class
class Population(Base):
    __tablename__ = 'population'

    id = Column(Integer, primary_key=True)
    year = Column(Integer)
    country = Column(Text)
    population = Column(Float)
    
   
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [6]:
# Use `declarative_base` from SQLAlchemy to model the GDP table as an ORM class
class GDP(Base):
    __tablename__ = 'gdp'

    id = Column(Integer, primary_key=True)
    year = Column(Integer)
    country = Column(Text)
    gdp = Column(Float)
    
   
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [7]:
# Use `declarative_base` from SQLAlchemy to model the population temperature as an ORM class
class Temperature(Base):
    __tablename__ = 'temperature'

    id = Column(Integer, primary_key=True)
    country = Column(Text)
    temperature = Column(Float)
    
   
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [8]:
# Create the medals, population, gdp, and temperature tables in the database
Base.metadata.create_all(engine)

In [9]:
# Load the cleaned medals csv file into a pandas dataframe
medals_df = pd.read_csv('Winter_Olympic_Medals.csv').fillna(value=0)
medals_df = medals_df.reset_index()
medals_df.columns = ["id","year","sport","event","country","gender","medal_rank","medal","name","age"]
medals_df

Unnamed: 0,id,year,sport,event,country,gender,medal_rank,medal,name,age
0,0,1924,Bobsled,Men's Four/Five,Switzerland,Men,1,gold,Switzerland-1,0.0
1,1,1924,Bobsled,Men's Four/Five,Britain,Men,2,silver,Britain-1,0.0
2,2,1924,Bobsled,Men's Four/Five,Belgium,Men,3,bronze,Belgium-1,0.0
3,3,1924,Cross-Country Skiing,Men's 18 Kilometers,Norway,Men,1,gold,Thorleif Haug,29.0
4,4,1924,Cross-Country Skiing,Men's 18 Kilometers,Norway,Men,2,silver,Johan GrÃ¸ttumsbraaten,24.0
5,5,1924,Cross-Country Skiing,Men's 18 Kilometers,Finland,Men,3,bronze,Tapani Niku,28.0
6,6,1924,Cross-Country Skiing,Men's 50 Kilometers,Norway,Men,1,gold,Thorleif Haug,29.0
7,7,1924,Cross-Country Skiing,Men's 50 Kilometers,Norway,Men,2,silver,Thoralf StrÃ¸mstad,27.0
8,8,1924,Cross-Country Skiing,Men's 50 Kilometers,Norway,Men,3,bronze,Johan GrÃ¸ttumsbraaten,24.0
9,9,1924,Curling,Men's Curling,Britain,Men,1,gold,Britain,0.0


In [10]:
# Create a list of data to write
medals_data = medals_df.to_dict(orient='records')
medals_data

[{'age': 0.0,
  'country': 'Switzerland',
  'event': "Men's Four/Five",
  'gender': 'Men',
  'id': 0,
  'medal': 'gold',
  'medal_rank': 1,
  'name': 'Switzerland-1',
  'sport': 'Bobsled',
  'year': 1924},
 {'age': 0.0,
  'country': 'Britain',
  'event': "Men's Four/Five",
  'gender': 'Men',
  'id': 1,
  'medal': 'silver',
  'medal_rank': 2,
  'name': 'Britain-1',
  'sport': 'Bobsled',
  'year': 1924},
 {'age': 0.0,
  'country': 'Belgium',
  'event': "Men's Four/Five",
  'gender': 'Men',
  'id': 2,
  'medal': 'bronze',
  'medal_rank': 3,
  'name': 'Belgium-1',
  'sport': 'Bobsled',
  'year': 1924},
 {'age': 29.0,
  'country': 'Norway',
  'event': "Men's 18 Kilometers",
  'gender': 'Men',
  'id': 3,
  'medal': 'gold',
  'medal_rank': 1,
  'name': 'Thorleif Haug',
  'sport': 'Cross-Country Skiing',
  'year': 1924},
 {'age': 24.0,
  'country': 'Norway',
  'event': "Men's 18 Kilometers",
  'gender': 'Men',
  'id': 4,
  'medal': 'silver',
  'medal_rank': 2,
  'name': 'Johan GrÃ¸ttumsbraaten

In [11]:
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

In [12]:
# Save the reference to the `medals` table as a variable called `medals_table`
medals_table = sqlalchemy.Table('medals', metadata, autoload=True)

In [13]:
#Remove any pre-existing data.
conn.execute(medals_table.delete())

<sqlalchemy.engine.result.ResultProxy at 0x267b3ee0fd0>

In [14]:
# Insert the data into the table
conn.execute(medals_table.insert(), medals_data)

<sqlalchemy.engine.result.ResultProxy at 0x267b4101630>

In [15]:
# Test that the insert works by fetching the first 5 rows. 
conn.execute("select * from medals limit 5").fetchall()

[(0, 1924, 'Bobsled', "Men's Four/Five", 'Switzerland', 'Men', 1, 'gold', 'Switzerland-1', 0),
 (1, 1924, 'Bobsled', "Men's Four/Five", 'Britain', 'Men', 2, 'silver', 'Britain-1', 0),
 (2, 1924, 'Bobsled', "Men's Four/Five", 'Belgium', 'Men', 3, 'bronze', 'Belgium-1', 0),
 (3, 1924, 'Cross-Country Skiing', "Men's 18 Kilometers", 'Norway', 'Men', 1, 'gold', 'Thorleif Haug', 29),
 (4, 1924, 'Cross-Country Skiing', "Men's 18 Kilometers", 'Norway', 'Men', 2, 'silver', 'Johan GrÃ¸ttumsbraaten', 24)]

In [18]:
# Load the cleaned population csv file into a pandas dataframe
population_df = pd.read_csv('population_by_country_1960_present.csv').fillna(value=0)
population_df = population_df.reset_index()
population_df

Unnamed: 0,index,Country Name,1960,1964,1968,1972,1976,1980,1984,1988,1992,1994,1998,2002,2006,2010,2014,2015
0,0,Afghanistan,8996351,9731361,10604346,11721940,12840299,13248370,12047115,11540888,13981231,16172719,18863999,21979923,25893450,28803167,32758020,33736494
1,1,Albania,1608800,1814135,2022272,2243126,2458526,2671997,2904429,3142336,3247039,3207536,3128530,3051010,2992547,2913021,2889104,2880703
2,2,Algeria,11124888,12295970,13744387,15377093,17190239,19337715,21893853,24591492,27181094,28362253,30335732,31995046,33777915,36117637,39113313,39871528
3,3,American Samoa,20013,22698,25885,28492,30321,32457,37688,44047,49593,51803,55901,58731,58650,55637,55437,55537
4,4,Andorra,13411,17469,21890,26892,31777,36067,42706,50434,58888,62677,64142,70049,80991,84449,79223,78014
5,5,Angola,5643182,6093321,6523791,7094834,7900997,8929900,10277321,11513968,12968345,13841301,15504318,17572649,20262399,23369131,26920466,27859305
6,6,Antigua and Barbuda,55339,59524,64655,69176,72285,73442,70725,67058,68427,71719,79851,86266,90301,94661,98875,99923
7,7,Argentina,20619075,21953929,23261278,24782949,26477152,28105888,29920904,31795517,33655151,34558115,36241590,37889370,39558890,41223889,42981515,43417765
8,8,Armenia,1874120,2144998,2401140,2650484,2889579,3099751,3285595,3504651,3442810,3283660,3108684,3033897,2958500,2877311,2906220,2916950
9,9,Aruba,54211,57032,58386,59840,60586,60096,62836,61079,68235,76700,87277,94992,100832,101669,103795,104341


In [19]:
# Create a list of data to write
population_data = population_df.to_dict(orient='records')
population_data

[{'1960': '8996351',
  '1964': '9731361',
  '1968': '10604346',
  '1972': '11721940',
  '1976': '12840299',
  '1980': '13248370',
  '1984': '12047115',
  '1988': '11540888',
  '1992': '13981231',
  '1994': '16172719',
  '1998': '18863999',
  '2002': '21979923',
  '2006': '25893450',
  '2010': '28803167',
  '2014': '32758020',
  '2015': '33736494',
  'Country Name': 'Afghanistan',
  'index': 0},
 {'1960': '1608800',
  '1964': '1814135',
  '1968': '2022272',
  '1972': '2243126',
  '1976': '2458526',
  '1980': '2671997',
  '1984': '2904429',
  '1988': '3142336',
  '1992': '3247039',
  '1994': '3207536',
  '1998': '3128530',
  '2002': '3051010',
  '2006': '2992547',
  '2010': '2913021',
  '2014': '2889104',
  '2015': '2880703',
  'Country Name': 'Albania',
  'index': 1},
 {'1960': '11124888',
  '1964': '12295970',
  '1968': '13744387',
  '1972': '15377093',
  '1976': '17190239',
  '1980': '19337715',
  '1984': '21893853',
  '1988': '24591492',
  '1992': '27181094',
  '1994': '28362253',
  