In [1]:
import pandas as pd
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float

In [2]:
# Create an engine to a SQLite database file called `winter_olympics.sqlite`
engine = create_engine("sqlite:///winter_olympics.sqlite")

In [3]:
# Create a connection to the engine called `conn`
conn = engine.connect()

In [4]:
# Use `declarative_base` from SQLAlchemy to model the measurement table as an ORM class

Base = declarative_base()

class Medals(Base):
    __tablename__ = 'medals'

    id = Column(Integer, primary_key=True)
    year = Column(Integer)
    sport = Column(Text)
    event = Column(Text)
    country = Column(Text)
    gender = Column(Text)
    medal_rank = Column(Integer)
    medal = Column(Text)
    name = Column(Text)
    age = Column(Integer)
   
    def __repr__(self):
        return f"id={self.id}, name={self.name}"
# More on __repr__: https://stackoverflow.com/questions/1984162/purpose-of-pythons-repr

In [5]:
# Create the measurement table in the database
Base.metadata.create_all(engine)

In [6]:
# Load the cleaned measurement csv file into a pandas dataframe
medals_df = pd.read_csv('Winter_Olympic_Medals.csv').fillna(value=0)
medals_df = medals_df.reset_index()
medals_df.columns = ["id","year","sport","event","country","gender","medal_rank","medal","name","age"]
medals_df

Unnamed: 0,id,year,sport,event,country,gender,medal_rank,medal,name,age
0,0,1924,Bobsled,Men's Four/Five,Switzerland,Men,1,gold,Switzerland-1,0.0
1,1,1924,Bobsled,Men's Four/Five,Britain,Men,2,silver,Britain-1,0.0
2,2,1924,Bobsled,Men's Four/Five,Belgium,Men,3,bronze,Belgium-1,0.0
3,3,1924,Cross-Country Skiing,Men's 18 Kilometers,Norway,Men,1,gold,Thorleif Haug,29.0
4,4,1924,Cross-Country Skiing,Men's 18 Kilometers,Norway,Men,2,silver,Johan GrÃ¸ttumsbraaten,24.0
5,5,1924,Cross-Country Skiing,Men's 18 Kilometers,Finland,Men,3,bronze,Tapani Niku,28.0
6,6,1924,Cross-Country Skiing,Men's 50 Kilometers,Norway,Men,1,gold,Thorleif Haug,29.0
7,7,1924,Cross-Country Skiing,Men's 50 Kilometers,Norway,Men,2,silver,Thoralf StrÃ¸mstad,27.0
8,8,1924,Cross-Country Skiing,Men's 50 Kilometers,Norway,Men,3,bronze,Johan GrÃ¸ttumsbraaten,24.0
9,9,1924,Curling,Men's Curling,Britain,Men,1,gold,Britain,0.0


In [7]:
# Create a list of data to write
medals_data = medals_df.to_dict(orient='records')
medals_data

[{'age': 0.0,
  'country': 'Switzerland',
  'event': "Men's Four/Five",
  'gender': 'Men',
  'id': 0,
  'medal': 'gold',
  'medal_rank': 1,
  'name': 'Switzerland-1',
  'sport': 'Bobsled',
  'year': 1924},
 {'age': 0.0,
  'country': 'Britain',
  'event': "Men's Four/Five",
  'gender': 'Men',
  'id': 1,
  'medal': 'silver',
  'medal_rank': 2,
  'name': 'Britain-1',
  'sport': 'Bobsled',
  'year': 1924},
 {'age': 0.0,
  'country': 'Belgium',
  'event': "Men's Four/Five",
  'gender': 'Men',
  'id': 2,
  'medal': 'bronze',
  'medal_rank': 3,
  'name': 'Belgium-1',
  'sport': 'Bobsled',
  'year': 1924},
 {'age': 29.0,
  'country': 'Norway',
  'event': "Men's 18 Kilometers",
  'gender': 'Men',
  'id': 3,
  'medal': 'gold',
  'medal_rank': 1,
  'name': 'Thorleif Haug',
  'sport': 'Cross-Country Skiing',
  'year': 1924},
 {'age': 24.0,
  'country': 'Norway',
  'event': "Men's 18 Kilometers",
  'gender': 'Men',
  'id': 4,
  'medal': 'silver',
  'medal_rank': 2,
  'name': 'Johan GrÃ¸ttumsbraaten

In [8]:
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

In [9]:
# Save the reference to the `medals` table as a variable called `medals_table`
medals_table = sqlalchemy.Table('medals', metadata, autoload=True)

In [10]:
#Remove any pre-existing data.
conn.execute(medals_table.delete())

<sqlalchemy.engine.result.ResultProxy at 0x25267801a90>

In [11]:
# Insert the data into the table
conn.execute(medals_table.insert(), medals_data)

<sqlalchemy.engine.result.ResultProxy at 0x25267769748>

In [12]:
# Test that the insert works by fetching the first 5 rows. 
conn.execute("select * from medals limit 5").fetchall()

[(0, 1924, 'Bobsled', "Men's Four/Five", 'Switzerland', 'Men', 1, 'gold', 'Switzerland-1', 0),
 (1, 1924, 'Bobsled', "Men's Four/Five", 'Britain', 'Men', 2, 'silver', 'Britain-1', 0),
 (2, 1924, 'Bobsled', "Men's Four/Five", 'Belgium', 'Men', 3, 'bronze', 'Belgium-1', 0),
 (3, 1924, 'Cross-Country Skiing', "Men's 18 Kilometers", 'Norway', 'Men', 1, 'gold', 'Thorleif Haug', 29),
 (4, 1924, 'Cross-Country Skiing', "Men's 18 Kilometers", 'Norway', 'Men', 2, 'silver', 'Johan GrÃ¸ttumsbraaten', 24)]