## Cleaning & Setting Up SQL Tables

In [None]:
import pandas as pd
import numpy as np
import sqlalchemy as db

In [None]:
races = pd.read_csv('./race_stats.csv')

In [None]:
drivers = pd.read_csv('./races.csv')

In [None]:
races.tail()

In [None]:
new_col = [string.lower().replace(' ', '_') for string in races.columns]

names = dict(zip(races.columns, new_col))

races = races.rename(columns = names)

In [None]:
races = races[['time_of_race', 'average_speed', 'margin_of_victory', 'lead_changes', 'date']]

races['year'] = [int(date[-4:]) for date in races['date']]

years = list(reversed(list(races['year'])))

race_dict = {y:x for (x,y) in enumerate(years, 1)}

In [None]:
races = races.dropna()

In [None]:
races['id'] = races['year'].map(race_dict)

In [None]:
drivers['race_id'] = drivers['year'].map(race_dict)

In [None]:
drivers.tail()

In [None]:
def to_num(number):
    new = int(number.replace('$', '').replace(',', ''))
    return new

In [None]:
drivers['winnings'] = drivers['winnings'].apply(to_num)

In [None]:
drivers.describe()

#### SQL Connection

In [None]:
from creds import uri

In [None]:
engine = db.create_engine(uri)
connection = engine.connect()
metadata = db.MetaData()

#### Table 1: Races

In [None]:
#Help with this section from LA Flex, Dan Kim example
columns_to_add = ''

for name, datatype in zip(races.columns, list(races.dtypes.values)): 
    if datatype == int:
        columns_to_add += f'{name} INT, '
    elif datatype == object:
        columns_to_add += f'{name} TEXT, '
    elif datatype == float:
        columns_to_add += f'{name} FLOAT, '

columns_to_add = "(" + columns_to_add.strip(', ') + ")"
columns_to_add

In [None]:
query = f'CREATE TABLE races {columns_to_add}'
#connection.execute(query) 

In [None]:
values_to_add = []

for row in range(len(races)):
    values_to_add.append(tuple(races.loc[row].values))

In [None]:
for values in values_to_add:
    query = f'INSERT INTO races VALUES {str(values)}'
    connection.execute(query)

#### Table 2: Drivers

In [None]:
drivers = drivers.rename(columns = {'car_name/entrant' : 'car_name'})

In [None]:
#Help with this section from LA Flex, Dan Kim example
columns_to_add = ''

for name, datatype in zip(drivers.columns, list(drivers.dtypes.values)): 
    if datatype == int:
        columns_to_add += f'{name} INT, '
    elif datatype == object:
        columns_to_add += f'{name} TEXT, '
    elif datatype == float:
        columns_to_add += f'{name} FLOAT, '

columns_to_add = "(" + columns_to_add.strip(', ') + ")"
columns_to_add

In [None]:
query = f'CREATE TABLE drivers {columns_to_add}'
#connection.execute(query) 

In [None]:
def add_esc(name):
    return name.replace('\'', '_')

In [None]:
add_esc("Eddie O'Donnell")

In [None]:
drivers['driver'] = drivers['driver'].apply(add_esc)
drivers['car_name'] = drivers['car_name'].apply(add_esc)

In [None]:
values_to_add = []

for row in range(len(drivers)):
    values_to_add.append(tuple(drivers.loc[row].values))

In [None]:
for values in values_to_add:
    query = f'INSERT INTO drivers VALUES {str(values)}'
    connection.execute(query)

In [None]:
len(drivers) + len(races)