In [1]:
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import Column, Integer, String, Float, Time
from sqlalchemy.ext.declarative import declarative_base
import pandas as pd
# from config import username, password
from flask import Flask, jsonify, render_template


In [2]:
csv_file = "data/2019_Ironman_World_Championship_Results.csv"
race_stats_df = pd.read_csv(csv_file)
csv_file = "data/countries_codes_and_coordinates.csv"
countries_df = pd.read_csv(csv_file)

In [3]:
# merge both dataframes into one dataframe on country code
merged_df = pd.merge(race_stats_df, countries_df, left_on="Country", right_on="Alpha_3_code")

In [4]:
merged_df = pd.merge(race_stats_df, countries_df, left_on="Country", right_on="Alpha_3_code")
merged_df['Swim'] = ((merged_df['Swim_h'] * 3600) + (merged_df['Swim_m'] * 60) + merged_df['Swim_s'])
merged_df['Bike'] = ((merged_df['Bike_h'] * 3600) + (merged_df['Bike_m'] * 60) + merged_df['Bike_s'])
merged_df['Run'] = ((merged_df['Run_h'] * 3600) + (merged_df['Run_m'] * 60) + merged_df['Run_s'])
merged_df['Overall'] = ((merged_df['Overall_h'] * 3600) + (merged_df['Overall_m'] * 60) + merged_df['Overall_s'])
merged_df['T1'] = ((merged_df['T1_h'] * 3600) + (merged_df['T1_m'] * 60) + merged_df['T1_s'])
merged_df['T2'] = ((merged_df['T2_h'] * 3600) + (merged_df['T2_m'] * 60) + merged_df['T2_s'])

In [5]:
# Remove unnecessary columns and rename columns
ironman_df = merged_df.loc[:, ['BIB', 'Last_Name', 'First_Name', 'Country_x', 'Country_y', 'Gender', 'Division', 'Swim', 'Bike', 'Run', 'Overall', 'Division_Rank', 'Gender_Rank', 'Overall_Rank', 'T1', 'T2', 'Latitude_average', 'Longitude_average']].dropna()
ironman_df = ironman_df.rename(columns={"Country_x":"Alpha_3_code", "Country_y":"Country"})


In [6]:
ironman_df

Unnamed: 0,BIB,Last_Name,First_Name,Alpha_3_code,Country,Gender,Division,Swim,Bike,Run,Overall,Division_Rank,Gender_Rank,Overall_Rank,T1,T2,Latitude_average,Longitude_average
0,4,Frodeno,Jan,DEU,Germany,Male,MPRO,2851,15362,9763,28273,1,1,1,118,179,51.0,9.0000
1,5,Kienle,Sebastian,DEU,Germany,Male,MPRO,3137,15304,10196,28924,3,3,3,132,155,51.0,9.0000
2,45,Frommhold,Nils,DEU,Germany,Male,MPRO,3047,15869,11110,30296,20,20,20,128,142,51.0,9.0000
3,44,Duelsen,Marc,DEU,Germany,Male,MPRO,3141,16106,10874,30395,23,23,23,127,147,51.0,9.0000
4,3003,Haug,Anne,DEU,Germany,Female,FPRO,3249,17417,10267,31210,1,1,32,122,155,51.0,9.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2289,1331,Altayeb,Dina,SAU,Saudi Arabia,Female,F45-49,5804,25892,20774,53462,80,538,2087,426,566,25.0,45.0000
2290,402,Fernandez vasquez,Bianca,DOM,Dominican Republic,Female,F35-39,7104,26998,19035,54022,73,545,2101,359,526,19.0,-70.6667
2291,327,Vu,Heidi,VNM,Viet Nam,Female,F45-49,4961,28107,22667,56402,84,574,2163,314,353,16.0,106.0000
2292,327,Vu,Heidi,VNM,Vietnam,Female,F45-49,4961,28107,22667,56402,84,574,2163,314,353,16.0,106.0000


In [15]:
ironman_df.dtypes

BIB                    int64
Last_Name             object
First_Name            object
Alpha_3_code          object
Country               object
Gender                object
Division              object
Swim                  object
Bike                  object
Run                   object
Overall               object
Division_Rank         object
Gender_Rank           object
Overall_Rank          object
T1                    object
T2                    object
Latitude_average     float64
Longitude_average    float64
dtype: object

In [27]:
ironman_df.loc[:, 'Swim'] = pd.to_datetime(ironman_df.loc[:, 'Swim'], format='%H:%M:%S') - pd.to_datetime(ironman_df.loc[:, 'Swim'], format='%H:%M:%S').dt.normalize()
ironman_df.loc[:, 'Bike'] = pd.to_datetime(ironman_df.loc[:, 'Bike'], format='%H:%M:%S') - pd.to_datetime(ironman_df.loc[:, 'Bike'], format='%H:%M:%S').dt.normalize()
ironman_df.loc[:, 'Run'] = pd.to_datetime(ironman_df.loc[:, 'Run'], format='%H:%M:%S') - pd.to_datetime(ironman_df.loc[:, 'Run'], format='%H:%M:%S').dt.normalize()

In [28]:
clean_df.dtypes

BIB                           int64
Last_Name                    object
First_Name                   object
Alpha_3_code                 object
Country                      object
Gender                       object
Division                     object
Swim                 datetime64[ns]
Bike                 datetime64[ns]
Run                  datetime64[ns]
Overall                      object
Division_Rank                object
Gender_Rank                  object
Overall_Rank                 object
T1                           object
T2                           object
Latitude_average            float64
Longitude_average           float64
dtype: object

In [29]:
swim = ironman_df['Swim'].mean()
swim

Timedelta('0 days 01:12:17.557105')

In [30]:
bike = ironman_df['Bike'].mean()
bike

Timedelta('0 days 05:45:38.909328')

In [25]:
run = ironman_df['Run'].min()
run

Timestamp('1900-01-01 02:42:43')

In [7]:
engine = create_engine("sqlite:///data/ironman.sqlite")

In [8]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

In [9]:
Base.classes.keys()

[]

In [10]:
# Save reference to the table
Race_Stats = Base.classes.race_stats

AttributeError: race_stats

In [12]:
session = Session(engine)