## Process 800m splits from my race splits database

In [35]:
import pandas as pd
from time_parser import TimeParser as tp
import warnings

warnings.filterwarnings('ignore')

# Read in data
data_200_splits = pd.read_csv('data/db_800m_splits.csv')

# Split into two datasets: 4 splits and 2 splits
data_400_splits = data_200_splits.copy()

splits_200m = data_200_splits.dropna()
splits_200m

splits_400m = data_400_splits[data_400_splits.isna().any(axis=1)]
splits_400m = splits_400m.drop(columns = ['lap_3', 'lap_4'])

## Parse Time Columns: convert strings into time in seconds

#### 200m Splits

In [36]:
splits_200m['lap_1_sec'] = [tp.parse_time(time) for time in splits_200m['lap_1']]
splits_200m['lap_2_sec'] = [tp.parse_time(time) for time in splits_200m['lap_2']]
splits_200m['lap_3_sec'] = [tp.parse_time(time) for time in splits_200m['lap_3']]
splits_200m['lap_4_sec'] = [tp.parse_time(time) for time in splits_200m['lap_4']]

splits_200m['first_400'] = splits_200m[['lap_1_sec', 'lap_2_sec']].sum(axis = 1)
splits_200m['second_400'] = splits_200m[['lap_3_sec', 'lap_4_sec']].sum(axis = 1)
splits_200m['total_time_sec'] = splits_200m[['first_400', 'second_400']].sum(axis = 1)
splits_200m['split_ratio'] = splits_200m['second_400'] / splits_200m['first_400']

splits_200m = splits_200m.drop(columns = ['lap_1', 'lap_2', 'lap_3', 'lap_4',
                            'lap_1_sec', 'lap_2_sec', 'lap_3_sec', 'lap_4_sec',
                            'total_time'])
splits_200m

Unnamed: 0,athlete,first_400,second_400,total_time_sec,split_ratio
3,lk boulware,79.23,87.64,166.87,1.106147
4,laney mcgahey,88.06,95.47,183.53,1.084147
12,sadie lyon,80.66,87.78,168.44,1.088272
15,baden sanderford,60.13,62.66,122.79,1.042076
17,colton thigpen,64.31,70.69,135.0,1.099207
18,ethan michael,70.9,68.5,139.4,0.96615
27,mary mac collins,68.69,74.83,143.52,1.089387
28,kennae hales,77.19,82.71,159.9,1.071512
29,sophie boxmeyer,85.8,87.91,173.71,1.024592
30,preston barnes,60.38,68.34,128.72,1.131832


#### 400m Splits

In [37]:
splits_400m['lap_1_sec'] = [tp.parse_time(time) for time in splits_400m['lap_1']]
splits_400m['lap_2_sec'] = [tp.parse_time(time) for time in splits_400m['lap_2']]

splits_400m['total_time_sec'] = splits_400m[['lap_1_sec', 'lap_2_sec']].sum(axis = 1)
splits_400m['split_ratio'] = splits_400m['lap_2_sec'] / splits_400m['lap_1_sec']

splits_400m = splits_400m.drop(columns = ['lap_1', 'lap_2', 'total_time'])
splits_400m = splits_400m.rename(columns = {'lap_1_sec': 'first_400', 'lap_2_sec':'second_400'})

splits_400m

Unnamed: 0,athlete,first_400,second_400,total_time_sec,split_ratio
0,shawn stephenson,60.20,63.71,123.91,1.058306
1,shawn stephenson,62.19,65.90,128.09,1.059656
2,shawn stephenson,60.81,63.79,124.60,1.049005
5,margaret rockhold,85.34,94.28,179.62,1.104757
6,kennedy richardson,83.60,93.13,176.73,1.113995
...,...,...,...,...,...
185,anglin young,59.47,68.13,127.60,1.145620
186,shawn stephenson,58.71,70.47,129.18,1.200307
187,mary mac collins,68.53,71.67,140.20,1.045819
188,lillie gould,73.97,75.31,149.28,1.018115


## Join datasets together and export

In [40]:
splits_all = pd.concat([splits_200m, splits_400m])
splits_all.to_csv('data/db_800m_splits_processed.csv', index=False)