In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from scipy import stats

In [2]:
# import data

dataFemales = pd.read_csv('MA_Exer_PikesPeak_Females.txt',delimiter='\t',encoding='latin-1')
dataMales = pd.read_csv('MA_Exer_PikesPeak_Males.txt',delimiter='\t',encoding='latin-1')

In [3]:
# parse race times and convert to float

time = re.compile(r'[A-Z]?\s*(1)?:?([0-5][0-9]):([0-5][0-9])')

def toMinutes(timeList):
    hours = []
    minutes = []
    seconds = []
    # separate race time string into hours, minutes, and seconds string lists
    for i in list(timeList):
        m = time.match(i)
        hours.append(m.group(1))
        minutes.append(m.group(2))
        seconds.append(m.group(3))
    # convert string lists into floats
    minutes = [float(i) for i in minutes]
    seconds = [float(i) for i in seconds]
    hours = [float(i or 0) for i in hours]
    timeFloat = []
    # convert hours, minutes, seconds to only minutes float
    for i in range(0,len(minutes)):
        totalTime = (60*hours[i] + minutes[i] + seconds[i]/60)
        timeFloat.append(totalTime)
    return np.array(timeFloat)

In [4]:
# convert minutes back to a time string

def toRaceString(minutesFloat):
    if minutesFloat >= 60.0:
        h = str(int(minutesFloat//60))
        m = format(int(minutesFloat-60*int(h)),'02')
        s = format(int((minutesFloat - int(minutesFloat))*60),'02')
        return(h+':'+m+':'+s)
    else:
        m = format(int(minutesFloat),'02')
        s = format(int((minutesFloat - int(minutesFloat))*60),'02')
        return(m+':'+s)

In [25]:
# make a dataframe of mean, median, mode, and range by gender

def raceStats(female,male):
    raceStats = pd.DataFrame([])
    raceStats['Gender'] = ['Female','Male']
    raceStats['N'] = [len(female),len(male)]
    raceStats['Mean'] = [toRaceString(np.mean(female)),toRaceString(np.mean(male))]
    raceStats['Median'] = [toRaceString(np.median(female)),toRaceString(np.median(male))]
    raceStats['Mode'] = [toRaceString(stats.mode(female)[0][0]),toRaceString(stats.mode(male)[0][0])]
    raceStats['Range'] = [toRaceString(np.max(female)-np.min(female)),toRaceString(np.max(male)-np.min(male))]
    return raceStats

In [6]:
# make divisions

divisions = [[9,14],[15,19],[20,29],[30,39],[40,49],[50,59],[60,69],[70,79],[80,89]]

def makeDivisions(data):
    divisionList = []
    for i in range(len(divisions)):
        div = data.loc[(data['Ag'] >= divisions[i][0]) & (data['Ag'] <= divisions[i][1])]
        divisionList.append(div)
    return divisionList

divF10, divF15, divF20, divF30, divF40, divF50, divF60, divF70, divF80 = makeDivisions(dataFemales)
divM10, divM15, divM20, divM30, divM40, divM50, divM60, divM70, divM80 = makeDivisions(dataMales)

In [7]:
dataMales.loc[(dataMales['Ag'] >= 80) & (dataMales['Ag'] <= 89)]

Unnamed: 0,Place,Div/Tot,Num,Name,Ag,Hometown,Gun Tim,Net Tim,Pace
1230,1231,1/2,2092,Raymond Palfrey,81.0,Springfield VA,1:18:00,1:12:56*,11:45
1236,1237,2/2,2152,Frank Pierce,84.0,Silver Spring MD,1:14:28,1:14:23*,11:59


In [8]:
dataFemales.sort_values(by=['Ag'])[0:5]

Unnamed: 0,Place,Div/Tot,Num,Name,Ag,Hometown,Gun Tim,Net Tim,Pace
382,383,4/15,1917,Stefanie Merritt,-1.0,Alexandria VA,55:33,54:22*,8:45
1008,1009,15/15,1778,Susan Mackey,-1.0,North Potomac M,D 1:16:36,1:11:18*,11:29
964,965,,201,Carla Sabloff,0.0,Mclean VA,1:13:38,1:08:29,11:02
704,705,10/15,1726,Arminda Lima,1.0,Silver Spring M,D 1:03:47,1:00:07,9:41
808,809,11/15,432,Ashley Peppel,10.0,Germantown MD,1:06:44,1:03:01,10:09


In [9]:
dataMales.sort_values(by=['Ag'])[0:6]

Unnamed: 0,Place,Div/Tot,Num,Name,Ag,Hometown,Gun Tim,Net Tim,Pace
1209,1210,26/28,556,Chris Barr,-1.0,Germantown MD,1:15:39,1:10:30,11:21
791,792,15/28,1555,Jon Kesler,-1.0,Silver Spring MD,59:20,54:06,8:43
525,526,,2003,Chris Nash,0.0,Brookeville MD,51:04,49:35,7:59
1225,1226,28/28,515,Gregory Aubertin,9.0,Rockville MD,1:16:21,1:12:26,11:40
1118,1119,24/28,1731,Jason Linzau,10.0,Silver Spring MD,1:06:44,1:03:01,10:09
668,669,11/28,514,Michael Aubertin,11.0,Rockville MD,54:59,51:51,8:21


In [24]:
raceStats(toMinutes(divF10['Net Tim']),toMinutes(divM10['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,12,58:53,58:08,49:59,20:36
1,Male,26,55:02,53:58,42:09,30:16


In [11]:
raceStats(toMinutes(divF15['Net Tim']),toMinutes(divM15['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,26,52:07,51:32,37:59,38:00
1,Male,45,46:39,45:58,46:15,43:29


In [12]:
raceStats(toMinutes(divF20['Net Tim']),toMinutes(divM20['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,228,57:14,56:30,46:22,53:31
1,Male,144,49:31,48:57,35:28,1:09:02


In [13]:
raceStats(toMinutes(divF30['Net Tim']),toMinutes(divM30['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,420,58:42,57:22,48:06,1:12:24
1,Male,346,51:39,51:17,45:43,52:52


In [14]:
raceStats(toMinutes(divF40['Net Tim']),toMinutes(divM40['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,283,57:52,57:37,46:00,1:10:17
1,Male,415,51:36,50:42,40:54,1:05:39


In [15]:
raceStats(toMinutes(divF50['Net Tim']),toMinutes(divM50['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,104,1:02:36,1:01:37,52:06,52:15
1,Male,205,53:55,53:05,55:16,1:06:56


In [16]:
raceStats(toMinutes(divF60['Net Tim']),toMinutes(divM60['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,24,1:00:00,58:22,46:07,52:55
1,Male,69,58:28,56:53,37:38,59:40


In [17]:
raceStats(toMinutes(divF70['Net Tim']),toMinutes(divM70['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,3,1:04:06,1:04:13,52:34,22:58
1,Male,9,59:15,56:15,45:15,38:15


In [18]:
raceStats(toMinutes(divF70['Net Tim']),toMinutes(divM80['Net Tim']))

Unnamed: 0,Gender,N,Mean,Median,Mode,Range
0,Female,3,1:04:06,1:04:13,52:34,22:58
1,Male,2,1:13:39,1:13:39,1:12:56,01:27
