In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from os.path import basename
import os

%matplotlib inline

In [2]:
#Function to fix the time (string)
def fix_time(df):
    '''This function takes the dataframe as an argument and 
    converts the string Lap Time column to be a datetime
    and then converts the datetime column to be a float.
    The function then returns the dataframe with those two
    columns appended to the right'''
    
    # TODO: Check to make sure that the 'Lap Time' column
    #       exists.
    # TODO: Check to make sure that the 'Lap Time' column
    #       is the right format
    
    df = df.assign(dtLapTime = pd.to_datetime('0:0:' + df['Lap Time'] + '0',
                                             exact = False, errors = 'ignore',
                                             format = "%H:%M:%S.%f").apply(lambda x: x.time()))
    df = df.assign(fLapTime = df.dtLapTime.apply(lambda x: ((x.hour * 60 + x.minute) * 60 + x.second) * 1000000 + x.microsecond) / 1000000)
    return df


def get_car_data(df, car_num, max_time):
    # make sure car exists
    # make a default max time val
    return df[(df['Car'] == car_num) & (df['Flag'] == 'Green') & (df['fLapTime'] < max_time)]


def get_cars_data(df, car_nums, max_time, strDfName):
    # make sure car exists
    # make a default max time val
    #df[df['A'].isin([3, 6])]
    df = df[(df['Car'].isin(car_nums)) & (df['Flag'] == 'Green') & (df['fLapTime'] < max_time)].copy()
    df['SetName'] = strDfName
    return df

def get_file_root(s_file_name):
    """trim off the file extension from the end of the path and trim off the path from the beginning of the path"""
    
    # print(os.path.splitext("path_to_file")[0])
    # now you can call it directly with basename
    #print(basename("/a/b/c.txt"))

    s_file_root = basename(os.path.splitext(s_file_name)[0]).replace(" ", "") #s_file_name.rtrim(3)
    
    return s_file_root

In [31]:
#def main():
# Set Paths for Sebring 2018 data

s_path_root = 'C:\\Users\\gator\\Documents\\GitHub\\IMSA2017\\LapData\\2018_sebring\\'

s_path_gt3_p1 = s_path_root + 'GT3 Sebring Practice 1\\GT3 Sebring Practice 1.csv'
s_path_gt3_q = s_path_root + 'GT3 Sebring Quailfying\\GT3 Sebring Quailfying.csv'
s_path_gt3_r1 = s_path_root + 'GT3 Sebring Race 1 Replay\\GT3 Sebring Race 1.csv'
s_path_gt3_r2 = s_path_root + 'GT3 Sebring Race 2\\GT3 Sebring Race 2.csv'

s_path_pc_p1 = s_path_root + 'IPC Sebring Practice 1\\IPC Sebring Practice 1.csv'
s_path_pc_p2 = s_path_root + 'IPC Sebring Practice 2\\IPC Sebring Practice 2.csv'
s_path_pc_q = s_path_root + 'IPC Sebring Qualifying\\IPC Sebring Qualifying.csv'
s_path_pc_r = s_path_root + 'IPC Sebring Race\\IPC Sebring Race.csv'

s_path_ctsc_p1 = s_path_root + 'ICTSC Sebring Practice 1\\ICTSC Sebring Practice 1.csv'
s_path_ctsc_p2 = s_path_root + 'ICTSC Sebring Practice 2\\ICTSC Sebring Practice 2.csv'
s_path_ctsc_p3 = s_path_root + 'ICTSC Sebring Practice 3\\ICTSC Sebring Practice 3.csv'
s_path_ctsc_q = s_path_root + 'ICTSC Sebring Qualifying\\ICTSC Sebring Qualifying.csv'
s_path_ctsc_r = s_path_root + 'ICTSC Sebring Race\\ICTSC Sebring Race.csv'

s_path_wsc_p1 = s_path_root + 'IWSC Sebring Practice 1\\IWSC Sebring Practice 1.csv'
s_path_wsc_p2 = s_path_root + 'IWSC Sebring Practice 2\\IWSC Sebring Practice 2.csv'
s_path_wsc_p3 = s_path_root + 'IWSC Sebring Practice 3\\IWSC Sebring Practice 3.csv'
s_path_wsc_p4 = s_path_root + 'IWSC Sebring Practice 4\\IWSC Sebring Practice 4.csv'
s_path_wsc_q = s_path_root + 'IWSC Sebring Qualifying\\IWSC Sebring Qualifying.csv'
s_path_wsc_r = s_path_root + 'IWSC Sebring Race\\IWSC Sebring Race.csv'


# read in all the data from Sebring 2018

list_of_csv_files = [s_path_gt3_p1, s_path_gt3_q, s_path_gt3_r1, s_path_gt3_r2,
                    s_path_pc_p1, s_path_pc_p2, s_path_pc_q, s_path_pc_r, 
                    s_path_ctsc_p1, s_path_ctsc_p2, s_path_ctsc_p3, s_path_ctsc_q, s_path_ctsc_r,
                    s_path_wsc_p1, s_path_wsc_p2, s_path_wsc_p3, s_path_wsc_p4, s_path_wsc_q, s_path_wsc_r]
df_list = []
for s_file_name in list_of_csv_files:
    df_temp = pd.read_csv(s_file_name)
    df_temp['Source'] = get_file_root(s_file_name)
    df_list.append(df_temp)
    
    
df = fix_time(pd.concat(df_list))

# Set up class lists
classes = ['GS', 'GT3CP', 'GT3G', 'GT3P', 'GTD', 'GTLM', 'LMP3', 'MPC', 'P', 'ST', 'TCR']
iwsc_classes = ['GTD', 'GTLM', 'P']
ictsc_classes = ['GS', 'ST', 'TCR']
ipc_classes = ['LMP3', 'MPC']
gt3_classes =['GT3CP', 'GT3G', 'GT3P']

race_series = {'all':classes, 'iwsc':iwsc_classes, 'ictsc':ictsc_classes, 'ipc':ipc_classes, 'gt3':gt3_classes}
    
    
#main()

In [32]:
df.describe()

Unnamed: 0,Car,Lap,S01,S02,S03,S04,S05,S06,S07,S08,S09,S10,SP5,SPI,fLapTime
count,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0,23573.0
mean,77.972596,93.28732,15.312782,6.447251,26.535603,16.139998,28.776277,30.202785,0.604626,11.556587,1.754089,8.708084,147.699482,108.752478,150.226249
std,164.1076,100.609144,53.089786,1.761435,5.22257,2.733691,5.932442,7.845112,0.214961,2.544635,0.842739,56.610775,27.059731,24.919807,136.798256
min,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,107.076
25%,24.0,11.0,5.568,5.568,24.267,14.933,26.231,27.097,0.525,10.509,1.43,2.667,145.908,106.556,119.575
50%,51.0,38.0,5.796,5.888,25.19,15.383,27.147,28.195,0.548,10.95,1.487,2.765,154.695,116.158,124.41
75%,73.0,171.0,6.299,6.571,26.97,16.405,29.09,30.008,0.58,11.967,1.619,3.002,161.165,120.704,138.466
max,912.0,344.0,1348.109,32.526,147.112,64.562,98.031,128.834,2.683,150.793,10.157,2687.168,176.61,135.579,4683.863


In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23573 entries, 0 to 12919
Data columns (total 23 columns):
Car             23573 non-null int64
Class           23573 non-null object
Driver          23573 non-null object
Lap             23573 non-null int64
Lap Time        23573 non-null object
Session Time    23573 non-null object
Flag            23573 non-null object
Location        23561 non-null object
S01             23573 non-null float64
S02             23573 non-null float64
S03             23573 non-null float64
S04             23573 non-null float64
S05             23573 non-null float64
S06             23573 non-null float64
S07             23573 non-null float64
S08             23573 non-null float64
S09             23573 non-null float64
S10             23573 non-null float64
SP5             23573 non-null float64
SPI             23573 non-null float64
Source          23573 non-null object
dtLapTime       23573 non-null object
fLapTime        23573 non-null float64
dtypes

In [34]:
np.unique(df.Class)

array(['GS', 'GT3CP', 'GT3G', 'GT3P', 'GTD', 'GTLM', 'LMP3', 'MPC', 'P',
       'ST', 'TCR'], dtype=object)

In [35]:
race_series

{'all': ['GS',
  'GT3CP',
  'GT3G',
  'GT3P',
  'GTD',
  'GTLM',
  'LMP3',
  'MPC',
  'P',
  'ST',
  'TCR'],
 'gt3': ['GT3CP', 'GT3G', 'GT3P'],
 'ictsc': ['GS', 'ST', 'TCR'],
 'ipc': ['LMP3', 'MPC'],
 'iwsc': ['GTD', 'GTLM', 'P']}