In [None]:
#Analyzing the Effect of Pit Stop Time Duration on Race Results 

#Sydney Walker, Alana Dawson, Ishana Ram, Deborah Hong 
#Group 1
#QTM 151 - Alejandro Sanchez Becerra
#April 25th, 2023


In [None]:
#Importing Libraries 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import date, time, datetime
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col


In [None]:
pitstops = pd.read_csv("data_raw/pit_stops.csv")
drivers = pd.read_csv("data_raw/drivers.csv")
results = pd.read_csv("data_raw/results.csv")
races = pd.read_csv("data_raw/races.csv")

display()

In [None]:
#British Grand Prix
races_British_grandprix = races.query('name == "British Grand Prix"')
races_British_grandprix.sort_values("year", ascending = True)
display(races_British_grandprix)

#Itlian Grand Prix
races_Australian_grandprix = races.query('name == "Australian Grand Prix"')
races_Australian_grandprix.sort_values("year", ascending = True)
display(races_Australian_grandprix)

In [None]:
#British Grand Prix
sorted_pitstops_British = pitstops.sort_values(by='duration', ascending=False)
pitstops_merge_British = pd.merge(races_British_grandprix,
                          sorted_pitstops_British,
                          on = "raceId",
                          how = "left")
British_grandprix_2011 = (pitstops_merge_British[pitstops_merge_British['year'] == 2011])
display(British_grandprix_2011)

#Australian Grand Prix
sorted_pitstops_Australian = pitstops.sort_values(by='duration', ascending=False)
pitstops_merge_Australian = pd.merge(races_Australian_grandprix,
                          sorted_pitstops_Australian,
                          on = "raceId",
                          how = "left")
Australian_grandprix_2011 = (pitstops_merge_Australian[pitstops_merge_Australian['year'] == 2011])
display(Australian_grandprix_2011)

In [None]:
#British Grand Prix
results_British_grandprix = results.query('raceId == 849')
pitstops_final_British = pd.merge(results_British_grandprix,British_grandprix_2011, on='driverId')
pitstops_final2_British = pitstops_final_British[["driverId","points","stop","duration","milliseconds_y"]]
display(pitstops_final_British)

#Australian Grand Prix
results_Australian_grandprix = results.query('raceId == 841')
pitstops_final_Australian = pd.merge(results_Australian_grandprix,Australian_grandprix_2011, on='driverId')
pitstops_final2_Australian = pitstops_final_Australian[["driverId","points","stop","duration","milliseconds_y"]]
display(pitstops_final_Australian)

In [None]:
#British Grand Prix
pitstopduration_agg_British = (pitstops_final2_British.groupby(["driverId"])
                       .agg(total_pitstop_duration = ('milliseconds_y', 'sum'),
                        total_points =('points', 'mean'))
                        .sort_values(by = 'total_pitstop_duration', ascending = False))
display(pitstopduration_agg_British)

#Australian Grand Prix
pitstopduration_agg_Australian = (pitstops_final2_Australian.groupby(["driverId"])
                       .agg(total_pitstop_duration = ('milliseconds_y', 'sum'),
                        total_points =('points', 'mean'))
                        .sort_values(by = 'total_pitstop_duration', ascending = False))
display(pitstopduration_agg_Australian)

In [None]:
#British Grand Prix
plt.subplot(1, 2, 1)
plt.scatter(x='total_pitstop_duration', y='total_points', data=pitstopduration_agg_British, color = "aquamarine")
plt.xlabel("Total Pitstop Time (Milliseconds)")
plt.ylabel("Points Earned In Competition")
plt.legend(["British"], loc='upper left')
x = pitstopduration_agg_British['total_pitstop_duration']
y = pitstopduration_agg_British['total_points']
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b, color='red')


#Australian Grand Prix
plt.subplot(1, 2, 2)
plt.scatter(x='total_pitstop_duration', y='total_points', data=pitstopduration_agg_Australian, color = "mediumslateblue")
plt.xlabel("Total Pitstop Time (Milliseconds)")
plt.ylabel("Points Earned In Competition")
x = pitstopduration_agg_Australian['total_pitstop_duration']
y = pitstopduration_agg_Australian['total_points']
m, b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b, color='red')


plt.tight_layout()
plt.legend(["Australian"], loc='upper left')
plt.show()


In [None]:
#Regression Analysis 


results_univariateBritish = smf.ols(formula = 'total_points ~ total_pitstop_duration',data =pitstopduration_agg_British ).fit(cov_type= "HC1")
print(summary_col(results_univariateBritish,
                  stars = True))

results_univariateAustralian = smf.ols(formula = 'total_points ~ total_pitstop_duration',data =pitstopduration_agg_Australian ).fit(cov_type= "HC1")
print(summary_col(results_univariateAustralian,
                  stars = True))