In [None]:
%pylab inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

In [None]:
data = pd.read_csv(path+'data/qualifying.csv')
merged = pd.read_csv(path+'data/merged.csv')

In [None]:
s20 = data.query('season == 2020')

d = s20['qual_time'].describe()
print(f"Min: { d['min'] } Max: {d['max']}")

In [None]:
s20.query('qual_time >= 100')

In [None]:
s20 = data.query('season == 2020')
q_times_by_round = s20.groupby('round').agg('qual_time')

x = s20['round'].values
y = s20['qual_time'].values

fig, axs = plt.subplots(1, dpi=80)
fig.set_size_inches(10, 5)

axs.set_title('')
axs.set_xlabel('Round')
axs.set_ylabel('Time in seconds')
axs.set_xticks(np.unique(x))
axs.scatter(x, y)

In [None]:
s21 = data.query('season == 2021')

fig, axs = plt.subplots(1, dpi=80)
fig.set_size_inches(10, 5)

axs.set_title('Standard Deviation of Qualifying Times')
axs.set_xlabel('Round')
axs.set_ylabel('Time in seconds')
axs.set_xticks(np.unique(s21['round']))

for i in range(1, len(np.unique(s21['round'])) + 1):
    r = s21.query('round == @i & qual_time.notnull()')['qual_time'].values
    mean = np.mean(r)
    err = 1.96 * np.std(r)/np.sqrt(len(r))

    axs.plot(i, mean)
    axs.errorbar(i, mean, err)

In [None]:
s21 = data.query('season == 2021')

fig, axs = plt.subplots(1, dpi=80)
fig.set_size_inches(10, 5)

axs.set_title('Standard Deviation of Qualifying Delta Times')
axs.set_xlabel('Round')
axs.set_ylabel('Time in seconds')
axs.set_xticks(np.unique(s21['round']))

for i in range(1, len(np.unique(s21['round']))+1):
    r = s21.query('round == @i & q_delta.notnull()')['q_delta'].values
    mean = np.mean(r)
    err = 1.96 * np.std(r)/np.sqrt(len(r))

    axs.plot(i, mean)
    axs.errorbar(i, mean, err)

In [None]:
### All rounds in 2020 apart from 14 to show the difference between the front runners
### and the bottom of the pack.
s21 = merged.query('season == 2020 & q_delta.notnull() & round != 14')

x = s21['round'].values
y = s21['q_delta'].values

fig, axs = plt.subplots(1, dpi=80)
fig.set_size_inches(10, 5)

axs.set_title('Difference in delta time in 2020')
axs.set_xlabel('Round')
axs.set_ylabel('Time in seconds')
axs.set_xticks(np.unique(s21['round']))
axs.scatter(x, y)

In [None]:
s21 = merged.query('season == 2021 & q_delta.notnull()')
#  & round not in [7, 15, 17, 18, 21]
x = s21['round'].values
y = s21['q_delta'].values

fig, axs = plt.subplots(1, dpi=80)
fig.set_size_inches(10, 5)

axs.set_title('Difference in delta time in 2021')
axs.set_xlabel('Round')
axs.set_ylabel('Time in seconds')
axs.set_xticks(np.unique(s21['round']))
axs.scatter(x, y)

In [None]:
### Pole conversion rate

pole_sitters = merged.query('grid == 1')

pole_winners = pole_sitters.query('podium == 1')
pole_conversion = round((len(pole_winners)/ len(pole_sitters)) * 100, 1)
print(f'Pole conversion: {pole_conversion}%')