In [118]:
import pandas as pd
import numpy as np
import matplotlib as plt
from datetime import datetime

races = pd.read_csv('races.csv')
qualifying = pd.read_csv('qualifying.csv')
constructors = pd.read_csv('constructor_standings.csv')
drivers = pd.read_csv('driver_standings.csv')
results = pd.read_csv('results.csv')
weather = pd.read_csv('weather.csv')


In [119]:
qualifying.rename(columns={'grid_pos': 'grid'}, inplace=True)

In [120]:
races

Unnamed: 0,season,round,circuit_id,country,lat,long,date,url
0,2020,1,red_bull_ring,Austria,47.2197,14.7647,2020-07-05,http://en.wikipedia.org/wiki/2020_Austrian_Gra...
1,2020,2,red_bull_ring,Austria,47.2197,14.7647,2020-07-12,http://en.wikipedia.org/wiki/2020_Styrian_Gran...
2,2020,3,hungaroring,Hungary,47.5789,19.2486,2020-07-19,http://en.wikipedia.org/wiki/2020_Hungarian_Gr...
3,2020,4,silverstone,UK,52.0786,-1.01694,2020-08-02,http://en.wikipedia.org/wiki/2020_British_Gran...
4,2020,5,silverstone,UK,52.0786,-1.01694,2020-08-09,http://en.wikipedia.org/wiki/70th_Anniversary_...
5,2020,6,catalunya,Spain,41.57,2.26111,2020-08-16,http://en.wikipedia.org/wiki/2020_Spanish_Gran...
6,2020,7,spa,Belgium,50.4372,5.97139,2020-08-30,http://en.wikipedia.org/wiki/2020_Belgian_Gran...
7,2020,8,monza,Italy,45.6156,9.28111,2020-09-06,http://en.wikipedia.org/wiki/2020_Italian_Gran...
8,2020,9,mugello,Italy,43.9975,11.3719,2020-09-13,http://en.wikipedia.org/wiki/2020_Tuscan_Grand...
9,2020,10,sochi,Russia,43.4057,39.9578,2020-09-27,http://en.wikipedia.org/wiki/2020_Russian_Gran...


In [121]:
df1 = pd.merge(races, weather, how='inner', on=['season', 'round', 'circuit_id']).drop(['lat', 'long','country','weather'], axis = 1)
df2 = pd.merge(df1, results, how='inner', on=['season', 'round', 'circuit_id']).drop(['url','points', 'status', 'time'], axis = 1)
df3 = pd.merge(df2, drivers, how='left', on=['season', 'round', 'driver']) 
df4 = pd.merge(df3, constructors, how='left', on=['season', 'round', 'constructor'])

final_df = pd.merge(df4, qualifying, how='inner', on=['season', 'round', 'grid']).drop(['driver_x'], axis=1)

In [122]:
from dateutil.relativedelta import *

final_df['date'] = pd.to_datetime(final_df.date)
final_df['date_of_birth'] = pd.to_datetime(final_df.date_of_birth)
final_df['driver_age'] = final_df.apply(lambda x: relativedelta(x['date'], x['date_of_birth']).years, axis=1)
final_df.drop(['date', 'date_of_birth'], axis = 1, inplace = True)

In [123]:
# fill/drop nulls

for col in ['driver_points', 'driver_wins', 'driver_standings_pos', 'constructor_points', 
            'constructor_wins' , 'constructor_standings_pos']:
    final_df[col].fillna(0, inplace = True)
    final_df[col] = final_df[col].map(lambda x: int(x))
    
final_df.dropna(inplace = True )

In [124]:
x = final_df['qualy_time'][0]
x = '1:02.921'
float(str(x).split(':')[1])
print(x[:8])

1:02.921


In [125]:
def format_qualifying(x):
    if ':' in x:
        return float(str(x).split(':')[1]) + (60 * float(str(x).split(':')[0])) if x != 0 else 0
    else:
        return(float(x))

In [126]:
final_df['qualy_time'] = final_df.qualy_time.map(lambda x: format_qualifying(x))

final_df['qualy_time'].describe()

count    687.000000
mean      83.817259
std       14.045557
min       53.377000
25%       75.980500
50%       81.640000
75%       90.854000
max      132.909000
Name: qualy_time, dtype: float64

In [127]:
# calculate difference in qualifying times

final_df = final_df[final_df['qualy_time'] != 0]
final_df.sort_values(['season', 'round', 'grid'], inplace = True)
final_df['time_difference'] = final_df.groupby(['season', 'round']).qualy_time.diff()
final_df['q_delta'] = final_df.groupby(['season', 'round']).time_difference.cumsum().fillna(0)
final_df.drop('time_difference', axis = 1, inplace = True)

In [128]:
final_df.head()

Unnamed: 0,season,round,circuit_id,warm,cold,dry,wet,cloudy,nationality,constructor_x,...,driver_wins,driver_standings_pos,constructor_points,constructor_wins,constructor_standings_pos,driver_y,constructor_y,qualy_time,driver_age,q_delta
0,2020,1,red_bull_ring,True,False,False,False,False,Finnish,mercedes,...,1,1,37,1,1,Valtteri Bottas BOT,Mercedes,62.939,30,0.0
19,2020,1,red_bull_ring,True,False,False,False,False,Dutch,red_bull,...,0,20,0,0,9,Max Verstappen VER,Red Bull Racing Honda,63.477,22,0.538
2,2020,1,red_bull_ring,True,False,False,False,False,British,mclaren,...,0,3,26,0,2,Lando Norris NOR,McLaren Renault,63.626,20,0.687
12,2020,1,red_bull_ring,True,False,False,False,False,Thai,red_bull,...,0,13,0,0,9,Alexander Albon ALB,Red Bull Racing Honda,63.868,24,0.929
3,2020,1,red_bull_ring,True,False,False,False,False,British,mercedes,...,0,4,37,1,1,Lewis Hamilton HAM,Mercedes,62.951,35,0.012
