In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
from datetime import datetime as dt

In [3]:
import plotly.express as px

In [None]:
'''
data is from Kaggle: https://www.kaggle.com/cjgdev/formula-1-race-data-19502017
'''

In [4]:
# importing csv files as Pandas DataFrames and appending them to dfs dictionary
dfs = {} 
for f in os.listdir():
    if f.endswith('.csv'):
        filename = os.path.splitext(f)[0]
        dfs[filename] = pd.read_csv(f)

In [None]:
'''
needed columns from each DataFrame:

circuits - circuitId, name
status - statusId, status
drivers - driverId, forename, surname
races - raceId, year, circuitId, name
constructors - constructorId, name
lapTimes - raceId, driverId, lap, position, time, milliseconds
results - 'resultId', 'raceId', 'driverId', 'constructorId', 'number', 'grid',
       'position', 'positionText', 'positionOrder', 'points', 'laps', 'time',
       'milliseconds', 'fastestLap', 'rank', 'fastestLapTime',
       'fastestLapSpeed', 'statusId'
       
'''

In [5]:
# merging circuits and races as race_info dataframe
race_info = pd.merge(dfs['circuits'][['circuitId','name']],dfs['races'][['raceId','year','circuitId','name']], on='circuitId')
race_info = race_info.rename(columns={'name_x':'circuit_name','name_y':'race_name'})

In [6]:
# merging results and drivers as results_driver
results_driver = pd.merge(dfs['results'],dfs['drivers'][['driverId','forename','surname']], on='driverId')

In [7]:
# merging results_driver and constructors as results_driver_const
results_driver_const = pd.merge(results_driver,dfs['constructors'][['constructorId','name']], on='constructorId')

In [8]:
# merging results_driver_const and status as results_status
results_status = pd.merge(results_driver_const,dfs['status'][['statusId','status']], on='statusId')

In [9]:
# merging results_status and lapTimes as results_laptimes
results_laptimes = pd.merge(results_status,dfs['lapTimes'][['raceId','driverId','lap','position','time','milliseconds']],on=['raceId','driverId'])
results_laptimes = results_laptimes.rename(columns={'position_x':'position','time_x':'time','milliseconds_x':'milliseconds',
                                                   'position_y':'lap_position','time_y':'lap_time','milliseconds_y':'lap_milliseconds'})

In [10]:
# merging results_laptimes and race_info as df_final
df_final = pd.merge(results_laptimes, race_info, on='raceId')

**How has the fastest lap time changed over time?**

In [11]:
# finiding fastest lap for each race
df_fast_lap = df_final[['year','raceId','race_name','circuit_name','forename','surname','name','fastestLap','fastestLapTime']]

In [90]:
df_fast_lap

Unnamed: 0,year,raceId,race_name,circuit_name,forename,surname,name,fastestLap,fastestLapTime
0,2008,18,Australian Grand Prix,Albert Park Grand Prix Circuit,Lewis,Hamilton,McLaren,39.0,01:27.5
1,2008,18,Australian Grand Prix,Albert Park Grand Prix Circuit,Lewis,Hamilton,McLaren,39.0,01:27.5
2,2008,18,Australian Grand Prix,Albert Park Grand Prix Circuit,Lewis,Hamilton,McLaren,39.0,01:27.5
3,2008,18,Australian Grand Prix,Albert Park Grand Prix Circuit,Lewis,Hamilton,McLaren,39.0,01:27.5
4,2008,18,Australian Grand Prix,Albert Park Grand Prix Circuit,Lewis,Hamilton,McLaren,39.0,01:27.5
...,...,...,...,...,...,...,...,...,...
426299,1998,197,Canadian Grand Prix,Circuit Gilles Villeneuve,David,Coulthard,McLaren,,
426300,1998,197,Canadian Grand Prix,Circuit Gilles Villeneuve,David,Coulthard,McLaren,,
426301,1998,197,Canadian Grand Prix,Circuit Gilles Villeneuve,David,Coulthard,McLaren,,
426302,1998,197,Canadian Grand Prix,Circuit Gilles Villeneuve,David,Coulthard,McLaren,,


In [12]:
def get_fastest_laps(df):
    
    res_df = pd.DataFrame(columns=['year','raceId','race_name','circuit_name','forename','surname','name',
                                      'fastestLap','fastestLapTime'])
    race_ids = df_fast_lap['raceId'].unique()
    for race_id in race_ids:
        temp_df = df[df['raceId'] == race_id].drop_duplicates().dropna()
        min_val = temp_df[temp_df['fastestLapTime'] == temp_df['fastestLapTime'].min()].to_dict(orient='records')
        res_df = res_df.append(min_val, ignore_index=True)
        
    return res_df
        
        

In [13]:
df_fast_lap_2 = get_fastest_laps(df_fast_lap)

In [14]:
def get_lap_seconds(x):
    
    time_obj = dt.strptime(x,'%M:%S.%f')
    time_delta = time_obj - dt(1900, 1, 1)
    seconds = time_delta.total_seconds()
    return seconds

In [15]:
df_fast_lap_2['fastestLapSeconds'] = df_fast_lap_2['fastestLapTime'].apply(get_lap_seconds)

In [16]:
df_fast_lap_2

Unnamed: 0,year,raceId,race_name,circuit_name,forename,surname,name,fastestLap,fastestLapTime,fastestLapSeconds
0,2008,18,Australian Grand Prix,Albert Park Grand Prix Circuit,Heikki,Kovalainen,McLaren,43.0,01:27.4,87.4
1,2008,19,Malaysian Grand Prix,Sepang International Circuit,Nick,Heidfeld,BMW Sauber,55.0,01:35.4,95.4
2,2008,19,Malaysian Grand Prix,Sepang International Circuit,Kimi,R�_ikk̦nen,Ferrari,37.0,01:35.4,95.4
3,2008,21,Spanish Grand Prix,Circuit de Barcelona-Catalunya,Kimi,R�_ikk̦nen,Ferrari,46.0,01:21.7,81.7
4,2008,22,Turkish Grand Prix,Istanbul Park,Lewis,Hamilton,McLaren,31.0,01:26.5,86.5
...,...,...,...,...,...,...,...,...,...,...
297,2004,98,United States Grand Prix,Indianapolis Motor Speedway,Rubens,Barrichello,Ferrari,7.0,01:10.4,70.4
298,2004,98,United States Grand Prix,Indianapolis Motor Speedway,Michael,Schumacher,Ferrari,8.0,01:10.4,70.4
299,2005,79,United States Grand Prix,Indianapolis Motor Speedway,Michael,Schumacher,Ferrari,48.0,01:11.5,71.5
300,2004,97,Canadian Grand Prix,Circuit Gilles Villeneuve,Rubens,Barrichello,Ferrari,68.0,01:13.6,73.6


In [18]:
fig = px.line(df_fast_lap_2,x='year',y='fastestLapSeconds',color='circuit_name',hover_data=['race_name','surname','name'])
fig.show()