In [107]:
# !pip install voila

<center><img src="formula-1-logo-5-3.png" width="400"/></center>

In [108]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")

data_path = "/data"

In [109]:
# List all available data files
# for file in os.listdir(data_path):
#     print(file)

In [110]:
# Dataset Exploration
# List all the columns in each file

files_and_columns = {}
for file in os.listdir(data_path):
    df = pd.read_csv(os.path.join(data_path, file))
    files_and_columns[file] = list(df.columns)

In [111]:
df_status = pd.read_csv(os.path.join(data_path, "status.csv"))
df_laptimes = pd.read_csv(os.path.join(data_path, "lap_times.csv"))
df_drivers = pd.read_csv(os.path.join(data_path, "drivers.csv"))
df_races = pd.read_csv(os.path.join(data_path, "races.csv"))
df_standings = pd.read_csv(os.path.join(data_path, "driver_standings.csv"))

In [112]:
# make a column with driver full name (merge first and last name)
df_drivers["full_name"] = ""

for i in df_drivers.index:
    df_drivers.loc[i, "full_name"] =  df_drivers.loc[i, "forename"] + " " + df_drivers.loc[i, "surname"]

In [113]:
# merge full name with lap times according to driver id
df_drivernames = df_drivers[["driverId", "full_name"]]
df_laptimes = df_laptimes.merge(df_drivernames, on="driverId")

In [114]:
# merge race names with lap times according to race id
df_laptimes = df_laptimes.merge(df_races[["raceId", "name", "year"]], on="raceId")

In [115]:
# make ipywidgets - select year, race and driver
#using ipywidgets
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

## Race Statistics

- The first graph gives us a trend of lap times as the laps progress in a race
- The second graph gives us a trend of position in the race as the laps progress

In [116]:
# select race
def select_race(df_year, race, driver):
    df_race_year = df_year[df_year['name'] == race]
    df_race_year_driver = df_race_year[df_race_year['full_name'] == driver]
    
    df_race_year_driver["timestamp"] = df_race_year_driver["time"].apply(lambda x: datetime.strptime(x, '%M:%S.%f'))
    fig, axs = plt.subplots(2,1)
    axs[0].plot(df_race_year_driver["lap"], df_race_year_driver["timestamp"])
    axs[0].set_xlabel("LAP NUMBER")
    axs[0].set_ylabel("LAP TIME")
    axs[1].plot(df_race_year_driver["lap"], df_race_year_driver["position"])
    axs[1].set_xlabel("LAP NUMBER")
    axs[1].set_ylabel("POSITION")

In [117]:
@widgets.interact(Year = df_laptimes['year'].sort_values().unique())
def choose_year(Year):
    df_laptimes_year = df_laptimes[df_laptimes['year'] == Year]
    
    # select race
    df_race_year = interactive(select_race, df_year = fixed(df_laptimes_year), race=df_laptimes_year['name'].unique(),
                              driver=df_laptimes_year['full_name'].unique())
    display(df_race_year)

interactive(children=(Dropdown(description='Year', options=(1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 20…

In [118]:
df_standings = df_standings.merge(df_drivernames, on="driverId")
df_standings = df_standings.merge(df_races[["raceId", "name", "year"]], on="raceId")

## Driver Statistics

In [119]:
global year
def select_racer(df_standings_year, racer):
    df_racer_year = df_standings_year[df_standings_year['full_name']==racer]
    
    df_racer_year.sort_values(by="raceId", inplace=True)
    
    global year 
    
    fig, ax = plt.subplots(dpi=100)
    ax.plot(df_racer_year['raceId'], df_racer_year['points'])
    ax.set_xticks(df_racer_year['raceId'])
    ax.set_xticklabels(df_racer_year['name'], rotation=90);
    ax.set_xlabel("Race Name")
    ax.set_ylabel("Season Points");
    ax.set_title("{} Season Progress- {}".format(str(year), racer));

In [120]:
# making ipywidgets
@widgets.interact(Year = df_standings['year'].sort_values().unique())
def choose_year(Year):
    df_standings_year = df_standings[df_standings['year'] == Year]
    
    global year
    year = Year
    # select racer
    df_racer_year = interactive(select_racer, df_standings_year = fixed(df_standings_year), racer=df_standings_year['full_name'].unique())
    display(df_racer_year)

interactive(children=(Dropdown(description='Year', options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 19…

## Comparing Driver Performance

In [121]:
def plot_versus(df_standings_year, driver_1, driver_2):
    
    df_standings_d1 = df_standings_year[df_standings_year['full_name'] == driver_1]
    df_standings_d2 = df_standings_year[df_standings_year['full_name'] == driver_2]
    
    df_standings_d1.sort_values('raceId', inplace=True)
    df_standings_d2.sort_values('raceId', inplace=True)
    
    fig, ax = plt.subplots(dpi=100)
    ax.plot(df_standings_d1['raceId'], df_standings_d1['points'], label=driver_1)
    ax.plot(df_standings_d2['raceId'], df_standings_d2['points'], label=driver_2)
    ax.set_xticks(df_standings_d1['raceId'])
    ax.set_xticklabels(df_standings_d1['name'], rotation=90)
    ax.legend()
    ax.set_xlabel("Race Name")
    ax.set_ylabel("Season Points");

In [122]:
@widgets.interact(Year = df_standings['year'].sort_values().unique())
def choose_year(Year):
    df_standings_year = df_standings[df_standings['year'] == Year]
            
    drivers = interactive(plot_versus, df_standings_year=fixed(df_standings_year),
                         driver_1=df_standings_year['full_name'].unique(),
                         driver_2 = df_standings_year['full_name'].unique())
    display(drivers)

interactive(children=(Dropdown(description='Year', options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 19…

In [123]:
df_constructors = pd.read_csv(os.path.join(data_path, "constructors.csv"))
df_results = pd.read_csv(os.path.join(data_path, "results.csv"))

In [124]:
df_results = df_results.merge(df_constructors[['constructorId', 'name']], on='constructorId')
df_results = df_results.merge(df_drivers[["driverId", "full_name"]], on="driverId")
df_results = df_results.merge(df_races[["raceId", "name", "year"]], on="raceId")
df_results.rename(columns={"name_x": "constructor_name", "name_y": "race_name"}, inplace=True)

In [125]:
for i in df_results.index:
    if df_results.loc[i]["race_name"] == "Qatar Grand Prix" and df_results.loc[i]["year"] == 2021:
        df_results.loc[i, "raceId"] = 1072

In [126]:
def plot_drivers(df_results_year, team):
    
    df_results_team = df_results_year[df_results_year['constructor_name'] == team]

    
    df_results_team = df_results_team.merge(df_standings[['raceId', 'driverId', 'points', 'year']], on=['raceId', 'driverId', 'year'])
    df_results_team.rename(columns={'points_x': 'race_points', 'points_y': 'total_points'}, inplace=True)
    
    
    df_results_team.sort_values(by='raceId')
    
    fig, ax = plt.subplots(dpi=100)
    for driver in df_results_team['full_name'].unique():
        df_driver_team = df_results_team[df_results_team['full_name'] == driver]
        df_driver_team.sort_values(by="raceId")

        ax.plot(df_driver_team['raceId'], df_driver_team['total_points'], label=driver)

    ax.set_xticks(df_results_team['raceId'])
    ax.set_xticklabels(df_results_team['race_name'], rotation=90);
    ax.set_xlabel("Race Name")
    ax.set_ylabel("Season Points")
    ax.legend()
    # ax.set_title("2020 Season Progress- McLaren Drivers");

## Comparings driver performance in the same team

In [127]:
@widgets.interact(Year = df_results['year'].sort_values(ascending=False).unique())
def choose_year(Year):
    df_results_year = df_results[df_results['year'] == Year]
            
    drivers = interactive(plot_drivers, df_results_year=fixed(df_results_year),
                            team = df_results_year['constructor_name'].unique())
    display(drivers)

interactive(children=(Dropdown(description='Year', options=(2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 20…

In [128]:
# analyzing the constructors results
df_constructors_result = pd.read_csv(os.path.join(data_path, "constructor_results.csv"))
df_constructors_standing = pd.read_csv(os.path.join(data_path, "constructor_standings.csv"))

In [129]:
# use constructor standing
# merge race name, constructor(team) name
df_constructors_result = df_constructors_result.merge(df_constructors[['constructorId', 'name']], on='constructorId')
df_constructors_result = df_constructors_result.merge(df_races[["raceId", "name", "year"]], on="raceId")

In [130]:
df_constructors_result.rename(columns={"name_x": "constructor_name", "name_y": "race_name"}, inplace=True)

In [131]:
def plot_teams(df_constructors_results_years, team):
    df_team = df_constructors_results_years[df_constructors_results_years['constructor_name'] == team]
    
    df_team = df_team.groupby("year")["points"].sum()
    
    year = []
    points = []
    for k, v in df_team.items():
        year.append(k)
        points.append(v)
        
    plt.bar(year, points)
    # plt.title("Ferrari's Performance 2016-2021")
    plt.xlabel("Year")
    plt.ylabel("Constructor Points")
    plt.xticks(year)

## Team Performance

In [132]:
@widgets.interact(year_1 = df_constructors_result['year'].sort_values().unique(),
                 year_2 = df_constructors_result['year'].sort_values().unique())
def choose_year(year_1, year_2):
    
    df_constructors_results_years = df_constructors_result[df_constructors_result['year'].isin(list(range(year_1, year_2+1)))]
    
            
    team = interactive(plot_teams, df_constructors_results_years=fixed(df_constructors_results_years),
                            team = df_constructors_results_years['constructor_name'].unique())
    display(team)

interactive(children=(Dropdown(description='year_1', options=(1956, 1958, 1959, 1960, 1961, 1962, 1963, 1964, …

## Qualifying Lap Time Trends

In [133]:
# exploring qualifying.csv and pit_stops.csv
df_qualifying = pd.read_csv(os.path.join(data_path, "qualifying.csv"))
df_pit_stops = pd.read_csv(os.path.join(data_path, "pit_stops.csv")) 

In [134]:
df_qualifying = df_qualifying.merge(df_races[["raceId", "name", "year"]], on="raceId")
df_qualifying = df_qualifying.merge(df_drivers[["driverId", "full_name"]], on="driverId")

In [135]:
#df_qualifying = df_qualifying.replace(r'\\N', "0" , regex=True)

In [136]:
df_qualifying['q1'] = df_qualifying['q1'].replace(r'\\N', "0:0.0" , regex=True)
df_qualifying['q2'] = df_qualifying['q2'].replace(r'\\N', "0:0.0" , regex=True)
df_qualifying['q3'] = df_qualifying['q3'].replace(r'\\N', "0:0.0" , regex=True)

In [137]:
def plot_quals(df_qualifying_years, driver, race):
    
    df_qualifying_race = df_qualifying_years[df_qualifying_years['name'] == race]
    df_qualifying_driver = df_qualifying_race[df_qualifying_race['full_name'] == driver]

    df_qualifying_driver['q1'] = df_qualifying_driver['q1'].apply(lambda x: datetime.strptime(x, '%M:%S.%f'))
    df_qualifying_driver['q2'] = df_qualifying_driver['q2'].apply(lambda x: datetime.strptime(x, '%M:%S.%f'))
    df_qualifying_driver['q3'] = df_qualifying_driver['q3'].apply(lambda x: datetime.strptime(x, '%M:%S.%f'))
    
    # create bar chart
    fig, ax = plt.subplots(dpi=100)
    ax.plot(df_qualifying_driver['year'], df_qualifying_driver['q1'], label='q1')
    ax.plot(df_qualifying_driver['year'], df_qualifying_driver['q2'], label='q2')
    ax.plot(df_qualifying_driver['year'], df_qualifying_driver['q3'], label='q3')
    ax.set_xticks(df_qualifying_driver['year']);
    ax.set_xlabel("Year")
    ax.set_ylabel("Lap Times HH:MM:SS")
    # ax.set_title("Lewis Hamilton: Qualifying Lap Times at the Australian Grand Prix")
    ax.legend();

In [138]:
@widgets.interact(year_1 = df_qualifying['year'].sort_values().unique(),
                 year_2 = df_qualifying['year'].sort_values().unique())
def choose_year(year_1, year_2):
    
    df_qualifying_years = df_qualifying[df_qualifying['year'].isin(list(range(year_1, year_2+1)))]
    
            
    team = interactive(plot_quals, df_qualifying_years=fixed(df_qualifying_years),
                            driver = df_qualifying_years['full_name'].unique(),
                              race = df_qualifying_years['name'].unique())
    display(team)

interactive(children=(Dropdown(description='year_1', options=(1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, …