In [20]:
# !pip install voila

<center><img src="formula-1-logo-5-3.png" width="400"/></center>

In [21]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")

data_path = "./data"

In [22]:
# List all available data files
# for file in os.listdir(data_path):
#     print(file)

In [23]:
# Dataset Exploration
# List all the columns in each file

files_and_columns = {}
for file in os.listdir(data_path):
    df = pd.read_csv(os.path.join(data_path, file))
    files_and_columns[file] = list(df.columns)

In [24]:
df_status = pd.read_csv(os.path.join(data_path, "status.csv"))
df_laptimes = pd.read_csv(os.path.join(data_path, "lap_times.csv"))
df_drivers = pd.read_csv(os.path.join(data_path, "drivers.csv"))
df_races = pd.read_csv(os.path.join(data_path, "races.csv"))
df_standings = pd.read_csv(os.path.join(data_path, "driver_standings.csv"))

In [25]:
# make a column with driver full name (merge first and last name)
df_drivers["full_name"] = ""

for i in df_drivers.index:
    df_drivers.loc[i, "full_name"] =  df_drivers.loc[i, "forename"] + " " + df_drivers.loc[i, "surname"]

In [26]:
# merge full name with lap times according to driver id
df_drivernames = df_drivers[["driverId", "full_name"]]
df_laptimes = df_laptimes.merge(df_drivernames, on="driverId")

In [27]:
# merge race names with lap times according to race id
df_laptimes = df_laptimes.merge(df_races[["raceId", "name", "year"]], on="raceId")

In [28]:
# make ipywidgets - select year, race and driver
#using ipywidgets
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

## Race Statistics

- The first graph gives us a trend of lap times as the laps progress in a race
- The second graph gives us a trend of position in the race as the laps progress

In [29]:
# select race
def select_race(df_year, race, driver):
    df_race_year = df_year[df_year['name'] == race]
    df_race_year_driver = df_race_year[df_race_year['full_name'] == driver]
    
    df_race_year_driver["timestamp"] = df_race_year_driver["time"].apply(lambda x: datetime.strptime(x, '%M:%S.%f'))
    fig, axs = plt.subplots(2,1)
    axs[0].plot(df_race_year_driver["lap"], df_race_year_driver["timestamp"])
    axs[0].set_xlabel("LAP NUMBER")
    axs[0].set_ylabel("LAP TIME")
    axs[1].plot(df_race_year_driver["lap"], df_race_year_driver["position"])
    axs[1].set_xlabel("LAP NUMBER")
    axs[1].set_ylabel("POSITION")

In [30]:
@widgets.interact(Year = df_laptimes['year'].sort_values().unique())
def choose_year(Year):
    df_laptimes_year = df_laptimes[df_laptimes['year'] == Year]
    
    # select race
    df_race_year = interactive(select_race, df_year = fixed(df_laptimes_year), race=df_laptimes_year['name'].unique(),
                              driver=df_laptimes_year['full_name'].unique())
    display(df_race_year)

interactive(children=(Dropdown(description='Year', options=(1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 20…

In [31]:
df_standings = df_standings.merge(df_drivernames, on="driverId")
df_standings = df_standings.merge(df_races[["raceId", "name", "year"]], on="raceId")

## Driver Statistics

In [32]:
global year
def select_racer(df_standings_year, racer):
    df_racer_year = df_standings_year[df_standings_year['full_name']==racer]
    
    df_racer_year.sort_values(by="raceId", inplace=True)
    
    global year 
    
    fig, ax = plt.subplots(dpi=100)
    ax.plot(df_racer_year['raceId'], df_racer_year['points'])
    ax.set_xticks(df_racer_year['raceId'])
    ax.set_xticklabels(df_racer_year['name'], rotation=90);
    ax.set_xlabel("Race Name")
    ax.set_ylabel("Season Points");
    ax.set_title("{} Season Progress- {}".format(str(year), racer));

In [33]:
# making ipywidgets
@widgets.interact(Year = df_standings['year'].sort_values().unique())
def choose_year(Year):
    df_standings_year = df_standings[df_standings['year'] == Year]
    
    global year
    year = Year
    # select racer
    df_racer_year = interactive(select_racer, df_standings_year = fixed(df_standings_year), racer=df_standings_year['full_name'].unique())
    display(df_racer_year)

interactive(children=(Dropdown(description='Year', options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 19…

## Comparing Driver Performance

In [34]:
def plot_versus(df_standings_year, driver_1, driver_2):
    
    df_standings_d1 = df_standings_year[df_standings_year['full_name'] == driver_1]
    df_standings_d2 = df_standings_year[df_standings_year['full_name'] == driver_2]
    
    df_standings_d1.sort_values('raceId', inplace=True)
    df_standings_d2.sort_values('raceId', inplace=True)
    
    fig, ax = plt.subplots(dpi=100)
    ax.plot(df_standings_d1['raceId'], df_standings_d1['points'], label=driver_1)
    ax.plot(df_standings_d2['raceId'], df_standings_d2['points'], label=driver_2)
    ax.set_xticks(df_standings_d1['raceId'])
    ax.set_xticklabels(df_standings_d1['name'], rotation=90)
    ax.legend()
    ax.set_xlabel("Race Name")
    ax.set_ylabel("Season Points");

In [35]:
@widgets.interact(Year = df_standings['year'].sort_values().unique())
def choose_year(Year):
    df_standings_year = df_standings[df_standings['year'] == Year]
            
    drivers = interactive(plot_versus, df_standings_year=fixed(df_standings_year),
                         driver_1=df_standings_year['full_name'].unique(),
                         driver_2 = df_standings_year['full_name'].unique())
    display(drivers)

interactive(children=(Dropdown(description='Year', options=(1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 19…