In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('darkgrid')

In [None]:
# import data
df_2020 = pd.read_csv('../input/f1-2021-2020-bahrain-qualifying-data/2020_round_15_qualifying.csv')
df_2021 = pd.read_csv('../input/f1-2021-2020-bahrain-qualifying-data/2021_round_1_qualifying.csv')
df_2021.head()

In [None]:
# identify null values (drivers who did not attend the qualifying session)
null_2020 = df_2020.isnull().sum()[df_2020.isnull().sum() > 0]
null_2021 = df_2021.isnull().sum()[df_2021.isnull().sum() > 0]
print(null_2020)
print(null_2021)

In [None]:
df_2021.info()

In [None]:
# drop unnamed column
df_2020.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
df_2021.drop(columns=['Unnamed: 0'], axis=1, inplace=True)

# Q3 Driver analysis

In [None]:
# delete all rows with drivers who did not attend Q3
q3_2020 = df_2020.dropna(subset=['secs_q3'], axis=0)
q3_2021 = df_2021.dropna(subset=['secs_q3'], axis=0)

In [None]:
plt.figure(figsize=(11, 4))

plt.subplot(1,2,1)
sns.barplot(x='driver', y='secs_q3', data=q3_2020, color='darkblue')
plt.ylim(87, 91)
plt.ylabel('Q3 Times in Sec.')
plt.axhline(q3_2020['secs_q3'].mean(), ls='--', color='red')
plt.title("Q3 Times 2020")

plt.subplot(1,2,2)
sns.barplot(x='driver', y='secs_q3', data=q3_2021, color='darkblue')
plt.ylim(87, 91)
plt.ylabel('Q3 Times in Sec.')
plt.axhline(q3_2021['secs_q3'].mean(), ls='--', color='red')
plt.title("Q3 Times 2021");

plt.savefig("q3_times.png")

In [None]:
plt.figure(figsize=(11, 4))

plt.subplot(1,2,1)
sns.boxplot(y=q3_2020['secs_q3'], color='lightblue')
plt.ylabel('Q3 Times in Sec.')
plt.ylim(87, 91)
plt.axhline(q3_2020['secs_q3'].mean(), ls='--', color='red')
plt.title("Q3 Times 2020")

plt.subplot(1,2,2)
sns.boxplot(y=q3_2021['secs_q3'], color='lightblue')
plt.ylabel('Q3 Times in Sec.')
plt.ylim(87, 91)
plt.axhline(q3_2021['secs_q3'].mean(), ls='--', color='red')
plt.title("Q3 Times 2021");

In [None]:
q3_2020.describe()

In [None]:
q3_2021.describe()

# Qualifying Constructor Analysis

In [None]:
q_constructor_2020 = df_2020[['constructor', 'secs_q1', 'secs_q2', 'secs_q3']].groupby(['constructor'], as_index=False).min()
q_constructor_2020['best_time'] = q_constructor_2020.min(axis=1)
q_constructor_2020['slow_time'] = q_constructor_2020.max(axis=1)
q_constructor_2020['delta_constructor'] = q_constructor_2020['slow_time'] - q_constructor_2020['best_time']
q_constructor_2020['delta_best'] = q_constructor_2020['best_time'] - q_constructor_2020['best_time'].min()
q_constructor_2020 = q_constructor_2020.sort_values(by='best_time', ascending=True)

q_constructor_2021 = df_2021[['constructor', 'secs_q1', 'secs_q2', 'secs_q3']].groupby(['constructor'], as_index=False).min()
q_constructor_2021['best_time'] = q_constructor_2021.min(axis=1)
q_constructor_2021['slow_time'] = q_constructor_2021.max(axis=1)
q_constructor_2021['delta_constructor'] = q_constructor_2021['slow_time'] - q_constructor_2021['best_time']
q_constructor_2021['delta_best'] = q_constructor_2021['best_time'] - q_constructor_2021['best_time'].min()
q_constructor_2021 = q_constructor_2021.sort_values(by='best_time', ascending=True)
q_constructor_2021

In [None]:
plt.figure(figsize=(11, 4))

plt.subplot(2,1,1)
sns.barplot(x='constructor', y='best_time', data=q_constructor_2020, color='darkblue')
plt.ylabel('Qualifying Times in Sec.')
plt.ylim(87, 93)
plt.axhline(q_constructor_2020['best_time'].mean(), ls='--', color='red')

plt.subplot(2,1,2)
sns.barplot(x='constructor', y='best_time', data=q_constructor_2021, color='darkblue')
plt.ylabel('Qualifying Times in Sec.')
plt.ylim(87, 93)
plt.axhline(q_constructor_2021['best_time'].mean(), ls='--', color='red');

In [None]:
# Who made the best improvements compared to last year's bahrain qualifying
delta_frame = pd.DataFrame({
    "constructor": ["Red Bull", "Mercedes", "Ferrari", "AlphaTauri", "McLaren", "Renault/Alpine", "Aston Martin/RP", "Alfa Romeo", "Williams", "Haas"],
    "best_2020": [87.678, 87.264, 89.137, 88.448, 88.542, 88.417, 88.322, 89.491, 89.294, 90.111],
    "best_2021": [88.997, 89.385, 89.678, 89.809, 89.927, 90.249, 90.601, 90.708, 91.316, 92.449]
})
delta_frame.head()

In [None]:
delta_frame["delta"] = delta_frame['best_2021'] - delta_frame['best_2020']
delta_frame = delta_frame.sort_values(by='delta', ascending=True)
delta_frame.head()

In [None]:
plt.figure(figsize=(11, 4))
sns.barplot(x='constructor', y='delta', data=delta_frame, color='darkblue')
plt.ylabel('Delta (in seconds)')
plt.axhline(delta_frame['delta'].mean(), ls='--', color='red')
plt.title("Delta of best Qualifying Laps 2020 vs 2021 in Bahrain compared per Constructor");