In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import sqlite3
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
input_files=os.listdir('/kaggle/input/formula-1-world-championship-1950-2020')
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
conn = sqlite3.connect("f1_info.db")

In [None]:
engine = create_engine("sqlite:///f1_info.db")

In [None]:
def preprocess_cat_cols(df):
    cat_col=df.select_dtypes(include='object').columns
    for col in cat_col:
        for val in df[col].tolist():
            try:
                if '\\N' in val:
                    df[col].replace({'\\N':'nan'}, inplace=True)
                    break
            except:
                print('Column:',col,'Value:',val)
        df[col].str.strip()
        
        
def duplicate_index(df):
    dup=df.duplicated()
    indices=dup[dup==True].index
    return indices

In [None]:
ckt=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('circuits.csv')])
#Column alt has nothing but '\N' so we drop that column
preprocess_cat_cols(ckt)
print(ckt.head())
ckt.to_sql('circuit', con=engine, if_exists='replace')

In [None]:
constr=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('constructors.csv')])
preprocess_cat_cols(constr)
print(constr.head())
constr.to_sql('constructors', con=engine, if_exists='replace')

In [None]:
constr_rsl=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('constructor_results.csv')])
preprocess_cat_cols(constr_rsl)
print(constr_rsl.head())
constr_rsl.to_sql('constructor_results', con=engine, if_exists='replace')

In [None]:
constr_std=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('constructor_standings.csv')])
preprocess_cat_cols(constr_std)
print(constr_std)
constr_std.to_sql('constr_std', con=engine, if_exists='replace')


In [None]:
drivers=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('drivers.csv')])

drivers['name']=drivers['forename']+' '+drivers['surname']
drivers.drop(['forename','surname'],axis=1,inplace=True)

preprocess_cat_cols(drivers)
print(drivers.head())
drivers.to_sql('drivers', con=engine, if_exists='replace')

In [None]:
drivers_std=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('driver_standings.csv')])
preprocess_cat_cols(drivers_std)
print(drivers_std)
drivers_std.to_sql('drivers_std', con=engine, if_exists='replace')

In [None]:
lap_time=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('lap_times.csv')])
preprocess_cat_cols(lap_time)
print(lap_time.head())
lap_time.to_sql('lap_time', con=engine, if_exists='replace')

In [None]:
pit_stops=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('pit_stops.csv')])
preprocess_cat_cols(pit_stops)
print(pit_stops.head())
pit_stops.to_sql('pit_stops', con=engine, if_exists='replace')

In [None]:
quali=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('qualifying.csv')])
preprocess_cat_cols(quali)
print(quali.head())
quali.to_sql('quali', con=engine, if_exists='replace')

In [None]:
races=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('races.csv')])
preprocess_cat_cols(races)
print(races)
races.to_sql('races', con=engine, if_exists='replace')

In [None]:
results=pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/'+input_files[input_files.index('results.csv')])
results.position.replace({'\\N':1000},inplace=True)
results['position']=results.position.astype('int32').tolist()
grouped=results.groupby(by='raceId')['position']
values=grouped.transform(lambda x: len(x))
indices_to_replace=results[results.position==1000].index.tolist()
values_to_replace=values[indices_to_replace]
results['position'].iloc[indices_to_replace]=values_to_replace

preprocess_cat_cols(results)
print(results.head())
results.to_sql('results', con=engine, if_exists='replace')

In [None]:
query="""
        select name,r.constructorId,count(raceId) wins from constructors c
        join results r on c.constructorId=r.constructorId
        where r.position=1
        group by r.constructorId
        order by wins desc
"""

## Constructor's Analysis

To begin this analysis of the constructors, we will do a disclaimer. Due to the fact that there is a lot of constructor, we will only concentrate our presentation on the most famous and the most well known such as Ferrari, McLaren, Mercedes and such.

As we can see, Ferrari is the constructor with the most races wons in all the history of the modern f1

In [None]:
team_race_wins=pd.read_sql_query(query,conn).copy()
fig,ax=plt.subplots(figsize=(15,7))
ax=team_race_wins.wins.plot.bar(x='name',y='wins', color='tab:blue')
ax.set_xticks(range(team_race_wins.shape[0]))
ax.set_xticklabels(team_race_wins.name, fontsize=12)
plt.xlabel('Constructors that have won atleast one race', fontsize=12)
plt.ylabel('Number of race wins', fontsize=12);

In [None]:
query="""
    select ssq.year, cs.name, max(ssq.total_pts) win_pts from
    (select sq.constructorId, sq.year, sum(sq.pts) total_pts from
    (select rs.constructorId,r.year,sum(rs.points) pts from results rs
    join races r on rs.raceId=r.raceId
    group by rs.raceId, rs.constructorId) sq
    group by sq.constructorId,sq.year) ssq
    join constructors cs
    on ssq.constructorId=cs.constructorId
    group by ssq.year
"""

Consequently, Ferrari has also won most of the constructor's championship with almost 25 wins.

In [None]:
q=pd.read_sql_query(query,conn)
constr_champs_by_year=q.copy()

constr_champs_by_team=constr_champs_by_year.name.value_counts()

fig,ax=plt.subplots(figsize=(15,7))
ax=constr_champs_by_team.plot.bar(color='tab:purple')
ax.set_xticks(range(team_race_wins.shape[0]))
ax.set_xticklabels(team_race_wins.name, fontsize=12)
plt.ylabel('Number of constructors championships', fontsize=12)
plt.xlabel('Constructors', fontsize=15);


In [None]:
query="""
    select cs.name, driver_champs from
    (select sqq.constructorId, count(sqq.constructorId) driver_champs from
    (select sq.year, sq.driverId, sq.constructorId, max(pts) season_pts from
    (select rs.driverId, rs.constructorId, r.year, sum(rs.points) pts from results rs
    join races r on rs.raceId=r.raceId
    group by rs.driverId, r.year) sq
    group by sq.year) sqq
    group by sqq.constructorId) sqqq join
    constructors cs on sqqq.constructorId=cs.constructorId
    order by driver_champs desc
"""

But as we can see on the driver's championship wins, Ferrari don't have this much of a difference with the second most decorated constructor which is McLaren.

In [None]:
q=pd.read_sql_query(query,conn)
driver_champs_by_team=q.copy()

fig,ax=plt.subplots(figsize=(13,8))
driver_champs_by_team.plot.bar(x='name',y='driver_champs',ax=ax, color='darkorange')
ax.set_xlabel('Constructors',fontsize=14)
ax.set_ylabel('Number of driver championships won by constructor',fontsize=14);

In [None]:
query="""
    select sssq.name, sq2.num_seasons ,sssq.num_champ from
    (select ssq.name, count(ssq.name) num_champ from
    (select sq.year, sq.name, max(sq.season_points) win_points from
    (select r.year, cs.name, sum(rs.points) season_points from results rs join
    races r on rs.raceId=r.raceId join
    constructors cs on rs.constructorId=cs.constructorId
    group by cs.name, r.year) sq
    group by sq.year) ssq
    group by ssq.name) sssq join 
    
    (select sq1.name, count(sq1.year) num_seasons from
    (select r.year, cs.name from results rs join
    races r on rs.raceId=r.raceId join
    constructors cs on rs.constructorId=cs.constructorId
    group by cs.name, r.year) sq1
    group by sq1.name) sq2
    
    on
    
    sssq.name=sq2.name
"""


But as we can see by comparing the number of seasons participated with the number of championship won, Ferrari is not on the podium this time. This can be explained by the fact that Ferrari is the only constructor that participated at every season since 1950.

We can also see with these percentages that Brawn is a quite unique constructor with his 100% win that can be explained by the fact that Brawn has only participated in one season.

We can also see that Mercedes is a rather effective constructor since they have participated in 12 seasons and have won 7 of them. They now have more than 50% percent chance of winning. They currently are the number one since 2014.

In [None]:
q=pd.read_sql_query(query,conn)
const_champs_seasons=q.copy()
const_champs_seasons['perc_season_champs']=round((const_champs_seasons['num_champ']/const_champs_seasons['num_seasons'])*100,2)
const_champs_seasons.sort_values(by=['perc_season_champs'],ascending=False,inplace=True)

fig=plt.figure(figsize=(17,17))
fig.tight_layout()

ax1=fig.add_subplot(221)
ax2=fig.add_subplot(222)
ax3=fig.add_subplot(212)

const_champs_seasons.plot.barh(x='name',y='num_seasons',ax=ax1, color='purple' ,label='Number of seasons participated')
const_champs_seasons.plot.barh(x='name',y='num_champ',ax=ax2, sharey=ax1, color='tab:green' ,label='Number of championships won')
const_champs_seasons.plot.bar(x='name',y='perc_season_champs',ax=ax3, color='brown' ,label='Percentage of championship wins');

As we can see with this timeline, there is moment in the history of Ferrari when they have not been winning the constructor's title for a long period of time. In theses periods, other constructor gain their glory such as McLaren and Williams on the 1982-1998 period and Red Bull and Mercedes on the 2009-2020 period.

In [None]:
champ_teams=constr_champs_by_team.index
ohe_teams=pd.get_dummies(constr_champs_by_year.name)
champ_teams_by_year=dict()
for team in champ_teams:    
    champ_teams_by_year[team]=np.cumsum(ohe_teams[team])

fig,ax=plt.subplots(figsize=(15,9))
for team,performance in champ_teams_by_year.items():
    ax.plot(performance)
    ax.scatter(range(len(performance)),performance,label=team)
ax.set_ylabel('Number of championships', fontsize=14)
ax.set_xlabel('Season Year', fontsize=14)
ax.set_xticks(range(constr_champs_by_year.shape[0]))
ax.set_xticklabels(constr_champs_by_year.year.tolist(), rotation='vertical', fontsize=15)
plt.legend();

In [None]:
query="""
    select sqq.year, cs.name from
    (select sq.year, sq.driverId, sq.constructorId, max(pts) season_pts from
    (select rs.driverId, rs.constructorId, r.year, sum(rs.points) pts from results rs
    join races r on rs.raceId=r.raceId
    group by rs.driverId, r.year) sq
    group by sq.year) sqq join
    constructors cs on
    cs.constructorId=sqq.constructorId
    order by sqq.year
"""

Just as the timeline above, in the driver's championship too, Ferrari had periods of time when it's pilots weren't winning driver's title as well. The same other constructor's pilots have won their titles during theses periods.

In [None]:
q=pd.read_sql_query(query,conn)
drivers_champs_by_team_yearly=q.copy()

champ_teams_for_drivers=drivers_champs_by_team_yearly.name.tolist()
ohe_champ_teams_for_drivers=pd.get_dummies(drivers_champs_by_team_yearly)
champ_teams_for_drivers=dict()
for team in ohe_champ_teams_for_drivers.columns[1:]:
    champ_teams_for_drivers[team.split('_')[1]]=np.cumsum(ohe_champ_teams_for_drivers[team])

fig,ax=plt.subplots(figsize=(15,9))
for team,driver_champs in champ_teams_for_drivers.items():
    ax.plot(driver_champs, label=team)
    ax.scatter(range(drivers_champs_by_team_yearly.shape[0]),driver_champs)
ax.set_xticks(range(drivers_champs_by_team_yearly.shape[0]))
ax.set_xticklabels(drivers_champs_by_team_yearly.year.tolist(), rotation='vertical', fontsize=15)
plt.legend();

In [None]:
query="""
    select sssq.name, sq2.num_seasons ,sssq.num_champ from
    (select ssq.name, count(ssq.name) num_champ from
    (select sq.year, sq.name, max(sq.season_points) win_points from
    (select r.year, dr.name, sum(rs.points) season_points from results rs join
    races r on rs.raceId=r.raceId join
    drivers dr on rs.driverId=dr.driverId
    group by dr.name, r.year) sq
    group by sq.year) ssq
    group by ssq.name) sssq join 
    
    (select sq1.name, count(sq1.year) num_seasons from
    (select r.year, dr.name from results rs join
    races r on rs.raceId=r.raceId join
    drivers dr on rs.driverId=dr.driverId
    group by dr.name, r.year) sq1
    group by sq1.name) sq2
    
    on
    
    sssq.name=sq2.name
"""

## Drivers Analysis

As seen with this graph, winning a driver's championship is very difficult. We can see lots of big names in this plot, but not much are at more than 10% of winning a title in their entier F1 career.

In [None]:
q=pd.read_sql_query(query,conn)
dr_champs_seasons=q.copy()
dr_champs_seasons['perc_season_champs']=round((dr_champs_seasons['num_champ']/dr_champs_seasons['num_seasons'])*100,2)
dr_champs_seasons.sort_values(by=['perc_season_champs'],ascending=False,inplace=True)

fig=plt.figure(figsize=(17,17))
fig.tight_layout()

ax1=fig.add_subplot(221)
ax2=fig.add_subplot(222)
ax3=fig.add_subplot(212)

dr_champs_seasons.plot.barh(x='name',y='num_seasons',ax=ax1, color='navy')
ax1.set_xticks(range(1,max(dr_champs_seasons['num_seasons'])+1))
ax1.set_title('Number season participated')

dr_champs_seasons.plot.barh(x='name',y='num_champ',ax=ax2, color='green')
ax2.set_title('Number of driver championships won')

dr_champs_seasons.plot.bar(x='name',y='perc_season_champs',ax=ax3, color='pink')
ax3.set_title('Percentage of championships won');

In [None]:
query="""
    select sq.name, count(*) num_wins from
    (select dr.name from results rs join
    drivers dr on rs.driverId=dr.driverId
    where rs.position==1) sq
    group by sq.name
    order by num_wins desc limit(25)
"""

As we can see here, Lewis Hamilton is on the verge of surpassing Michael Schumacher and take his throne of greatest race winner.

In [None]:
dr_race_wins=pd.read_sql_query(query,conn)
fig,ax=plt.subplots(figsize=(14,6))
dr_race_wins.plot.bar(x='name',y='num_wins',ax=ax, color='firebrick')
ax.set_title('Drivers by number race wins')
ax.set_ylabel('Number of race wins');


In [None]:
query="""
    select sq.name, count(*) num_pole_positions from
    (select dr.name from results rs join
    drivers dr on rs.driverId=dr.driverId
    where rs.grid==1) sq
    group by sq.name
    order by num_pole_positions desc limit(25)
"""


We can also see that most of the race winners were also in pole position.

In [None]:
q=pd.read_sql_query(query,conn)
fig,ax=plt.subplots(figsize=(14,6))
q.plot.bar(x='name',y='num_pole_positions',ax=ax, color='olive')
ax.set_title('Drivers by number of pole positions')
ax.set_ylabel('Number of pole positions');



In [None]:
def import_all():
    data = {}
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            name = filename.replace('.csv', '')
            data[name] = pd.read_csv(os.path.join(dirname, filename))
            
    return data


def add_ids(data, key):
    
    df = data[key]
    n_lines = df.shape[0]

    df = pd.merge(df, data['races'][['raceId', 
                                     'year', 'round', 
                                     'circuitId', 'date', 'time']], 
                  on='raceId', how='left')
    if df.shape[0] != n_lines:
        raise ValueError('Merging raceId went wrong')
        
    df = pd.merge(df, data['circuits'][['circuitId', 
                                        'circuitRef', 'location', 'country']], 
                  on='circuitId', how='left')
    if df.shape[0] != n_lines:
        raise ValueError('Merging circuitId went wrong')
        
    df = pd.merge(df, data['drivers'][['driverId', 
                                       'driverRef', 'forename', 'surname', 
                                       'dob', 'nationality']].rename(columns={'nationality': 'drv_nat'}), 
                  on='driverId', how='left')
    if df.shape[0] != n_lines:
        raise ValueError('Merging driverId went wrong')
    
    if (key != 'lap_times') and (key != 'pit_stops'):
        df = pd.merge(df, data['constructors'][['constructorId', 
                                                'constructorRef', 
                                                'name', 'nationality']].rename(columns={'nationality': 'cstr_nat'}), 
                      on='constructorId', how='left')
        if df.shape[0] != n_lines:
            raise ValueError('Merging constructorId went wrong')
        
    if key == 'results':
        df = pd.merge(df, data['status'], 
                      on='statusId', how='left')
        if df.shape[0] != n_lines:
            raise ValueError('Merging statusId went wrong')
        
    return df

In [None]:
data = import_all()

res = add_ids(data, 'results')
qual = add_ids(data, 'qualifying')
laps = add_ids(data, 'lap_times')
pits = add_ids(data, 'pit_stops')

laps.rename(columns={'time_x': 'lap_time', 'time_y': 'time'}, inplace=True)
res.rename(columns={'time_x': 'race_time', 'time_y': 'time'}, inplace=True)
pits.rename(columns={'time_x': 'pit_time', 'time_y': 'time'}, inplace=True)

laps = pd.merge(laps, res[['raceId', 'driverId', 
                           'constructorRef', 'name', 'cstr_nat']], 
                on=['raceId', 'driverId'], how='left')
pits = pd.merge(pits, res[['raceId', 'driverId', 
                           'constructorRef', 'name', 'cstr_nat']], 
                on=['raceId', 'driverId'], how='left')

In [None]:
res[['lap_mins', 'lap_secs']] = res['fastestLapTime'].str.split(':', expand=True)
res[['lap_secs', 'lap_millisecs']] = res['lap_secs'].str.split('.', expand=True)
res['lap_mins'] = pd.to_numeric(res['lap_mins'], errors='coerce').fillna(99)
res['lap_secs'] = pd.to_numeric(res['lap_secs'], errors='coerce').fillna(99)
res['lap_millisecs'] = pd.to_numeric(res['lap_millisecs'], errors='coerce').fillna(99)

res['fastestLapTime_ms'] = (60 * res['lap_mins'] + res['lap_secs']) * 1000 + res['lap_millisecs']

res['race_fastestTime'] = res.groupby('raceId').fastestLapTime_ms.transform('min')
res['FastLap'] = np.where(res['race_fastestTime'] == res['fastestLapTime_ms'], 1, 0)

res.drop(['lap_mins', 'lap_secs', 'lap_millisecs'], axis=1, inplace=True)

points = {1: 25, 2: 18, 3: 15, 4: 12, 5: 10, 6: 8, 7: 6, 8: 4, 9: 2, 10: 1}

res['points'] = res['positionOrder'].map(points).fillna(0)
#res.loc[res.FastLap == 1, 'points'] = res['points'] + 1

res['fastestLap'] = pd.to_numeric(res['fastestLap'], errors='coerce')

res['DriverName'] = res['forename'].str[0] + '. ' + res['surname']


res['net_gain'] = -(res['positionOrder'] - res['grid'])
res['abs_gain'] = abs(res['net_gain'])

res['finished'] = np.where(res.status == 'Finished', 1, 0)

In [None]:
def plot_frame(ax):
    ax.set_facecolor('#fff')
    ax.spines['bottom'].set_color('black')
    ax.tick_params(axis='x', colors='black')
    ax.xaxis.label.set_color('black')
    ax.spines['left'].set_color('black')
    ax.tick_params(axis='y', colors='black')
    ax.yaxis.label.set_color('black')
    return ax

def get_drv_ann(data, year, ax, adjust, count=False, measure='Pts.'):
    
    yr_data = data[data.year==year].groupby(['driverId', 'DriverName', 'name']).points.sum().sort_values(ascending=False)
    if count:
        yr_data = data[data.year==year].groupby(['driverId', 'DriverName', 'name']).resultId.count().sort_values(ascending=False)
    drv_name = yr_data.index[0][1]
    ctr_name = yr_data.index[0][2]
    pts = yr_data[0]
    value = pts / data[data.year==year].raceId.nunique()
    
    text = f'{drv_name}\n{ctr_name}, {year}\n{int(pts)} {measure}'
    
    ax.annotate(text, xy=(year, value), xycoords='data', xytext=adjust, textcoords='offset points', color='w')
    
    return ax


def get_ctr_ann(data, year, ax, adjust, count=False, measure='Pts.'):
    
    yr_data = data[data.year==year].groupby(['name']).points.sum().sort_values(ascending=False)
    if count:
        yr_data = data[data.year==year].groupby(['name']).resultId.count().sort_values(ascending=False)
    ctr_name = yr_data.index[0]
    pts = yr_data[0]
    value = pts / data[data.year==year].raceId.nunique()
    
    text = f'{ctr_name}, {year}\n{int(pts)} {measure}'
    
    ax.annotate(text, xy=(year, value), xycoords='data', xytext=adjust, textcoords='offset points', color='w')
    
    return ax


def plot_bars(bars, ax, color):
    
    colors = [color if (c == 2020) else 'w' for c in bars.index]
    bars.plot(color=colors, kind='bar', ax=ax)
    ax.set_title('Top Years vs 2020', fontsize=14, color='w')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=0)
    
    return ax


## The Best Season ##

One way of defining how interesting a season is, we can use the number of overtakes of a driver. We can calculate this indicator by observing the difference between starting and finishing positions. 


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 7), facecolor='#fff')
fig.suptitle(f'Position changes per Season', fontsize=18, color='black')

(res.groupby(['year', 'circuitRef']).abs_gain.sum() / res.groupby(['year', 'circuitRef']).size()).groupby('year').mean().plot(label='Mean', color='grey')
(res.groupby(['year', 'circuitRef']).abs_gain.sum() / res.groupby(['year', 'circuitRef']).size()).groupby('year').min().plot(label='Min', color='r')
(res.groupby(['year', 'circuitRef']).abs_gain.sum() / res.groupby(['year', 'circuitRef']).size()).groupby('year').max().plot(label='Max', color='b')

leg = ax.legend(facecolor="#fff")
for text in leg.get_texts():
    text.set_color("black")

ax = plot_frame(ax)

We can observe that 1989 was a special season since there is a lot of position changes and after this season, the number of position changes slowly reduced and is now pretty much the same every year while being a lot lower than from before the 90s.

But an other thing to take in consideration is the number of drivers that have finished the race with as much laps as the leader of the race.

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 7), facecolor='#fff')
fig.suptitle(f'Proportion of Drivers that finished the race per Season', fontsize=18, color='black')

res.groupby(['year', 'circuitRef']).finished.mean().groupby('year').mean().plot(color='grey', label='Mean')
res.groupby(['year', 'circuitRef']).finished.mean().groupby('year').max().plot(color='b', label='Max')
res.groupby(['year', 'circuitRef']).finished.mean().groupby('year').min().plot(color='r', label='Min')

leg = ax.legend(facecolor="#fff")
for text in leg.get_texts():
    text.set_color("black")

ax = plot_frame(ax)

We can observe that on contrary to the number of position changes, the proportion of drivers that finished the race has improved since the 90s proportionnaly. It can be explicained by the improvement of the cars reliability and has helped lowering the difference between the top teams and the rest.

In conclusion we can determine from our markers that the best season was probably the 1989's since it has a lot position change and less drivers who have finished the race with the same number of laps as the leader. **From our data we can say that the 89s season was pivotal and was the last of a genre of F1 we will probably never see again because of the improvement in technology.**