In [101]:
%matplotlib inline
import pandas as pd
import plotly.express as px
import plotly.io as pio

pio.templates.default = "plotly_white"

## 1. Loading of the Data set

In [102]:
df_results = pd.read_csv('../data/inputs/results.csv')
df_drivers = pd.read_csv('../data/inputs/drivers.csv')
df_constructors = pd.read_csv('../data/inputs/constructors.csv')

In [103]:
df_results.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
0,1,18,1,1,22,1,1,1,1,10.0,58,1:34:50.616,5690616,39,2,1:27.452,218.3,1
1,2,18,2,2,3,5,2,2,2,8.0,58,+5.478,5696094,41,3,1:27.739,217.586,1
2,3,18,3,3,7,7,3,3,3,6.0,58,+8.163,5698779,41,5,1:28.090,216.719,1
3,4,18,4,4,5,11,4,4,4,5.0,58,+17.181,5707797,58,7,1:28.603,215.464,1
4,5,18,5,1,23,3,5,5,5,4.0,58,+18.014,5708630,43,1,1:27.418,218.385,1


In [104]:
df_drivers.head()

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen


In [105]:
df_constructors.head()

Unnamed: 0,constructorId,constructorRef,name,nationality,url
0,1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
1,2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
2,3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Pr...
3,4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formul...
4,5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso


## 2. Merging of the three Data set

In [106]:
df_results_v1 = df_results.merge(df_drivers[['driverId', 'forename', 'surname']], how='inner', on='driverId').merge(df_constructors[['constructorId', 'name', 'nationality']])

In [107]:
df_results_v1.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,forename,surname,name,nationality
0,1,18,1,1,22,1,1,1,1,10.0,...,5690616,39,2,1:27.452,218.3,1,Lewis,Hamilton,McLaren,British
1,2,18,2,2,3,5,2,2,2,8.0,...,5696094,41,3,1:27.739,217.586,1,Nick,Heidfeld,BMW Sauber,German
2,3,18,3,3,7,7,3,3,3,6.0,...,5698779,41,5,1:28.090,216.719,1,Nico,Rosberg,Williams,British
3,4,18,4,4,5,11,4,4,4,5.0,...,5707797,58,7,1:28.603,215.464,1,Fernando,Alonso,Renault,French
4,5,18,5,1,23,3,5,5,5,4.0,...,5708630,43,1,1:27.418,218.385,1,Heikki,Kovalainen,McLaren,British


## 3. Visualisation of the data

### 3.1 Count the number of time each user won a GP
df_winners = df_results_v1[df_results_v1['rank'] == '1']
px.histogram(df_winners, x='surname', y='resultId', histfunc='count')

In [108]:
df_winners = df_results_v1[df_results_v1['rank'] == '1']
px.histogram(df_winners, x='surname', y='resultId', histfunc='count')

In [109]:
df_winners.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,forename,surname,name,nationality
4,5,18,5,1,23,3,5,5,5,4.0,...,5708630,43,1,1:27.418,218.385,1,Heikki,Kovalainen,McLaren,British
27,28,19,2,2,3,5,6,6,6,3.0,...,5528388,55,1,1:35.366,209.244,1,Nick,Heidfeld,BMW Sauber,German
48,49,20,5,1,23,5,5,5,5,4.0,...,5493759,49,1,1:33.193,209.062,1,Heikki,Kovalainen,McLaren,British
66,67,21,8,6,1,1,1,1,1,10.0,...,5899051,46,1,1:21.670,205.191,1,Kimi,Räikkönen,Ferrari,Italian
90,91,22,8,6,1,4,3,3,3,6.0,...,5213722,20,1,1:26.506,222.144,1,Kimi,Räikkönen,Ferrari,Italian


In [110]:
px.histogram(df_winners, x='surname', y='resultId', histfunc='count')

Visuel évident car c'est Lewis Hamilton qui detient le record du plus grand nombre de victoires en formule 1

### 3.2 The GP winner by point

In [111]:
px.histogram(df_winners, x='surname', y='points')

### 3.3 Constructors

#### 3.3.1 Constructors number of races

In [112]:
px.histogram(df_results_v1, x='name', y='resultId', histfunc='count')

#### 3.3.2 Number of races win

In [113]:
px.histogram(df_winners, x='name', y='resultId', histfunc='count')