In [1]:
%matplotlib inline
import pandas as pd
import numpy as np

## 1. Loading the Data sets

In [2]:
df_constructors = pd.read_csv("../data/inputs/constructors.csv")
df_races_results = pd.read_csv("../data/outputs/02_races_results.csv")

In [3]:
print(df_constructors.shape)
df_constructors.head()

(211, 5)


Unnamed: 0,constructorId,constructorRef,name,nationality,url
0,1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
1,2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
2,3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Pr...
3,4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formul...
4,5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso


In [4]:
df_races_results.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,grid,points,laps,milliseconds,fastestLap,race_rank,fastestLapTime,fastestLapSpeed,statusId,year,round,circuitId,name
0,1,18,1,1,1,10.0,58,5690616,39,2,87452,218.3,1,2008,1,1,Australian Grand Prix
1,2,18,2,2,5,8.0,58,5696094,41,3,87739,217.586,1,2008,1,1,Australian Grand Prix
2,3,18,3,3,7,6.0,58,5698779,41,5,88090,216.719,1,2008,1,1,Australian Grand Prix
3,4,18,4,4,11,5.0,58,5707797,58,7,88603,215.464,1,2008,1,1,Australian Grand Prix
4,5,18,5,1,3,4.0,58,5708630,43,1,87418,218.385,1,2008,1,1,Australian Grand Prix


## 2. flag the actives constructors

### 2.1. Remove unused columns 

In [5]:
df_constructors.drop(['constructorRef', 'url'], axis=1, inplace=True)

In [6]:
df_constructors.head()

Unnamed: 0,constructorId,name,nationality
0,1,McLaren,British
1,2,BMW Sauber,German
2,3,Williams,British
3,4,Renault,French
4,5,Toro Rosso,Italian


### 2.2. Adding the flags

In [7]:
df_transition = df_constructors.merge(df_races_results[['constructorId', 'year']], how='inner', on='constructorId')

In [8]:
df_transition.head()

Unnamed: 0,constructorId,name,nationality,year
0,1,McLaren,British,2008
1,1,McLaren,British,2008
2,1,McLaren,British,2008
3,1,McLaren,British,2008
4,1,McLaren,British,2008


In [9]:
active_constructors = df_transition[df_transition.year == 2023]['name'].unique().tolist()
print(active_constructors)

['McLaren', 'Williams', 'Ferrari', 'Red Bull', 'Alfa Romeo', 'Aston Martin', 'Mercedes', 'Haas F1 Team', 'AlphaTauri', 'Alpine F1 Team']


In [10]:
df_constructors['constructor_is_active'] = 0

for constructor in active_constructors:
    df_constructors.loc[df_constructors.name == constructor, 'constructor_is_active'] = 1

In [11]:
df_constructors[df_constructors.constructor_is_active == 1]

Unnamed: 0,constructorId,name,nationality,constructor_is_active
0,1,McLaren,British,1
2,3,Williams,British,1
5,6,Ferrari,Italian,1
8,9,Red Bull,Austrian,1
49,51,Alfa Romeo,Swiss,1
115,117,Aston Martin,British,1
129,131,Mercedes,German,1
207,210,Haas F1 Team,American,1
209,213,AlphaTauri,Italian,1
210,214,Alpine F1 Team,French,1


## 3. Calculated fields

### 3.1. Calculate the number of races win by constructor

In [12]:
ids = df_constructors.constructorId.to_list()

In [13]:
df_constructors['constructor_races_won'] = 0

for constId in ids:
    df_constructors.loc[df_constructors.constructorId == constId, 'constructor_races_won'] = df_races_results[(df_races_results.constructorId == constId) & (df_races_results.race_rank == 1)].shape[0]

In [14]:
df_constructors.head(10)

Unnamed: 0,constructorId,name,nationality,constructor_is_active,constructor_races_won
0,1,McLaren,British,1,50
1,2,BMW Sauber,German,0,2
2,3,Williams,British,1,7
3,4,Renault,French,0,14
4,5,Toro Rosso,Italian,0,1
5,6,Ferrari,Italian,1,92
6,7,Toyota,Japanese,0,3
7,8,Super Aguri,Japanese,0,0
8,9,Red Bull,Austrian,1,92
9,10,Force India,Indian,0,5


### 3.2. Constructor point avegrage

In [15]:
df_constructors['constructor_avg_point'] = np.float64(0)

for constId in ids:
    avg_points = df_races_results[df_races_results.constructorId == constId]['points'].mean()
    df_constructors.loc[df_constructors.constructorId == constId, 'constructor_avg_point'] = np.float64(avg_points)

In [16]:
df_constructors.head(10)

Unnamed: 0,constructorId,name,nationality,constructor_is_active,constructor_races_won,constructor_avg_point
0,1,McLaren,British,1,50,3.364151
1,2,BMW Sauber,German,0,2,2.2
2,3,Williams,British,1,7,2.243008
3,4,Renault,French,0,14,2.257942
4,5,Toro Rosso,Italian,0,1,0.932836
5,6,Ferrari,Italian,1,92,4.343429
6,7,Toyota,Japanese,0,3,0.994643
7,8,Super Aguri,Japanese,0,0,0.051282
8,9,Red Bull,Austrian,1,92,9.461111
9,10,Force India,Indian,0,5,2.589623


### 3.3. Constructor number of times in the point zone (top 10) 

In [17]:
df_constructors['constructor_times_in_top_10'] = 0

for constId in ids:
    df_constructors.loc[df_constructors.constructorId == constId, 'constructor_times_in_top_10'] = df_races_results[(df_races_results.constructorId == constId) & (df_races_results.race_rank <= 10)].shape[0]

In [18]:
df_constructors.head()

Unnamed: 0,constructorId,name,nationality,constructor_is_active,constructor_races_won,constructor_avg_point,constructor_times_in_top_10
0,1,McLaren,British,1,50,3.364151,1571
1,2,BMW Sauber,German,0,2,2.2,98
2,3,Williams,British,1,7,2.243008,1170
3,4,Renault,French,0,14,2.257942,576
4,5,Toro Rosso,Italian,0,1,0.932836,208


In [19]:
df_constructors.to_csv('../data/outputs/04_constructors_processed.csv', index=False)