In [1]:
from pathlib import Path

import pandas as pd
from IPython.display import display

# *PATH DATI ---------------------------------------------------------
DATA_DIR = Path("data/f1")


def load_f1_raw():
    """
    Carica i 4 csv F1 "grezzi" e li restituisce come tuple di DataFrame.

    Ritorna:
        races, results, drivers, constructors
    """
    races = pd.read_csv(DATA_DIR / "races.csv")
    results = pd.read_csv(DATA_DIR / "results.csv")
    drivers = pd.read_csv(DATA_DIR / "drivers.csv")
    constructors = pd.read_csv(DATA_DIR / "constructors.csv")
    return races, results, drivers, constructors


def show_basic_info(df: pd.DataFrame, name: str) -> None:
    """
    Stampa shape, prime righe e dtypes di un DataFrame.
    Utile per far capire cosa contiene ogni csv.
    """
    print(f"=== {name} ===")
    print("shape:", df.shape)
    display(df.head())
    print("\nTipi di dato:")
    print(df.dtypes)
    print("-" * 80)


def build_f1_merged() -> pd.DataFrame:
    """
    Ricrea lo stesso df che usi nella dashboard:
    results + races + drivers + constructors.
    """
    races, results, drivers, constructors = load_f1_raw()

    # nome pilota leggibile
    drivers["driver_name"] = (
        drivers["forename"] + " " + drivers["surname"]
    )

    df = (
        results
        .merge(races[["raceId", "year", "name"]], on="raceId")
        .merge(drivers[["driverId", "driver_name"]], on="driverId")
        .merge(
            constructors[["constructorId", "name"]]
            .rename(columns={"name": "team"}),
            on="constructorId",
        )
    )
    return df


In [2]:
# 1. Vedere i singoli csv
races, results, drivers, constructors = load_f1_raw()

show_basic_info(races, "races.csv")
show_basic_info(results, "results.csv")
show_basic_info(drivers, "drivers.csv")
show_basic_info(constructors, "constructors.csv")


=== races.csv ===
shape: (1125, 18)


Unnamed: 0,raceId,year,round,circuitId,name,date,time,url,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
1,2,2009,2,2,Malaysian Grand Prix,2009-04-05,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
2,3,2009,3,17,Chinese Grand Prix,2009-04-19,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
3,4,2009,4,3,Bahrain Grand Prix,2009-04-26,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
4,5,2009,5,4,Spanish Grand Prix,2009-05-10,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N



Tipi di dato:
raceId          int64
year            int64
round           int64
circuitId       int64
name           object
date           object
time           object
url            object
fp1_date       object
fp1_time       object
fp2_date       object
fp2_time       object
fp3_date       object
fp3_time       object
quali_date     object
quali_time     object
sprint_date    object
sprint_time    object
dtype: object
--------------------------------------------------------------------------------
=== results.csv ===
shape: (26759, 18)


Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
0,1,18,1,1,22,1,1,1,1,10.0,58,1:34:50.616,5690616,39,2,1:27.452,218.3,1
1,2,18,2,2,3,5,2,2,2,8.0,58,+5.478,5696094,41,3,1:27.739,217.586,1
2,3,18,3,3,7,7,3,3,3,6.0,58,+8.163,5698779,41,5,1:28.090,216.719,1
3,4,18,4,4,5,11,4,4,4,5.0,58,+17.181,5707797,58,7,1:28.603,215.464,1
4,5,18,5,1,23,3,5,5,5,4.0,58,+18.014,5708630,43,1,1:27.418,218.385,1



Tipi di dato:
resultId             int64
raceId               int64
driverId             int64
constructorId        int64
number              object
grid                 int64
position            object
positionText        object
positionOrder        int64
points             float64
laps                 int64
time                object
milliseconds        object
fastestLap          object
rank                object
fastestLapTime      object
fastestLapSpeed     object
statusId             int64
dtype: object
--------------------------------------------------------------------------------
=== drivers.csv ===
shape: (861, 9)


Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen



Tipi di dato:
driverId        int64
driverRef      object
number         object
code           object
forename       object
surname        object
dob            object
nationality    object
url            object
dtype: object
--------------------------------------------------------------------------------
=== constructors.csv ===
shape: (212, 5)


Unnamed: 0,constructorId,constructorRef,name,nationality,url
0,1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
1,2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
2,3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Pr...
3,4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formul...
4,5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso



Tipi di dato:
constructorId      int64
constructorRef    object
name              object
nationality       object
url               object
dtype: object
--------------------------------------------------------------------------------


In [3]:
# 2. Vedere il df finale dopo i merge
df = build_f1_merged()
show_basic_info(df, "df finale dopo i merge")

# opzionale: qualche riga random per far vedere bene le colonne
display(df.sample(5, random_state=0))

=== df finale dopo i merge ===
shape: (26759, 22)


Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,name,driver_name,team
0,1,18,1,1,22,1,1,1,1,10.0,...,5690616,39,2,1:27.452,218.3,1,2008,Australian Grand Prix,Lewis Hamilton,McLaren
1,2,18,2,2,3,5,2,2,2,8.0,...,5696094,41,3,1:27.739,217.586,1,2008,Australian Grand Prix,Nick Heidfeld,BMW Sauber
2,3,18,3,3,7,7,3,3,3,6.0,...,5698779,41,5,1:28.090,216.719,1,2008,Australian Grand Prix,Nico Rosberg,Williams
3,4,18,4,4,5,11,4,4,4,5.0,...,5707797,58,7,1:28.603,215.464,1,2008,Australian Grand Prix,Fernando Alonso,Renault
4,5,18,5,1,23,3,5,5,5,4.0,...,5708630,43,1,1:27.418,218.385,1,2008,Australian Grand Prix,Heikki Kovalainen,McLaren



Tipi di dato:
resultId             int64
raceId               int64
driverId             int64
constructorId        int64
number              object
grid                 int64
position            object
positionText        object
positionOrder        int64
points             float64
laps                 int64
time                object
milliseconds        object
fastestLap          object
rank                object
fastestLapTime      object
fastestLapSpeed     object
statusId             int64
year                 int64
name                object
driver_name         object
team                object
dtype: object
--------------------------------------------------------------------------------


Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,name,driver_name,team
24495,24501,1024,9,3,88,19,16,16,16,0.0,...,7160691,59,18,1:46.793,170.674,1,2019,Singapore Grand Prix,Robert Kubica,Williams
10474,10475,445,160,45,24,21,9,9,9,0.0,...,\N,\N,\N,\N,\N,13,1984,British Grand Prix,Piercarlo Ghinzani,Osella
19640,19641,817,723,6,42,5,2,2,2,6.0,...,11063300,\N,\N,\N,\N,1,1952,Swiss Grand Prix,Rudi Fischer,Ferrari
20090,20091,835,731,105,12,21,\N,R,25,0.0,...,\N,\N,\N,\N,\N,70,1950,Indianapolis 500,Henry Banks,Maserati
24634,24640,1031,847,3,63,17,\N,R,15,0.0,...,\N,49,14,1:09.317,224.256,32,2020,Austrian Grand Prix,George Russell,Williams


In [4]:
# 3. Esempio: tutti i piloti di una singola gara
df[df["name"] == "Italian Grand Prix"].head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,year,name,driver_name,team
268,269,31,20,5,15,1,1,1,1,10.0,...,5207494,53,14,1:30.510,230.414,1,2008,Italian Grand Prix,Sebastian Vettel,Toro Rosso
269,270,31,5,1,23,2,2,2,2,8.0,...,5220006,53,13,1:30.300,230.95,1,2008,Italian Grand Prix,Heikki Kovalainen,McLaren
270,271,31,9,2,4,11,3,3,3,6.0,...,5227965,52,12,1:30.298,230.955,1,2008,Italian Grand Prix,Robert Kubica,BMW Sauber
271,272,31,4,4,5,8,4,4,4,5.0,...,5231397,51,9,1:29.961,231.82,1,2008,Italian Grand Prix,Fernando Alonso,Renault
272,273,31,2,2,3,10,5,5,5,4.0,...,5235242,53,6,1:29.807,232.217,1,2008,Italian Grand Prix,Nick Heidfeld,BMW Sauber
