##### Read all the data as required

In [7]:
from pandas import read_csv
from lib import configuration
from lib import common_functions

In [8]:
drivers_df = read_csv(f'{configuration.silver_api_folder_path}/drivers.csv')
drivers_df = drivers_df.rename(columns={"number":"driver_number","name":"driver_name","nationality":"driver_nationality"})
drivers_df = drivers_df[["driver_id","driver_number","driver_name","driver_nationality"]]
drivers_df

Unnamed: 0,driver_id,driver_number,driver_name,driver_nationality
0,1,999,Carlo Abate,Italian
1,2,999,George Abecassis,British
2,3,999,Kenny Acheson,British
3,4,999,Philippe Adams,Belgian
4,5,999,Walt Ader,American
...,...,...,...,...
854,855,999,Emilio Zapico,Spanish
855,856,999,Guanyu Zhou,Chinese
856,857,999,Ricardo Zonta,Brazilian
857,858,999,Renzo Zorzi,Italian


In [9]:
constructors_df = read_csv(f'{configuration.silver_api_folder_path}/constructors.csv')
constructors_df = constructors_df.rename(columns={"name":"team"})
constructors_df = constructors_df[["constructor_id","team","nationality"]]
constructors_df

Unnamed: 0,constructor_id,team,nationality
0,1,Adams,American
1,2,AFM,German
2,3,AGS,French
3,4,Alfa Romeo,Swiss
4,5,AlphaTauri,Italian
...,...,...,...
207,208,Watson,American
208,209,Wetteroth,American
209,210,Williams,British
210,211,Wolf,Canadian


In [19]:
circuits_df = read_csv(f'{configuration.silver_api_folder_path}/circuits.csv')
circuits_df = circuits_df.rename(columns={"location":"circuit_location"})
circuits_df = circuits_df[["circuit_id","circuit_name","circuit_location"]]
circuits_df

Unnamed: 0,circuit_id,circuit_name,circuit_location
0,1,Adelaide Street Circuit,Adelaide
1,2,Ain Diab,Casablanca
2,3,Aintree,Liverpool
3,4,Albert Park Grand Prix Circuit,Melbourne
4,5,Circuit of the Americas,Austin
...,...,...,...
72,73,Yas Marina Circuit,Abu Dhabi
73,74,Korean International Circuit,Yeongam County
74,75,Circuit Park Zandvoort,Zandvoort
75,76,Zeltweg,Styria


In [11]:
races_df = read_csv(f'{configuration.silver_api_folder_path}/races.csv')
races_df = races_df.rename(columns={"year":"race_year","name":"race_name","race_timestamp":"race_date"})
races_df = races_df[["race_id","circuit_id","race_name","race_date","race_year"]]
races_df

Unnamed: 0,race_id,circuit_id,race_name,race_date,race_year
0,1,Silverstone Circuit,British Grand Prix,1950-05-13 06:00:00,1950
1,2,Circuit de Monaco,Monaco Grand Prix,1950-05-21 06:00:00,1950
2,3,Indianapolis Motor Speedway,Indianapolis 500,1950-05-30 06:00:00,1950
3,4,Circuit Bremgarten,Swiss Grand Prix,1950-06-04 06:00:00,1950
4,5,Circuit de Spa-Francorchamps,Belgian Grand Prix,1950-06-18 06:00:00,1950
...,...,...,...,...,...
1120,1121,Autódromo Hermanos Rodríguez,Mexico City Grand Prix,2024-10-27 06:00:00,2024
1121,1122,Autódromo José Carlos Pace,São Paulo Grand Prix,2024-11-03 06:00:00,2024
1122,1123,Las Vegas Strip Street Circuit,Las Vegas Grand Prix,2024-11-23 06:00:00,2024
1123,1124,Losail International Circuit,Qatar Grand Prix,2024-12-01 06:00:00,2024


In [14]:
results_df = read_csv(f'{configuration.silver_api_folder_path}/results.csv')
#results_df = results_df.rename(columns={"time":"race_time"})
results_df = results_df[["race_id","driver_id","constructor_id","grid","position","points"]]
results_df

Unnamed: 0,race_id,driver_id,constructor_id,grid,position,points
0,British Grand Prix1950,farina,alfa,1,1,9.0
1,British Grand Prix1950,fagioli,alfa,2,2,6.0
2,British Grand Prix1950,reg_parnell,alfa,4,3,4.0
3,British Grand Prix1950,cabantous,lago,6,4,3.0
4,British Grand Prix1950,rosier,lago,9,5,2.0
...,...,...,...,...,...,...
26434,Dutch Grand Prix1984,piquet,brabham,2,25,0.0
26435,Dutch Grand Prix1984,ghinzani,osella,21,26,0.0
26436,Dutch Grand Prix1984,alboreto,ferrari,9,27,0.0
26437,Italian Grand Prix1984,lauda,mclaren,4,1,9.0


##### Join circuits to races

In [21]:
race_circuits_df = circuits_df.set_index("circuit_name").join(races_df.set_index("circuit_id"), lsuffix='race_', how='inner').reset_index()
race_circuits_df = race_circuits_df[["race_id", "race_year", "race_name", "race_date", "circuit_location"]]
race_circuits_df

Unnamed: 0,race_id,race_year,race_name,race_date,circuit_location
0,81,1959,German Grand Prix,1959-08-02 06:00:00,Berlin
1,420,1985,Australian Grand Prix,1985-11-03 06:00:00,Adelaide
2,436,1986,Australian Grand Prix,1986-10-26 06:00:00,Adelaide
3,452,1987,Australian Grand Prix,1987-11-15 06:00:00,Adelaide
4,468,1988,Australian Grand Prix,1988-11-13 06:00:00,Adelaide
...,...,...,...,...,...
1120,319,1979,Belgian Grand Prix,1979-05-13 06:00:00,Heusden-Zolder
1121,333,1980,Belgian Grand Prix,1980-05-04 06:00:00,Heusden-Zolder
1122,347,1981,Belgian Grand Prix,1981-05-17 06:00:00,Heusden-Zolder
1123,362,1982,Belgian Grand Prix,1982-05-09 06:00:00,Heusden-Zolder


In [None]:
final_df = results_df.set_index("race_id").join(race_circuits_df.set_index("race_id"), lsuffix='_res', rsuffix='_race', how='inner').reset_index()
final_df = final_df.set_index("driver_id").join(drivers_df.set_index("driver_id"), lsuffix='_res', rsuffix='_drive', how='inner').reset_index()
final_df = final_df.set_index("constructor_id").join(constructors_df.set_index("constructor_id"), lsuffix='_res', rsuffix='_const', how='inner').reset_index()

final_df["created_date"] = common_functions.get_ingestion_date()
final_df

In [None]:
filtered_final_df = final_df[(final_df['race_year']==2020) & (final_df['race_name']=='Abu Dhabi Grand Prix')]
filtered_final_df.sort_values(by='points', ascending=False)

In [None]:
final_df.to_csv(f"{configuration.gold_folder_path}/race_results.csv", index=False)

In [None]:
df_parquet = read_csv(f'{configuration.gold_folder_path}/race_results.csv')
df_parquet