### Formula 1 Races

In [35]:
import pandas as pd
import numpy as np

In [36]:
circuits = pd.read_csv('f1db_csv/circuits.csv')
sprints = pd.read_csv('f1db_csv/sprint_results.csv')

In [37]:
circuits.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_Internatio...
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_Internati...
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park


In [38]:
sprints.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,fastestLapTime,statusId
0,1,1061,830,9,33,2,1,1,1,3,17,25:38.426,1538426,14,1:30.013,1
1,2,1061,1,131,44,1,2,2,2,2,17,+1.430,1539856,17,1:29.937,1
2,3,1061,822,131,77,3,3,3,3,1,17,+7.502,1545928,17,1:29.958,1
3,4,1061,844,6,16,4,4,4,4,0,17,+11.278,1549704,16,1:30.163,1
4,5,1061,846,1,4,6,5,5,5,0,17,+24.111,1562537,16,1:30.566,1


In [53]:
sprints.drop(columns=['number', 'grid', 'positionText', 'positionOrder', 'laps', 'time', 'milliseconds', 'fastestLap', 'fastestLapTime', 'statusId'], inplace=True)

In [54]:
sprints.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,position,points
0,1,1061,830,9,1,3
1,2,1061,1,131,2,2
2,3,1061,822,131,3,1
3,4,1061,844,6,4,0
4,5,1061,846,1,5,0


In [39]:
races = pd.read_csv('f1db_csv/races.csv')

In [40]:
races.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
1,2,2009,2,2,Malaysian Grand Prix,2009-04-05,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
2,3,2009,3,17,Chinese Grand Prix,2009-04-19,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
3,4,2009,4,3,Bahrain Grand Prix,2009-04-26,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
4,5,2009,5,4,Spanish Grand Prix,2009-05-10,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N


##### Clean up race dataframe and remove unnecessary columns
-  We are cleaning up FP practice and qualification data as it isn't necessary for geographical visualization

In [41]:
races = races.drop(['fp1_date', 'fp1_time', 'fp2_date', 'fp2_time', 'fp3_date', 'fp3_time', 'quali_date', 'quali_time', 'sprint_time'], axis=1)

In [42]:
circuit_subset = circuits[['circuitId', 'circuitRef', 'name', 'location', 'country', 'lat', 'lng']]

##### Merge Circuits and Race Data
- Merge on Circuit ID to match circuit information with the race data for each season
- Rename columns for easy readability

In [43]:
races = pd.merge(races, circuit_subset, on='circuitId', how='left')

In [44]:
races.tail()

Unnamed: 0,raceId,year,round,circuitId,name_x,date,time,url,sprint_date,circuitRef,name_y,location,country,lat,lng
1120,1140,2024,20,32,Mexico City Grand Prix,2024-10-27,20:00:00,https://en.wikipedia.org/wiki/2024_Mexico_City...,\N,rodriguez,Autódromo Hermanos Rodríguez,Mexico City,Mexico,19.4042,-99.0907
1121,1141,2024,21,18,São Paulo Grand Prix,2024-11-03,17:00:00,https://en.wikipedia.org/wiki/2024_S%C3%A3o_Pa...,2024-11-02,interlagos,Autódromo José Carlos Pace,São Paulo,Brazil,-23.7036,-46.6997
1122,1142,2024,22,80,Las Vegas Grand Prix,2024-11-23,06:00:00,https://en.wikipedia.org/wiki/2024_Las_Vegas_G...,\N,vegas,Las Vegas Strip Street Circuit,Las Vegas,United States,36.1147,-115.173
1123,1143,2024,23,78,Qatar Grand Prix,2024-12-01,17:00:00,https://en.wikipedia.org/wiki/2024_Qatar_Grand...,2024-11-30,losail,Losail International Circuit,Al Daayen,Qatar,25.49,51.4542
1124,1144,2024,24,24,Abu Dhabi Grand Prix,2024-12-08,13:00:00,https://en.wikipedia.org/wiki/2024_Abu_Dhabi_G...,\N,yas_marina,Yas Marina Circuit,Abu Dhabi,UAE,24.4672,54.6031


In [45]:
races.rename(columns={'raceId':'race_id', 'circuitId':'circuit_id', 'name_y':'circuit_name', 'name_x':'race_name', 'location':'city', 'lng':'lon', 'circuitRef': 'circuit_ref'}, inplace=True)

###### Replace \N values with actual null values

In [46]:
races.replace('\\N', np.nan, inplace=True)

In [50]:
races

Unnamed: 0,race_id,year,round,circuit_id,race_name,date,time,url,sprint_date,circuit_ref,circuit_name,city,country,lat,lon
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...,,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.84970,144.96800
1,2,2009,2,2,Malaysian Grand Prix,2009-04-05,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...,,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.73800
2,3,2009,3,17,Chinese Grand Prix,2009-04-19,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Gran...,,shanghai,Shanghai International Circuit,Shanghai,China,31.33890,121.22000
3,4,2009,4,3,Bahrain Grand Prix,2009-04-26,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...,,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.03250,50.51060
4,5,2009,5,4,Spanish Grand Prix,2009-05-10,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Gran...,,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57000,2.26111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1120,1140,2024,20,32,Mexico City Grand Prix,2024-10-27,20:00:00,https://en.wikipedia.org/wiki/2024_Mexico_City...,,rodriguez,Autódromo Hermanos Rodríguez,Mexico City,Mexico,19.40420,-99.09070
1121,1141,2024,21,18,São Paulo Grand Prix,2024-11-03,17:00:00,https://en.wikipedia.org/wiki/2024_S%C3%A3o_Pa...,2024-11-02,interlagos,Autódromo José Carlos Pace,São Paulo,Brazil,-23.70360,-46.69970
1122,1142,2024,22,80,Las Vegas Grand Prix,2024-11-23,06:00:00,https://en.wikipedia.org/wiki/2024_Las_Vegas_G...,,vegas,Las Vegas Strip Street Circuit,Las Vegas,United States,36.11470,-115.17300
1123,1143,2024,23,78,Qatar Grand Prix,2024-12-01,17:00:00,https://en.wikipedia.org/wiki/2024_Qatar_Grand...,2024-11-30,losail,Losail International Circuit,Al Daayen,Qatar,25.49000,51.45420


#### Save transformed race/circuit data 

In [48]:
races.to_csv('transformed_data/race_data.csv', index=False)

In [49]:
races.to_csv('/Users/markrubin/Documents/My Tableau Repository/Datasources/formula1/race_data.csv', index=False)

In [55]:
sprints.to_csv('transformed_data/sprint_data.csv', index=False)