In [32]:
import os
import pandas as pd
import numpy as np

In [33]:
drivers = pd.read_csv(os.path.join("data_raw", "drivers.csv"))
results = pd.read_csv(os.path.join("data_raw", "results.csv"))
races = pd.read_csv(os.path.join("data_raw", "races.csv"))
circuits = pd.read_csv(os.path.join("data_raw", "circuits.csv"))
status = pd.read_csv(os.path.join("data_raw", "status.csv"))
constructors = pd.read_csv(os.path.join("data_raw", "constructors.csv"))

In [34]:
drivers["driverUrl"] = drivers["url"]
results = results.merge(drivers[['driverId', 'driverRef', 'code', 'forename', 'surname', 'dob', 'nationality', 'driverUrl']], on="driverId", how="outer")
circuits["circuitUrl"] = circuits["url"]
circuits["circuit"] = circuits["name"]
races["gp"] = races["name"]
races = races.merge(circuits[["circuitId", "circuitRef", "location", "country", "circuitUrl", "circuit"]], on="circuitId", how="outer")
results = results.merge(races[["raceId","year","round","date","quali_date","quali_time","location","country","gp"]], on="raceId", how="outer")
results = results.merge(status, on="statusId", how="outer")
constructors["constructor"] = constructors["name"]
constructors["constructorUrl"] = constructors["url"]
constructors["constructorNationality"] = constructors["nationality"]
results = results.merge(constructors[["constructorId", "constructor", "constructorNationality", "constructorRef"]], on="constructorId", how="outer")

In [35]:
results["name"] = results["forename"] + " " + results["surname"]
results["date"] = pd.to_datetime(results["date"], format="%Y-%m-%d")
results["quali_date"] = pd.to_datetime(results["date"], format="%Y-%m-%d")
results["dob"] = pd.to_datetime(results["dob"], format="%Y-%m-%d")
results["number"] = results["number"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")
# results["age"] = results["date"] - results["dob"]
results["year"] = results["year"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")
results["round"] = results["round"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")
results["position"] = results["position"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")
results["fastestLapSpeed"] = results["fastestLapSpeed"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")

In [36]:
results.columns

Index(['resultId', 'raceId', 'driverId', 'constructorId', 'number', 'grid',
       'position', 'positionText', 'positionOrder', 'points', 'laps', 'time',
       'milliseconds', 'fastestLap', 'rank', 'fastestLapTime',
       'fastestLapSpeed', 'statusId', 'driverRef', 'code', 'forename',
       'surname', 'dob', 'nationality', 'driverUrl', 'year', 'round', 'date',
       'quali_date', 'quali_time', 'location', 'country', 'gp', 'status',
       'constructor', 'constructorNationality', 'constructorRef', 'name'],
      dtype='object')

In [37]:
results.sort_values(by=['year','round']).dropna().tail(1)['location']

12366    Las Vegas
Name: location, dtype: object

In [38]:
results.to_json(os.path.join('data','ergast-results.json'))

In [39]:
sprints = pd.read_csv(os.path.join("data_raw", "sprint_results.csv"))

In [40]:
sprints = sprints.merge(drivers[['driverId', 'driverRef', 'code', 'forename', 'surname', 'dob', 'nationality', 'driverUrl']], on="driverId", how="inner")

In [41]:
sprints

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,fastestLap,fastestLapTime,statusId,driverRef,code,forename,surname,dob,nationality,driverUrl
0,1,1061,830,9,33,2,1,1,1,3,...,14,1:30.013,1,max_verstappen,VER,Max,Verstappen,1997-09-30,Dutch,http://en.wikipedia.org/wiki/Max_Verstappen
1,22,1065,830,9,33,3,2,2,2,2,...,9,1:23.502,1,max_verstappen,VER,Max,Verstappen,1997-09-30,Dutch,http://en.wikipedia.org/wiki/Max_Verstappen
2,42,1071,830,9,33,1,2,2,2,2,...,9,1:12.114,1,max_verstappen,VER,Max,Verstappen,1997-09-30,Dutch,http://en.wikipedia.org/wiki/Max_Verstappen
3,61,1077,830,9,1,1,1,1,1,8,...,14,1:19.154,1,max_verstappen,VER,Max,Verstappen,1997-09-30,Dutch,http://en.wikipedia.org/wiki/Max_Verstappen
4,81,1084,830,9,1,1,1,1,1,8,...,5,1:08.455,1,max_verstappen,VER,Max,Verstappen,1997-09-30,Dutch,http://en.wikipedia.org/wiki/Max_Verstappen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,176,1110,858,3,2,13,16,16,16,0,...,6,2:03.482,1,sargeant,SAR,Logan,Sargeant,2000-12-31,American,http://en.wikipedia.org/wiki/Logan_Sargeant
236,199,1115,858,3,2,20,\N,R,19,0,...,2,2:03.562,31,sargeant,SAR,Logan,Sargeant,2000-12-31,American,http://en.wikipedia.org/wiki/Logan_Sargeant
237,219,1116,858,3,2,20,19,19,19,0,...,6,1:41.947,1,sargeant,SAR,Logan,Sargeant,2000-12-31,American,http://en.wikipedia.org/wiki/Logan_Sargeant
238,240,1118,858,3,2,20,20,20,20,0,...,4,1:15.496,1,sargeant,SAR,Logan,Sargeant,2000-12-31,American,http://en.wikipedia.org/wiki/Logan_Sargeant


In [42]:
sprints = sprints.merge(races[["raceId","year","round","location","country","gp"]], on="raceId", how="outer")

In [43]:
sprints["name"] = sprints["forename"] + " " + sprints["surname"]
sprints["year"] = sprints["year"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")
sprints["round"] = sprints["round"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")
sprints["position"] = sprints["position"].apply(pd.to_numeric, errors = "coerce").astype(np.int64, errors="ignore")
sprints = sprints.merge(constructors[["constructorId", "constructor", "constructorNationality", "constructorRef"]], on="constructorId", how="outer")

In [44]:
sprints

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,driverUrl,year,round,location,country,gp,name,constructor,constructorNationality,constructorRef
0,1.0,1061.0,830.0,9.0,33.0,2.0,1.0,1,1.0,3.0,...,http://en.wikipedia.org/wiki/Max_Verstappen,2021.0,10.0,Silverstone,UK,British Grand Prix,Max Verstappen,Red Bull,Austrian,red_bull
1,20.0,1061.0,815.0,9.0,11.0,5.0,,R,20.0,0.0,...,http://en.wikipedia.org/wiki/Sergio_P%C3%A9rez,2021.0,10.0,Silverstone,UK,British Grand Prix,Sergio Pérez,Red Bull,Austrian,red_bull
2,22.0,1065.0,830.0,9.0,33.0,3.0,2.0,2,2.0,2.0,...,http://en.wikipedia.org/wiki/Max_Verstappen,2021.0,14.0,Monza,Italy,Italian Grand Prix,Max Verstappen,Red Bull,Austrian,red_bull
3,29.0,1065.0,815.0,9.0,11.0,9.0,9.0,9,9.0,0.0,...,http://en.wikipedia.org/wiki/Sergio_P%C3%A9rez,2021.0,14.0,Monza,Italy,Italian Grand Prix,Sergio Pérez,Red Bull,Austrian,red_bull
4,42.0,1071.0,830.0,9.0,33.0,1.0,2.0,2,2.0,2.0,...,http://en.wikipedia.org/wiki/Max_Verstappen,2021.0,19.0,São Paulo,Brazil,São Paulo Grand Prix,Max Verstappen,Red Bull,Austrian,red_bull
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1525,,,,206.0,,,,,,,...,,,,,,,,Marussia,Russian,marussia
1526,,,,207.0,,,,,,,...,,,,,,,,Caterham,Malaysian,caterham
1527,,,,208.0,,,,,,,...,,,,,,,,Lotus F1,British,lotus_f1
1528,,,,209.0,,,,,,,...,,,,,,,,Manor Marussia,British,manor


In [45]:
sprints.to_json(os.path.join('data','ergast-sprints.json'))