In [28]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime
import json
import warnings
from unidecode import unidecode

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/F1-Fantasy-Predictor/'
elif platform == "darwin":
    path = '~/Documents/GitHub/F1-Fantasy-Predictor/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [29]:
years =  list(range(2014, 2023 + 1))

In [30]:
races = {
        'season': [],
        'round': [],
        'circuit_id': [],
        'country': [],
        'lat': [],
        'long': [],
        'url': []
        }

for year in years:
    url = 'https://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        races['season'].append(int(item['season']))
        races['round'].append(int(item['round']))
        races['circuit_id'].append(item['Circuit']['circuitId'])
        races['country'].append(item['Circuit']['Location']['country'])    
        races['lat'].append(float(item['Circuit']['Location']['lat']))
        races['long'].append(float(item['Circuit']['Location']['long']))
        races['url'].append(item['url'])

races = pd.DataFrame(races)
races.head()

Unnamed: 0,season,round,circuit_id,country,lat,long,url
0,2014,1,albert_park,Australia,-37.8497,144.968,http://en.wikipedia.org/wiki/2014_Australian_G...
1,2014,2,sepang,Malaysia,2.76083,101.738,http://en.wikipedia.org/wiki/2014_Malaysian_Gr...
2,2014,3,bahrain,Bahrain,26.0325,50.5106,http://en.wikipedia.org/wiki/2014_Bahrain_Gran...
3,2014,4,shanghai,China,31.3389,121.22,http://en.wikipedia.org/wiki/2014_Chinese_Gran...
4,2014,5,catalunya,Spain,41.57,2.26111,http://en.wikipedia.org/wiki/2014_Spanish_Gran...


In [31]:
### Query API

schedules = {
        'season': [],
        'round': [],
        'circuit_id': [],
        'date': []
        }

for year in years:

    url = 'http://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        schedules['season'].append(int(item['season']))
        schedules['round'].append(int(item['round']))
        schedules['circuit_id'].append(item['Circuit']['circuitId'])
        schedules['date'].append(datetime.strptime(item['date']+ 'T' +item['time'], '%Y-%m-%dT%H:%M:%SZ'))

schedules = pd.DataFrame(schedules)
schedules.head()

Unnamed: 0,season,round,circuit_id,date
0,2014,1,albert_park,2014-03-16 06:00:00
1,2014,2,sepang,2014-03-30 08:00:00
2,2014,3,bahrain,2014-04-06 15:00:00
3,2014,4,shanghai,2014-04-20 07:00:00
4,2014,5,catalunya,2014-05-11 12:00:00


In [32]:
race_schedules = pd.merge(races, schedules, how='inner', on=['season', 'round', 'circuit_id'])

race_schedules.to_csv(path+'data/races.csv', index=False)

In [33]:
races = pd.read_csv(path+'data/races.csv')

rounds = []
for year in np.array(races.season.unique()):
    rounds.append([year, list(races[races.season == year]['round'])])

In [34]:
# query API
    
results = {
    'season': [],
    'round':[],
    'circuit_id':[],
    'driver': [],
    'date_of_birth': [],
    'nationality': [],
    'constructor': [],
    'grid': [],
    'time': [],
    'status': [],
    'points': [],
    'podium': [],
    'fastest_lap': []
    }

for n in list(range(len(rounds))):
    for i in rounds[n][1]:
        
        url = 'http://ergast.com/api/f1/{}/{}/results.json'
        r = requests.get(url.format(rounds[n][0], i))
        json = r.json()
        if json['MRData']['RaceTable']['Races'] != []:
            for item in json['MRData']['RaceTable']['Races'][0]['Results']:
                results['season'].append(int(json['MRData']['RaceTable']['Races'][0]['season']))
                results['round'].append(int(json['MRData']['RaceTable']['Races'][0]['round']))
                results['circuit_id'].append(json['MRData']['RaceTable']['Races'][0]['Circuit']['circuitId'])
                results['driver'].append(
                    unidecode(item['Driver']['givenName'].lower()) +'_'+ unidecode(item['Driver']['familyName'].lower())
                    )
                results['date_of_birth'].append(item['Driver']['dateOfBirth'])
                results['nationality'].append(item['Driver']['nationality'])
                results['constructor'].append(item['Constructor']['constructorId'])
                results['grid'].append(int(item['grid']))

                try:
                    results['time'].append(int(item['Time']['millis']))
                except:
                    results['time'].append(None)

                results['status'].append(item['status'])
                results['points'].append(float(item['points']))
                results['podium'].append(int(item['position']))

                try:
                    results['fastest_lap'].append(item['FastestLap']['Time']['time'])
                except:
                    results['fastest_lap'].append(None)


results = pd.DataFrame(results)
results.head()

Unnamed: 0,season,round,circuit_id,driver,date_of_birth,nationality,constructor,grid,time,status,points,podium,fastest_lap
0,2014,1,albert_park,nico_rosberg,1985-06-27,German,mercedes,3,5578710.0,Finished,25.0,1,1:32.478
1,2014,1,albert_park,kevin_magnussen,1992-10-05,Danish,mclaren,4,5605487.0,Finished,18.0,2,1:33.066
2,2014,1,albert_park,jenson_button,1980-01-19,British,mclaren,10,5608737.0,Finished,15.0,3,1:32.917
3,2014,1,albert_park,fernando_alonso,1981-07-29,Spanish,ferrari,5,5613994.0,Finished,12.0,4,1:33.186
4,2014,1,albert_park,valtteri_bottas,1989-08-28,Finnish,williams,15,5626349.0,Finished,10.0,5,1:32.616


In [35]:
results.to_csv(path+'data/results.csv', index=False)

In [37]:
driver_standings = {
    'season': [],
    'round':[],
    'driver': [],
    'driver_points': [],
    'driver_wins': [],
    'driver_standings_pos': []}

for n in list(range(len(rounds))):
    for i in rounds[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/driverStandings.json'
        r = requests.get(url.format(rounds[n][0], i))
        json = r.json()

        if json['MRData']['StandingsTable']['StandingsLists'] != []:
            for item in json['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']:
                driver_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
                driver_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))    
                driver_standings['driver'].append(
                    unidecode(item['Driver']['givenName'].lower()) +'_'+ unidecode(item['Driver']['familyName'].lower())
                    )
                driver_standings['driver_points'].append(float(item['points']))
                driver_standings['driver_wins'].append(int(item['wins']))
                driver_standings['driver_standings_pos'].append(int(item['position']))

driver_standings = pd.DataFrame(driver_standings)
driver_standings.tail(21)

2014 1
2014 2
2014 3
2014 4
2014 5
2014 6
2014 7
2014 8
2014 9
2014 10
2014 11
2014 12
2014 13
2014 14
2014 15
2014 16
2014 17
2014 18
2014 19
2015 1
2015 2
2015 3
2015 4
2015 5
2015 6
2015 7
2015 8
2015 9
2015 10
2015 11
2015 12
2015 13
2015 14
2015 15
2015 16
2015 17
2015 18
2015 19
2016 1
2016 2
2016 3
2016 4
2016 5
2016 6
2016 7
2016 8
2016 9
2016 10
2016 11
2016 12
2016 13
2016 14
2016 15
2016 16
2016 17
2016 18
2016 19
2016 20
2016 21
2017 1
2017 2
2017 3
2017 4
2017 5
2017 6
2017 7
2017 8
2017 9
2017 10
2017 11
2017 12
2017 13
2017 14
2017 15
2017 16
2017 17
2017 18
2017 19
2017 20
2018 1
2018 2
2018 3
2018 4
2018 5
2018 6
2018 7
2018 8
2018 9
2018 10
2018 11
2018 12
2018 13
2018 14
2018 15
2018 16
2018 17
2018 18
2018 19
2018 20
2018 21
2019 1
2019 2
2019 3
2019 4
2019 5
2019 6
2019 7
2019 8
2019 9
2019 10
2019 11
2019 12
2019 13
2019 14
2019 15
2019 16
2019 17
2019 18
2019 19
2019 20
2019 21
2020 1
2020 2
2020 3
2020 4
2020 5
2020 6
2020 7
2020 8
2020 9
2020 10
2020 11
2020 12

Unnamed: 0,season,round,driver,driver_points,driver_wins,driver_standings_pos
3888,2022,22,nico_hulkenberg,0.0,0,22
3889,2023,1,max_verstappen,25.0,1,1
3890,2023,1,sergio_perez,18.0,0,2
3891,2023,1,fernando_alonso,15.0,0,3
3892,2023,1,carlos_sainz,12.0,0,4
3893,2023,1,lewis_hamilton,10.0,0,5
3894,2023,1,lance_stroll,8.0,0,6
3895,2023,1,george_russell,6.0,0,7
3896,2023,1,valtteri_bottas,4.0,0,8
3897,2023,1,pierre_gasly,2.0,0,9


In [38]:
driver_standings.to_csv(path+'data/driver_standings.csv', index=False)

In [39]:
constructor_rounds = rounds

constructor_standings = {'season': [],
                    'round':[],
                    'constructor': [],
                    'constructor_points': [],
                    'constructor_wins': [],
                   'constructor_standings_pos': []
                   }

for n in list(range(len(constructor_rounds))):
    for i in constructor_rounds[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/constructorStandings.json'
        r = requests.get(url.format(constructor_rounds[n][0], i))
        json = r.json()

        if json['MRData']['StandingsTable']['StandingsLists'] != []:
            for item in json['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings']:
                constructor_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
                constructor_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))
                constructor_standings['constructor'].append(item['Constructor']['constructorId'])
                constructor_standings['constructor_points'].append(float(item['points']))
                constructor_standings['constructor_wins'].append(int(item['wins']))
                constructor_standings['constructor_standings_pos'].append(int(item['position']))
            
constructor_standings = pd.DataFrame(constructor_standings)
constructor_standings.tail(10)

Unnamed: 0,season,round,constructor,constructor_points,constructor_wins,constructor_standings_pos
1859,2023,1,red_bull,43.0,1,1
1860,2023,1,aston_martin,23.0,0,2
1861,2023,1,mercedes,16.0,0,3
1862,2023,1,ferrari,12.0,0,4
1863,2023,1,alfa,4.0,0,5
1864,2023,1,alpine,2.0,0,6
1865,2023,1,williams,1.0,0,7
1866,2023,1,alphatauri,0.0,0,8
1867,2023,1,haas,0.0,0,9
1868,2023,1,mclaren,0.0,0,10


In [40]:
constructor_standings.to_csv(path+'data/constructor_standings.csv', index=False)