In [None]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

In [None]:
years =  list(range(2014, 2022))

In [None]:
races = {
        'season': [],
        'round': [],
        'circuit_id': [],
        'country': [],
        'lat': [],
        'long': [],
        'url': []
        }

for year in years:
    url = 'https://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        races['season'].append(int(item['season']))
        races['round'].append(int(item['round']))
        races['circuit_id'].append(item['Circuit']['circuitId'])
        races['country'].append(item['Circuit']['Location']['country'])    
        races['lat'].append(float(item['Circuit']['Location']['lat']))
        races['long'].append(float(item['Circuit']['Location']['long']))
        races['url'].append(item['url'])

races = pd.DataFrame(races)
races.head()

In [None]:
### Query API

schedules = {
        'season': [],
        'round': [],
        'circuit_id': [],
        'date': []
        }

for year in years:

    url = 'http://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        schedules['season'].append(int(item['season']))
        schedules['round'].append(int(item['round']))
        schedules['circuit_id'].append(item['Circuit']['circuitId'])
        schedules['date'].append(datetime.strptime(item['date']+ 'T' +item['time'], '%Y-%m-%dT%H:%M:%SZ'))

schedules = pd.DataFrame(schedules)
schedules.head()

In [None]:
race_schedules = pd.merge(races, schedules, how='inner', on=['season', 'round', 'circuit_id'])

race_schedules.to_csv(path+'data/races.csv', index=False)

In [None]:
races = pd.read_csv(path+'data/races.csv')

rounds = []
for year in np.array(races.season.unique()):
    rounds.append([year, list(races[races.season == year]['round'])])

In [None]:
# query API
    
results = {
    'season': [],
    'round':[],
    'circuit_id':[],
    'driver': [],
    'date_of_birth': [],
    'nationality': [],
    'constructor': [],
    'grid': [],
    'time': [],
    'status': [],
    'points': [],
    'podium': [],
    'fastest_lap': []
    }

for n in list(range(len(rounds))):
    for i in rounds[n][1]:
    
        url = 'http://ergast.com/api/f1/{}/{}/results.json'
        r = requests.get(url.format(rounds[n][0], i))
        json = r.json()

        for item in json['MRData']['RaceTable']['Races'][0]['Results']:

            results['season'].append(int(json['MRData']['RaceTable']['Races'][0]['season']))
            results['round'].append(int(json['MRData']['RaceTable']['Races'][0]['round']))
            results['circuit_id'].append(json['MRData']['RaceTable']['Races'][0]['Circuit']['circuitId'])
            results['driver'].append(item['Driver']['driverId'])
            results['date_of_birth'].append(item['Driver']['dateOfBirth'])
            results['nationality'].append(item['Driver']['nationality'])
            results['constructor'].append(item['Constructor']['constructorId'])
            results['grid'].append(int(item['grid']))

            try:
                results['time'].append(int(item['Time']['millis']))
            except:
                results['time'].append(None)

            results['status'].append(item['status'])
            results['points'].append(float(item['points']))
            results['podium'].append(int(item['position']))

            if item['laps'] != '0':
                results['fastest_lap'].append(item['FastestLap']['Time']['time'])
            else:
                results['fastest_lap'].append(None)


results = pd.DataFrame(results)
results.head()

In [None]:
results.to_csv(path+'data/results.csv', index = False)

In [None]:
driver_standings = {
    'season': [],
    'round':[],
    'driver': [],
    'driver_points': [],
    'driver_wins': [],
    'driver_standings_pos': []}

for n in list(range(len(rounds))):
    for i in rounds[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/driverStandings.json'
        r = requests.get(url.format(rounds[n][0], i))
        json = r.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']:
            driver_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            driver_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))    
            driver_standings['driver'].append(item['Driver']['driverId'])
            driver_standings['driver_points'].append(float(item['points']))
            driver_standings['driver_wins'].append(int(item['wins']))
            driver_standings['driver_standings_pos'].append(int(item['position']))

driver_standings = pd.DataFrame(driver_standings)
driver_standings.tail(21)

In [None]:
driver_standings.to_csv(path+'data/driver_standings.csv', index=False)

In [None]:
constructor_rounds = rounds

constructor_standings = {'season': [],
                    'round':[],
                    'constructor': [],
                    'constructor_points': [],
                    'constructor_wins': [],
                   'constructor_standings_pos': [],
                   'constructor_nationality': []}

for n in list(range(len(constructor_rounds))):
    for i in constructor_rounds[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/constructorStandings.json'
        r = requests.get(url.format(constructor_rounds[n][0], i))
        json = r.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings']:
            constructor_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            constructor_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))
            constructor_standings['constructor'].append(item['Constructor']['constructorId'])
            constructor_standings['constructor_points'].append(float(item['points']))
            constructor_standings['constructor_wins'].append(int(item['wins']))
            constructor_standings['constructor_nationality'].append(int(item['nationality']))
            constructor_standings['constructor_standings_pos'].append(int(item['position']))
            
constructor_standings = pd.DataFrame(constructor_standings)
constructor_standings.tail(10)

In [None]:
constructor_standings.to_csv(path+'data/constructor_standings.csv', index=False)