In [1]:
import requests
import pandas as pd
import numpy as np

API_PATH = 'http://ergast.com/api/f1'

In [2]:
def getDriversByYear(year, fmt='json'):
    url = '{}/{}/drivers.{}'.format(API_PATH, year, fmt)
    r = requests.get(url)
    drivers_data = r.json()
    return drivers_data['MRData']['DriverTable']['Drivers']


def getDriversByYearDf(year):
    drivers_records = getDriversByYear(year)
    return pd.DataFrame.from_records(drivers_records)
    
drivers_2014 = getDriversByYearDf(2014)
drivers_2014

Unnamed: 0,code,dateOfBirth,driverId,familyName,givenName,nationality,permanentNumber,url
0,ALO,1981-07-29,alonso,Alonso,Fernando,Spanish,14,http://en.wikipedia.org/wiki/Fernando_Alonso
1,BIA,1989-08-03,jules_bianchi,Bianchi,Jules,French,17,http://en.wikipedia.org/wiki/Jules_Bianchi
2,BOT,1989-08-29,bottas,Bottas,Valtteri,Finnish,77,http://en.wikipedia.org/wiki/Valtteri_Bottas
3,BUT,1980-01-19,button,Button,Jenson,British,22,http://en.wikipedia.org/wiki/Jenson_Button
4,CHI,1991-04-21,chilton,Chilton,Max,British,4,http://en.wikipedia.org/wiki/Max_Chilton
5,ERI,1990-09-02,ericsson,Ericsson,Marcus,Swedish,9,http://en.wikipedia.org/wiki/Marcus_Ericsson
6,GRO,1986-04-17,grosjean,Grosjean,Romain,French,8,http://en.wikipedia.org/wiki/Romain_Grosjean
7,GUT,1991-08-05,gutierrez,Gutiérrez,Esteban,Mexican,21,http://en.wikipedia.org/wiki/Esteban_Guti%C3%A...
8,HAM,1985-01-07,hamilton,Hamilton,Lewis,British,44,http://en.wikipedia.org/wiki/Lewis_Hamilton
9,HUL,1987-08-19,hulkenberg,Hülkenberg,Nico,German,27,http://en.wikipedia.org/wiki/Nico_H%C3%BClkenberg


In [3]:
def getRacesByYear(year, fmt='json'):
    url = '{}/{}.{}'.format(API_PATH, year, fmt)
    r = requests.get(url)
    races_data = r.json()
    return races_data['MRData']['RaceTable']['Races']


def getRacesByYearDf(year):
    races_records = getRacesByYear(year)
    return pd.DataFrame({
        'round': [item['round'] for item in races_records],
        'raceName': [item['raceName'] for item in races_records],
        'circuitId': [item['Circuit']['circuitId'] for item in races_records]
    })

races_2014 = getRacesByYearDf(2014)
races_2014

Unnamed: 0,circuitId,raceName,round
0,albert_park,Australian Grand Prix,1
1,sepang,Malaysian Grand Prix,2
2,bahrain,Bahrain Grand Prix,3
3,shanghai,Chinese Grand Prix,4
4,catalunya,Spanish Grand Prix,5
5,monaco,Monaco Grand Prix,6
6,villeneuve,Canadian Grand Prix,7
7,red_bull_ring,Austrian Grand Prix,8
8,silverstone,British Grand Prix,9
9,hockenheimring,German Grand Prix,10


In [40]:
import re
from datetime import timedelta

def lapTimeToSeconds(lapTime):
    parts = re.split(':|\.', lapTime)
    parts = list(map(int, parts))
    duration = timedelta(minutes=parts[0], seconds=parts[1], milliseconds=parts[2])
    return duration.total_seconds()


def fastLapTime(item):
    try:
        return lapTimeToSeconds(item['FastestLap']['Time']['time'])
    except KeyError:
        return np.nan
    

def fastLapNum(item):
    try:
        return item['FastestLap']['lap']
    except KeyError:
        return np.nan
    

def fastLapRank(item):
    try:
        return item['FastestLap']['rank']
    except KeyError:
        return np.nan


def getRaceResultsByYearRound(year, roundNum, fmt='json'):
    url = '{}/{}/{}/results.{}'.format(API_PATH, year, roundNum, fmt)
    r = requests.get(url)
    results_data = r.json()
    # TODO Should data be converted to proper types here? Ergast returns all strings.
    return results_data['MRData']['RaceTable']['Races'][0]


def getRaceResultsDf(year, roundNum):
    race_data = getRaceResultsByYearRound(year, roundNum)
    results = race_data['Results']
    
    columns = ['carNum', 'pos', 'driverId', 'constructorId', 'grid', 'laps', 'status', 'fastLapNum', 'fastLapTime', 'fastLapRank']
    data_types = {'carNum': np.int8, 'pos': np.int8, 'driverId': object,
                  'constructorId': object, 'grid': np.int8, 'laps': np.int8,
                  'status': object, 'fastLapNum': np.float64, 'fastLapTime': np.float64,
                  'fastLapRank': np.float64}
    
    data = []
    for item in results:
        data.append(
            (item['number'],
             item['position'],
             item['Driver']['driverId'],
             item['Constructor']['constructorId'],
             item['grid'],
             item['laps'],
             item['status'],
             fastLapNum(item),
             fastLapTime(item),
             fastLapRank(item))
        )
    
    df = pd.DataFrame(data, columns=columns)
    df = df.astype(data_types)
    
    return df

# FastestLap is not set as a key if the driver did not complete a lap.
# That is a quirk of the ergast API, kind of a bad one. Would be better if it
# was either and empty object or null
# getRaceResultsByYearRound(2014, 2)['Results'][21]
getRaceResultsDf(2014, 2)

Unnamed: 0,carNum,pos,driverId,constructorId,grid,laps,status,fastLapNum,fastLapTime,fastLapRank
0,44,1,hamilton,mercedes,1,56,Finished,53.0,103.066,1.0
1,6,2,rosberg,mercedes,3,56,Finished,55.0,103.96,2.0
2,1,3,vettel,red_bull,2,56,Finished,51.0,104.289,4.0
3,14,4,alonso,ferrari,4,56,Finished,47.0,104.165,3.0
4,27,5,hulkenberg,force_india,7,56,Finished,38.0,105.982,10.0
5,22,6,button,mclaren,10,56,Finished,47.0,106.039,11.0
6,19,7,massa,williams,13,56,Finished,44.0,104.897,6.0
7,77,8,bottas,williams,18,56,Finished,31.0,105.475,9.0
8,20,9,kevin_magnussen,mclaren,8,55,+1 Lap,44.0,105.373,8.0
9,26,10,kvyat,toro_rosso,11,55,+1 Lap,36.0,106.695,13.0
