In [1]:
import requests
import json
import os
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
def connection(url):
    '''
    Try and Establish a Connection to given website
    Return: data in json format
    '''

    try:
        response = requests.get(url)
        
        if not response.status_code // 100 == 2:
            return(f"Error: Unexpected response {response}")

        geodata = response.json()
        return(geodata)

    except requests.exceptions.RequestException as e:
        return(f"Error: {e}")

In [3]:
def path_exist(filepath):
    '''Checks if a filepath exist or not'''
    if os.path.exists(filepath):
        print('Data Status: Stored')
        return True
    print('Data Source: Gathered')
    return False

In [4]:
def get_current_constructors():
    '''Returns list of current years drivers'''
    filepath = Path('../data/current_constructors.json')
    jsondata = dict()
    # Checks if data is already stored
    if path_exist(filepath):
        with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)
    else:
        # Request data from API
        link = 'http://ergast.com/api/f1/2022/constructors.json?limit=1000'
        jsondata = connection(link)
        jsondata = jsondata['MRData']['ConstructorTable']['Constructors']
        # Store the data under data/current_drivers.json
        json_object = json.dumps(jsondata, indent=4)
        with open(filepath, 'w', encoding='utf-8') as outfile: outfile.write(json_object)

    current_drivers = list()
    for driver in jsondata:
        current_drivers.append(driver['constructorId'])
    return current_drivers

In [5]:
def get_season_data(year):
    '''Returns list of current years drivers'''
    filepath = Path(f'../data/constructors/{year}_constructor.json')
    jsondata = dict()
    # Checks if data is already stored
    if path_exist(filepath):
        with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)
    else:
        # Request data from API
        link = f'http://ergast.com/api/f1/{year}/constructorStandings.json?limit=1000'
        jsondata = connection(link)
        jsondata = jsondata['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings']
        # Store the data under data/current_drivers.json
        json_object = json.dumps(jsondata, indent=4)
        with open(filepath, 'w', encoding='utf-8') as outfile: outfile.write(json_object)

    filepath = Path(f'../data/constructors/{year}_driver.json')
    jsondata = dict()
    # Checks if data is already stored
    if path_exist(filepath):
        with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)
    else:
        # Request data from API
        link = f'http://ergast.com/api/f1/{year}/driverStandings.json?limit=1000'
        jsondata = connection(link)
        jsondata = jsondata['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']
        # Store the data under data/current_drivers.json
        json_object = json.dumps(jsondata, indent=4)
        with open(filepath, 'w', encoding='utf-8') as outfile: outfile.write(json_object)

In [6]:
# Takes a string lap time and converts it to a nanosecond equivalent
def time_to_nanoseconds(raw_time):
    try:
        dirty = datetime.strptime(raw_time, '%M:%S.%f').time()
        #clean = timedelta(minutes=dirty.minute, seconds=dirty.second, microseconds=dirty.microsecond)
        nanoseconds = (dirty.minute*6e10)+(dirty.second*1e9)+(dirty.microsecond*1e3)
        #nanoseconds = (dirty.microsecond*1000)
        return nanoseconds/1e9
    # Catch NaaN
    except:
        return None

In [7]:
# find percent different between driver time and average time
def percent_difference(driver_time,average_time):
    diff = abs((driver_time - average_time)/((driver_time + average_time)/2))*100
    if driver_time > average_time:
        return -abs(diff)
    return diff

In [8]:
def sort_scores(score_list):
    cleaned_list = []
    nan_list = []
    
    for score in score_list:
        if str(score[0]) != 'nan':
            cleaned_list.append(score)
        else:
            nan_list.append(score)
    
    cleaned_list = sorted(cleaned_list,key=lambda x: x[0], reverse=True)
    cleaned_list.extend(nan_list)
    return cleaned_list

In [9]:
def get_driver_team(year,round):

    df = pd.DataFrame(columns=['driver','constructor'])

    filepath = Path(f'../data/constructors/{year}_driver.json')
    jsondata = dict()
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)

    for driver in jsondata:
        df = df.append({'driver':driver['Driver']['driverId'],'constructor':driver['Constructors'][0]['constructorId']},ignore_index=True)
    return df


Incorporate a way to weigh in team score so that we get a new score

In [10]:
# def get_team_score(year,round,current_drivers):
year = 2012
_round = 1

filepath = Path(f'../data/races/{year}/{_round}.csv')
if path_exist(filepath):
    original_df = pd.read_csv(filepath)
else:
    filepath = Path(f'../data/races/{year}/{_round}.json')
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)

    # Gets list of all drivers at the start of the race, sorts them alphabetically
    drivers = []
    for driver in jsondata['Laps'][0]['Timings']:
        drivers.append(driver['driverId'])

    drivers.sort()

    # Create original dataframe which just holds a column of the drivers
    original_df = pd.DataFrame()
    original_df['Drivers'] = drivers

    # Goes through a race.json and adds the times for all laps per driver
    for lap in jsondata['Laps']:
        lap_data = []
        
        for _ in range(0, len(drivers)):
            try:
                driver = next(item for item in lap['Timings'] if item['driverId']==drivers[_])
                # Create a tuple of driver and lap time
                lap_data.append([driver['driverId'],driver['time']])
            except:
                lap_data.append([drivers[_],None])

        # Sort it so it matches the rows
        lap_data.sort(key = lambda x: x[0])
        # Add new column of lap time
        time_list = list(list(zip(*lap_data))[1])
        original_df[f"Lap {lap['number']}"] = time_list
        original_df.to_csv(Path(f'../data/races/{year}/{_round}.csv'),index=False)

# Add constructor column
working_df = original_df.copy()
drivers = working_df['Drivers'].values.tolist()
driver_team = get_driver_team(year,_round)
temp = [None] * len(drivers)
working_df.insert(loc=1,column='Constructors',value=temp)
# Fill constructor with a drivers respective team
working_df['Constructors'] = working_df['Drivers'].apply(lambda x: driver_team.loc[driver_team['driver'] == x]['constructor'].values[0])


# Convert each string laptime to nanosecond equivolent
for col in working_df.columns[2:]: working_df[col] = working_df[col].apply(lambda x : time_to_nanoseconds(x))

median_total = []

for driver in drivers:
    score_med = 0
    driver_row = working_df.loc[working_df['Drivers'] == driver]
    for lap in driver_row.columns[2:]:
        score_med += percent_difference(driver_row[lap].values[0],working_df[lap].median())
    median_total.append((score_med,driver))

# average_total, avg_diff = sort_scores(average_total)
median_total= sort_scores(median_total)

# print(f'{"Drivers": <25}Range: {avg_diff: <15}Range: {med_diff: <15}')
for driver in range(len(median_total)):
    print(f'{median_total[driver][1]: <20}Score:{round(median_total[driver][0],5): >15}')
        


Data Status: Stored
button              Score:       17.76838
vettel              Score:       15.65909
hamilton            Score:        8.89086
webber              Score:        6.02738
alonso              Score:      -17.72023
raikkonen           Score:      -33.16688
perez               Score:      -36.74718
kobayashi           Score:      -38.41618
ricciardo           Score:      -39.16071
vergne              Score:       -39.5187
resta               Score:      -43.24014
rosberg             Score:      -54.65684
bruno_senna         Score:            nan
glock               Score:            nan
grosjean            Score:            nan
kovalainen          Score:            nan
maldonado           Score:            nan
massa               Score:            nan
michael_schumacher  Score:            nan
petrov              Score:            nan
pic                 Score:            nan


Map driver in season and round to a team