In [37]:
import requests
import json
import os
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime
import statistics

In [38]:
def path_exist(filepath):
    '''Checks if a filepath exist or not'''
    if os.path.exists(filepath):
        return True
    return False

In [39]:
# Takes a string lap time and converts it to a nanosecond equivalent
def time_to_nanoseconds(raw_time):
    try:
        dirty = datetime.strptime(raw_time, '%M:%S.%f').time()
        #clean = timedelta(minutes=dirty.minute, seconds=dirty.second, microseconds=dirty.microsecond)
        nanoseconds = (dirty.minute*6e10)+(dirty.second*1e9)+(dirty.microsecond*1e3)
        #nanoseconds = (dirty.microsecond*1000)
        return nanoseconds/1e9
    # Catch NaaN
    except:
        pass
    try:
        dirty = datetime.strptime(raw_time, '%I:%M:%S.%f').time()
        return None
    except:
        return raw_time

In [40]:
# find percent different between driver time and median time
def percent_difference(driver_time,median_time):
    diff = abs((driver_time - median_time)/((driver_time + median_time)/2))*100
    if driver_time > median_time:
        return -abs(diff)
    return diff

In [41]:
def get_round(year,circuitId):
    '''Returns the round within a given year the circuitId was found under'''

    filepath = Path(f'../data/scheduled/{year}.json')
    jsondata = dict()
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)
    for circuit in jsondata:
        if circuit['Circuit']['circuitId'] == circuitId:
            return circuit['round']
    return None

In [42]:
def get_score(year,round,current_drivers):
    filepath = Path(f'../data/races/{year}/{round}.csv')
    if path_exist(filepath):
        original_df = pd.read_csv(filepath)
    else:
        filepath = Path(f'../data/races/{year}/{round}.json')
        with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)

        # Gets list of all drivers at the start of the race, sorts them alphabetically
        drivers = []
        for driver in jsondata['Laps'][0]['Timings']:
            drivers.append(driver['driverId'])

        drivers.sort()

        # Create original dataframe which just holds a column of the drivers
        original_df = pd.DataFrame()
        original_df['Drivers'] = drivers

        # Goes through a race.json and adds the times for all laps per driver
        for lap in jsondata['Laps']:
            lap_data = []
            
            for _ in range(0, len(drivers)):
                try:
                    driver = next(item for item in lap['Timings'] if item['driverId']==drivers[_])
                    # Create a tuple of driver and lap time
                    lap_data.append([driver['driverId'],driver['time']])
                except:
                    lap_data.append([drivers[_],None])

            # Sort it so it matches the rows
            lap_data.sort(key = lambda x: x[0])
            # Add new column of lap time
            time_list = list(list(zip(*lap_data))[1])
            original_df[f"Lap {lap['number']}"] = time_list
            original_df.to_csv(Path(f'../data/races/{year}/{round}.csv'),index=False)

    working_df = original_df.copy()
    # print(working_df.info())
    # Convert each string laptime to nanosecond equivolent
    for col in working_df.columns[1:]: working_df[col] = working_df[col].apply(lambda x : time_to_nanoseconds(x))
    

    score_list = []
    for driver in current_drivers:
        score_avg = 0
        driver_row = working_df.loc[working_df['Drivers'] == driver]
        if not driver_row.empty:
            for lap in driver_row.columns[1:]:
                score_avg += percent_difference(driver_row[lap].values[0],working_df[lap].mean())
                if np.isnan(score_avg):
                    score_avg = 0
                # print(f'{year}\t{round}\t{score_avg}\t{type(score_avg)}')
        else:
            # score_avg = float('nan')
            score_avg = 0
        score_list.append(score_avg)
    return score_list
        


In [43]:
def get_current_circuits():
    '''Returns list of current circuits as of year 2022'''

    filepath = Path('../data/scheduled/2022.json')
    jsondata = dict()
    # Checks if data is already stored
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)
    current_circuits = list()
    for circuit in jsondata:
        current_circuits.append([circuit['Circuit']['circuitId'],circuit['raceName']])
    return current_circuits

In [44]:
def get_current_drivers():
    '''Returns list of current years drivers'''

    filepath = Path('../data/current_drivers.json')
    jsondata = dict()
    # Checks if data is already stored
    with open(filepath, 'r', encoding='utf-8') as infile: jsondata = json.load(infile)

    current_drivers = list()
    for driver in jsondata:
        current_drivers.append(driver['driverId'])
    return current_drivers

In [45]:
current_circuits = get_current_circuits()
current_drivers = get_current_drivers()
current_drivers.sort()

In [47]:

for circuitId in current_circuits:
    
    df = pd.DataFrame()
    df['Drivers'] = current_drivers

    for year in range(2012,2022):
        round = get_round(year,circuitId[0])
        if round != None:
            for driver in current_drivers:
                score_list = get_score(year,round,current_drivers)
        else: continue
        df[f'Season {year}'] = score_list
    
    df.to_csv(Path(f'../data/circuits/{circuitId[0]}_ps.csv'),index=False)
    del df