In [3]:
from urllib.request import urlopen
import numpy as np
import pandas as pd
import csv
import torch, torch.nn as nn, torch.nn.functional as F, torch.optim as optim
import math
import os
from tqdm.auto import tqdm
from io import BytesIO
from datetime import datetime, timedelta

# Data Preparation

Create a CSV file of information for one race, each row contains info for each lap
- 0th lap will be the driver's best qualifying lap

In [18]:
res = urlopen('https://api.openf1.org/v1/sessions?csv=true')
data = pd.read_csv(BytesIO(res.read()))
data.to_csv("LapData/Sessions.csv", index=False)

In [19]:
Sessions = pd.read_csv("Sessions.csv")
races = Sessions.query("session_type == 'Race' and session_name != 'Sprint'")
qualis = Sessions.query("session_type == 'Qualifying' and session_name == 'Qualifying'")



For each Track:
- Need to find all data for qualifying and race (Every lap they put in)
- Create a dataframe of every driver and all the laps they did including whether they pit, retired, etc
- Each row needs to have Lap time

In [23]:
QualiAndRaceSessions= Sessions[Sessions['session_name'].isin(['Qualifying', 'Race'])]
print(QualiAndRaceSessions.loc[QualiAndRaceSessions['location']=='Marina Bay'])
print(QualiAndRaceSessions)

    circuit_key circuit_short_name country_code  country_key country_name  \
75           61          Singapore          SGP          157    Singapore   
76           61          Singapore          SGP          157    Singapore   

                     date_end                 date_start gmt_offset  \
75  2023-09-16 14:00:00+00:00  2023-09-16 13:00:00+00:00   08:00:00   
76  2023-09-17 14:00:00+00:00  2023-09-17 12:00:00+00:00   08:00:00   

      location  meeting_key  session_key session_name session_type  year  
75  Marina Bay         1219         9161   Qualifying   Qualifying  2023  
76  Marina Bay         1219         9165         Race         Race  2023  
     circuit_key circuit_short_name country_code  country_key   country_name  \
6             63             Sakhir          BRN           36        Bahrain   
7             63             Sakhir          BRN           36        Bahrain   
11           149             Jeddah          KSA          153   Saudi Arabia   
12       

In [21]:
def addRow(circuitkey, driver_ids, currentposition, pits, laps,status):
    row = {}
    row['CircuitID'] = circuitkey
    for j in range(len(driver_ids)):
        row[f'driver_ID_{j+1}'] = driver_ids[j]
        row[f'position{j+1}'] = currentposition[j]
        row[f'inPit{j+1}'] = pits[j]
        row[f'status{j+1}'] = status[j]
        row[f'laptime{j+1}'] = laps[j]

    return row

In [30]:
# Loop over each track per year
for i in tqdm(range(0,len(QualiAndRaceSessions),2)):
    location = QualiAndRaceSessions.iloc[i]['location']
    year = QualiAndRaceSessions.iloc[i]['year']
    circuitkey = QualiAndRaceSessions.iloc[i]['circuit_key']
    QualifyingKey = QualiAndRaceSessions.iloc[i]['session_key']
    RaceKey = QualiAndRaceSessions.iloc[i+1]['session_key']
    
    print(location,year, QualifyingKey, RaceKey)
    # Gather all data pertaining to qualifying from session_key for each driver
    res = urlopen("https://api.openf1.org/v1/laps?csv=true&session_key=" + str(QualifyingKey))
    QualiLaps = pd.read_csv(BytesIO(res.read()))

    res = urlopen("https://api.openf1.org/v1/pit?csv=true&session_key="+str(QualifyingKey))
    QualiPits = pd.read_csv(BytesIO(res.read()))

    res = urlopen("https://api.openf1.org/v1/race_control?csv=true&session_key="+str(QualifyingKey))
    QualiRaceControl = pd.read_csv(BytesIO(res.read()))

    res = urlopen("https://api.openf1.org/v1/stints?csv=true&session_key="+str(QualifyingKey))
    QualiStints = pd.read_csv(BytesIO(res.read()))
    # Create a DataFrame with each row having for each driver: Driver_ID, position, inpit, status, laptime
    # Each Row needs to have Driver_ID, Driver Name, Position(Maybe?), whether they pitted that lap, status, laptime, tire age, compound, If there was a flag,
        
    # Gather all data pertaining to race from session_key for each driver
    res = urlopen("https://api.openf1.org/v1/position?csv=true&session_key=" + str(RaceKey))
    RacePositions = pd.read_csv(BytesIO(res.read()))
    RaceStartPos = RacePositions[:20].sort_values(by='position', ascending=True)
    
    res = urlopen("https://api.openf1.org/v1/laps?csv=true&session_key=" + str(RaceKey))
    RaceLaps = pd.read_csv(BytesIO(res.read()))

    res = urlopen("https://api.openf1.org/v1/pit?csv=true&session_key="+str(RaceKey))
    RacePits = pd.read_csv(BytesIO(res.read()))

    res = urlopen("https://api.openf1.org/v1/race_control?csv=true&session_key="+str(RaceKey))
    RaceControl = pd.read_csv(BytesIO(res.read()))

    res = urlopen("https://api.openf1.org/v1/stints?csv=true&session_key="+str(RaceKey))
    RaceStints = pd.read_csv(BytesIO(res.read()))

    res = urlopen("https://api.openf1.org/v1/sessions?csv=true&session_key="+str(RaceKey))
    RaceMeeting = pd.read_csv(BytesIO(res.read()))

    RaceStartTime = RaceMeeting.iloc[0]['date_start']
    RaceStartDict = RaceStartPos.to_dict('records')

    QualiLaps_sorted = QualiLaps.sort_values(by='driver_number', ascending=False)
    R_NumLaps = RaceLaps.sort_values(by='lap_number',ascending = False).iloc[0]['lap_number']
    
    PositionChanges = RacePositions
    PositionChanges['date'] = pd.to_datetime(PositionChanges['date'], format='ISO8601')
    
    totaltimes = {}
    startinggrid = {}
    crossfinishtime = []
    currentposition = []
    for i in RaceStartDict:
        startinggrid[i['driver_number']]= i['position']
        totaltimes[i['driver_number']] = 0
        crossfinishtime.append(RaceStartTime)

    driver_ids = []
    for driver_number in QualiLaps.sort_values(by='driver_number',ascending=False)['driver_number'].unique():
        driver_ids.append(int(driver_number))    
    if len(driver_ids)<20:
        driver_ids.append(2)
        driver_ids.sort()

    laplist = []
    status = [1 for _ in range(len(driver_ids))]
    
    laps = []
    pits = []
    currentposition = []
    lastavailablepos = 20
    # 0th Lap (Qualifying Lap)
    for driver_number in driver_ids:
        try:
            currentposition.append(int(RaceStartPos.loc[RaceStartPos['driver_number'] == driver_number]['position'].iloc[0]))
        except:
            currentposition.append(lastavailablepos)
            lastavailablepos -=1
        q_driver_laps = QualiLaps_sorted.loc[QualiLaps_sorted['driver_number'] == driver_number]
        if not q_driver_laps.empty:
            fastest_lap = q_driver_laps.sort_values(by='lap_duration', ascending=True).iloc[0]['lap_duration']
            laps.append(float(fastest_lap))
            pits.append(False)
        else:
            print("No laps found")
            pits.append(True)
            laps.append(0)
    
    laplist.append(addRow(circuitkey=circuitkey,
                          driver_ids=driver_ids,
                          currentposition=currentposition,
                          pits=pits,
                          laps=laps,
                          status = status))
    # 1st Lap (Including Formation lap and countdown)
    SecondLapStarts = RaceLaps[RaceLaps['lap_number']==2]
    FirstLapTimes = [-1 for _ in range(len(driver_ids))]
    for i in range(len(driver_ids)):
        LapStartTime = SecondLapStarts[SecondLapStarts['driver_number']==driver_ids[i]]
        if not LapStartTime.empty:
            LapStartTime = LapStartTime.iloc[0]['date_start']
            crossfinishtime[i] = (datetime.fromisoformat(LapStartTime))
            FirstLapTimes[i] = ((datetime.fromisoformat(LapStartTime)-datetime.fromisoformat(RaceStartTime)).total_seconds())
        else:
            status[i] = 0
    
    laps = []
    pits = []
    for driver_idx in range(len(driver_ids)):
        pits.append(False)
        laps.append(FirstLapTimes[driver_idx])
    
    laplist.append(addRow(circuitkey=circuitkey,
                          driver_ids=driver_ids,
                          currentposition=currentposition,
                          pits=pits,
                          laps=laps,
                          status=status))
    # Rest of race
    lap_num = 2
    retired = {}
    # Current position value being used from starting grid - only needs to be updated each lap to see if anyone has made a change
    while (lap_num < R_NumLaps+1):
        laps = []
        pits = []
        for driver_idx in range(len(driver_ids)):
            r_driver_laps =RaceLaps.loc[RaceLaps['driver_number'] == driver_ids[driver_idx]]
            if not r_driver_laps.empty:
                lapdata = r_driver_laps.loc[r_driver_laps['lap_number']==lap_num]
                if not lapdata.empty:
                    laptime = lapdata.iloc[0]['lap_duration']
                    pitted = lapdata.iloc[0]['is_pit_out_lap']
                    if not laptime:
                        print("No laps found")
                    if(math.isnan(laptime)):
                        laps.append(0)
                        pits.append(False)
                    else:
                        laps.append(laptime)
                        pits.append(pitted)
                        totaltimes[driver_ids[driver_idx]] += laptime
                        crossfinishtime[driver_idx] += timedelta(seconds = laptime)
                        # Check if their position has changed
                        filteredpositions = PositionChanges[(PositionChanges['date']<=(crossfinishtime[driver_idx]))&(PositionChanges['driver_number']==driver_ids[driver_idx])]
                        if not filteredpositions.empty:
                            closestrow = filteredpositions.iloc[-1]
                            currentposition[driver_idx] = closestrow['position']
                else:# Driver Retired
                    laps.append(-1)
                    pits.append(False)
                    status[driver_idx] = 0
    
            else: #Driver did not start
                laps.append(-1)
                pits.append(False)
                retired[driver_ids[driver_idx]] = len(retired)
                status[driver_idx] = 0
    
        laplist.append(addRow(circuitkey=circuitkey,
                              driver_ids=driver_ids,
                              currentposition=currentposition,
                              pits=pits,
                              laps=laps,
                              status=status))
    
        lap_num += 1
    
    df = pd.DataFrame(laplist)
    df.to_csv("/Users/theebankumaresan/Documents/Programming/Python/f1lapbylap/f1LapbyLap/LapData/" + f'{year}/{location}.csv',index = False)



  0%|          | 0/36 [00:00<?, ?it/s]

Sakhir 2023 7768 7953
Jeddah 2023 7775 7779
Melbourne 2023 7783 7787
Baku 2023 9064 9070
Miami 2023 9074 9078
Monaco 2023 9090 9094
Barcelona 2023 9098 9102
Montréal 2023 9106 9110
Spielberg 2023 9112 9118
Silverstone 2023 9122 9126
Budapest 2023 9129 9133
Spa-Francorchamps 2023 9135 9141
Zandvoort 2023 9145 9149
Monza 2023 9153 9157
Marina Bay 2023 9161 9165
Suzuka 2023 9169 9173
Lusail 2023 9215 9221
Austin 2023 9207 9213
Mexico City 2023 9177 9181
São Paulo 2023 9304 9205
Las Vegas 2023 9314 9189
Yas Island 2023 9193 9197
Sakhir 2024 9468 9472
Jeddah 2024 9476 9480
Melbourne 2024 9484 9488
No laps found
Suzuka 2024 9492 9496
Shanghai 2024 9664 9673
Miami 2024 9498 9507
Imola 2024 9511 9515
Monaco 2024 9519 9523
Montréal 2024 9527 9531
Barcelona 2024 9535 9539
Spielberg 2024 9541 9550
Silverstone 2024 9554 9558
Budapest 2024 9562 9566
Spa-Francorchamps 2024 9570 9574


In [24]:
QualifyingKey = 9165

Mres = urlopen("https://api.openf1.org/v1/laps?csv=true&session_key=" + str(QualifyingKey))
MQualiLaps = pd.read_csv(BytesIO(Mres.read())) 



In [29]:
# print(MQualiLaps.loc[MQualiLaps['driver_number']==22])
print(MQualiLaps[:20])

                          date_start  driver_number  duration_sector_1  \
0                                NaN              1                NaN   
1                                NaN              2                NaN   
2                                NaN              4                NaN   
3                                NaN             10                NaN   
4                                NaN             11                NaN   
5                                NaN             14                NaN   
6                                NaN             16                NaN   
7                                NaN             20                NaN   
8                                NaN             22                NaN   
9                                NaN             23                NaN   
10                               NaN             24                NaN   
11                               NaN             27                NaN   
12                               NaN  

In [5]:
path = "/Users/theebankumaresan/Documents/Programming/Python/f1lapbylap/f1LapbyLap/LapData/"
filespath = sorted(os.listdir(path+f'{2023}/'))[1:]
for i in filespath:
    res = pd.read_csv(path + "2023/"+f'{i}')
    if len(res.columns) != 101:
        print(len(res.columns))
        driver_id_columns = [col for col in res.columns if col.startswith('driver_ID')]
        print(i)
        print(res.loc[0, driver_id_columns])

96
Lusail.csv
driver_ID_1     81
driver_ID_2     77
driver_ID_3     63
driver_ID_4     44
driver_ID_5     40
driver_ID_6     31
driver_ID_7     27
driver_ID_8     24
driver_ID_9     23
driver_ID_10    22
driver_ID_11    20
driver_ID_12    18
driver_ID_13    16
driver_ID_14    14
driver_ID_15    11
driver_ID_16    10
driver_ID_17     4
driver_ID_18     2
driver_ID_19     1
Name: 0, dtype: object
96
Marina Bay.csv
driver_ID_1     81
driver_ID_2     77
driver_ID_3     63
driver_ID_4     55
driver_ID_5     44
driver_ID_6     40
driver_ID_7     31
driver_ID_8     27
driver_ID_9     24
driver_ID_10    23
driver_ID_11    22
driver_ID_12    20
driver_ID_13    16
driver_ID_14    14
driver_ID_15    11
driver_ID_16    10
driver_ID_17     4
driver_ID_18     2
driver_ID_19     1
Name: 0, dtype: object
96
Monza.csv
driver_ID_1     81
driver_ID_2     77
driver_ID_3     63
driver_ID_4     55
driver_ID_5     44
driver_ID_6     40
driver_ID_7     31
driver_ID_8     27
driver_ID_9     24
driver_ID_10    

Tracks that do not have correct amount:

- <strike> Lusail - Sainz #55 Quali 20 drivers </strike>
- <strike> Marina Bay - Stroll #18 Quali 20 drivers </strike>
- <strike> Monza - Tsunoda Quali 20 drivers </strike>
- <strike> Sao Paulo - Leclerc #16 Quali 20 drivers </strike>
- <strike> Melbourne - Sargeant #2 Practice 1 20 drivers </strike>
- <strike> Monaco - magnussen #20, hulkenburg #27, perez #11, ocon #31 (but hes there?) Quali 20 drivers </strike>
- <strike> Silverstone - gasly #10 Quali 20 drivers </strike>
- <strike> Suzuka - albon #23, ricciardo #3 Quali 20 drivers </strike> 


Status - 1 is running, 0 is retired

Make a Dataset:

- Helper functions for getting information from each id

In [231]:
drivers = pd.read_csv('Drivers.csv')
Sessions = pd.read_csv('Sessions.csv')

def driver_info(id):
    _drivers = drivers
    _d = _drivers.query(f'driver_number == {id}')
    if _d.empty:
        return None, None, None, None, None, None
    _number = int(_d.iloc[0]['driver_number'])
    _code = _d.iloc[0]['name_acronym']
    _firstname = _d.iloc[0]['first_name']
    _lastname = _d.iloc[0]['last_name']
    _nationality = _d.iloc[0]['country_code']
    return _number, _code, _firstname, _lastname, _nationality
    
def circuit_info(circuit_key):
    _sessions = Sessions
    _s = _sessions.query(f'circuit_key=={circuit_key}')
    _name = _s.iloc[0]['circuit_short_name']
    _country = _s.iloc[0]['country_name']
    _location = _s.iloc[0]['location']
    return _name, _country, _location

DataSet Class:

- <strike> Initializes with a directory containing race data files organized by year </strike>
- <strike> 3 methods for setting current year, round and next round to automatically load appropriate file </strike>
- <strike> method for processing race data for a given index, transforming into tensors </strike>
- <strike> method for returning length of dataset, accounts for fact that last lap is a label </strike>


In [57]:
class RaceDataSet(torch.utils.data.Dataset):
    
    def __init__(self, dir):
        self.year = 2023
        self.dir = dir
        self.currentyear = sorted(os.listdir(self.dir+f'{self.year}/'))
        self.round = 2
        
        if(self.round<len(self.currentyear)):
            self.currentrace = pd.read_csv(self.dir+f'{self.year}/{self.currentyear[self.round-1]}')
        else:
            self.currentrace = pd.read_csv(self.dir+f'{self.year}/{self.currentyear[-1]}')
        
    def set_year(self, year):
        self.year = year
        self.currentyear = sorted(os.listdir(self.dir+f'{self.year}/'))
        
    def set_round(self, newround):
        self.round = newround
        if(self.year<2023 or self.year >2024):
            return
        if(self.round<len(self.currentyear)):
            self.currentrace = pd.read_csv(self.dir+f'{self.year}/{self.currentyear[self.round-1]}')
        else:
            self.currentrace = pd.read_csv(self.dir+f'{self.year}/{self.currentyear[-1]}')

    def nextround(self):
        self.round +=1 
        if(self.round < len(self.currentyear)):
            self.set_round(self.round)
        else:         
            self.round = 2
            self.set_year(self.year+1)
            self.set_round(self.round)

    def __len__(self):
        return len(self.currentrace)-1

    # method for processing race data for a given index, transforming into tensors
    # - Handles NaN values by replacing them with 0 and scales features like driver IDs and lap times
    # - prepares sequences of lap times up to current lap for model input
    # - extracts and scales relevant features from dataset to create input-output pairs for model training
    
    def __getitem__(self, i):            
        for j in range(i+1):
            cur = torch.tensor(self.currentrace.iloc[j].values,dtype=torch.float64)
            cur[cur != cur] = 0
            for k in range(1,len(cur),5):
                cur[k] =cur[k]/100
                cur[k+4] = cur[k+4]/100
            if j==0:
                cur_ret = cur.clone()
            elif j==1:
                cur_ret = cur_ret.unsqueeze(0)
                cur_ret = torch.cat((cur_ret,cur.clone().unsqueeze(0)), dim = 0)
            else:
                cur_ret = torch.cat((cur_ret,cur.clone().unsqueeze(0)),dim = 0)
                
            next = torch.tensor(self.currentrace.iloc[i+1].values,dtype=torch.float64)
            next_exp = torch.cat((next[0:1],next[2:4],next[5:6]),0)
            for k in range(1,20):
                next_exp = torch.cat((
                    next_exp,
                    next[2+5*k:4+5*k],
                    next[5+5*k:6+5*k]
                ),0)
            next_exp[next_exp!=next_exp] = 0
            for k in range(0,len(next_exp)-1,3):
                next_exp[k+2] = next_exp[k+2]/10
            if j == 0:
                next_ret = next_exp.clone()
            elif j == 1:
                next_ret = next_ret.unsqueeze(0)
                next_ret = torch.cat((next_ret,next_exp.clone().unsqueeze(0)),dim=0)
            else:
                next_ret = torch.cat((next_ret,next_exp.clone().unsqueeze(0)),dim=0)
        return (cur_ret, next_ret)

In [253]:
ds = RaceDataSet("/Users/theebankumaresan/Documents/Programming/Python/f1lapbylap/f1LapbyLap/LapData/")

In [116]:
ds.nextround()
ds.year

2023

In [117]:
ds.round

8

Helper functions for displaying input/output tensors in english
- converts tensor into a chart which shows: driver name, position, inpit, laptime and status

In [35]:
def pos_df(lap_in, out):
    _df = []
    _lap = lap_in.detach().clone()
    _o = out.detach().clone()
    print(_lap)
    for i in range(1, 101, 5):
        _lap[i] = _lap[i] * 100
        _lap[i+4] = _lap[i+4] * 100
    # for i in range(1, 61, 3):
        # _o[i+2] = _o[i+2] *10
    _name, _loc, _country = circuit_info(int(_lap[0].item()))
    j = 1
    for i in range(1, 101, 5):
        _num, _code, _fn, _ln, _ = driver_info(int(_lap[i].item()))
        _time = _o[j+2].item()
        _pos = _o[j].item()
        _pitting = _o[j+1].item()*10
        _df.append({
            'code': f'{_code}',
            'driver': f'{_fn} {_ln}',
            'position': _pos,
            'pitting': _pitting,
            'laptime': _time
        })
        j += 3
    df = pd.DataFrame(_df)
    df = df.sort_values(by=['position', 'laptime'])
    return _name, _loc, _country, df

In [36]:
print(len(ds))

NameError: name 'ds' is not defined

In [37]:
lap_num = 9

if(lap_num==0):
    _, _, _, df = pos_df(ds[lap_num][0], ds[lap_num][1])
else:
    _, _, _, df = pos_df(ds[lap_num][0][0], ds[lap_num][1][0])


NameError: name 'ds' is not defined

In [38]:
df

Unnamed: 0,CircuitID,driver_ID_1,position1,inPit1,status1,laptime1,driver_ID_2,position2,inPit2,status2,...,driver_ID_19,position19,inPit19,status19,laptime19,driver_ID_20,position20,inPit20,status20,laptime20
0,7,81,5,False,1,114.027,77,14,False,1,...,2,18,False,1,117.23,1,11,False,1,113.159
1,7,81,5,False,1,347.508,77,14,False,1,...,2,18,False,1,355.287,1,11,False,1,350.225
2,7,81,4,False,1,110.164,77,13,False,1,...,2,20,False,1,112.288,1,9,False,1,109.828
3,7,81,4,False,1,109.971,77,13,False,1,...,2,20,False,1,111.366,1,8,False,1,110.141
4,7,81,4,False,1,109.847,77,13,False,1,...,2,19,False,1,110.772,1,8,False,1,110.055
5,7,81,4,False,1,110.085,77,13,False,1,...,2,19,False,1,111.115,1,8,False,1,110.138
6,7,81,4,False,1,109.963,77,13,False,1,...,2,19,False,1,111.282,1,8,False,1,110.073
7,7,81,4,False,1,110.03,77,13,False,1,...,2,19,False,1,111.662,1,8,False,1,110.146
8,7,81,4,False,1,110.062,77,12,False,1,...,2,18,False,1,115.636,1,8,False,1,110.094
9,7,81,4,False,1,110.481,77,11,False,1,...,2,19,True,1,123.886,1,8,False,1,110.609


Creating a tensor of size in but content of out

In [39]:
# cur, n = ds[lap_num][0][0]
# cur
print(ds[lap_num][0][-1])
print(ds[lap_num][1][-1])

NameError: name 'ds' is not defined

In [40]:
def out_to_in_size(in_,out_):
    _ret = in_.detach().clone()
    print(_ret, out_)
    try:
        _ret = [0][0][2] = out_[0][0][1]
        _ret = [0][0][3] = out_[0][0][2]
        _ret = [0][0][5] = out_[0][0][3]
        for i in range(1,20):
            _ret = [0][0][2+i*5] = out_[0][0][i*3]
            _ret = [0][0][3+i*5] = out_[0][0][i*3+1]
            _ret = [0][0][5+i*5] = out_[0][0][i*3+2]
        return _ret.squeeze().squeeze()
    except:
        _ret[0][2] = out_[0][1]
        _ret[0][3] = out_[0][2]
        _ret[0][5] = out_[0][3]
        for i in range(1,20):
            _ret[0][2+i*5] = out_[0][i*3]
            _ret[0][3+i*5] = out_[0][i*3+1]
            _ret[0][5+i*5] = out_[0][i*3+2]
        return _ret

In [41]:
prev, n = ds[2]
df = out_to_in_size(prev, n)
df

print(len(prev[0]))

NameError: name 'ds' is not defined

Creating the LSTM Model

- Input size is 101
- Output size is 61
- lstm_hids is 101
- lstm_layers try 2
- try dropout of 0.2

In [42]:
class RacePredictor(nn.Module):
    def __init__(self, input_size, output_size, lstm_hids, lstm_layers, dropout):
        super(RacePredictor, self).__init__()
        
        self.input_size = input_size
        self.lstm_hids = lstm_hids
        self.lstm_layers = lstm_layers
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=lstm_hids, num_layers=lstm_layers, dropout=dropout, batch_first=True)
        
        self.fc = nn.Linear(in_features=lstm_hids, out_features=output_size)
        
        nn.init.xavier_uniform_(self.fc.weight.data)
        
        for name,params in self.named_parameters():
            if name[:6] == 'weight':
                nn.init.xavier_uniform_(params)
            elif name[:4] == 'bias':
                nn.init.constant_(params, 0.0)
                
    def forward(self, ins, prev_states = None):
        lstm_outs, next_states = self.lstm(ins, prev_states)
        outs = self.fc(lstm_outs)
        return outs, next_states
    
    def zero_states(self):
        hidden_state = torch.zeros(self.lstm_layers,1,self.lstm_hids)
        cell_state = torch.zeros(self.lstm_layers,1,self.lstm_hids)
        return(hidden_state, cell_state)

Helper functions for training the LSTM model
- training method, testing method and method to do both

In [43]:
device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")

In [62]:
def run_train(model, dataset, lossfn, optimizer, scheduler):
    model.train()
    tLoss = 0
    tCount = 0
    dataset.set_year(2023)
    dataset.set_round(2)
    
    while (dataset.year<2025):

        hidden_state, cell_state = model.zero_states()
        states = hidden_state.to(device), cell_state.to(device)
        
        i = len(dataset)-1
        optimizer.zero_grad()
        lap_in,lap_exp = dataset[i]
        
        if i ==0:
            lap_in = lap_in.unsqueeze(0)
            lap_exp = lap_exp.unsqueeze(0)
        lap_in = lap_in.unsqueeze(0).float()
        lap_exp = lap_exp.unsqueeze(0).float()
        
        lap_in = lap_in.to(device)
        lap_exp = lap_exp.to(device)
        lap_out,states = model(lap_in, states)
        loss = lossfn(lap_out, lap_exp)
        
        for state in states:
            state.detach_()
        if (math.isnan(loss)):
            print('Loss is nan')
            print(f'Year: {ds.year}')
            print(f'round: {ds.round}')
            print(f'lap: {i}')
            print(lap_in)
            input()
        optimizer.step()
        tLoss += loss.item() *1
        tCount += 1        
        dataset.nextround()
    scheduler.step()
    return tLoss/tCount
def run_test(model, dataset, lossfn):
    model.eval()
    tLoss = 0
    tCount = 0
    
    dataset.set_year(2023)
    dataset.set_round(2)
    hidden_state, cell_state = model.zero_states()
    states = hidden_state.to(device), cell_state.to(device)
    
    i = len(dataset)-1
    lap_in, lap_exp = dataset[i]
    if(i==0):
        lap_in = lap_in.unsqueeze(0)
        lap_exp = lap_exp.unsqueeze(0)
    lap_in = lap_in.unsqueeze(0).float()
    lap_exp = lap_exp.unsqueeze(0).float()
    lap_in,lap_exp = lap_in.to(device), lap_exp.to(device)
    lap_out, states = model(lap_in, states)
    loss = lossfn(lap_out, lap_exp)
    tLoss += loss.item()-1
    tCount += 1
    
    return tLoss/tCount

def train_and_test(model, dataset, lossfn, optimizer, scheduler, vID, nEpochs = 10):
    
    for epoch in tqdm(range(nEpochs), desc='Epochs', unit = 'ep'):
        train_loss = run_train(model, dataset, lossfn, optimizer, scheduler)
        test_loss = run_test(model, dataset, lossfn)
        tqdm.write(f'Epoch: {epoch} | Train Loss {train_loss:.6f} | Test Loss {test_loss:.6f}' "\n")
    return
    

In [63]:
model = RacePredictor(101,61,101,2,0.2)
model.to(device)
criterion = nn.MSELoss().to(device)
opt = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(opt,3,gamma=0.1)
ds = RaceDataSet("/Users/theebankumaresan/Documents/Programming/Python/f1lapbylap/f1LapbyLap/LapData/")



In [59]:
run_train(model,ds,criterion,opt,scheduler)

5691.905296998865

In [64]:
train_and_test(model, ds, criterion, opt, scheduler, 25, 10)

Epochs:   0%|          | 0/10 [00:00<?, ?ep/s]

Epoch: 0 | Train Loss 5693.869711 | Test Loss 2241.072998

Epoch: 1 | Train Loss 5693.869590 | Test Loss 2241.072998

Epoch: 2 | Train Loss 5693.851196 | Test Loss 2241.072998

Epoch: 3 | Train Loss 5693.835336 | Test Loss 2241.072998

Epoch: 4 | Train Loss 5693.797713 | Test Loss 2241.072998

Epoch: 5 | Train Loss 5693.865456 | Test Loss 2241.072998

Epoch: 6 | Train Loss 5693.819950 | Test Loss 2241.072998

Epoch: 7 | Train Loss 5693.774982 | Test Loss 2241.072998

Epoch: 8 | Train Loss 5693.885969 | Test Loss 2241.072998

Epoch: 9 | Train Loss 5693.818561 | Test Loss 2241.072998

