In [52]:
import requests
import pandas as pd
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt

In [3]:
# used to get lap data on a desired car and no of laps
def get_lapData(session, car_no, start, end):
    print(f'Lap data for car No. {car_no} is on its way!!')
    data_list = []

    if session == '':
        session = 'latest'
    
    for i in range(start, end):
        response = requests.get(f'https://api.openf1.org/v1/laps?session_key={session}&driver_number={car_no}&lap_number={i}')

        if response.status_code != 200:
            return 'API request error now. Try again!'
        
        data = response.json()
        
        if len(data) > 0:
            data_list.append(data[0])

    return data_list

# function used to clean lap data
def clean_lapData(df):
    df.drop(columns=['date_start', 'meeting_key', 'session_key', 'date_start'])
    df.rename(columns={"duration_sector_1": "S1", "duration_sector_2": "S2", "duration_sector_3" : 'S3'}, inplace=True)
    return df


In [4]:
# used to convert the list data into a df
def to_df(data):
    return pd.DataFrame(data)

# used to add data to csv
def into_csv(df):
    return df.to_csv('Data/SPA.csv', index=False)
    

In [5]:
# used to get info on stints
def get_stint(session, car_no, tyre_age):
    print(f'Stint data for car No. {car_no} is incoming!!')

    data_list = []

    if session == '':
        session = 'latest'
    
    response = requests.get(f'https://api.openf1.org/v1/stints?session_key={session}&tyre_age_at_start>={tyre_age}')

    if response.status_code != 200:
        return 'API request error currently. Review info and try again!!'
    
    data = response.json()

    for i in range(0, len(data)):
        if data[i]['driver_number'] == car_no: 
            data_list.append(data[i])
            
    return data_list

# function used to clean stint data
def clean_stintData(df): 
    df.drop(columns=['date_start', 'meeting_key', 'session_key'])
    return df

In [6]:
# used to get data relating to car performance 
def get_carData(session, car_no):
    print(f'Car data for car No. {car_no} is on its way!!')

    data_list = []

    if session == '':
        session = 'latest'

    response = requests.get(f'https://api.openf1.org/v1/car_data?driver_number={car_no}&session_key={session}&speed>=330')

    if response.status_code != 200:
        return 'API request error currently. Review info and try again!!'
    
    data = response.json()

    for i in range(0, len(data)):
        if data[i]['driver_number'] == car_no: 
            data_list.append(data[i])
            
    return data_list

In [7]:
# used to get session key and correlate fruther to retrive lap data
def get_session(country, year, type):
    # type options: Race, Sprint, Qualifying, Pratice (1, 2, 3)
    print(f'Getting session info for {country} {year}, {type}!!')

    response = requests.get(f'https://api.openf1.org/v1/sessions?country_name={country}&session_name={type}&year={year}')

    if response.status_code != 200:
        return 'API request error currently. Review info and try again!!'

    data = response.json()

    data_list = {
        'session': data[0]['session_key'],
        'meeting': data[0]['meeting_key']
    }

    return data_list

def clean_session(df):
    df = df.drop(columns=['meeting_key'])
    return df

In [None]:
# Full code example in use, i.e. 1

# 1. State the session you want to get information for!
session  = get_session('Belgium', '2024', 'Race')

# 2. Get stint data for the race, with session key just found...
stint_data = get_stint(session['session'], 44, 0)

# 3. Unpack the desired stint data to retrive the laps
start = stint_data[0]['lap_start']
end = stint_data[0]['lap_end'] + 1

# 4. feed the stint lap data, into the full get lap data method
lap_data = get_lapData(session['session'], 44, start + 1, end)  # +1 is needed to avoid api error

# 5. conver the lap data into a df for futher anylsis
lap_df = to_df(lap_data)

# 5.5 apply the needed methods to clean the lap df
lap_df = clean_lapData(lap_df)

lap_df

In [None]:
# Full code example in use, i.e. 2
data = []
car_no = [44, 63]

# 1. State the session you want to get information for!
session  = get_session('Belgium', '2024', 'Race')

for car in car_no:
    # 2. Get stint data for the race, with session key just found...
    stint_data = get_stint(session['session'], car, 0)

    # 3. Unpack the desired stint data to retrive the laps
    start = stint_data[len(stint_data) - 1]['lap_start']
    end = stint_data[len(stint_data) - 1]['lap_end'] + 1

    # 4. feed the stint lap data, into the full get lap data method
    lap_data = get_lapData(session['session'], car, start, end)  # +1 is needed to avoid api error, for initial stint

    # 5. conver the lap data into a df for futher anylsis
    lap_df = to_df(lap_data)

    # 5.5 apply the needed methods to clean the lap df
    lap_df = clean_lapData(lap_df)

    # 6. add it to a csv
    into_csv(lap_df)

    # 7. display the df's
    display(lap_df)

In [128]:
# imports for machine learning and k-means clustering
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [36]:
# full code example using machine learning (pt.1)
# Step 1: Get the session id pratice
prac_sessions = [1, 2, 3]
car_no = 44
session_list = []

for s in prac_sessions:
    session = get_session('Belgium', '2024', f'Practice {s}')
    session_list.append(session['session'])

# Step 2: get the stint data for the sessions found
stint_data = []
for s in session_list:
    stint = get_stint(s, car_no, 0)
    df = to_df(stint)
    stint_data.append(df)

stint = pd.concat(stint_data, ignore_index=True)
stint = clean_session(stint[stint['compound'] != 'INTERMEDIATE'])

stint['lap_diff'] = stint['lap_end'] - stint['lap_start']

# refresh the session list to note any changes after cleaning
session_list =  stint['session_key'].unique().tolist()

# Step 3: get the lap data for each stint in the sessions found
data = []
for s in session_list:
    # Filter the DataFrame for the current session
    session_df = stint.loc[stint['session_key'] == s]
    
    # Get the minimum lap start and maximum lap end for the filtered session
    start = session_df['lap_start'].min()
    end = session_df['lap_end'].max()

    # Fetch lap data using the start and end values
    lap = get_lapData(s, car_no, start, end)
    lap = to_df(lap)
    lap = lap[lap['is_pit_out_lap']==False]

    data.append(lap)

# Step 4: concat all togther to make a larger df, for pratice
if data:
    practice_data = pd.concat(data, ignore_index=True)



Getting session info for Belgium 2024, Practice 1!!
Getting session info for Belgium 2024, Practice 2!!
Getting session info for Belgium 2024, Practice 3!!
Stint data for car No. 44 is incoming!!
Stint data for car No. 44 is incoming!!
Stint data for car No. 44 is incoming!!
Lap data for car No. 44 is on its way!!
Lap data for car No. 44 is on its way!!


In [149]:

# Perform the merge on lap start and end
merged_data = []

for _, stint_row in stint.iterrows():
    start_lap = stint_row['lap_start']
    end_lap = stint_row['lap_end']

    # Filter lap data that falls within the stint lap range
    filtered_laps = practice_data[(practice_data['lap_number'] >= start_lap) & (practice_data['lap_number'] <= end_lap)]
    if not filtered_laps.empty:
        # Add stint data to each filtered lap
        filtered_laps = filtered_laps.assign(
                lap_start=stint_row['lap_start'],
                lap_end=stint_row['lap_end'],
                compound=stint_row['compound'],
                tyre_age_at_start=stint_row['tyre_age_at_start'], 
        )

        if 'lap_diff' in stint_row:
            filtered_laps['lap_diff'] = stint_row['lap_diff']

    merged_data.append(filtered_laps)


df_merged = pd.concat(merged_data, ignore_index=True)
df_merged = df_merged[(df_merged['lap_diff'] >= 5)].sort_values(by='session_key', ascending=False)
df_merged = df_merged[df_merged['segments_sector_3'].apply(lambda x: len(x) > 0)]
df_merged = df_merged.drop(columns=['date_start', 'meeting_key', 'driver_number', 'lap_start', 'lap_end', 'is_pit_out_lap'])

df_merged.head()

Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
66,9568,322.0,201,298.0,109.684,31.82,48.445,29.419,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,SOFT,5,11
48,9568,335.0,210,,103.742,30.491,44.752,28.499,"[None, 2048, 2049, 2049, 2049, 2049, 2049, 2049]","[2048, 2049, 2049, 2051, 2048, 2051, 2049, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2064]",7,SOFT,0,8
47,9568,333.0,92,310.0,135.978,30.704,54.101,51.173,"[2049, 2048, 2049, 2049, 2049, 2049, 2048, 2048]","[2049, 2048, 2048, 2049, 2048, 2048, 2048, 204...","[2049, 2049, 2049, 2049, 2049, 2049, 2048]",6,SOFT,0,8
46,9568,155.0,101,108.0,234.937,67.745,115.374,51.818,"[2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2049]",5,SOFT,0,8
45,9568,335.0,211,309.0,104.194,30.617,44.958,28.619,"[2049, 2048, 2048, 2049, 2049, 2049, 2049, 2049]","[2049, 2048, 2048, 2049, 2049, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",4,SOFT,0,8


In [144]:
# full code example using machine learning (pt.3)
# make a df of all the soft data in the table
df_soft = df_merged[df_merged['compound'] == 'SOFT'].sort_values(by="lap_diff")

# for the first stint
s1 = df_soft[df_soft['lap_diff'] == 8].fillna(0).sort_values(by="lap_number")
# s1 = s1[(s1['i1_speed'] >= 315) & (s1['i2_speed'] >= 150)] 

# define the model, this time with it being a RandomForestRegressor model
rf_model = RandomForestRegressor()

# loop through both the session with data to make assesments
for session in s1['session_key'].unique().tolist():

    session_data = s1[s1['session_key'] == session]

    last = session_data.iloc[-1, :]


    X = session_data[['i1_speed' ,'i2_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']]
    y = session_data['lap_duration']

    # prep training data, not including the last lap
    X_train = X.iloc[:-1, :]
    y_train = y.iloc[:-1]

    # fit the model
    rf_model = rf_model.fit(X_train, y_train)

    # Prepare the test data (features of the last lap)
    X_test = last[['i1_speed', 'i1_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']].values.reshape(1, -1)

    # Predict the lap duration for the last lap
    y_pred = rf_model.predict(X_test)

    # Display the predicted lap duration
    print("Predicted: ", y_pred[0], " | Actual: ", y.iloc[-1])


Predicted:  181.15551  | Actual:  247.853
Predicted:  114.71757999999994  | Actual:  114.651




In [175]:
# full code example using machine learning (pt.4)
df_soft = df_merged[df_merged['compound'] == 'SOFT'].sort_values(by="lap_diff")

# for the second stint on softs
s2 = df_soft[df_soft['lap_diff'] == 11].fillna(0).sort_values(by="lap_number")
s2 = s2[(s2['i1_speed'] >= 315) & (s2['i2_speed'] >= 150)] 

# define the model, this time with it being a linear regression model
lr_model = RandomForestRegressor()

# loop through both the session with data to make assesments
for session in s2['session_key'].unique().tolist():

    session_data = s2[s2['session_key'] == session]

    display(session_data)

    last = session_data.iloc[-1, :]

    X = session_data[['i1_speed' ,'i2_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']]
    y = session_data['lap_duration']

    # prep training data, not including the last lap
    X_train = X.iloc[:-1, :]
    y_train = y.iloc[:-1]

    # fit the model
    lr_model = lr_model.fit(X_train, y_train)

    # Prepare the test data (features of the last lap)
    X_test = last[['i1_speed', 'i1_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']].values.reshape(1, -1)

    # Predict the lap duration for the last lap
    y_pred = lr_model.predict(X_test)

    # Display the predicted lap duration
    print("Predicted: ", y_pred[0], " | Actual: ", y.iloc[-1])

Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
64,9568,318.0,202,298.0,109.143,31.804,47.945,29.394,"[2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",15,SOFT,5,11
65,9568,317.0,203,299.0,109.394,31.836,48.083,29.475,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",16,SOFT,5,11
66,9568,322.0,201,298.0,109.684,31.82,48.445,29.419,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,SOFT,5,11
68,9568,321.0,199,298.0,110.035,31.911,48.556,29.568,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,SOFT,5,11
69,9568,323.0,195,300.0,112.737,31.87,49.347,31.52,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",20,SOFT,5,11
70,9568,322.0,195,0.0,111.105,31.952,49.473,29.68,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",21,SOFT,5,11


Predicted:  109.88617000000002  | Actual:  111.105




Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
59,9567,321.0,199,304.0,110.338,31.776,49.018,29.544,"[2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,SOFT,5,11
60,9567,322.0,195,309.0,111.118,31.81,49.687,29.621,"[None, 2048, 2048, 2048, 2049, 2049, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",18,SOFT,5,11
61,9567,334.0,196,313.0,111.927,31.604,50.713,29.61,"[None, 2048, 2048, 2049, 2049, 2049, 2049, 2049]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,SOFT,5,11


Predicted:  110.80600000000008  | Actual:  111.927




In [164]:
# full code example using machine learning (pt.5)
# make a df of all the medium data in the table
df_medium = df_merged[df_merged['compound'] == 'MEDIUM'].sort_values(by="lap_diff")

sf = df_medium.fillna(0).sort_values(by="lap_number")
sf = sf[(sf['i1_speed'] >= 315) & (sf['i2_speed'] >= 150)] 

rf_model = RandomForestRegressor()

for session in sf['session_key'].unique().tolist():

    session_data = sf[sf['session_key'] == session]

    display(session_data)

    last = session_data.iloc[-1, :]

    X = session_data[['i1_speed' ,'i2_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']]
    y = session_data['lap_duration']

    X_train = X.iloc[:-1, :]
    y_train = y.iloc[:-1]

    rf_model = rf_model.fit(X_train, y_train)

    X_test = last[['i1_speed', 'i1_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']].values.reshape(1, -1)

    y_pred = rf_model.predict(X_test)

    print("Predicted: ", y_pred[0], " | Actual: ", y.iloc[-1])

Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
5,9568,335.0,214,308.0,104.134,30.635,44.923,28.576,"[2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049]","[2049, 2049, 2049, 2049, 2049, 2051, 2049, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2049]",2,MEDIUM,0,6
7,9568,335.0,211,309.0,104.194,30.617,44.958,28.619,"[2049, 2048, 2048, 2049, 2049, 2049, 2049, 2049]","[2049, 2048, 2048, 2049, 2049, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",4,MEDIUM,0,6
10,9568,335.0,210,0.0,103.742,30.491,44.752,28.499,"[None, 2048, 2049, 2049, 2049, 2049, 2049, 2049]","[2048, 2049, 2049, 2051, 2048, 2051, 2049, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2064]",7,MEDIUM,0,6
30,9568,322.0,201,298.0,109.684,31.82,48.445,29.419,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,MEDIUM,11,5
32,9568,321.0,199,298.0,110.035,31.911,48.556,29.568,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,MEDIUM,11,5
33,9568,323.0,195,300.0,112.737,31.87,49.347,31.52,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",20,MEDIUM,11,5
34,9568,322.0,195,0.0,111.105,31.952,49.473,29.68,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",21,MEDIUM,11,5


Predicted:  108.93240999999995  | Actual:  111.105




Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
0,9567,332.0,211,304.0,105.888,30.929,46.331,28.628,"[2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049]","[2049, 2049, 2049, 2049, 2049, 2049, 2049, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",2,MEDIUM,0,6
3,9567,331.0,204,306.0,105.207,30.868,45.732,28.607,"[2049, 2048, 2049, 2048, 2048, 2048, 2048, 2048]","[2048, 2049, 2049, 2049, 2048, 2049, 2049, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2064]",5,MEDIUM,0,6
25,9567,321.0,199,304.0,110.338,31.776,49.018,29.544,"[2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,MEDIUM,11,5
26,9567,322.0,195,309.0,111.118,31.81,49.687,29.621,"[None, 2048, 2048, 2048, 2049, 2049, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",18,MEDIUM,11,5
27,9567,334.0,196,313.0,111.927,31.604,50.713,29.61,"[None, 2048, 2048, 2049, 2049, 2049, 2049, 2049]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,MEDIUM,11,5


Predicted:  107.48023000000018  | Actual:  111.927




In [178]:
# full code example using machine learning (pt.6)
# make a df of all the medium data in the table
df_medium = df_merged[df_merged['compound'] == 'MEDIUM'].sort_values(by="lap_diff")

s3 = df_medium[df_medium['lap_diff'] == 5].fillna(0).sort_values(by="lap_number")
s3 = s3[(s3['i1_speed'] >= 315) & (s3['i2_speed'] >= 150)] 

rf_model = RandomForestRegressor()

for session in s3['session_key'].unique().tolist():

    session_data = s3[s3['session_key'] == session]

    display(session_data)

    last = session_data.iloc[-1, :]

    X = session_data[['i1_speed' ,'i2_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']]
    y = session_data['lap_duration']

    X_train = X.iloc[:-1, :]
    y_train = y.iloc[:-1]

    rf_model = rf_model.fit(X_train, y_train)

    X_test = last[['i1_speed', 'i1_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']].values.reshape(1, -1)

    y_pred = rf_model.predict(X_test)

    print("Predicted: ", y_pred[0], " | Actual: ", y.iloc[-1])

Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
30,9568,322.0,201,298.0,109.684,31.82,48.445,29.419,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,MEDIUM,11,5
32,9568,321.0,199,298.0,110.035,31.911,48.556,29.568,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,MEDIUM,11,5
33,9568,323.0,195,300.0,112.737,31.87,49.347,31.52,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",20,MEDIUM,11,5
34,9568,322.0,195,0.0,111.105,31.952,49.473,29.68,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",21,MEDIUM,11,5


Predicted:  110.45956999999987  | Actual:  111.105




Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
25,9567,321.0,199,304.0,110.338,31.776,49.018,29.544,"[2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,MEDIUM,11,5
26,9567,322.0,195,309.0,111.118,31.81,49.687,29.621,"[None, 2048, 2048, 2048, 2049, 2049, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",18,MEDIUM,11,5
27,9567,334.0,196,313.0,111.927,31.604,50.713,29.61,"[None, 2048, 2048, 2049, 2049, 2049, 2049, 2049]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,MEDIUM,11,5


Predicted:  110.85280000000013  | Actual:  111.927




In [186]:
# full code example using machine learning (pt.7)
# make a df of all the medium data in the table
df_medium = df_merged[df_merged['compound'] == 'MEDIUM'].sort_values(by="lap_diff")

s4 = df_medium[df_medium['lap_diff'] == 5].fillna(0).sort_values(by="lap_number")
s4 = s4[(s4['i1_speed'] >= 315) & (s4['i2_speed'] >= 150)] 

rf_model = RandomForestRegressor()

for session in s4['session_key'].unique().tolist():

    session_data = s4[s4['session_key'] == session]

    display(session_data)

    last = session_data.iloc[-1, :]

    X = session_data[['i1_speed' ,'i2_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']]
    y = session_data['lap_duration']

    X_train = X.iloc[:-1, :]
    y_train = y.iloc[:-1]

    rf_model = rf_model.fit(X_train, y_train)

    X_test = last[['i1_speed', 'i1_speed', 'st_speed', 'duration_sector_1', 'duration_sector_2', 'duration_sector_3']].values.reshape(1, -1)

    y_pred = rf_model.predict(X_test)

    print("Predicted: ", y_pred[0], " | Actual: ", y.iloc[-1])

Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
30,9568,322.0,201,298.0,109.684,31.82,48.445,29.419,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,MEDIUM,11,5
32,9568,321.0,199,298.0,110.035,31.911,48.556,29.568,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,MEDIUM,11,5
33,9568,323.0,195,300.0,112.737,31.87,49.347,31.52,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",20,MEDIUM,11,5
34,9568,322.0,195,0.0,111.105,31.952,49.473,29.68,"[None, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",21,MEDIUM,11,5


Predicted:  110.60870999999989  | Actual:  111.105




Unnamed: 0,session_key,i1_speed,i2_speed,st_speed,lap_duration,duration_sector_1,duration_sector_2,duration_sector_3,segments_sector_1,segments_sector_2,segments_sector_3,lap_number,compound,tyre_age_at_start,lap_diff
25,9567,321.0,199,304.0,110.338,31.776,49.018,29.544,"[2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",17,MEDIUM,11,5
26,9567,322.0,195,309.0,111.118,31.81,49.687,29.621,"[None, 2048, 2048, 2048, 2049, 2049, 2048, 2048]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",18,MEDIUM,11,5
27,9567,334.0,196,313.0,111.927,31.604,50.713,29.61,"[None, 2048, 2048, 2049, 2049, 2049, 2049, 2049]","[2048, 2048, 2048, 2048, 2048, 2048, 2048, 204...","[2048, 2048, 2048, 2048, 2048, 2048, 2048]",19,MEDIUM,11,5


Predicted:  110.81380000000011  | Actual:  111.927


