# Racing Profile

Thinking about all the features that would go into a single observation. 
Observation unit: driver
Dependent variable: Grand Prix finishing position

Independent variables:??

    * Model 1: Features from Practice 1-3 and Qualifying
    * Model 2: Model 1 + features from first half of race
    * Model 3: Model 2 + features from past performance

What Features?

Model 1: Features from Practice 1-3 and Qualifying - Following features for each session (x4)

    * Min/Max/Avg lap times
    * Number of stints (stint_number max)
    * Sum of # of practice laps?
    * Summary stats for each lap? or stint? (break, rpm, throttle, speed, drs) (min/max/avg)
        * max break, min/max/avg rpm, max/avg throttle, min/max/avg speed
    * (Maybe) Number of gear changes (n_gear) 
    * Avg Pit duration
    * Number of Pits?


In [7]:
#| label: import
from urllib.request import urlopen
import pandas as pd
import json
from datetime import datetime

query_base = "https://api.openf1.org/v1/"

In [14]:
query_meetings = query_base+"meetings?year=2023"

response = urlopen(query_meetings)
data = json.loads(response.read().decode('utf-8'))
meetings_df = pd.json_normalize(data)

print(len(meetings_df))

23


In [None]:
meeting_list = meetings_df['meeting_key'].to_list()

# print(meeting_list)

for meeting in meeting_list:
    query_sessions = query_base+"sessions?meeting_key="+str(meeting)
    print(meeting)

    # response = urlopen(query_sessions)
    # data = json.loads(response.read().decode('utf-8'))
    # sessions_df = pd.json_normalize(data)

    # sessions = list(sessions_df['session_key'])

    meeting_sessions_df.loc[meeting_sessions_df['meeting_key'] == meeting, 'sessions_list'] = [['p1','p2','p3','q','r']]


print(meeting_sessions_df.head())

1140


ValueError: Must have equal len keys and value when setting with an ndarray

In [None]:
# get session numbers for practice and qualifiying, preloaded meeting_number for now
query_sessions = query_base+"sessions?meeting_key=1229"

response = urlopen(query_sessions)
data = json.loads(response.read().decode('utf-8'))
sessions_df = pd.json_normalize(data)

# get just the session keys 
sessions = list(sessions_df['session_key'])
# remove the real race
del sessions[-1]
print(sessions)

[9465, 9466, 9467, 9468, 9472]


In [1]:
# I'll just do one driver for now
driver_number = str(16)

# create features data frame
X = pd.DataFrame()

# create list of features for a individual driver
driver_feats = [driver_number]

session_sub = sessions[0]


for session in session_sub:
    # creat list to store all data for this session
    session_feats = []

    # LAPS QUERY
    query_laps = query_base+"laps?driver_number="+driver_number+"&session_key="+str(session)

    # call api and convert to df
    response = urlopen(query_laps)
    data = json.loads(response.read().decode('utf-8'))
    laps_df = pd.json_normalize(data)

    # extract lap infor for current session
    min_lap = laps_df['lap_duration'].min()
    max_lap = laps_df['lap_duration'].max()
    avg_lap = float(round(laps_df['lap_duration'].mean(),3))
    num_laps = laps_df['lap_number'].max()

    # PARSE THE LAPS DATA BY TIME
    # laps_times_df will be used for car_data queries
    lap_times = laps_df[['lap_number','date_start','lap_duration']]

    # Convert date_start to datetime if it's not already in datetime format
    lap_times['date_start'] = pd.to_datetime(lap_times['date_start'])

    # use the next lap start as the end time exept for the last lap, which will be calculated with lap duration
    lap_times['date_end'] = lap_times['date_start'].shift(-1).fillna(lap_times['date_start'] + pd.to_timedelta(lap_times['lap_duration'], unit='s'))

    # convert back to string
    lap_times['date_start'] = lap_times['date_start'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f') + lap_times['date_start'].dt.strftime('%z').str[:3] + ':' + lap_times['date_start'].dt.strftime('%z').str[3:]
    lap_times['date_end'] = lap_times['date_end'].dt.strftime('%Y-%m-%dT%H:%M:%S.%f') + lap_times['date_end'].dt.strftime('%z').str[:3] + ':' + lap_times['date_end'].dt.strftime('%z').str[3:]

    # find the lap number for their best lap
    min_lap_num = laps_df[laps_df['lap_duration']==min_lap]['lap_number'].to_list()[0]


    # CONDUCT A CAR DATA QUERY ON THE MINIMUM LAP
    # base car data query for time filter to be added to
    query_car_base = query_base+"car_data?driver_number="+driver_number+"&session_key="+str(session)

    # create staret and end time for car data query
    start_time = lap_times[lap_times['lap_number']==min_lap_num]['date_start'].to_list()[0]
    end_time = lap_times[lap_times['lap_number']==min_lap_num]['date_end'].to_list()[0]

    # query for lap specific times
    query_car = query_car_base + "&date>="+str(start_time)+"&date<="+str(end_time)

    # call api for car data with lap time query
    response = urlopen(query_car)
    data = json.loads(response.read().decode('utf-8'))
    car_df = pd.json_normalize(data)

    # get summary stats for the lap
    max_break = car_df['brake'].max()
    max_rpm = car_df['rpm'].max()
    min_rpm = car_df['rpm'].min()
    avg_rpm = round(car_df['rpm'].mean())
    max_throttle = car_df['throttle'].max()
    avg_throttle = round(car_df['throttle'].mean())
    min_speed = car_df['speed'].min()
    max_speed = car_df['speed'].max()
    avg_speed = round(car_df['speed'].mean())

    # create list of car_data stats per lap
    min_lap_stats = [max_break, min_rpm, max_rpm, avg_rpm, max_throttle, avg_throttle, min_speed, max_speed, avg_speed]


    # STINTS QUERY FOR NUMBER OF STINTS
    query_stints = query_base + "stints?driver_number="+driver_number+"&session_key="+str(session)

    # call api and convert to df
    response = urlopen(query_stints)
    data = json.loads(response.read().decode('utf-8'))
    stints_df = pd.json_normalize(data)

    # extract max stint number
    num_stints = stints_df['stint_number'].max()


    # PTIS QUERY
    query_pits = query_base + "pit?driver_number="+driver_number+"&session_key="+str(session)

    # call api and convert to df
    response = urlopen(query_pits)
    data = json.loads(response.read().decode('utf-8'))
    pits_df = pd.json_normalize(data)

    # extract num of pits and avg pit duration
    num_pits = len(pits_df)
    avg_pit_time = float(round(pits_df['pit_duration'].mean(),1))

    # WEATHER QUERY
    query_wx = query_base + "weather?&session_key="+str(session)

    # call api and convert to df
    response = urlopen(query_wx)
    data = json.loads(response.read().decode('utf-8'))
    weather_df = pd.json_normalize(data)

    ### parse weather data

    # max_rain
    # max_wind
    # avg_air_temp
    # avg_track_temp

    wx_stats = ['max_rain', 'max_wind', 'avg_air_temp', 'avg_track_temp']


    # RADIO QUERY
    query_radio = query_base + "team_radio?driver_number="+driver_number+"&session_key="+str(session)

    # call api and convert to df
    response = urlopen(query_radio)
    data = json.loads(response.read().decode('utf-8'))
    radio_df = pd.json_normalize(data)

    # numer of radio calls during the session
    num_radios = len(radio_df)


    # AFTER ALL QUERIES PER SESSION
    # append to driver features list
    session_feats = session_feats + [min_lap, max_lap, avg_lap, num_laps, num_stints] + [num_pits, avg_pit_time] + min_lap_stats + wx_stats + num_radios


print(driver_feats)


NameError: name 'pd' is not defined

In [None]:
print(len(driver_feats))

# colnames = ['min_lap_p1', 'max_lap_p1', 'avg_lap_p1', 'num_laps_p1, 'num_stints_p1', 'num_pits_p1', 'avg_pit_time_p1',
#            'max_break_p1', 'min_rpm_p1', 'max_rpm_p1', 'avg_rpm_p1', 'max_throttle_p1', 'avg_throttle_p1', 'min_speed_p1',
#            'max_speed_p1', 'avg_speed_p1', 'max_rain_p1', 'max_wind_p1', 'avg_air_temp_p1', 'avg_track_temp_p1', 'num_radios_p1',
#            'min_lap_p2', 'max_lap_p2', 'avg_lap_p2', 'num_laps_p2, 'num_stints_p2', 'num_pits_p2', 'avg_pit_time_p2',
#            'max_break_p2', 'min_rpm_p2', 'max_rpm_p2', 'avg_rpm_p2', 'max_throttle_p2', 'avg_throttle_p2', 'min_speed_p2',
#            'max_speed_p2', 'avg_speed_p2', 'max_rain_p2', 'max_wind_p2', 'avg_air_temp_p2', 'avg_track_temp_p2', 'num_radios_p2',
#            'min_lap_p3', 'max_lap_p3', 'avg_lap_p3', 'num_laps_p3, 'num_stints_p3', 'num_pits_p3', 'avg_pit_time_p3',
#            'max_break_p3', 'min_rpm_p3', 'max_rpm_p3', 'avg_rpm_p3', 'max_throttle_p3', 'avg_throttle_p3', 'min_speed_p3',
#            'max_speed_p3', 'avg_speed_p3','max_rain_p3', 'max_wind_p3', 'avg_air_temp_p3', 'avg_track_temp_p3', 'num_radios_p3',
#            'min_lap_q', 'max_lap_q', 'avg_lap_q', 'num_laps_q, 'num_stints_q','num_pits_q', 'avg_pit_time_q',
#            'max_break_q', 'min_rpm_q', 'max_rpm_q', 'avg_rpm_q', 'max_throttle_q', 'avg_throttle_q', 'min_speed_q', 
#            'max_speed_q', 'avg_speed_q','max_rain_q', 'max_wind_q', 'avg_air_temp_q', 'avg_track_temp_q', 'num_radios_q',

65


Looking at car data for individual laps
* max break, min/max/avg rpm, max/avg throttle, min/max/avg speed

In [182]:
lap_num = 7

# base car data query for time filter to be added to
query_car_base = query_base+"car_data?driver_number="+driver_number+"&session_key="+str(session)

# create staret and end time for car data query
start_time = lap_times[lap_times['lap_number']==lap_num]['date_start'].to_list()[0]
end_time = lap_times[lap_times['lap_number']==lap_num]['date_end'].to_list()[0]

# query for lap specific times
query_car = query_car_base + "&date>="+str(start_time)+"&date<="+str(end_time)

# call api for car data with lap time query
response = urlopen(query_car)
data = json.loads(response.read().decode('utf-8'))
car_df = pd.json_normalize(data)
print(len(car_df))

max_break = car_df['brake'].max()
max_rpm = car_df['rpm'].max()
min_rpm = car_df['rpm'].min()
avg_rpm = round(car_df['rpm'].mean())
max_throttle = car_df['throttle'].max()
avg_throttle = round(car_df['throttle'].mean())
min_speed = car_df['speed'].min()
max_speed = car_df['speed'].max()
avg_speed = round(car_df['speed'].mean())

car_data_list = [max_break, min_rpm, max_rpm, avg_rpm, max_throttle, avg_throttle, min_speed, max_speed, avg_speed]

print(car_data_list)


2758
[104, 0, 12084, 2014, 104, 79, 0, 276, 26]
