In [1]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt 
import seaborn as sns 
import numpy as np

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import MinMaxScaler

In [5]:
import os
import glob

def find_csv_files(root_dir):
    # This will hold all the paths to the files
    csv_files = []

    # Walk through all the directories and subdirectories
    for dirpath, dirnames, filenames in os.walk(root_dir):
        # Use glob to find files ending with 'test.csv' in the current directory
        for file in glob.glob(os.path.join(dirpath, '*test.csv')):
            csv_files.append(file)
    
    return csv_files

# Replace 'your_directory_path' with the path to your directory
root_directory = 'data/RaceMultiOutputModelRandomized/'
files = find_csv_files(root_directory)
print(files)

['data/RaceMultiOutputModelRandomized/categorical/position/Lap7/1copies_X_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap7/1copies_y_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap9/1copies_X_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap9/1copies_y_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap8/1copies_X_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap8/1copies_y_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap6/1copies_X_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap6/1copies_y_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap1/1copies_X_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap1/1copies_y_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap1/3copies_X_test.csv', 'data/RaceMultiOutputModelRandomized/categorical/position/Lap1/3copies_y_te

In [8]:
race_ids = pd.DataFrame()
for file in files:
    if '1copies' in file:
        race_ids[file] = pd.read_csv(file)['copy_id']

In [37]:
def model(X):
    return np.argmin(X.values, axis=1)

def process_Y(y):
    return np.argmin(y.values, axis=1)

# Baseline Model -- argmin lap n
Use positions at nth lap as prediction of finishing position (predicting first place only)

In [62]:
def calculate_fp_accuracy(X, y):
    y_pred = model(X)
    y_true = process_Y(y)
    return np.mean(y_pred == y_true)

In [81]:
fp_accuracy = pd.DataFrame(columns=['lap','train','valid','test'])
for lap_data in ['Lap{}'.format(i) for i in range(1,11)]:

    X_train = pd.read_csv('data/RaceMultiOutputModelRandomized/categorical/position/{}/1copies_X_train.csv'.format(lap_data)).drop(columns=['copy_id'],axis=1)
    y_train = pd.read_csv('data/RaceMultiOutputModelRandomized/categorical/position/{}/1copies_y_train.csv'.format(lap_data)).drop(columns=['copy_id'],axis=1)
    X_valid = pd.read_csv('data/RaceMultiOutputModelRandomized/categorical/position/{}/1copies_X_valid.csv'.format(lap_data)).drop(columns=['copy_id'],axis=1)
    y_valid = pd.read_csv('data/RaceMultiOutputModelRandomized/categorical/position/{}/1copies_y_valid.csv'.format(lap_data)).drop(columns=['copy_id'],axis=1)
    X_test = pd.read_csv('data/RaceMultiOutputModelRandomized/categorical/position/{}/1copies_X_test.csv'.format(lap_data)).drop(columns=['copy_id'],axis=1)
    y_test = pd.read_csv('data/RaceMultiOutputModelRandomized/categorical/position/{}/1copies_y_test.csv'.format(lap_data)).drop(columns=['copy_id'],axis=1)

    train_accuracy = calculate_fp_accuracy(X_train, y_train)
    valid_accuracy = calculate_fp_accuracy(X_valid, y_valid)
    test_accuracy = calculate_fp_accuracy(X_test, y_test)
    
    fp_accuracy.loc[len(fp_accuracy)] = [lap_data, train_accuracy, valid_accuracy, test_accuracy]
fp_accuracy

Unnamed: 0,lap,train,valid,test
0,Lap1,0.563725,0.470588,0.509804
1,Lap2,0.552826,0.54902,0.54902
2,Lap3,0.555283,0.568627,0.568627
3,Lap4,0.55774,0.568627,0.588235
4,Lap5,0.577396,0.568627,0.607843
5,Lap6,0.577396,0.588235,0.607843
6,Lap7,0.570025,0.588235,0.607843
7,Lap8,0.567568,0.588235,0.607843
8,Lap9,0.574939,0.568627,0.607843
9,Lap10,0.58231,0.588235,0.588235


# Other

In [66]:
def top_n_accuracy(y_true, y_pred, n=3):
    """
    Calculate the Top-N accuracy of the predictions.

    :param y_true: NumPy array of actual winner indices.
    :param y_pred: NumPy array of predicted rankings (each row contains indices ranked from most likely to least likely to win).
    :param n: Top N positions to consider for accuracy.
    :return: Top-N accuracy as a float.
    """

    array = []


    zero_off = 0
    one_off = 0
    two_off = 0

    for i, race in enumerate(y_true):
        race_1st = np.argpartition(race, 0)[0] #race.argsort()[0]##np.argpartition(race, 0)[0]
        #race_1st = race.argsort().argsort()[0]
        #assert race_1st==race_1st2
        #array.append(race_1st)
        race_2nd = np.argpartition(race, 1)[1]
        race_3rd = np.argpartition(race, 2)[2]

        pred = y_pred[i]
        if pred == race_1st:
            zero_off +=1
        elif pred == race_2nd:
            one_off += 1
        elif pred == race_3rd:
            two_off +=1

    print(zero_off, one_off, two_off)

    zero_off_acc = zero_off/len(y_pred)
    one_off_acc = (zero_off + one_off)/len(y_pred)
    two_off_acc = (zero_off + one_off + two_off)/len(y_pred)

    return zero_off_acc, one_off_acc, two_off_acc

In [67]:
top_n_accuracy(y_train.values, preds, n=3)

235 67 20


(0.5773955773955773, 0.742014742014742, 0.7911547911547911)

In [94]:
X_train

Unnamed: 0,driver1_lap1,driver2_lap1,driver3_lap1,driver4_lap1,driver5_lap1,driver6_lap1,driver7_lap1,driver8_lap1,driver9_lap1,driver10_lap1,...,driver15_lap1,driver16_lap1,driver17_lap1,driver18_lap1,driver19_lap1,driver20_lap1,driver21_lap1,driver22_lap1,driver23_lap1,driver24_lap1
0,4,3,5,9,2,1,6,19,12,17,...,11,14,10,15,13,25,25,25,25,25
1,5,13,3,10,1,2,14,9,17,18,...,15,8,19,12,11,16,20,25,25,25
2,2,10,3,4,1,9,8,5,15,21,...,22,7,13,17,19,16,18,20,25,25
3,4,9,13,19,5,1,6,2,14,15,...,17,12,20,11,16,18,25,25,25,25
4,3,8,7,5,2,21,13,1,15,14,...,17,10,16,4,18,19,20,25,25,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,25,25,25,25
450,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,25,25,25,25
451,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,25,25,25,25,25
452,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,25,25,25,25,25,25


In [69]:
preds

array([ 5,  4,  4,  5,  7,  3,  5,  4,  3,  5,  3,  0,  5,  4,  4,  3,  1,
        2,  2,  2,  3,  2,  4,  4,  2,  7,  4,  2,  6, 12,  8,  2,  4,  7,
        7,  6,  6,  7,  7,  6,  2,  7,  7,  7,  7,  6,  7,  2,  7,  0,  0,
        0,  2,  0,  2, 14,  0,  1,  0,  0,  1,  0,  6,  2,  0,  0,  0,  0,
        3,  2,  0,  2,  2,  2,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,
        4,  0,  5,  0,  0,  0,  3,  0,  5,  0,  0,  5,  0,  0,  1,  1,  0,
        0,  0,  1,  2,  1,  1,  5,  4,  0,  1,  0,  4,  0,  0,  0,  7,  1,
        3,  0,  0,  3,  3,  5,  3,  6,  1,  7,  0,  5,  2,  0,  0,  0,  8,
        6,  6,  0,  3,  1,  7,  5,  0,  0,  6,  1,  1,  0,  5,  5,  4,  4,
        8,  6,  6,  7,  5,  0,  4,  9,  8,  0,  8,  9,  9,  4, 14,  4,  4,
        1,  8,  4,  0,  0,  0,  0,  0,  3,  4,  2,  2,  5,  2,  0,  5,  5,
        5,  0,  4,  4,  0,  1,  1,  4,  1,  5,  1,  4,  0,  5,  1,  1,  4,
       15,  1,  1,  0,  1,  1, 18,  0, 17, 18,  1,  1, 18, 13,  1,  3, 18,
        1, 17, 13, 13,  9

In [51]:
y_train

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,21,22,23,24
0,25,3,25,4,1,2,5,25,7,10,...,25,11,6,25,25,25,25,25,25,25
1,3,7,2,25,1,25,4,25,6,8,...,25,25,9,5,10,11,25,25,25,25
2,25,5,3,25,1,2,25,7,8,25,...,25,6,25,25,25,25,25,10,25,25
3,2,25,25,9,4,1,8,3,25,10,...,11,25,25,25,12,13,25,25,25,25
4,2,4,6,3,1,11,8,25,25,7,...,25,25,25,25,9,25,10,25,25,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449,2,25,1,3,25,5,15,4,8,7,...,12,9,14,11,13,25,25,25,25,25
450,9,2,3,6,7,1,10,15,5,17,...,8,25,16,14,11,25,25,25,25,25
451,1,2,18,3,7,8,6,9,4,10,...,13,14,15,12,11,25,25,25,25,25
452,9,8,4,5,7,3,11,2,12,10,...,16,17,14,1,25,25,25,25,25,25


In [73]:
x = pd.DataFrame({'actuals':actuals,'array':array})
x

Unnamed: 0,actuals,array
0,4,16
1,4,2
2,4,10
3,5,1
4,4,1
...,...,...
449,2,1
450,5,8
451,0,0
452,17,8


In [57]:
(preds==actuals)

array([False,  True,  True,  True, False, False, False,  True,  True,
        True, False, False, False, False, False,  True, False,  True,
        True, False,  True,  True,  True,  True,  True,  True, False,
       False, False, False, False, False, False,  True,  True, False,
        True,  True,  True, False,  True, False,  True,  True, False,
        True, False, False,  True, False,  True, False,  True,  True,
       False, False, False, False, False,  True,  True, False, False,
       False,  True, False, False, False, False, False, False, False,
        True, False,  True, False,  True,  True,  True, False, False,
       False,  True,  True, False,  True,  True, False,  True, False,
        True, False, False, False,  True,  True,  True, False,  True,
       False, False,  True,  True,  True, False,  True, False,  True,
       False, False,  True,  True,  True, False, False,  True, False,
       False, False, False,  True,  True, False, False, False,  True,
       False,  True,