# Final position in variable interval


## Dependencies


The dependencies used are as follows


In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from xgboost import XGBClassifier, XGBRegressor, XGBRanker

import sys

sys.path.append("..")

from utils.visualization import model_prediction

import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import warnings

warnings.simplefilter("ignore")

## Initialization


First we will do the prediction of the model that predicts the final position of each driver at a ±1 interval.


In [2]:
df_train = pd.read_csv("../assets/data/processed/final_model.csv")
df_train = df_train[df_train["raceYear"] >= 2019]

X_train = pd.read_csv("../assets/data/processed/final_model_X.csv")
X_train = X_train.iloc[-len(df_train) :]

y_train = df_train["positionFinal"]

In [3]:
df_test = pd.read_csv("../assets/data/processed/testing.csv")

columns = {"raceRound": df_test["raceRound"], "driverRef": df_test["driverRef"]}

X_test = pd.read_csv("../assets/data/processed/testing_X.csv")

y_test = df_test["positionFinal"]

## K-Nearest Neighbors


In [4]:
attributes = [
    "positionGrid",
    "driverRef",
    "raceYear",
    "raceRound",
    "driverWins",
    "constructorWins",
    "driverAgeToday",
    "weatherCold",
    "weatherDry",
    "weatherWet",
    "circuitLaps",
    "driverExpYear",
    "driverExpRace",
    "driverPos2",
    "driverPos3",
    "driverPos4",
    "driverPos5",
    "driverPos8",
    "driverPos9",
    "driverPos12",
    "driverPos13",
    "driverPos17",
    "driverPos18",
    "driverRac",
    "driverPac",
]
knn = KNeighborsClassifier(n_neighbors=16, metric="manhattan", weights="distance")
model_prediction(
    knn,
    X_train[attributes],
    y_train,
    X_test[attributes],
    y_test,
    columns=columns,
    multiclass=True,
)

Unnamed: 0,raceRound,driverRef,positionFinal,positionPred
0,1,max_verstappen,1,2
1,1,perez,2,1
2,1,sainz,3,8
3,1,leclerc,4,9
4,1,russell,5,9
5,1,norris,6,14
6,1,hamilton,7,8
7,1,piastri,8,16
8,1,alonso,9,8
9,1,stroll,10,14


## Decision Tree


In [5]:
attributes = [
    "positionGrid",
    "driverNationality",
    "raceYear",
    "grandPrix",
    "raceTime",
    "circuitRef",
    "circuitLocation",
    "circuitCountry",
    "circuitLat",
    "circuitLng",
    "circuitAlt",
    "constructorWins",
    "q1",
    "q2",
    "q3",
    "weather",
    "weatherWarm",
    "weatherCold",
    "weatherDry",
    "weatherWet",
    "weatherCloudy",
    "circuitType",
    "circuitDirection",
    "circuitLength",
    "circuitLaps",
    "circuitDist",
    "qMin",
    "qMax",
    "qAvg",
    "driverAgeStarted",
    "driverPodiums",
    "driverPos3",
    "driverPos4",
    "driverPos5",
    "driverPos8",
    "driverPos10",
    "driverPos11",
    "driverPos12",
    "driverPos15",
    "driverPos17",
    "driverPos18",
    "driverPac",
]
tree = DecisionTreeClassifier(max_depth=4, criterion="gini", splitter="best")
model_prediction(
    tree,
    X_train[attributes],
    y_train,
    X_test[attributes],
    y_test,
    columns=columns,
    multiclass=True,
)

Unnamed: 0,raceRound,driverRef,positionFinal,positionPred
0,1,max_verstappen,1,2
1,1,perez,2,2
2,1,sainz,3,4
3,1,leclerc,4,4
4,1,russell,5,4
5,1,norris,6,6
6,1,hamilton,7,7
7,1,piastri,8,7
8,1,alonso,9,6
9,1,stroll,10,7


## Random Forest


In [6]:
attributes = [
    "positionGrid",
    "constructorRef",
    "raceYear",
    "circuitCountry",
    "driverWins",
    "weatherDry",
    "circuitType",
    "qMax",
    "driverExpYear",
    "driverPos3",
    "driverPos5",
    "driverPos7",
    "driverPos8",
    "driverPos13",
]
random_forest = RandomForestClassifier(
    max_depth=7, n_estimators=292, criterion="entropy"
)
model_prediction(
    random_forest,
    X_train[attributes],
    y_train,
    X_test[attributes],
    y_test,
    columns=columns,
    multiclass=True,
)

Unnamed: 0,raceRound,driverRef,positionFinal,positionPred
0,1,max_verstappen,1,1
1,1,perez,2,4
2,1,sainz,3,3
3,1,leclerc,4,3
4,1,russell,5,4
5,1,norris,6,6
6,1,hamilton,7,7
7,1,piastri,8,8
8,1,alonso,9,4
9,1,stroll,10,13


## Multilayer Perceptron


In [7]:
attributes = [
    "positionGrid",
    "driverNumber",
    "constructorNationality",
    "circuitCountry",
    "circuitLat",
    "circuitLng",
    "driverWins",
    "weatherWarm",
    "weatherCloudy",
    "circuitLaps",
    "qMin",
    "driverPodiums",
    "driverPos3",
    "driverPos6",
    "driverPos9",
    "driverPos15",
    "driverPos17",
    "driverRac",
    "driverAwa",
]
nn = MLPClassifier(activation="logistic", hidden_layer_sizes=(47, 25))
model_prediction(
    nn,
    X_train[attributes],
    y_train,
    X_test[attributes],
    y_test,
    columns=columns,
    multiclass=True,
)

Unnamed: 0,raceRound,driverRef,positionFinal,positionPred
0,1,max_verstappen,1,1
1,1,perez,2,6
2,1,sainz,3,2
3,1,leclerc,4,4
4,1,russell,5,1
5,1,norris,6,6
6,1,hamilton,7,6
7,1,piastri,8,9
8,1,alonso,9,6
9,1,stroll,10,14


## XGBClassifier


In [8]:
attributes = [
    "carNumber",
    "positionGrid",
    "driverRef",
    "driverNumber",
    "constructorRef",
    "constructorNationality",
    "raceYear",
    "raceRound",
    "grandPrix",
    "raceTime",
    "circuitRef",
    "circuitLat",
    "circuitLng",
    "circuitAlt",
    "driverWins",
    "constructorWins",
    "q3",
    "driverAgeAtRace",
    "weatherWarm",
    "weatherDry",
    "circuitType",
    "circuitDirection",
    "circuitLength",
    "qMin",
    "qMax",
    "qAvg",
    "driverPodiums",
    "driverPos2",
    "driverPos5",
    "driverPos7",
    "driverPos8",
    "driverPos9",
    "driverPos10",
    "driverPos15",
    "driverPos17",
    "driverExp",
    "driverRac",
    "driverOvr",
]
model = XGBClassifier(
    objective="multi:softmax",
    learning_rate=0.08582939395916275,
    n_estimators=334,
    max_depth=14,
    min_child_weight=1,
    gamma=0.2007647640582077,
    subsample=0.8028533603905642,
    colsample_bytree=0.7154227055164297,
    reg_alpha=4,
    reg_lambda=55,
)
model_prediction(
    model,
    X_train[attributes],
    y_train - 1,
    X_test[attributes],
    y_test - 1,
    columns=columns,
    multiclass=True,
)

Unnamed: 0,raceRound,driverRef,positionFinal,positionPred
0,1,max_verstappen,0,0
1,1,perez,1,3
2,1,sainz,2,3
3,1,leclerc,3,2
4,1,russell,4,2
5,1,norris,5,5
6,1,hamilton,6,4
7,1,piastri,7,5
8,1,alonso,8,5
9,1,stroll,9,12


## XGBRegressor


In [9]:
attributes = [
    "positionGrid",
    "driverRef",
    "weatherWet",
    "circuitType",
    "driverPodiums",
    "driverPos2",
    "driverPos3",
    "driverPos5",
    "driverPos6",
    "driverPos7",
    "driverPos8",
    "driverPos10",
    "driverPos12",
    "driverPos14",
    "driverPos15",
    "driverPos17",
    "driverPac",
    "driverOvr",
]
model = XGBRegressor(
    objective="reg:linear",
    learning_rate=0.18517350997324947,
    n_estimators=395,
    max_depth=16,
    min_child_weight=94,
    gamma=0.16242064862534417,
    subsample=0.6071469551618005,
    colsample_bytree=0.7848869553625191,
    reg_alpha=64,
    reg_lambda=100,
)
model_prediction(
    model,
    X_train[attributes],
    y_train - 1,
    X_test[attributes],
    y_test - 1,
    columns=columns,
    multiclass=True,
)

Unnamed: 0,raceRound,driverRef,positionFinal,positionPred
0,1,max_verstappen,0,3
1,1,perez,1,6
2,1,sainz,2,5
3,1,leclerc,3,5
4,1,russell,4,5
5,1,norris,5,7
6,1,hamilton,6,7
7,1,piastri,7,8
8,1,alonso,8,6
9,1,stroll,9,10


## XGBRanker


In [10]:
races_per_year = np.cumsum(
    [0] + df_train.groupby("raceYear")["raceRound"].max().to_list()
)
set_id = lambda y, r: r + (races_per_year[y - 2019])

X_train["qid"] = df_train.apply(lambda x: set_id(x["raceYear"], x["raceRound"]), axis=1)
X_test["qid"] = df_test["raceRound"] + X_train["qid"].max()

attributes = [
    "carNumber",
    "positionGrid",
    "driverRef",
    "driverNumber",
    "driverNationality",
    "constructorNationality",
    "raceRound",
    "raceTime",
    "circuitAlt",
    "driverWins",
    "constructorWins",
    "q1",
    "q2",
    "weather",
    "weatherWarm",
    "weatherDry",
    "weatherWet",
    "weatherCloudy",
    "circuitType",
    "circuitLength",
    "circuitLaps",
    "circuitDist",
    "qMin",
    "qMax",
    "driverAgeStarted",
    "driverExpYear",
    "driverExpRace",
    "driverPodiums",
    "driverPos2",
    "driverPos3",
    "driverPos4",
    "driverPos5",
    "driverPos6",
    "driverPos7",
    "driverPos8",
    "driverPos9",
    "driverPos10",
    "driverPos11",
    "driverPos14",
    "driverPos16",
    "driverPos18",
    "driverExp",
    "driverAwa",
    "driverPac",
    "qid",
]
model = XGBRanker(
    objective="rank:pairwise",
    learning_rate=0.04189820511305815,
    n_estimators=192,
    max_depth=9,
    min_child_weight=54,
    gamma=0.8661594450075584,
    subsample=0.8739552860337986,
    colsample_bytree=0.7763941630015024,
    reg_alpha=7,
    reg_lambda=14,
)
model_prediction(
    model,
    X_train[attributes],
    y_train,
    X_test[attributes],
    y_test,
    columns=columns,
    rank=True,
    multiclass=True,
)

Unnamed: 0,raceRound,driverRef,positionFinal,positionPred
0,1,max_verstappen,1,1
1,1,perez,2,2
2,1,sainz,3,5
3,1,leclerc,4,4
4,1,russell,5,3
5,1,norris,6,6
6,1,hamilton,7,8
7,1,piastri,8,9
8,1,alonso,9,7
9,1,stroll,10,10


## Results


We can also observe how, in general, the percentage is met. Also, as we advance in the careers, there is more consistency in the hits, which may be that there are several attributes that provide more information as the season progresses. Similarly, the first positions are also more consistent, probably because the drivers keep their positions with respect to the grid than the last ones.
