In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import os
from pathlib import Path
from sklearn.preprocessing import StandardScaler

In [2]:
from config import *
import psycopg2

In [3]:
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port= PORT)
cursor = conn.cursor()
cursor.execute("Select * FROM results_final LIMIT 0")
colnames = [desc[0] for desc in cursor.description]

In [4]:
cursor.execute("SELECT * FROM results_final")
data = cursor.fetchall()
df = pd.DataFrame(data, columns=colnames)
df.head()

Unnamed: 0,index,resultId,raceId,name,year,circuitId,driverId,constructorId,starting_position,finishing_status,train_test,ending_position
0,20323,20323,337,Bahrain Grand Prix,2010,3,4,6,3,1,1,1
1,20324,20324,337,Bahrain Grand Prix,2010,3,13,6,2,1,1,2
2,20325,20325,337,Bahrain Grand Prix,2010,3,1,1,4,1,1,3
3,20326,20326,337,Bahrain Grand Prix,2010,3,20,9,1,1,1,4
4,20327,20327,337,Bahrain Grand Prix,2010,3,3,131,5,1,1,5


In [5]:
# df = pd.read_csv(Path('../Resources/PythonExport/results_final.csv'))
#df.head()

In [6]:
# We are only taking rows where the finishing_status = 1, aka driver finished the race
df1 = df.loc[(df["finishing_status"] == 1)]

In [7]:
# Creating our train and test data sets using the train_test column
df_train = df1.loc[df["train_test"] == 1]
df_test = df1.loc[df["train_test"] == 2]

In [8]:
X_train = df_train[['circuitId','driverId','constructorId', 'starting_position']]
y_train = df_train['ending_position']

In [9]:
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)

In [10]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000)
classifier

LogisticRegression(max_iter=1000, multi_class='multinomial')

In [11]:
classifier.fit(X_train_scaled, y_train)

LogisticRegression(max_iter=1000, multi_class='multinomial')

In [12]:
X_test = df_test[['circuitId','driverId','constructorId', 'starting_position']]
y_test = df_test['ending_position']

In [13]:
scaler1 = StandardScaler().fit(X_test)
X_test_scaled = scaler.transform(X_test)

In [14]:
y_pred = classifier.predict(X_test_scaled)
Z = pd.DataFrame({"Prediction": y_pred, "Actual": y_test})

In [15]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.15833333333333333


In [16]:
# Merging the Z dataframe and df_test datframe 
df_test1 = pd.merge(df_test, Z, left_index=True, right_index=True, how='outer')

In [17]:
# Number of races
race_number = len(df_test1.loc[df_test1["ending_position"] =="1"])
print (f"There were {race_number} races in the 2019 season ")

There were 21 races in the 2019 season 


In [18]:
# Predicting the race winer: Prediction = 1 and ending_position = 1
first_prediction = len(df_test1.loc[(df_test1["Prediction"] == "1") & (df_test1["ending_position"] == "1")])
print(f"The model predicted {first_prediction} first positions accurately out of 21")

The model predicted 15 first positions accurately out of 21


In [19]:
# Predicting second place: Prediction = 2 and ending_position = 2
second_prediction = len(df_test1.loc[(df_test1["Prediction"] == "2") & (df_test1["ending_position"] == "2")])
print(f"The model predicted {second_prediction} second positions accurately out of 21")

The model predicted 7 second positions accurately out of 21


In [20]:
# Predicting third place: Prediction = 3 and ending_position = 3
third_prediction = len(df_test1.loc[(df_test1["Prediction"] == "3") & (df_test1["ending_position"] == "3")])
print(f"The model predicted {third_prediction} third positions accurately out of 21")

The model predicted 5 third positions accurately out of 21
