In [1]:
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [3]:
models = []

window_sizes = [5,10,15,18,20]
for window in window_sizes:
    print('='*60)
    print(f'Training Logistic Regression Model with window size={window} games')
    window_df = pd.read_excel(f'data/train_{window}.xlsx')
    
    X = window_df.drop(columns=['Target'])
    y = window_df['Target']
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X.drop(columns=['Date']))
    
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)
    
    lr_model = LogisticRegression()
    lr_model.fit(X_train, y_train)
    
    y_pred = lr_model.predict(X_test)
    lr_acc = accuracy_score(y_test, y_pred)
    models.append({'Window': window, 'Model': lr_model, 'Scaler': scaler, 'Accuracy': lr_acc})
    
    print(f'Model Accuracy: {lr_acc}')
    print('Report:\n', classification_report(y_test, y_pred))
    print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
    print('='*60)

Training Logistic Regression Model with window size=5 games
Model Accuracy: 0.5397849462365591
Report:
               precision    recall  f1-score   support

           0       0.54      0.58      0.56       232
           1       0.54      0.50      0.52       233

    accuracy                           0.54       465
   macro avg       0.54      0.54      0.54       465
weighted avg       0.54      0.54      0.54       465

Confusion Matrix:
 [[135  97]
 [117 116]]
Training Logistic Regression Model with window size=10 games
Model Accuracy: 0.5563218390804597
Report:
               precision    recall  f1-score   support

           0       0.55      0.59      0.57       217
           1       0.56      0.52      0.54       218

    accuracy                           0.56       435
   macro avg       0.56      0.56      0.56       435
weighted avg       0.56      0.56      0.56       435

Confusion Matrix:
 [[128  89]
 [104 114]]
Training Logistic Regression Model with window size=1

In [4]:
lr_models = pd.DataFrame(models)
lr_models = lr_models.sort_values(by=['Accuracy'], ascending=False).reset_index(drop=True)
lr_models

Unnamed: 0,Window,Model,Scaler,Accuracy
0,18,LogisticRegression(),StandardScaler(),0.589147
1,20,LogisticRegression(),StandardScaler(),0.576
2,15,LogisticRegression(),StandardScaler(),0.567901
3,10,LogisticRegression(),StandardScaler(),0.556322
4,5,LogisticRegression(),StandardScaler(),0.539785


In [5]:
import joblib

In [6]:
model = lr_models.iloc[0]['Model']
scaler = lr_models.iloc[0]['Scaler']
print('Best Model by Accuracy')
print(f'Window: {lr_models.iloc[0]["Window"]}')
print(f'Accuracy: {lr_models.iloc[0]["Accuracy"]}')

Best Model by Accuracy
Window: 18
Accuracy: 0.5891472868217055


In [7]:
file = 'TrainedModels/LR_Model.pkl'
scaler_file = 'Scalers/LR.bin'
joblib.dump(model, file)
joblib.dump(scaler, scaler_file, compress=True)

['Scalers/LR.bin']