In [1]:
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [3]:
models = []

window_sizes = [5,10,15,18,20]
for window in window_sizes:
    print('='*60)
    print(f'Training Logistic Regression Model with window size={window} games')
    window_df = pd.read_excel(f'data/train_{window}.xlsx')
    
    X = window_df.drop(columns=['Target'])
    y = window_df['Target']
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X.drop(columns=['Date']))
    
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)
    
    lr_model = LogisticRegression()
    lr_model.fit(X_train, y_train)
    
    y_pred = lr_model.predict(X_test)
    lr_acc = accuracy_score(y_test, y_pred)
    models.append({'Window': window, 'Model': lr_model, 'Scaler': scaler, 'Accuracy': lr_acc})
    
    print(f'Model Accuracy: {lr_acc}')
    print('Report:\n', classification_report(y_test, y_pred))
    print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
    print('='*60)

Training Logistic Regression Model with window size=5 games
Model Accuracy: 0.546236559139785
Report:
               precision    recall  f1-score   support

           0       0.54      0.57      0.56       232
           1       0.55      0.52      0.54       233

    accuracy                           0.55       465
   macro avg       0.55      0.55      0.55       465
weighted avg       0.55      0.55      0.55       465

Confusion Matrix:
 [[132 100]
 [111 122]]
Training Logistic Regression Model with window size=10 games
Model Accuracy: 0.5655172413793104
Report:
               precision    recall  f1-score   support

           0       0.57      0.56      0.56       217
           1       0.57      0.57      0.57       218

    accuracy                           0.57       435
   macro avg       0.57      0.57      0.57       435
weighted avg       0.57      0.57      0.57       435

Confusion Matrix:
 [[121  96]
 [ 93 125]]
Training Logistic Regression Model with window size=15

In [4]:
lr_models = pd.DataFrame(models)
lr_models

Unnamed: 0,Window,Model,Scaler,Accuracy
0,5,LogisticRegression(),StandardScaler(),0.546237
1,10,LogisticRegression(),StandardScaler(),0.565517
2,15,LogisticRegression(),StandardScaler(),0.57284
3,18,LogisticRegression(),StandardScaler(),0.596899
4,20,LogisticRegression(),StandardScaler(),0.586667
