In [1]:
import pandas as pd

In [2]:
teams_df = pd.read_excel('train.xlsx')
current_df = pd.read_excel('test.xlsx')

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
teams_df = teams_df.drop(columns=['Date'])
current_df = current_df.drop(columns=['Date'])

In [5]:
X = teams_df.drop(columns=['Target'])
y = teams_df['Target']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)

X_test = current_df.drop(columns=['Target'])
y_test = current_df['Target']

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}

In [10]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [11]:
grid_search = GridSearchCV(GradientBoostingClassifier(), param_grid, cv=3)
grid_search.fit(X_train, y_train)

In [12]:
best_gbm_model = grid_search.best_estimator_
y_val_pred = best_gbm_model.predict(X_val)
print('Report:\n', classification_report(y_val, y_val_pred))
print('Confusion Matrix:\n', confusion_matrix(y_val, y_val_pred))

Report:
               precision    recall  f1-score   support

           0       0.58      0.58      0.58       912
           1       0.58      0.59      0.58       912

    accuracy                           0.58      1824
   macro avg       0.58      0.58      0.58      1824
weighted avg       0.58      0.58      0.58      1824

Confusion Matrix:
 [[527 385]
 [378 534]]


In [13]:
import joblib

In [14]:
file = 'TrainedModels/GB_Model.pkl'
joblib.dump(best_gbm_model, file)

['TrainedModels/GB_Model.pkl']

In [15]:
gb_model = joblib.load(file)

In [16]:
y_test_pred = gb_model.predict(X_test)
print('Report:\n', classification_report(y_test, y_test_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_test_pred))

Report:
               precision    recall  f1-score   support

           0       0.70      0.75      0.72       700
           1       0.73      0.69      0.71       700

    accuracy                           0.72      1400
   macro avg       0.72      0.72      0.72      1400
weighted avg       0.72      0.72      0.72      1400

Confusion Matrix:
 [[522 178]
 [220 480]]
