In [4]:
import pandas as pd
import numpy as np

from metrics import print_metrics

from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

import matplotlib.pyplot as plt

from sklearn.model_selection import ParameterGrid

random_seed = 42

In [2]:
train = pd.read_csv("../data/featured/train.csv", sep=',')
val = pd.read_csv("../data/featured/val.csv", sep=',')

train['date'] = pd.to_datetime(train['date'])

Xtrain = train.drop(columns=['teamA_win'])
ytrain = train['teamA_win']

Xval = val.drop(columns=['teamA_win'])
yval = val['teamA_win']

display(Xtrain.shape, Xval.shape)

(4830, 55)

(209, 55)

In [3]:
min_date = Xtrain['date'].min()
max_date = Xtrain['date'].max()
sample_weight = (Xtrain['date'] - min_date) / (max_date - min_date)

Xtrain = Xtrain.drop(columns=['date'])
Xval = Xval.drop(columns=['date'])

In [9]:
param_grid = {
    'n_estimators': [300, 500],     
    'max_depth': [6, 8],              
    'learning_rate': [0.05, 0.1],  
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'min_child_weight': [1, 3],
    'gamma': [0, 0.1],
}

best_accuracy = 0
best_params = None

for params in ParameterGrid(param_grid):
    clf = XGBClassifier(random_state=random_seed, n_jobs=-1, **params)
    clf.fit(
        Xtrain, ytrain, sample_weight=sample_weight,
        eval_set=[(Xval, yval)],
        verbose=False
    )
    
    yval_pred = clf.predict(Xval)
    accuracy = accuracy_score(yval, yval_pred)
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params

print("Best Parameters:", best_params)
print("Best Validation Accuracy:", best_accuracy)

Best Parameters: {'colsample_bytree': 0.8, 'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 6, 'min_child_weight': 3, 'n_estimators': 300, 'subsample': 0.8}
Best Validation Accuracy: 0.6889952153110048
