In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

In [None]:
X_train = np.load('./X_train.npy')
y_train = np.load('./y_train_fixed.npy')
X_test = np.load('./X_test.npy')
y_test = np.load('./y_test_fixed.npy')

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

xgb = XGBClassifier()
param_grid = {
    'max_depth': [3, 5, 7, 9],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
    'n_estimators': [50, 100, 200]
}

grid = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid,
    scoring="accuracy",
    cv=5)

grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
print("Best CV accuracy:", grid.best_score_)

best_model = grid.best_estimator_

y_pred = best_model.predict(X_test)

# Accuracy
test_acc = accuracy_score(y_test, y_pred)
print("Test Accuracy:", test_acc)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Best params: {'colsample_bytree': 0.9, 'learning_rate': 0.01, 'max_depth': 7, 'n_estimators': 200, 'subsample': 0.8}
Best CV accuracy: 0.37755555555555553
Test Accuracy: 0.379
Confusion Matrix:
 [[292  14  10   9   2   1   0   0   0   0]
 [ 97  17  15  12   3   0   0   0   0   0]
 [ 40  11  44   5   0   1   0   0   0   0]
 [ 66   8   3  23   0   1   0   0   0   0]
 [ 74   1   2   5   2   0   0   0   0   0]
 [ 47   6  20   6   0   0   0   0   0   0]
 [ 59   5   2   3   0   0   0   0   0   0]
 [ 22   8   7   4   1   0   0   1   0   0]
 [ 25   2   1   3   0   0   0   0   0   0]
 [ 18   0   1   1   0   0   0   0   0   0]]
