In [None]:
import torch
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [None]:
# Load Training Data
X, y = torch.load('../data/X_ensemble_train.pt').numpy(), torch.load('../data/y_ensemble_train.pt').numpy()

# Load Validation Data
X_val, y_val = torch.load('../data/X_ensemble_val.pt'), torch.load('../data/y_val.pt')
y_val = torch.cat(y_val, dim=0)
X_val, y_val = X_val.numpy(), y_val.numpy()

In [None]:
# Define the classifiers
rf = RandomForestClassifier(random_state=42)
xgb_classif = xgb.XGBClassifier(random_state=42)

### Performing a grid search

In [None]:
# Define the parameter grids for each classifier
rf_param_grid = {'n_estimators': [10, 50, 100],
                 'max_depth': [None, 5, 10]}
xgb_param_grid = {'learning_rate': [0.1, 0.01],
                  'max_depth': [3, 5, 7],
                  'n_estimators': [10, 50, 100]}

# Use grid search cross-validation to find the best hyperparameters for each classifier
rf_grid_search = GridSearchCV(rf, param_grid=rf_param_grid, cv=5)
rf_grid_search.fit(X, y)

xgb_grid_search = GridSearchCV(xgb_classif, param_grid=xgb_param_grid, cv=5)
xgb_grid_search.fit(X, y)

# Print the best hyperparameters for each classifier
print("Random Forest Best Hyperparameters:")
print(rf_grid_search.best_params_)
print("XGBoost Best Hyperparameters:")
print(xgb_grid_search.best_params_)

# Evaluate the best classifiers on the test set
rf_best = rf_grid_search.best_estimator_
xgb_best = xgb_grid_search.best_estimator_

y_pred_rf = rf_best.predict(X_val)
y_pred_xgb = xgb_best.predict(X_val)

print("Accuracy of best rf classifier:")
print(accuracy_score(y_val, y_pred_rf))
print("Accuracy of best xgb classifier:")
print(accuracy_score(y_val, y_pred_xgb))

### Performing another grid search

In [None]:
# Define the parameter grids for each classifier
rf_param_grid = {'n_estimators': [75, 100, 125],
                 'max_depth': [10, 15, 20]}

xgb_param_grid = {'learning_rate': [0.1, 0.01],
                  'max_depth': [3, 5, 7, 10],
                  'n_estimators': [10, 50, 100]}

# Use grid search cross-validation to find the best hyperparameters for each classifier
rf_grid_search = GridSearchCV(rf, param_grid=rf_param_grid, cv=5, n_jobs=8)
rf_grid_search.fit(X, y)

xgb_grid_search = GridSearchCV(xgb, param_grid=xgb_param_grid, cv=5, n_jobs=8)
xgb_grid_search.fit(X, y)

# Print the best hyperparameters for each classifier
print("Random Forest Best Hyperparameters:")
print(rf_grid_search.best_params_)
print("XGBoost Best Hyperparameters:")
print(xgb_grid_search.best_params_)

# Evaluate the best classifiers on the test set
rf_best = rf_grid_search.best_estimator_
xgb_best = xgb_grid_search.best_estimator_

y_pred_rf = rf_best.predict(X_val)
y_pred_xgb = xgb_best.predict(X_val)

print("Accuracy of best rf classifier:")
print(accuracy_score(y_val, y_pred_rf))
print("Accuracy of best xgb classifier:")
print(accuracy_score(y_val, y_pred_xgb))
