In [8]:
import torch
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [3]:
X, y = torch.load('../data/X_ensemble_train.pt'), torch.load('../data/y_ensemble_train.pt')

In [4]:
X, y = X.numpy(), y.numpy()

In [5]:
# Define the parameter grids for each classifier
rf_param_grid = {'n_estimators': [10, 50, 100],
                 'max_depth': [None, 5, 10]}
xgb_param_grid = {'learning_rate': [0.1, 0.01],
                  'max_depth': [3, 5, 7],
                  'n_estimators': [10, 50, 100]}

In [6]:
# Define the classifiers
rf = RandomForestClassifier(random_state=42)
xgb_classif = xgb.XGBClassifier(random_state=42)

In [9]:
# Use grid search cross-validation to find the best hyperparameters for each classifier
rf_grid_search = GridSearchCV(rf, param_grid=rf_param_grid, cv=5)
rf_grid_search.fit(X, y)

xgb_grid_search = GridSearchCV(xgb_classif, param_grid=xgb_param_grid, cv=5)
xgb_grid_search.fit(X, y)

# Print the best hyperparameters for each classifier
print("Random Forest Best Hyperparameters:")
print(rf_grid_search.best_params_)
print("XGBoost Best Hyperparameters:")
print(xgb_grid_search.best_params_)

TypeError: estimator should be an estimator implementing 'fit' method, <module 'xgboost' from '/home/lucasc/.local/share/virtualenvs/cil-project-E9C2ZiPG/lib/python3.9/site-packages/xgboost/__init__.py'> was passed

In [10]:
# Print the best hyperparameters for each classifier
print("Random Forest Best Hyperparameters:")
print(rf_grid_search.best_params_)

Random Forest Best Hyperparameters:
{'max_depth': 10, 'n_estimators': 100}


In [11]:
# Evaluate the best classifiers on the test set
rf_best = rf_grid_search.best_estimator_


In [12]:
X_val, y_val = torch.load('../data/X_ensemble_val.pt'), torch.load('../data/y_val.pt')

In [15]:
y_val = torch.cat(y_val, dim=0)
X_val, y_val = X_val.numpy(), y_val.numpy()

In [16]:
y_pred = rf_best.predict(X_val)

In [17]:
accuracy_score(y_val, y_pred)

0.9025228145590359

In [20]:
# Define the parameter grids for each classifier
rf_param_grid = {'n_estimators': [75, 100, 125],
                 'max_depth': [10, 15, 20]}

# Use grid search cross-validation to find the best hyperparameters for each classifier
rf_grid_search2 = GridSearchCV(rf, param_grid=rf_param_grid, cv=5, n_jobs=8)
rf_grid_search2.fit(X, y)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42), n_jobs=8,
             param_grid={'max_depth': [10, 15, 20],
                         'n_estimators': [75, 100, 125]})

In [25]:
rf_best = rf_grid_search2.best_estimator_
y_pred = rf_best.predict(X_val)

In [33]:
rf_grid_search2.best_params_

{'max_depth': 10, 'n_estimators': 125}

In [26]:
accuracy_score(y_val, y_pred)

0.9025007927839047

In [23]:
from xgboost import XGBClassifier

xgb_param_grid = {'learning_rate': [0.1, 0.01],
                  'max_depth': [3, 5, 7, 10],
                  'n_estimators': [10, 50, 100]}

xgb = XGBClassifier(random_state=42)
xgb_grid_search = GridSearchCV(xgb, param_grid=xgb_param_grid, cv=5, n_jobs=8)
xgb_grid_search.fit(X, y)

GridSearchCV(cv=5,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     callbacks=None, colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None,
                                     early_stopping_rounds=None,
                                     enable_categorical=False, eval_metric=None,
                                     feature_types=None, gamma=None,
                                     gpu_id=None, grow_policy=None,
                                     importance_type=None,
                                     interaction_constraints=None,
                                     learning_rate=None, max_bin=None,
                                     max_cat_threshold=None,
                                     max_cat_to_onehot=None,
                                     max_delta_step=None, max_depth=None,
                                     max

In [27]:
xgb_best = xgb_grid_search.best_estimator_

In [28]:
y_pred = xgb_best.predict(X_val)

In [29]:
accuracy_score(y_val, y_pred)

0.9026153060145872

In [32]:
xgb_grid_search.best_params_

{'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 50}

In [34]:
xgb_param_grid = {'learning_rate': [0.1, 0.05],
                  'max_depth': [6, 7, 8],
                  'n_estimators': [30, 50, 70]}

xgb = XGBClassifier(random_state=42)
xgb_grid_search = GridSearchCV(xgb, param_grid=xgb_param_grid, cv=5, n_jobs=8)

In [35]:
xgb_grid_search.fit(X, y)

GridSearchCV(cv=5,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     callbacks=None, colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None,
                                     early_stopping_rounds=None,
                                     enable_categorical=False, eval_metric=None,
                                     feature_types=None, gamma=None,
                                     gpu_id=None, grow_policy=None,
                                     importance_type=None,
                                     interaction_constraints=None,
                                     learning_rate=None,..._bin=None,
                                     max_cat_threshold=None,
                                     max_cat_to_onehot=None,
                                     max_delta_step=None, max_depth=None,
                                     max_

In [36]:
xgb_best = xgb_grid_search.best_estimator_

In [37]:
y_pred = xgb_best.predict(X_val)

In [38]:
accuracy_score(y_val, y_pred)

0.9026153060145872

In [39]:
xgb_grid_search.best_params_

{'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 50}