In [1]:
from src.utilities.sklearn_grid_search import *

import pandas as pd
import numpy as np

from sklearn.ensemble import GradientBoostingRegressor, HistGradientBoostingRegressor, RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier, HistGradientBoostingClassifier, RandomForestClassifier

In [2]:
# select device to run this on
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
# create synthetic data

n_samples, n_features = 1000, 10                      # number of samples & number of features

x = torch.randn(n_samples, n_features)                # randomly generated features
weights = torch.randn(n_features) * 5                 # randomly generated weights

y = (x @ weights)                                     # linear combinations of random features & weights
y += torch.randn(n_samples) * abs(y).mean() * 0.5     # normal random error

y = nn.Sigmoid()(y).round()                           # sigmoid for binary classification

In [4]:
# select model
model = RandomForestClassifier(random_state = 18)

# input dictionary corresponding to parameters & ranges to test
param_dict = {
  
              'max_depth':[2, 3, 4, 5, 6, 10, 12, 16, None],
              'max_features': ['sqrt', 'log2', 2, 3, 5, 7, 9, 12, None],
              'n_estimators': [5, 10, 25, 50, 100, 150, 200, 300],

              }

In [None]:
# run function to approximate best parameter combinations using Sparse Tensor Completion

best_estimated_params= return_best_k_params(model = model,                              # ML model to hyperparameter tune
                                            param_dict = param_dict,                    # dictionary of parameters
                                            X = x,
                                            Y = y,
                                            num_top_combinations = 10,                  # number of best estimated combinations to return
                                            cv_splits = 5,                              # number of Cross Validation folds
                                            tensor_training_portion = 25,               # fraction/number of combinations computed to estimate all
                                            tensor_completion_model = 'cpd.smooth',     # sparse tensor completion model
                                            metric = 'f1',                              # evaluation metric
                                            rank = 5,                                   # rank decomposition used for tensor completion
                                            device = device,                            # device to run this on
                                            verbose = True)

25/648 total combinations in sparse tensor.
1/25 param_combinations done.
2/25 param_combinations done.
3/25 param_combinations done.
4/25 param_combinations done.
5/25 param_combinations done.
6/25 param_combinations done.
7/25 param_combinations done.
8/25 param_combinations done.
9/25 param_combinations done.
10/25 param_combinations done.
11/25 param_combinations done.
12/25 param_combinations done.
13/25 param_combinations done.
14/25 param_combinations done.
15/25 param_combinations done.
16/25 param_combinations done.
17/25 param_combinations done.
18/25 param_combinations done.
19/25 param_combinations done.
20/25 param_combinations done.
21/25 param_combinations done.
22/25 param_combinations done.
23/25 param_combinations done.
24/25 param_combinations done.
25/25 param_combinations done.

Running sparse tensor completion...
Done with sparse tensor completion!

Evaluating predicted best parameters.
Done!


In [6]:
# display best estimated parameters and their actual evaluation metric
for p in best_estimated_params: print(f"{p[0]} \nactual eval = {p[1]:.5f}\n")

{'max_depth': 12, 'max_features': 5, 'n_estimators': 200} 
actual eval = 0.81595

{'max_depth': 6, 'max_features': 3, 'n_estimators': 25} 
actual eval = 0.80705

{'max_depth': 6, 'max_features': 5, 'n_estimators': 25} 
actual eval = 0.80387

{'max_depth': 6, 'max_features': 5, 'n_estimators': 10} 
actual eval = 0.80096

{'max_depth': 3, 'max_features': 3, 'n_estimators': 25} 
actual eval = 0.79592

{'max_depth': 3, 'max_features': 5, 'n_estimators': 25} 
actual eval = 0.78195

{'max_depth': 6, 'max_features': 3, 'n_estimators': 10} 
actual eval = 0.77302

{'max_depth': 3, 'max_features': 12, 'n_estimators': 25} 
actual eval = 0.76612

{'max_depth': 3, 'max_features': 12, 'n_estimators': 10} 
actual eval = 0.76205

{'max_depth': 3, 'max_features': 3, 'n_estimators': 10} 
actual eval = 0.75967

