In [5]:
from notebooks.utilities.STC_grid_search import *

import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier

In [6]:
# select device to run this on
device = "cuda" if torch.cuda.is_available() else "cpu"

In [7]:
# select model
model = RandomForestClassifier(random_state = 18)

# input dictionary corresponding to parameters & ranges to test
param_dict = {'max_depth':[2, 3, 4, 5, 6, 10, None],
              'max_features': ['sqrt', 'log2', 1, 3, 5, 7, 9, None],
              'n_estimators': [5, 10, 25, 50, 100],
              'min_samples_split': [2, 4, 8, 16, 32, 64, 128, 256]}

In [9]:
# read data
data = np.array(pd.read_csv("classification_datasets/alzheimers_disease_data.csv"))

# put data into features & target format
x = data[:, :-1]
y = data[:, -1]

del data

# select a subset of data if necessary
x, y = get_subset(x = x, y = y, portion = 1.0, random_state = 18)

print(f"X Shape: {x.shape}; Y Shape: {y.shape}")

X Shape: (2149, 32); Y Shape: (2149,)


In [10]:
# run function to approximate best parameter combinations using Sparse Tensor Completion
best_estimated_params= return_best_k_params(model = model, 
                                            param_dict = param_dict,
                                            X = x,
                                            Y = y,
                                            num_top_combinations = 15,     # number of best estimated combinations to return
                                            cv_splits = 5,                 # how many CV folds
                                            portion_of_combinations = 0.05,# proportion of total combinations to compute to estimate all
                                            STC_model_type = 'costco',     # sparse tensor completion model
                                            rank = 25,                     # rank decomposition used for tensor completion
                                            device = device,
                                            verbose = False)

Done!


In [11]:
# display best estimated parameters and their actual evaluation metric
for x in best_estimated_params:
    print(f"{x[0]} \nactual eval = {x[1]:.4f}\n")

{'max_depth': None, 'max_features': 9, 'n_estimators': 100, 'min_samples_split': 8} 
actual eval = 0.9296

{'max_depth': None, 'max_features': 9, 'n_estimators': 100, 'min_samples_split': 2} 
actual eval = 0.9284

{'max_depth': 10, 'max_features': None, 'n_estimators': 100, 'min_samples_split': 8} 
actual eval = 0.9277

{'max_depth': None, 'max_features': 9, 'n_estimators': 50, 'min_samples_split': 8} 
actual eval = 0.9276

{'max_depth': None, 'max_features': 9, 'n_estimators': 25, 'min_samples_split': 8} 
actual eval = 0.9268

{'max_depth': None, 'max_features': None, 'n_estimators': 100, 'min_samples_split': 8} 
actual eval = 0.9263

{'max_depth': 10, 'max_features': 9, 'n_estimators': 100, 'min_samples_split': 8} 
actual eval = 0.9261

{'max_depth': None, 'max_features': 9, 'n_estimators': 50, 'min_samples_split': 2} 
actual eval = 0.9255

{'max_depth': None, 'max_features': 9, 'n_estimators': 10, 'min_samples_split': 8} 
actual eval = 0.9252

{'max_depth': None, 'max_features': Non