In [1]:
from src.utilities.MLP_grid_search import *

import torch
from torch import nn
import pandas as pd
import numpy as np

In [2]:
# select device to run this on
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
# create synthetic data

n_samples, n_features = 1000, 10                # number of samples & number of features

x = torch.randn(n_samples, n_features)          # randomly generated features
weights = torch.randn(n_features) * 5           # randomly generated weights

y = (x @ weights)                               # linear combinations of random features & weights
y += torch.randn(n_samples) * abs(y).mean()     # normal random error

y = nn.Sigmoid()(y).round()                     # sigmoid for binary classification

In [4]:
print(f"X Shape: {x.shape}\nY Shape: {y.shape}")

X Shape: torch.Size([1000, 10])
Y Shape: torch.Size([1000])


In [5]:
# input dictionary corresponding to parameters & ranges to test

param_dict = {

              # for hyperparameters to be tuned, enter list of values

              'batch_size':[16, 32, 64, 128],
              'lr': [1e-3, 1e-2, 1e-1],
              'activation':['relu', 'tanh', None],
              'hidden_dims':[[16], [32], [64]],
              'dropouts':[0.0, 0.1, 0.2],
              'num_epochs':[5, 10, 25, 50],

              # for constant hyperparameters, enter one value

              'wd':1e-3,
              'last_act':'sigmoid',
              'loss_fn':'bce',
              'stop_early':False
              }

In [6]:
# run function to approximate best parameter combinations using Sparse Tensor Completion

best_estimated_params = return_best_k_params(hyperameter_dict = param_dict,              # dictionary of parameters
                                             X = x,                                      # features
                                             Y = y,                                      # target
                                             num_top_combinations = 10,                  # number of best estimated combinations to return
                                             tensor_training_entries = 25,               # number of gridsearch entries to compute to infer the rest
                                             training_values = 100,                      # training entries for gridsearch
                                             testing_values = 10_000,                    # testing values for gridsearch
                                             num_tests = 5,                              # number of tests for gridsearch entries (uses average value)
                                             tensor_completion_model = 'cpd.smooth',     # sparse tensor completion model
                                             task_type = 'classification',               # MLP task
                                             metric = 'accuracy',                        # metric for each gridsearch entry
                                             rank = 16,                                  # rank decomposition used for tensor completion
                                             device = device,                            # device
                                             verbose = True                              # prints intermediate progress if True
                                             )

25/1296 total combinations in sparse tensor.

1/25 param_combinations done.
2/25 param_combinations done.
3/25 param_combinations done.
4/25 param_combinations done.
5/25 param_combinations done.
6/25 param_combinations done.
7/25 param_combinations done.
8/25 param_combinations done.
9/25 param_combinations done.
10/25 param_combinations done.
11/25 param_combinations done.
12/25 param_combinations done.
13/25 param_combinations done.
14/25 param_combinations done.
15/25 param_combinations done.
16/25 param_combinations done.
17/25 param_combinations done.
18/25 param_combinations done.
19/25 param_combinations done.
20/25 param_combinations done.
21/25 param_combinations done.
22/25 param_combinations done.
23/25 param_combinations done.
24/25 param_combinations done.
25/25 param_combinations done.

Running sparse tensor completion...
Done with sparse tensor completion!

Evaluating predicted best parameters.
Done!


In [7]:
for i in range(len(best_estimated_params)):
    best_estimated_params[i][0]['Evaluation'] = best_estimated_params[i][1]

hyperparams = list(best_estimated_params[0][0])

In [8]:
df = pd.DataFrame(np.array([[str(param_set[0][p]) for p in hyperparams] for param_set in best_estimated_params]),
             columns = hyperparams)
df

Unnamed: 0,batch_size,lr,activation,hidden_dims,dropouts,num_epochs,Evaluation
0,32,0.01,tanh,[32],0.0,25,0.7555555555555555
1,32,0.01,tanh,[32],0.0,10,0.7515555555555555
2,64,0.01,tanh,[32],0.0,10,0.7488888888888889
3,64,0.01,tanh,[32],0.1,10,0.7442222222222222
4,32,0.01,,[32],0.0,25,0.7386666666666667
5,32,0.01,tanh,[32],0.2,25,0.735111111111111
6,64,0.01,tanh,[32],0.0,25,0.7337777777777778
7,32,0.01,tanh,[32],0.0,5,0.7315555555555555
8,32,0.01,tanh,[64],0.0,25,0.7248888888888889
9,16,0.01,tanh,[32],0.0,25,0.7162222222222222
