In [53]:
import sys
sys.path.append('../src')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from models.linear_models import LogisticModel
from models.ensemble_models import RandomForestModel
from optimizers.grid_search import GridSearchOptimizer
from optimizers.random_search import RandomSearchOptimizer
from optimizers.bayesian import BayesSearchOptimizer, HyperoptOptimizer
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from optimizers.darts_en import DARTSOptimizer
from hyperopt.pyll.base import scope
from utils.data_loader import load_data

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Setup

In [2]:
def optimize_model(model, parameters, X_train, y_train):
    optimizer = GridSearchOptimizer(model, parameters)
    best_params = optimizer.optimize(X_train, y_train)
    
    results = optimizer.cv_results_

    best_score = max(results['mean_test_score'])
    
    worst_index = results['mean_test_score'].argmin()
    worst_score = results['mean_test_score'][worst_index]
    worst_params = {key: results['param_' + key][worst_index] for key in parameters}
    
    return best_params, best_score, worst_params, worst_score

In [3]:
data = pd.read_csv('/Users/christopherpuglisi/Library/CloudStorage/GoogleDrive-cpuglisi@netrias.com/Shared drives/DTRA_CARES/data/CARES/data/protein_exp_normalized_cleaned_labeled_cp_12222023_v2.csv', comment='#', low_memory=False, index_col=0)

In [4]:
feature = list(data.columns)[:-3]
target = ['Threat']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data[feature], data[target], test_size=0.2, random_state=0)

In [6]:
logistic_model = LogisticModel()
rf_model = RandomForestModel()

## Grid Search

In [85]:
logistic_params = {'C': [0.1, 1, 10], 'max_iter': [500, 1000], 'penalty': ['none','l1','l2']}
rf_params = {'n_estimators': [50, 250, 500], 'max_depth': [50, 250, 500]}

In [86]:
best_params_logistic, best_score_logistic, worst_params_logistic, worst_score_logistic = optimize_model(logistic_model, logistic_params, X_train, y_train)
print("Best parameters for Logistic Regression:", best_params_logistic)
print("Best score for Logistic Regression:", best_score_logistic)

Best parameters for Logistic Regression: {'C': 0.1, 'max_iter': 500, 'penalty': 'l2'}
Best score for Logistic Regression: 0.9846153846153847


In [87]:
best_params_rf, best_score_rf, worst_params_rf, worst_score_rf = optimize_model(rf_model, rf_params, X_train, y_train)
print("Best parameters for Random Forest:", best_params_rf)
print("Best score for Random Forest:", best_score_rf)
print("Worst parameters for Random Forest:", worst_params_rf)
print("Worst score for Random Forest:", worst_score_rf)

Best parameters for Random Forest: {'max_depth': 50, 'n_estimators': 250}
Best score for Random Forest: 0.9342383107088988
Worst parameters for Random Forest: {'n_estimators': 500, 'max_depth': 500}
Worst score for Random Forest: 0.9225490196078431


## Bayesian

In [88]:
rf_model = RandomForestModel()
rf_params = {'n_estimators': [50, 100, 200], 'max_depth': [10, 50, 100]}

# Bayes Optimization
bayes_optimizer = BayesSearchOptimizer(rf_model, rf_params)
best_params_bayes, best_score_bayes = bayes_optimizer.optimize(X_train, y_train)
print("Best Parameters:", best_params_bayes)
print("Best Score:", best_score_bayes)

Best Parameters: OrderedDict([('max_depth', 10), ('n_estimators', 100)])
Best Score: 0.9419306184012065


In [89]:
rf_param_space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 50, 500, 50)),
    'max_depth': scope.int(hp.quniform('max_depth', 50, 500, 50))
}

In [90]:
hyperopt_optimizer = HyperoptOptimizer(rf_model, rf_param_space)
best_params_hyperopt, best_score_hyperopt = hyperopt_optimizer.optimize(X_train, y_train)
print("Best Parameters:", best_params_hyperopt)
print("Best Score:", best_score_hyperopt)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [02:38<00:00,  3.16s/trial, best loss: -0.9380844645550527]
Best Parameters: {'max_depth': 400.0, 'n_estimators': 450.0}
Best Score: 0.9380844645550527


## NAS

In [54]:
darts_optimizer = DARTSOptimizer(input_size=X_train.shape[1])
best_architecture, best_score, best_accuracy = darts_optimizer.optimize(X_train, y_train, X_test, y_test)
print("Best Architecture:", best_architecture)
print("Best Score:", best_score)
print("Best Accuracy:", best_accuracy)

100%|██████████| 50/50 [01:11<00:00,  1.43s/trial, best loss: -0.9076923076923077]
Best Architecture: {'dropout_rate': <hyperopt.pyll.base.Apply object at 0x3b02a4c40>, 'learning_rate': <hyperopt.pyll.base.Apply object at 0x3af8ca0d0>, 'num_layers': <hyperopt.pyll.base.Apply object at 0x3cb934c40>, 'num_nodes': <hyperopt.pyll.base.Apply object at 0x3cb934df0>}
Best Score: 0.9076923076923077
Best Accuracy: 0.9076923076923077
