# Hyperparameters Tuning

In [None]:
# Run this in Jupyter's Terminal
# pip install hyperopt

In [None]:
from hyperopt import tpe
from hyperopt import STATUS_OK
from hyperopt import Trials
from hyperopt import hp
from hyperopt import fmin

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

import pandas as pd
import mlflow

from data_utils import get_train_test_split_for_stock
from config import *

# Retrieve data

In [None]:
X_train, X_test, y_train, y_test = get_train_test_split_for_stock(PATH_TO_DATA_FILE)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Define objective function

In [None]:
"""
The way optimization works in hyperopt is through minimization, but in our
case, we want the maximum possible F1 score metric. So, the way we define our
loss (the function to minimize) is as the inverse of our F1 score metric, as in
loss = 1 - f-score, so the minimization of this function will represent the best
F1 score metric.
"""



def objective(params, X_train=X_train, y_train=y_train, random_state=RANDOM_STATE, n_folds=N_FOLDS):
    """
    Objective function for Logistic Regression Hyperparameter Tuning
    """

    # Perform n_fold cross validation with hyperparameters
    # Use early stopping and evaluate based on ROC AUC
    
    mlflow.sklearn.autolog()
    
    with mlflow.start_run(nested=True):
        
        clf = LogisticRegression(**params, random_state=random_state, verbose=0)
        scores = cross_val_score(clf, X_train, y_train, cv=n_folds, scoring='f1_macro')

        # Extract the best score
        best_score = max(scores)

        # Loss must be minimized
        loss = 1 - best_score

        # Dictionary with information for evaluation
        return {'loss': loss, 'params': params, 'status': STATUS_OK}

# Define hyperparameter space

In [None]:
space = {
    'warm_start' : hp.choice('warm_start', [True, False]),
    'fit_intercept' : hp.choice('fit_intercept', [True, False]),
    'tol' : hp.uniform('tol', 0.00001, 0.0001),
    'C' : hp.uniform('C', 0.05, 3),
    'solver' : hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
    'max_iter' : hp.choice('max_iter', range(5,1000))
}

# Create experiment

In [None]:
mlflow.set_experiment("SP_EXP_HyperParam_Tuning")

# Run optimization

In [None]:
# Algorithm
tpe_algorithm = tpe.suggest

# Trials object to track progress
bayes_trials = Trials()

with mlflow.start_run(run_name="Hyperopt optimization"):
    
    best = fmin(fn = objective, 
                space = space, 
                algo = tpe.suggest, 
                max_evals = MAX_EVALS, 
                trials = bayes_trials)
    
    mlflow.log_param("Best params", best)

In [None]:
best

# Show
- nested experiments in UI