# Import variables

In [1]:
!pip install hyperopt



In [3]:
from hyperopt import tpe
from hyperopt import STATUS_OK
from hyperopt import Trials
from hyperopt import hp
from hyperopt import fmin
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import pandas
import mlflow

# Load Data

In [4]:
pandas_df = pandas.read_csv("training_data.csv")
X=pandas_df.iloc[:,:-1]
y=pandas_df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4284, stratify=y)

# Define objective function

In [5]:
N_FOLDS = 4
MAX_EVALS = 10

def objective(params, n_folds = N_FOLDS):
    """Objective function for Logistic Regression Hyperparameter Tuning"""

    # Perform n_fold cross validation with hyperparameters
    # Use early stopping and evaluate based on ROC AUC
    mlflow.sklearn.autolog()
    with mlflow.start_run(nested=True):
        clf = LogisticRegression(**params,random_state=0,verbose =0)
        scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='f1_macro')

        # Extract the best score
        best_score = max(scores)

        # Loss must be minimized
        loss = 1 - best_score

        # Dictionary with information for evaluation
        return {'loss': loss, 'params': params, 'status': STATUS_OK}

# Define parameter space

In [6]:

space = {
    'warm_start' : hp.choice('warm_start', [True, False]),
    'fit_intercept' : hp.choice('fit_intercept', [True, False]),
    'tol' : hp.uniform('tol', 0.00001, 0.0001),
    'C' : hp.uniform('C', 0.05, 3),
    'solver' : hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
    'max_iter' : hp.choice('max_iter', range(5,1000))
}

# Create experiment

In [7]:
mlflow.set_experiment("Hyperopt_Optimization")

INFO: 'Hyperopt_Optimization' does not exist. Creating a new experiment


# Define Optimization Trials

In [8]:
# Algorithm
tpe_algorithm = tpe.suggest

# Trials object to track progress
bayes_trials = Trials()




with mlflow.start_run():
    best = fmin(fn = objective, space = space, algo = tpe.suggest, max_evals = MAX_EVALS, trials = bayes_trials)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]









 10%|█         | 1/10 [00:07<01:06,  7.41s/trial, best loss: 0.48037676609105184]









 20%|██        | 2/10 [00:13<00:51,  6.49s/trial, best loss: 0.4753470735734817] 









 30%|███       | 3/10 [00:19<00:43,  6.16s/trial, best loss: 0.4753470735734817]









 40%|████      | 4/10 [00:25<00:36,  6.12s/trial, best loss: 0.4753470735734817]









 50%|█████     | 5/10 [00:32<00:32,  6.48s/trial, best loss: 0.4753470735734817]









 60%|██████    | 6/10 [00:37<00:24,  6.20s/trial, best loss: 0.4753470735734817]









 70%|███████   | 7/10 [00:43<00:18,  6.12s/trial, best loss: 0.4753470735734817]









 80%|████████  | 8/10 [00:49<00:12,  6.06s/trial, best loss: 0.4753470735734817]









 90%|█████████ | 9/10 [00:56<00:06,  6.43s/trial, best loss: 0.4753470735734817]









100%|██████████| 10/10 [01:03<00:00,  6.35s/trial, best loss: 0.4753470735734817]


In [9]:
best

{'C': 1.5701165348547805,
 'fit_intercept': 0,
 'max_iter': 29,
 'solver': 1,
 'tol': 7.24663364805058e-05,
 'warm_start': 0}