# Import variables

In [1]:
from hyperopt import tpe
from hyperopt import STATUS_OK
from hyperopt import Trials
from hyperopt import hp
from hyperopt import fmin
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import pandas
import mlflow

# Load Data

In [2]:
pandas_df = pandas.read_csv("training_data.csv")
X=pandas_df.iloc[:,:-1]
y=pandas_df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4284, stratify=y)

# Define objective function

In [3]:
N_FOLDS = 4
MAX_EVALS = 10

def objective(params, n_folds = N_FOLDS):
    """Objective function for Logistic Regression Hyperparameter Tuning"""

    # Perform n_fold cross validation with hyperparameters
    # Use early stopping and evaluate based on ROC AUC
    mlflow.sklearn.autolog()
    with mlflow.start_run(nested=True):
        clf = LogisticRegression(**params,random_state=0,verbose =0)
        scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='f1_macro')

        # Extract the best score
        best_score = max(scores)

        # Loss must be minimized
        loss = 1 - best_score

        # Dictionary with information for evaluation
        return {'loss': loss, 'params': params, 'status': STATUS_OK}

# Define parameter space

In [4]:

space = {
    'warm_start' : hp.choice('warm_start', [True, False]),
    'fit_intercept' : hp.choice('fit_intercept', [True, False]),
    'tol' : hp.uniform('tol', 0.00001, 0.0001),
    'C' : hp.uniform('C', 0.05, 3),
    'solver' : hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
    'max_iter' : hp.choice('max_iter', range(5,1000))
}

# Create experiment

In [5]:
mlflow.set_experiment("Hyperopt_Optimization")

INFO: 'Hyperopt_Optimization' does not exist. Creating a new experiment


# Define Optimization Trials

In [6]:
# Algorithm
tpe_algorithm = tpe.suggest

# Trials object to track progress
bayes_trials = Trials()




with mlflow.start_run():
    best = fmin(fn = objective, space = space, algo = tpe.suggest, max_evals = MAX_EVALS, trials = bayes_trials)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

2021/03/12 03:14:34 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'd71f9aa76ba748e7af385410b216c83e'
2021/03/12 03:14:36 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'd71f9aa76ba748e7af385410b216c83e'
2021/03/12 03:14:36 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'd71f9aa76ba748e7af385410b216c83e'
2021/03/12 03:14:37 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 10%|█         | 1/10 [00:04<00:37,  4.15s/trial, best loss: 0.5013998735598075]

2021/03/12 03:14:38 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fab46e4c9cb44f0eb529c77f4c2b690a'
2021/03/12 03:14:39 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fab46e4c9cb44f0eb529c77f4c2b690a'
2021/03/12 03:14:40 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fab46e4c9cb44f0eb529c77f4c2b690a'
2021/03/12 03:14:40 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 20%|██        | 2/10 [00:07<00:30,  3.84s/trial, best loss: 0.4753470735734817]

2021/03/12 03:14:42 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '4d9f900f91144fa0b66a629a3cc50ac2'
2021/03/12 03:14:43 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '4d9f900f91144fa0b66a629a3cc50ac2'
2021/03/12 03:14:43 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '4d9f900f91144fa0b66a629a3cc50ac2'
2021/03/12 03:14:44 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 30%|███       | 3/10 [00:11<00:25,  3.67s/trial, best loss: 0.4753470735734817]

2021/03/12 03:14:45 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'b8501c97f0234eaaae143560039b9749'
2021/03/12 03:14:46 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'b8501c97f0234eaaae143560039b9749'
2021/03/12 03:14:47 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'b8501c97f0234eaaae143560039b9749'
2021/03/12 03:14:47 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 40%|████      | 4/10 [00:14<00:21,  3.56s/trial, best loss: 0.4753470735734817]

2021/03/12 03:14:49 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c20a4e7bb63d4915a6eef819d207822d'
2021/03/12 03:14:49 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c20a4e7bb63d4915a6eef819d207822d'
2021/03/12 03:14:50 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c20a4e7bb63d4915a6eef819d207822d'
2021/03/12 03:14:50 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 50%|█████     | 5/10 [00:18<00:17,  3.50s/trial, best loss: 0.4753470735734817]

2021/03/12 03:14:52 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '960affdbf53e4d3f8a42d75a354b99d6'
2021/03/12 03:14:53 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '960affdbf53e4d3f8a42d75a354b99d6'
2021/03/12 03:14:53 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '960affdbf53e4d3f8a42d75a354b99d6'
2021/03/12 03:14:54 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 60%|██████    | 6/10 [00:21<00:13,  3.37s/trial, best loss: 0.4753470735734817]

2021/03/12 03:14:55 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '184507691817492a82f749472ca28416'
2021/03/12 03:14:56 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '184507691817492a82f749472ca28416'
2021/03/12 03:14:57 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '184507691817492a82f749472ca28416'
2021/03/12 03:14:57 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 70%|███████   | 7/10 [00:24<00:10,  3.44s/trial, best loss: 0.4753470735734817]

2021/03/12 03:14:59 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'f239c60b3bbf4ff49b572ab9d9848a2a'
2021/03/12 03:14:59 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'f239c60b3bbf4ff49b572ab9d9848a2a'
2021/03/12 03:15:00 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'f239c60b3bbf4ff49b572ab9d9848a2a'
2021/03/12 03:15:01 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 80%|████████  | 8/10 [00:28<00:06,  3.48s/trial, best loss: 0.4753470735734817]

2021/03/12 03:15:03 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '719221d3ea7943458bef6b622dc66970'
2021/03/12 03:15:03 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '719221d3ea7943458bef6b622dc66970'
2021/03/12 03:15:04 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '719221d3ea7943458bef6b622dc66970'
2021/03/12 03:15:04 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 90%|█████████ | 9/10 [00:32<00:03,  3.67s/trial, best loss: 0.4753470735734817]

2021/03/12 03:15:06 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '02e90781386142708b01ecc4d437b12b'
2021/03/12 03:15:07 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '02e90781386142708b01ecc4d437b12b'
2021/03/12 03:15:08 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '02e90781386142708b01ecc4d437b12b'
2021/03/12 03:15:08 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

100%|██████████| 10/10 [00:35<00:00,  3.58s/trial, best loss: 0.4753470735734817]


In [8]:
print(best)

{'C': 2.713769314962313, 'fit_intercept': 0, 'max_iter': 499, 'solver': 4, 'tol': 6.961466308748119e-05, 'warm_start': 1}
