# Tracking experiments with MLflow Tracking

## Importing dependencies

In [1]:
# Importing dependencies
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn import datasets

## Breaking runs down into experiments

In [2]:
# Creating an experiment
experiment_id = mlflow.create_experiment(name="test")

In [3]:
# Selecting an existing experiment
mlflow.set_experiment(experiment_name="test")

In [4]:
# Deleting an experiment
mlflow.delete_experiment(experiment_id=experiment_id)

## Logging experiments manually

In [5]:
# Setting an experiment for manual logging
mlflow.set_experiment(experiment_name="manual_logging")

INFO: 'manual_logging' does not exist. Creating a new experiment


In [6]:
# Checking if the script is executed directly
if __name__ == "__main__":
    # Loading data
    data = datasets.load_breast_cancer()
    
    # Splitting the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(data.data, 
                                                        data.target,
                                                        stratify=data.target)
    
    # Selecting a parameter range to try out
    C = list(range(1, 10))
    
    # Starting a tracking run
    with mlflow.start_run(run_name="PARENT_RUN"):
        # For each value of C, running a child run
        for param_value in C:
            with mlflow.start_run(run_name="CHILD_RUN", nested=True):
                # Instantiating and fitting the model
                model = LogisticRegression(C=param_value, max_iter=1000)            
                model.fit(X=X_train, y=y_train)
                
                # Logging the current value of C
                mlflow.log_param(key="C", value=param_value)
                
                # Logging the test performance of the current model                
                mlflow.log_metric(key="Score", value=model.score(X_test, y_test)) 
                
                # Saving the model as an artifact
                mlflow.sklearn.log_model(sk_model=model, artifact_path="model")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

## Logging experiments automatically

In [7]:
# Setting an experiment for automatic logging
mlflow.set_experiment(experiment_name="auto_logging")

INFO: 'auto_logging' does not exist. Creating a new experiment


In [8]:
# Checking if the script is executed directly
if __name__ == "__main__":
    # Enabling automatic logging for scikit-learn runs
    mlflow.sklearn.autolog()
    
    # Loading data
    data = datasets.load_breast_cancer()
    
    # Setting hyperparameter values to try
    params = {"C": [1, 2, 3, 4, 5, 6, 7, 8, 9]}
    
    # Instantiating LogisticRegression and GridSearchCV
    log_reg = LogisticRegression(max_iter=1000)
    grid_search = GridSearchCV(estimator=log_reg, param_grid=params)
    
    # Starting a logging run
    with mlflow.start_run() as run:
        # Fitting GridSearchCV
        grid_search.fit(X=data.data, y=data.target)
            
    # Disabling autologging
    mlflow.sklearn.autolog(disable=True)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt