# Tracking experiments with MLflow Tracking

## Importing dependencies

In [1]:
# Importing dependencies
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn import datasets
from pathlib import Path


* 'schema_extra' has been renamed to 'json_schema_extra'


## Breaking runs down into experiments

In [2]:
# Creating an experiment
experiment_id = mlflow.create_experiment(name="test")

In [3]:
# Selecting an existing experiment
mlflow.set_experiment(experiment_name="test")

<Experiment: artifact_location='file:///Users/parulpandey/Documents/MLOps/notebooks/mlruns/175161063379229255', creation_time=1698862094821, experiment_id='175161063379229255', last_update_time=1698862094821, lifecycle_stage='active', name='test', tags={}>

In [4]:
# Deleting an experiment
mlflow.delete_experiment(experiment_id=experiment_id)

## Logging experiments manually

In [5]:
# MODEL_REGISTRY = Path("/Users/parulpandey/Documents/MLOps/artifacts/mlflow")
# Path(MODEL_REGISTRY).mkdir(parents=True, exist_ok=True)
# MLFLOW_TRACKING_URI = 'file://'+"/Users/parulpandey/Documents/MLOps/artifacts/mlflow"#str(MODEL_REGISTRY.absolute())
# mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
# print (mlflow.get_tracking_uri())

In [6]:
# Setting an experiment for manual logging
mlflow.set_experiment(experiment_name="manual_logging")

2023/11/01 11:08:14 INFO mlflow.tracking.fluent: Experiment with name 'manual_logging' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///Users/parulpandey/Documents/MLOps/notebooks/mlruns/887022714727992340', creation_time=1698862094841, experiment_id='887022714727992340', last_update_time=1698862094841, lifecycle_stage='active', name='manual_logging', tags={}>

In [2]:
# Checking if the script is executed directly
if __name__ == "__main__":
    # Loading data
    data = datasets.load_breast_cancer()
    
    # Splitting the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(data.data, 
                                                        data.target,
                                                        stratify=data.target)
    
    # Selecting a parameter range to try out
    C = list(range(1, 10))
try:
    experiment = mlflow.get_experiment_by_name('oof')
    experiment_id = experiment.experiment_id
    print(experiment_id)
except AttributeError:
    experiment_id = mlflow.create_experiment('oof')

#with mlflow.start_run(experiment_id=experiment_id) as run:
   
    # Starting a tracking run
    with mlflow.start_run(run_name="PARENT_RUN",experiment_id=experiment_id):
        # For each value of C, running a child run
        for param_value in C:
            with mlflow.start_run(run_name="CHILD_RUN", nested=True):
                # Instantiating and fitting the model
                #model = LogisticRegression(C=param_value, max_iter=1000)            
                #model.fit(X=X_train, y=y_train)
                
                # Logging the current value of C
                mlflow.log_param(key="C", value=param_value)
                
                # Logging the test performance of the current model                
                mlflow.log_metric(key="Score", value=2) 
                
                # # Saving the model as an artifact
                # mlflow.sklearn.log_model(sk_model=2, artifact_path="model")

## Logging experiments automatically

In [8]:
# Setting an experiment for automatic logging
mlflow.set_experiment(experiment_name="auto_logging")

2023/11/01 11:08:14 INFO mlflow.tracking.fluent: Experiment with name 'auto_logging' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///Users/parulpandey/Documents/MLOps/notebooks/mlruns/667360824054492315', creation_time=1698862094942, experiment_id='667360824054492315', last_update_time=1698862094942, lifecycle_stage='active', name='auto_logging', tags={}>