In [2]:
import mlflow
# import torch

In [3]:
# set mlflow tracking uri (location to store exp and run data)
mlflow.set_tracking_uri(uri="../../mlruns")

In [4]:
client = mlflow.MlflowClient()

## Creating and Setting Experiments

In [5]:
exp_name = "creating-experiments"

experiment = client.get_experiment_by_name(exp_name)

if experiment is None:
   exp_id = client.create_experiment("creating-experiments", tags={
      "topic": "experiment-management",
      "version": "v1"
   })

   print(f"Experiment created with ID: {exp_id}")

else:
   exp_id = experiment.experiment_id
   print(f"Experiment already exists with ID: {exp_id}")

Experiment already exists with ID: 243891594606546977


In [6]:
# Setting the current experiment as the one we created above
experiment = mlflow.set_experiment(experiment_id=exp_id)

# Here, if an experiment with the given exp_name doesn't exist, mlflow will on its own create a new experiment and set it to current with the given exp_name. The drawback here is we cant pass artifact locations or tags directly in set_experiment() function

# But this wont work when we are passing exp_id to set_experiment(). If exp with exp_id doesn't exist then it will show err.

# Get experiment details
experiment.to_proto()

experiment_id: "243891594606546977"
name: "creating-experiments"
artifact_location: "/home/nakul/devfiles/Python/mlflow-tut/src/mlflow_tut/../../mlruns/243891594606546977"
lifecycle_stage: "active"
last_update_time: 1749488386349
creation_time: 1749488386349
tags {
  key: "version"
  value: "v2"
}
tags {
  key: "new-tag"
  value: "updated-value"
}
tags {
  key: "mlflow.note.content"
  value: "This is the description for the experiment"
}
tags {
  key: "topic"
  value: "experiment-management"
}

## Updating an Experiment

In [7]:
# Update the current experiment
mlflow.set_experiment_tag("new-tag", "new-value")
client.set_experiment_tag(exp_id, "new-tag", "newer-value")
mlflow.set_experiment_tags({
   "new-tag":"updated-value",
   "version": "v2"
})

## Special Tags

In [8]:
# This tag sets the description for the experiment which can be viewed in mlflow ui
mlflow.set_experiment_tag("mlflow.note.content", "This is the description for the experiment")

## Starting a New Run

In [9]:
# Or we can create a run object
run = mlflow.start_run()

In [10]:
print(type(run).__name__)

ActiveRun


In [11]:
run.info.to_proto()

run_uuid: "c5f27d40fea14f5c8e4a45c54b6727fe"
experiment_id: "243891594606546977"
run_name: "adventurous-carp-264"
user_id: "nakul"
status: RUNNING
start_time: 1749533095553
artifact_uri: "/home/nakul/devfiles/Python/mlflow-tut/src/mlflow_tut/../../mlruns/243891594606546977/c5f27d40fea14f5c8e4a45c54b6727fe/artifacts"
lifecycle_stage: "active"
run_id: "c5f27d40fea14f5c8e4a45c54b6727fe"

In [12]:
run.data.to_dictionary()

{'metrics': {},
 'params': {},
 'tags': {'mlflow.source.name': '/home/nakul/.cache/pypoetry/virtualenvs/mlflow-tut-87emS356-py3.12/lib/python3.12/site-packages/ipykernel_launcher.py',
  'mlflow.source.type': 'LOCAL',
  'mlflow.runName': 'adventurous-carp-264',
  'mlflow.user': 'nakul'}}

In [13]:
# We can't have two runs running together, so to end a run do the following
mlflow.end_run()

## Using Runs as Context Managers

In [14]:
# Now we will start a new run in the experiment
with mlflow.start_run() as run:
   print(f"Active run_id: {run.info.run_id}")
   print(f"Active exp_id: {run.info.experiment_id}")  

Active run_id: 1a8f3c94759f4004a34da7a421e85769
Active exp_id: 243891594606546977


## Using MLFlow Client to Manage Runs

In [21]:
client = mlflow.MlflowClient()

run = client.create_run(experiment_id=exp_id, run_name="new-run")

print(type(run).__name__)

Run


## Set tags for Runs

In [23]:
client.set_tag(run.info.run_id, "my-tag", "my-tag-value") 
client.set_tag(run.info.run_id, "mlflow.note.content", "This is a description provided using the client") 

## Nested Runs using Parent Run

In [24]:
with mlflow.start_run(run_name="parent") as parent_run:
   print("Parent run_id: ", parent_run.info.run_id)
   mlflow.log_param("parent param1", 2)

with mlflow.start_run(run_name="child", parent_run_id=parent_run.info.run_id) as child_run:
   print("Child run_id: ", child_run.info.run_id)
   mlflow.log_param("child param1", 23)

with mlflow.start_run(run_name="grandchild", parent_run_id=child_run.info.run_id) as grandchild_run:
   print("Grandchild run_id: ", grandchild_run.info.run_id)
   mlflow.log_param("grandchild param1", 234)



Parent run_id:  fe414cce82304c6dbc18cdb9604d26d9
Child run_id:  cf1de562977e41b88b43ac42673d71b0
Grandchild run_id:  e6a6d5a505b14716a4ab5cbc3c761013


## Logging Metrics

In [29]:
metrics = {
   "m1" : 1,
   "m2" : 2,
   "m3" : 3,
}

with mlflow.start_run(run_name="logging_metrics") as run:
   mlflow.log_metrics(metrics, run_id=run.info.run_id)
   print(f"Logged metrics: {metrics}")

run.to_proto()

Logged metrics: {'m1': 1, 'm2': 2, 'm3': 3}


info {
  run_uuid: "50c23ab84a8b4ecdbb611e54501cd51b"
  experiment_id: "243891594606546977"
  run_name: "logging_metrics"
  user_id: "nakul"
  status: RUNNING
  start_time: 1749539146584
  artifact_uri: "/home/nakul/devfiles/Python/mlflow-tut/src/mlflow_tut/../../mlruns/243891594606546977/50c23ab84a8b4ecdbb611e54501cd51b/artifacts"
  lifecycle_stage: "active"
  run_id: "50c23ab84a8b4ecdbb611e54501cd51b"
}
data {
  tags {
    key: "mlflow.source.name"
    value: "/home/nakul/.cache/pypoetry/virtualenvs/mlflow-tut-87emS356-py3.12/lib/python3.12/site-packages/ipykernel_launcher.py"
  }
  tags {
    key: "mlflow.source.type"
    value: "LOCAL"
  }
  tags {
    key: "mlflow.runName"
    value: "logging_metrics"
  }
  tags {
    key: "mlflow.user"
    value: "nakul"
  }
}

## Logging Artifacts

In [35]:
run = client.get_run(run_id=run.info.run_id)

print(run.info.run_name)

logging_metrics


In [36]:
client.log_artifacts(run.info.run_id, local_dir="../../data/experiments", artifact_path="my_artifacts")

## Logging Models with Example Data

In [37]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

In [38]:
rfc = RandomForestClassifier(n_estimators=2)

In [39]:
iris = load_iris(as_frame=True)
x = iris.data
y = iris.target

In [40]:
x

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [41]:
input_example = x.iloc[:10]
input_example

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


In [44]:
rfc.__class__.__name__

'RandomForestClassifier'

In [46]:
import mlflow.sklearn

with mlflow.start_run(run_name="logging_model") as run:
   mlflow.sklearn.log_model(
      sk_model=rfc,
      artifact_path=rfc.__class__.__name__,
      input_example=input_example
   )

  "dataframe_split": {
    "columns": [
      "sepal length (cm)",
      "sepal width (cm)",
      "petal length (cm)",
      "petal width (cm)"
    ],
    "data": [
      [
        5.1,
        3.5,
        1.4,
        0.2
      ],
      [
        4.9,
        3.0,
        1.4,
        0.2
      ],
      [
        4.7,
        3.2,
        1.3,
        0.2
      ],
      [
        4.6,
        3.1,
        1.5,
        0.2
      ],
      [
        5.0,
        3.6,
        1.4,
        0.2
      ],
      [
        5.4,
        3.9,
        1.7,
        0.4
      ],
      [
        4.6,
        3.4,
        1.4,
        0.3
      ],
      [
        5.0,
        3.4,
        1.5,
        0.2
      ],
      [
        4.4,
        2.9,
        1.4,
        0.2
      ],
      [
        4.9,
        3.1,
        1.5,
        0.1
      ]
    ]
  }
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is val