# MLFlow Client API

In [75]:
from mlflow import MlflowClient
from mlflow.entities import Experiment

from typing import List
import logging

import tools

In [60]:
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [61]:
client: MlflowClient = MlflowClient(tracking_uri="http://localhost:8080")

In [62]:
all_experiments: List[Experiment] = client.search_experiments()

In [63]:
default_experiment: Experiment = [{
    'experiment_name': experiment.name,
    'experiment_lifecycle': experiment.lifecycle_stage
} for experiment in all_experiments
if experiment.name == "Default"][0]

In [72]:
logger.info(default_experiment)

INFO:__main__:{'experiment_name': 'Default', 'experiment_lifecycle': 'active'}


## Creating experiments

In [65]:
experiment_description: str = (
    "This is the grocery forecasting project. "
    "This experiment contains the produce models for apples."
)

experiment_tags: dict = {
    "project_name": "grocery-forecasting",
    "store_dept": "produce",
    "team": "stores-ml",
    "project_quarter": "Q3-2023",
    "mlflow.note.content": experiment_description,
}

In [66]:
produce_apples_experiment: str = client.create_experiment(name="Apple_Models", tags=experiment_tags)

In [71]:
logger.info(f"The experiment ID is: {produce_apples_experiment}")

INFO:__main__:The experiment ID is: 800746094367988606


## Searching Experiments

In [None]:
apples_experiment: Experiment = client.search_experiments(
    filter_string="tags.`project_name` = 'grocery-forecasting'")

In [None]:
logger.info(apples_experiment)

INFO:__main__:[<Experiment: artifact_location='mlflow-artifacts:/800746094367988606', creation_time=1733996915446, experiment_id='800746094367988606', last_update_time=1733996915446, lifecycle_stage='active', name='Apple_Models', tags={'mlflow.note.content': 'This is the grocery forecasting project. This '
                        'experiment contains the produce models for apples.',
 'project_name': 'grocery-forecasting',
 'project_quarter': 'Q3-2023',
 'store_dept': 'produce',
 'team': 'stores-ml'}>]


## Create a dataset about apples

In [76]:
data = tools.generate_apple_sales_data_with_promo_adjustment(base_demand=1_000, n_rows=1_000)

data[-20:]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["previous_days_demand"].fillna(
  df["previous_days_demand"].fillna(


Unnamed: 0,date,average_temperature,rainfall,weekend,holiday,price_per_kg,promo,demand,previous_days_demand
980,2024-11-23 11:02:21.164866,34.130183,1.454065,1,0,1.449177,0,1289.802447,1001.085782
981,2024-11-24 11:02:21.164865,32.353643,9.462859,1,0,2.856503,0,1136.951553,1289.802447
982,2024-11-25 11:02:21.164864,18.816833,0.39147,0,0,1.326429,0,963.352029,1136.951553
983,2024-11-26 11:02:21.164863,34.533012,2.120477,0,0,0.970131,0,1039.385504,963.352029
984,2024-11-27 11:02:21.164863,23.057202,2.365705,0,0,1.049931,0,991.427049,1039.385504
985,2024-11-28 11:02:21.164862,34.810165,3.089005,0,0,2.035149,0,974.971149,991.427049
986,2024-11-29 11:02:21.164861,29.208905,3.673292,0,0,2.518098,0,1056.249547,974.971149
987,2024-11-30 11:02:21.164861,16.428676,4.077782,1,0,1.268979,0,1381.118915,1056.249547
988,2024-12-01 11:02:21.164860,32.067512,2.734454,1,0,0.762317,0,1358.492007,1381.118915
989,2024-12-02 11:02:21.164859,31.938203,13.883486,0,0,1.153301,0,967.04047,1358.492007


## Logging our first runs with MLflow

In [77]:
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import numpy as np

In [78]:
mlflow.set_tracking_uri("http://127.0.0.1:8080")

In [79]:
# Sets the current active experiment to the "Apple_Models" experiment and
# returns the Experiment metadata
apple_experiment = mlflow.set_experiment("Apple_Models")

# Define a run name for this iteration of training.
# If this is not set, a unique name will be auto-generated for your run.
run_name = "apples_rf_test"

# Define an artifact path that the model will be saved to.
artifact_path = "rf_apples"

In [80]:
# Split the data into features and target and drop irrelevant date field and target field
X = data.drop(columns=["date", "demand"])
y = data["demand"]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

params = {
    "n_estimators": 100,
    "max_depth": 6,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 888,
}

# Train the RandomForestRegressor
rf = RandomForestRegressor(**params)

# Fit the model on the training data
rf.fit(X_train, y_train)

# Predict on the validation set
y_pred = rf.predict(X_val)

# Calculate error metrics
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

# Assemble the metrics we're going to write into a collection
metrics = {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2}

# Initiate the MLflow run context
with mlflow.start_run(run_name=run_name) as run:
    # Log the parameters used for the model fit
    mlflow.log_params(params)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics)

    # Log an instance of the trained model for later use
    mlflow.sklearn.log_model(
        sk_model=rf, input_example=X_val, artifact_path=artifact_path
    )



🏃 View run apples_rf_test at: http://127.0.0.1:8080/#/experiments/800746094367988606/runs/3a7b933cbf364aff8db03afba8aae731
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/800746094367988606
