<a href="https://colab.research.google.com/github/nyp-sit/iti105/blob/main/MLFlow_on_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mlflow

In [None]:
from google.colab.output import eval_js
print(eval_js("google.colab.kernel.proxyPort(5000)"))

# Auto logging

In [None]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

mlflow.autolog()

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# Create and train models.
rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
rf.fit(X_train, y_train)

# Use the model to make predictions on the test dataset.
predictions = rf.predict(X_test)

In [None]:
!mlflow ui

In [None]:
from mlflow.utils.model_utils import Model
from xgboost.sklearn import XGBClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
import xgboost as xgb
import matplotlib as mpl


import mlflow
import mlflow.xgboost

# prepare train and test data
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# enable auto logging
mlflow.xgboost.autolog()

with mlflow.start_run():
    # train model
    params = {
        "objective": "multi:softprob",
        "num_class": 3,
        "learning_rate": 0.3,
        "eval_metric": "mlogloss",
        "colsample_bytree": 1.0,
        "subsample": 1.0,
        "seed": 42,
    }
    model = xgb.XGBClassifier(**params)
    model.fit(X_train, y_train)

    # evaluate model
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    # log metrics
    mlflow.log_metrics({"accuracy": acc})



# Explicit Logging

In [None]:
import time
from sklearn.ensemble import RandomForestClassifier

EXPERIMENT_NAME = "my_experiment"
experiment = mlflow.set_experiment(EXPERIMENT_NAME)
EXPERIMENT_ID = experiment.experiment_id

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


for depth in [6, 8]:
    # Create and train models.
    rf = RandomForestClassifier(n_estimators=100, max_depth=depth, max_features=3)
    rf.fit(X_train, y_train)

    # Use the model to make predictions on the test dataset.
    predictions = rf.predict(X_test)
    acc = accuracy_score(y_test, predictions)

    RUN_ID = time.strftime("run_%Y_%m_%d-%H_%M_%S")

    with mlflow.start_run(experiment_id=EXPERIMENT_ID, run_name=RUN_ID) as run:
        mlflow.log_param("max_depth", depth)
        # log metrics
        mlflow.log_metric("accuracy", acc)


In [None]:
!mlflow ui