In [1]:
!cat .env

PROJECT_ID=winged-quanta-472908-n1
LOCATION=us-central1
BUCKET_URI=gs://mlops-course-winged-quanta-472908-n1-unique
BUCKET_NAME=mlops-course-winged-quanta-472908-n1-unique


In [1]:
import mlflow
from mlflow import MlflowClient
from mlflow.models import infer_signature
from pprint import pprint
import requests

response = requests.get('https://api.ipify.org')

mlflow.set_tracking_uri(f"http://{response.text}:8100")
client = MlflowClient(mlflow.get_tracking_uri())
all_experiments = client.search_experiments()

In [2]:
experiments = client.search_experiments(view_type="ALL")  # includes deleted ones
for exp in experiments:
    print(f"ID={exp.experiment_id} | Name={exp.name} | Stage={exp.lifecycle_stage}")

ID=2 | Name=IRIS classifier: Quickstart-v2 | Stage=active
ID=1 | Name=IRIS classifier: Quickstart | Stage=deleted
ID=0 | Name=Default | Stage=active


In [4]:
mlflow.get_tracking_uri()

'http://34.135.48.47:8100'

In [14]:
mlflow.set_experiment('IRIS classifier: Quickstart-v2')

<Experiment: artifact_location='gs://mlops-course-winged-quanta-472908-n1-unique/2', creation_time=1761387321002, experiment_id='2', last_update_time=1761387321002, lifecycle_stage='active', name='IRIS classifier: Quickstart-v2', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [126]:
# Restore it if found
if exp is not None and exp.lifecycle_stage == "deleted":
    client.restore_experiment(exp.experiment_id)
    print(f"Restored experiment: {exp.name}")
else:
    print("Experiment not found or already active.")

Experiment not found or already active.


In [13]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score
import joblib
from datetime import datetime
import os

# Load the data
script_dir = script_dir = os.getcwd()
data_path = os.path.join(script_dir, "data")
X_train = pd.read_csv(os.path.join(data_path, "X_train.csv"))
X_test = pd.read_csv(os.path.join(data_path, "X_test.csv"))
y_train = pd.read_csv(os.path.join(data_path, "y_train.csv")).values.ravel()  # flatten
y_test = pd.read_csv(os.path.join(data_path, "y_test.csv")).values.ravel()

In [11]:
params = {
    "max_depth": 3,
    "random_state": 42,
    "min_samples_split": 2 
}

In [15]:
# Initialize the Decision Tree classifier with some simple hyperparameters
clf = DecisionTreeClassifier(**params)

# Train the model
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)

In [16]:
# Compute metrics
metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred, average="weighted"),
    "recall": recall_score(y_test, y_pred, average="weighted"),
    "f1_score": f1_score(y_test, y_pred, average="weighted")
}

In [17]:
print(metrics)

{'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1_score': 1.0}


In [18]:
with mlflow.start_run():
    mlflow.log_params(params)
    
    mlflow.log_metrics(metrics)
    
    mlflow.set_tag("Training info","Decision Tree model IRIS data")
    
    signature = infer_signature(X_test[:10], clf.predict(X_test[:10]))
    
    model_info = mlflow.sklearn.log_model(
        sk_model = clf,
        name="iris_model",
        signature = signature,
        input_example = X_train[:10],
        registered_model_name = "IRIS-classifier-dt"
    )

Registered model 'IRIS-classifier-dt' already exists. Creating a new version of this model...
2025/10/26 17:45:47 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IRIS-classifier-dt, version 3


🏃 View run defiant-frog-792 at: http://136.113.100.212:8100/#/experiments/2/runs/14b5a5def4ce48daa3dc1c98dfd0ea5e
🧪 View experiment at: http://136.113.100.212:8100/#/experiments/2


Created version '3' of model 'IRIS-classifier-dt'.


In [3]:
model_name = "IRIS-classifier-dt"
versions = client.search_model_versions(f"name='{model_name}'")

best_version = None
best_accuracy = 0

for v in versions:
    run_id = v.run_id
    run = client.get_run(run_id)
    acc = run.data.metrics.get("accuracy")

    if acc is not None and acc > best_accuracy:
        best_accuracy = acc
        best_version = v

if best_version:
    print(f"Best model version: {best_version.version}")
    print(f"Run ID: {best_version.run_id}")
    print(f"Accuracy: {best_accuracy}")
    print(f"Stage: {best_version.current_stage}")

    # Load the best model directly
    best_model_uri = f"models:/{model_name}/{best_version.version}"
    best_model = mlflow.sklearn.load_model(best_model_uri)
    print(f"Loaded best model from registry: {best_model_uri}")

else:
    print("No versions found for this model.")


Best model version: 3
Run ID: 14b5a5def4ce48daa3dc1c98dfd0ea5e
Accuracy: 1.0
Stage: None


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 51.22it/s]


Loaded best model from registry: models:/IRIS-classifier-dt/3


In [21]:
# Load eval data
test_data = pd.read_csv(os.path.join(data_path, "validate.csv")) 
y_test = test_data[['species']].values.ravel()
X_test = test_data.drop('species',axis=1)

# Predict
y_pred = best_model.predict(X_test)

# Compute metrics
metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred, average="weighted"),
    "recall": recall_score(y_test, y_pred, average="weighted"),
    "f1_score": f1_score(y_test, y_pred, average="weighted")
}

In [22]:
print(metrics)

{'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1_score': 1.0}
