In [1]:
import numpy as np
import scipy.sparse
import pickle


In [2]:
from azureml.core import Workspace, Experiment, Run, Model
from sklearn.metrics import root_mean_squared_error
import os

In [3]:

# Load test set
y_test = np.load("artifacts/y_test.npy")
X_test = scipy.sparse.load_npz("artifacts/X_test.npz")

print("✅ Loaded test data.")


✅ Loaded test data.


In [4]:
# Connect to workspace and experiment
ws = Workspace.from_config()
experiment = Experiment(ws, name="nyc-taxi-mlops")

# Step 1: Find best run by RMSE
runs = list(experiment.get_runs())
best_run = None
best_rmse = float("inf")

for run in runs:
    metrics = run.get_metrics()
    if "rmse" in metrics:
        rmse = metrics["rmse"]
        if rmse < best_rmse:
            best_rmse = rmse
            best_run = run

print(f"🏆 Best run ID: {best_run.id}, RMSE: {best_rmse}")


🏆 Best run ID: dbd54cb3-7897-4def-b6e6-f44c84fc310d, RMSE: 5.913929461984396


In [5]:
# Step 2: Download model and preprocessor from best run
model_files = best_run.get_file_names()
model_file = [f for f in model_files if f.startswith("models/") and f.endswith(".pkl")][0]
dv_file = [f for f in model_files if f.startswith("preprocessor/") and f.endswith(".b")][0]

os.makedirs("downloaded", exist_ok=True)
best_run.download_file(name=model_file, output_file_path="downloaded/model.pkl")
best_run.download_file(name=dv_file, output_file_path="downloaded/dv.b")


In [6]:

# Step 3: Load them
with open("downloaded/model.pkl", "rb") as f_model:
    best_model = pickle.load(f_model)

with open("downloaded/dv.b", "rb") as f_dv:
    dv = pickle.load(f_dv)


In [7]:

# Step 4: Evaluate on test set
y_pred = best_model.predict(X_test)
test_rmse = root_mean_squared_error(y_test, y_pred)
print(f"🧪 Test RMSE: {test_rmse:.3f}")


🧪 Test RMSE: 5.940


In [8]:
# Step 5: Register model + preprocessor
model = Model.register(
    workspace=ws,
    model_path="downloaded/model.pkl",
    model_name="nyc-taxi-best-model",
    tags={"source": "mlops-demo", "validated_rmse": round(test_rmse, 3)}
)

# Register dv separately (optional)
Model.register(
    workspace=ws,
    model_path="downloaded/dv.b",
    model_name="nyc-taxi-preprocessor",
    tags={"related_model": model.name}
)

print(f"✅ Registered model: {model.name} (version: {model.version})")


Registering model nyc-taxi-best-model
Registering model nyc-taxi-preprocessor
✅ Registered model: nyc-taxi-best-model (version: 1)
