In [1]:
import pickle
import numpy as np
import scipy.sparse

In [6]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
from azureml.core import Workspace, Experiment

In [2]:
with open("artifacts/dv.b", "rb") as f_in:
    dv = pickle.load(f_in)

y_train = np.load("artifacts/y_train.npy")
y_val = np.load("artifacts/y_val.npy")
X_train = scipy.sparse.load_npz("artifacts/X_train.npz")
X_val = scipy.sparse.load_npz("artifacts/X_val.npz")


In [7]:
# Connect to Azure ML
ws = Workspace.from_config()
experiment = Experiment(ws, "nyc-taxi-mlops")

# Models to train
model_classes = [
    RandomForestRegressor,
    GradientBoostingRegressor,
    LinearRegression
]

for model_class in model_classes:
    model_name = model_class.__name__

    with experiment.start_logging(snapshot_directory=None) as run:
        print(f"🔄 Training {model_name}")

        # Log model name
        run.log("model_name", model_name)

        # Train
        model = model_class()
        model.fit(X_train, y_train)

        # Predict + evaluate
        y_pred = model.predict(X_val)
        rmse = root_mean_squared_error(y_val, y_pred)
        run.log("rmse", rmse)
        print(f"✅ {model_name} RMSE: {rmse}")

        # Save model
        model_path = f"outputs/{model_name}_model.pkl"
        with open(model_path, "wb") as f_out:
            pickle.dump(model, f_out)

        run.upload_file(f"models/{model_name}_model.pkl", model_path)

        # Save and upload preprocessor
        dv_path = f"outputs/{model_name}_dv.b"
        with open(dv_path, "wb") as f_out:
            pickle.dump(dv, f_out)

        run.upload_file(f"preprocessor/{model_name}_dv.b", dv_path)

        print(f"📦 {model_name} model + preprocessor saved and uploaded.\n")


🔄 Training RandomForestRegressor
✅ RandomForestRegressor RMSE: 5.930336383124501
📦 RandomForestRegressor model + preprocessor saved and uploaded.

🔄 Training GradientBoostingRegressor
✅ GradientBoostingRegressor RMSE: 5.913929461984396
📦 GradientBoostingRegressor model + preprocessor saved and uploaded.

🔄 Training LinearRegression
✅ LinearRegression RMSE: 7.258011597927111
📦 LinearRegression model + preprocessor saved and uploaded.

