In [5]:
!pip install mlflow



In [6]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
import mlflow
import mlflow.sklearn

In [12]:
def train_model(alpha=1.0):
    """
    Train a Ridge Regression model and log parameters, metrics, and the model using MLflow.
    """
    # Step 1: Load the California Housing dataset
    data = fetch_california_housing()
    X, y = data.data, data.target

    # Step 2: Split the data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Step 3: Scale the features (important for Ridge Regression)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Step 4: Start an MLflow run
    with mlflow.start_run():
        # Log the regularization parameter (alpha)
        mlflow.log_param("alpha", alpha)

        # Initialize and fit the Ridge Regression model
        ridge_model = Ridge(alpha=alpha)
        ridge_model.fit(X_train_scaled, y_train)

        # Step 5: Define an input example
        input_example = X_train[:5]  # Use the first 5 rows of training data as an example

        # Step 6: Log the trained Ridge Regression model with an input example
        mlflow.sklearn.log_model(
            sk_model=ridge_model,
            artifact_path="ridge_model",
            input_example=input_example  # Auto-infer the signature
        )

        # Step 7: Make predictions on the test set
        y_pred = ridge_model.predict(X_test_scaled)

        # Step 8: Evaluate the model using Mean Squared Error (MSE)
        mse = mean_squared_error(y_test, y_pred)

        # Log the MSE metric
        mlflow.log_metric("mse", mse)

        # Print results for verification
        print(f"Mean Squared Error (MSE) on the test set: {mse:.4f}")
        print("Ridge Regression Coefficients:")
        print(ridge_model.coef_)


In [13]:
alpha_values=[0.01,0.1,1,10]
for alpha in alpha_values:
  train_model(alpha)

Mean Squared Error (MSE) on the test set: 0.5559
Ridge Regression Coefficients:
[ 0.85438247  0.12254702 -0.29440814  0.33925697 -0.00230747 -0.04082914
 -0.89692126 -0.86983406]
Mean Squared Error (MSE) on the test set: 0.5559
Ridge Regression Coefficients:
[ 0.85437741  0.12255402 -0.29439016  0.33923433 -0.00230517 -0.0408295
 -0.89685268 -0.8697646 ]
Mean Squared Error (MSE) on the test set: 0.5559
Ridge Regression Coefficients:
[ 0.85432679  0.12262397 -0.29421036  0.33900794 -0.00228221 -0.04083302
 -0.89616759 -0.86907074]
Mean Squared Error (MSE) on the test set: 0.5555
Ridge Regression Coefficients:
[ 0.85381377  0.12331557 -0.2924118   0.33674955 -0.0020552  -0.04086734
 -0.88938399 -0.86219926]


In [14]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.8-py3-none-any.whl.metadata (10 kB)
Downloading pyngrok-7.2.8-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.8


In [15]:
from pyngrok import ngrok
auth_token="2xgR91BRWTC4lsXkfvQTVI1QMuQ_53QQvZmCQiypj8nTvfE4J"
ngrok.set_auth_token(auth_token)
ngrok_tunnel=ngrok.connect(addr="5000",proto="http")
print("MLFLOW UI ",ngrok_tunnel.public_url)

MLFLOW UI  https://b3e9-34-141-195-229.ngrok-free.app


In [None]:
!mlflow ui

[2025-05-28 11:06:15 +0000] [12309] [INFO] Starting gunicorn 23.0.0
[2025-05-28 11:06:15 +0000] [12309] [INFO] Listening at: http://127.0.0.1:5000 (12309)
[2025-05-28 11:06:15 +0000] [12309] [INFO] Using worker: sync
[2025-05-28 11:06:15 +0000] [12310] [INFO] Booting worker with pid: 12310
[2025-05-28 11:06:15 +0000] [12311] [INFO] Booting worker with pid: 12311
[2025-05-28 11:06:15 +0000] [12316] [INFO] Booting worker with pid: 12316
[2025-05-28 11:06:15 +0000] [12317] [INFO] Booting worker with pid: 12317
