#### load pkl from gcp bucket

In [1]:
import pandas as pd

from google.cloud import storage
import pickle
import io

import mlflow
from mlflow.tracking import MlflowClient

mlflow.set_tracking_uri("http://localhost:5050")
client = MlflowClient()
model_name = "dtc_persona_clustering_model"


In [2]:
project_id = "tough-processor-312510"
storage_client = storage.Client(project=project_id)

try:
    buckets = storage_client.list_buckets()
    print("Buckets:")
    for bucket in buckets:
        print(f"- {bucket.name}")

except Exception as e:
    print(f"An error occurred: {e}")

Buckets:
- mmotl_mlflow_artifacts


In [3]:
# # Get the bucket object
# bucket = storage_client.bucket("mmotl_mlflow_artifacts")

# # Path to the model file in the bucket
# # blob = bucket.blob("5/ba49d88bf2224b13a4643ac0104f12ad/artifacts/model/model.pkl")

# # Download the model file as bytes
# # model_bytes = blob.download_as_bytes()

# # Load the model using pickle
# with io.BytesIO(model_bytes) as f:
#     model = pickle.load(f)

In [4]:
MLFLOW_TRACKING_URI = "http://localhost:5050"
MODEL_NAME = "dtc_persona_clustering_model"
MODEL_STAGE = "Production"
MODEL_URI = f"models:/{MODEL_NAME}/{MODEL_STAGE}"

# Initialize a variable to hold the model
loaded_model = None

try:
    # --- Step 1: Set the tracking URI ---
    # This tells the MLflow client where to send requests.
    print(f"Connecting to MLflow Tracking Server at: {MLFLOW_TRACKING_URI}")
    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

    # --- Step 2: Load the model ---
    # This is the primary operation that might fail if the server is unreachable
    # or the model/stage does not exist.
    print(f"Loading model '{MODEL_NAME}' from stage '{MODEL_STAGE}'...")
    loaded_model = mlflow.pyfunc.load_model(model_uri=MODEL_URI)

# Catch specific MLflow exceptions for better error messages
except mlflow.exceptions.RestException as e:
    print("\n--- MLFLOW ERROR ---")
    print(f"Failed to load the model due to an MLflow REST API error.")
    print("This commonly happens if:")
    print(f"  1. The MLflow server is not running or accessible at '{MLFLOW_TRACKING_URI}'.")
    print(f"  2. The model named '{MODEL_NAME}' does not exist in the registry.")
    print(f"  3. The model does not have a version assigned to the '{MODEL_STAGE}' stage.")
    print(f"\nOriginal Error: {e}")
    # Exit the script with an error code, as the application cannot continue.
    sys.exit(1)

# Catch any other unexpected exceptions during the process
except Exception as e:
    print(f"\n--- AN UNEXPECTED ERROR OCCURRED ---")
    print(f"An error occurred while trying to load the model: {e}")
    sys.exit(1)

# The 'else' block runs only if the 'try' block completes successfully
else:
    print("\n✅ Model loaded successfully!")
    # You can now use the 'loaded_model' object for predictions.
    # For example, print its metadata if available.
    if hasattr(loaded_model, 'metadata'):
        print(f"Model Signature: {loaded_model.metadata.signature}")

Connecting to MLflow Tracking Server at: http://localhost:5050
Loading model 'dtc_persona_clustering_model' from stage 'Production'...


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

 - lz4 (current: uninstalled, required: lz4==4.4.4)
 - pandas (current: 2.2.3, required: pandas==1.5.3)
 - psutil (current: 7.0.0, required: psutil==5.9.8)
 - scipy (current: 1.13.1, required: scipy==1.15.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.



✅ Model loaded successfully!
Model Signature: inputs: 
  ['x1': double (required), 'x2': double (required), 'x3': double (required), 'x4': double (required), 'x5': double (required), 'x6': double (required), 'x7': double (required), 'x8': double (required), 'x9': double (required), 'x10': double (required)]
outputs: 
  [Tensor('int32', (-1,))]
params: 
  None



In [8]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: model
  flavor: mlflow.sklearn
  run_id: 935478b640754c528a5635b6a14ffbc9

#### deployment with flask

In [14]:
# This function uses the loaded KMeans model to predict cluster labels for the input data X.
# It accepts either a list or a DataFrame as input and returns the predicted cluster labels.

def predict_labels(X):
    """
    Predict cluster labels for the input data X using the loaded KMeans model.
    It accepts either a list or a DataFrame as input and returns the predicted cluster labels.

    Parameters:
        X (array-like): Input data for prediction.

    Returns:
        array: Predicted cluster labels.
    """
    return loaded_model.predict(X)

In [15]:
# example features for prediction
# These features should match the input format expected by the KMeans model.

features = [[5.267844592017922,9.038527987090323,3.506074024159338,1.9479126171645216,-3.836098637534244,-7.881183180600871,-6.6307668078106285,3.0319909038843242,4.00735755812744,-1.1632153990676501],
            [5.267844592017922,9.038527987090323,3.506074024159338,1.9479126171645216,-3.836098637534244,-7.881183180600871,-6.6307668078106285,3.0319909038843242,4.00735755812744,-1.1632153990676501],
            [4.499870969600433, -10.104167280185836, -7.327319616485084, -5.869221088718748, -5.388042708088642, 3.7703252138116436, -3.527628901910611, 3.677010471509409, 0.8067997068103117, -3.83049661092741]]

In [16]:
# Convert the features list to a pandas DataFrame for prediction
features_df = pd.DataFrame(features)

In [19]:
features_df
# Create a list of column names
column_names = [f'x{i}' for i in range(1, 11)]

# Create an empty DataFrame with the specified column names
features_df.columns = column_names

In [20]:
predict_labels(features_df)

array([2, 2, 1], dtype=int32)

from script.

In [21]:
def load_model():
    """
    Loads the model from GCS and populates the global 'model' variable.
    This function will be called once when the script is loaded.
    """
    global model

    # --- Configuration ---
    MLFLOW_TRACKING_URI = "http://localhost:5050"
    MODEL_NAME = "dtc_persona_clustering_model"
    MODEL_STAGE = "Production"
    MODEL_URI = f"models:/{MODEL_NAME}/{MODEL_STAGE}"

    # Initialize a variable to hold the model
    model = None

    try:
        # --- Step 1: Set the tracking URI ---
        # This tells the MLflow client where to send requests.
        print(f"Connecting to MLflow Tracking Server at: {MLFLOW_TRACKING_URI}")
        mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

        # --- Step 2: Load the model ---
        # This is the primary operation that might fail if the server is unreachable
        # or the model/stage does not exist.
        print(f"Loading model '{MODEL_NAME}' from stage '{MODEL_STAGE}'...")
        model = mlflow.pyfunc.load_model(model_uri=MODEL_URI)

    # Catch specific MLflow exceptions for better error messages
    except mlflow.exceptions.RestException as e:
        print("\n--- MLFLOW ERROR ---")
        print(f"Failed to load the model due to an MLflow REST API error.")
        print("This commonly happens if:")
        print(
            f"  1. The MLflow server is not running or accessible at '{MLFLOW_TRACKING_URI}'."
        )
        print(f"  2. The model named '{MODEL_NAME}' does not exist in the registry.")
        print(
            f"  3. The model does not have a version assigned to the '{MODEL_STAGE}' stage."
        )
        print(f"\nOriginal Error: {e}")
        # Exit the script with an error code, as the application cannot continue.
        sys.exit(1)

    # Catch any other unexpected exceptions during the process
    except Exception as e:
        print(f"\n--- AN UNEXPECTED ERROR OCCURRED ---")
        print(f"An error occurred while trying to load the model: {e}")
        sys.exit(1)

    # The 'else' block runs only if the 'try' block completes successfully
    else:
        print("\n✅ Model loaded successfully!")
        # You can now use the 'model' object for predictions.
        # For example, print its metadata if available.
        if hasattr(model, "metadata"):
            print(f"Model Signature: {model.metadata.signature}")

In [22]:
load_model()
model



Connecting to MLflow Tracking Server at: http://localhost:5050
Loading model 'dtc_persona_clustering_model' from stage 'Production'...


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

 - lz4 (current: uninstalled, required: lz4==4.4.4)
 - pandas (current: 2.2.3, required: pandas==1.5.3)
 - psutil (current: 7.0.0, required: psutil==5.9.8)
 - scipy (current: 1.13.1, required: scipy==1.15.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.



✅ Model loaded successfully!
Model Signature: inputs: 
  ['x1': double (required), 'x2': double (required), 'x3': double (required), 'x4': double (required), 'x5': double (required), 'x6': double (required), 'x7': double (required), 'x8': double (required), 'x9': double (required), 'x10': double (required)]
outputs: 
  [Tensor('int32', (-1,))]
params: 
  None



mlflow.pyfunc.loaded_model:
  artifact_path: model
  flavor: mlflow.sklearn
  run_id: 935478b640754c528a5635b6a14ffbc9

In [23]:
model

mlflow.pyfunc.loaded_model:
  artifact_path: model
  flavor: mlflow.sklearn
  run_id: 935478b640754c528a5635b6a14ffbc9

In [24]:
# Create a list of column names
column_names = [f'x{i}' for i in range(1, 11)]

# Create an empty DataFrame with the specified column names
features_df.columns = column_names

In [25]:
model.predict(features_df)

array([2, 2, 1], dtype=int32)