In [1]:
# # Install MLflow and its dependencies
# !pip install mlflow==2.9.0
# !pip install lz4==4.3.3
# !pip install numpy==1.26.4
# !pip install pandas==1.3.0
# !pip install scikit-learn==1.6.0
# !pip install scipy==1.11.4
# !pip install xgboost==2.1.3

In [2]:
import pandas as pd
from loguru import logger

import mlflow
from mlflow.tracking import MlflowClient

# Configure logging
logger.add("logs/inference.log")

# Set MLflow tracking URI
mlflow.set_tracking_uri("http://localhost:5001")


def load_latest_model_and_mappings(model_name: str):
    """Load the latest model version and its category mappings"""
    client = MlflowClient()

    try:
        # Get latest model version using aliases instead of stages
        logger.info(f"Attempting to load latest version of model: {model_name}")
        latest_version = client.get_model_version_by_alias(model_name, "current")
        run_id = latest_version.run_id
        logger.info(f"Found model version: {latest_version.version} with run_id: {run_id}")

        if not run_id:
            logger.error("No run_id found for the model version")
            return None, None

        # Load the model
        model = mlflow.pyfunc.load_model(f"models:/{model_name}@current")
        logger.info("Model loaded successfully")

        # Get category mappings from the same run
        try:
            # List artifacts to debug
            logger.info(f"Listing artifacts for run_id: {run_id}")
            artifacts = client.list_artifacts(run_id)
            logger.info(f"Available artifacts: {[art.path for art in artifacts]}")
            
            # Try to load the mappings
            category_mappings = mlflow.artifacts.load_dict(
                f"runs:/{run_id}/category_mappings.json"
            )
            logger.info("Category mappings loaded successfully")
            logger.debug(f"Mappings content: {category_mappings}")
            
            return model, category_mappings

        except Exception as e:
            logger.error(f"Error loading category mappings: {e}")
            return None, None

    except Exception as e:
        logger.error(f"Error loading model: {e}")
        return None, None

In [3]:
# Model name
model_name = "purchase_prediction_model"

# Load model and mappings
model, category_mappings = load_latest_model_and_mappings(model_name)

[32m2024-12-24 23:27:25.156[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_latest_model_and_mappings[0m:[36m20[0m - [1mAttempting to load latest version of model: purchase_prediction_model[0m
[32m2024-12-24 23:27:25.180[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_latest_model_and_mappings[0m:[36m23[0m - [1mFound model version: 6 with run_id: 58c3a14211e74bb3acad6f6d7443163a[0m
 - mlflow (current: 2.9.0, required: mlflow==2.19.0)
 - xgboost (current: 2.1.3, required: xgboost==2.0.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
[32m2024-12-24 23:27:25.797[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_latest_model_and_mappings[0m:[36m31[0m - [1mModel loaded successfully[0m
[32m2024-12-24 23:27:25.798[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_latest_model_and_mappings[0m:[36m36[0m - [1mListing artifacts for 

In [4]:
# Prepare inference data
data = [
    {
        "brand": "sumsung",
        "price": 130.76,
        "event_weekday": 2,
        "category_code_level1": "electronics",
        "category_code_level2": "smartphone",
        "activity_count": 1,
    },
    {
        "brand": "video",
        "price": 130.76,
        "event_weekday": 2,
        "category_code_level1": "electronics",
        "category_code_level2": "smartphone",
        "activity_count": 1,
    },
]

# Convert to DataFrame
df = pd.DataFrame(data)
logger.info(f"Input data shape: {df.shape}")

[32m2024-12-24 23:27:25.841[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m23[0m - [1mInput data shape: (2, 6)[0m


In [5]:
# Encode categorical columns using saved mappings
for col in ["brand", "category_code_level1", "category_code_level2"]:
    mapping = category_mappings[col]
    # Map values using the saved mapping, with -1 for unseen categories
    df[col] = df[col].map(mapping).fillna(-1)
    logger.info(f"Encoded column {col}")

[32m2024-12-24 23:27:25.847[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mEncoded column brand[0m
[32m2024-12-24 23:27:25.848[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mEncoded column category_code_level1[0m
[32m2024-12-24 23:27:25.849[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mEncoded column category_code_level2[0m


In [6]:
# Make predictions
predictions = model.predict(df)
logger.info(f"Predictions: {predictions}")

[32m2024-12-24 23:27:25.890[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mPredictions: [0.5839172 0.5839172][0m


In [7]:
df

Unnamed: 0,brand,price,event_weekday,category_code_level1,category_code_level2,activity_count
0,-1.0,130.76,2,3,12,1
1,-1.0,130.76,2,3,12,1
