## <span style="color:#ff5f27">👨🏻‍🏫 Create Deployment </span>

In this notebook, you'll create a deployment for your recommendation system.

**NOTE Currently the transformer scripts are not implemented.**

In [None]:
import time

# Start the timer
notebook_start_time = time.time()

## <span style="color:#ff5f27">📝 Imports </span>

In [None]:
import os

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

In [None]:
# Connect to Hopsworks Model Registry
mr = project.get_model_registry()

dataset_api = project.get_dataset_api()

## <span style="color:#ff5f27">🚀 Ranking Model Deployment </span>


You start by deploying your ranking model. Since it is a CatBoost model you need to implement a `Predict` class that tells Hopsworks how to load the model and how to use it.

In [None]:
ranking_model = mr.get_best_model(
    name="ranking_model", 
    metric="fscore", 
    direction="max",
)
ranking_model

In [None]:
%%writefile ranking_transformer.py

import logging

import hopsworks
import pandas as pd


class Transformer(object):
    def __init__(self):
        # Connect to Hopsworks
        project = hopsworks.login()
        self.fs = project.get_feature_store()

        # Retrieve 'transactions' feature group.
        self.transactions_fg = self.fs.get_feature_group("transactions", 1)

        # Retrieve the 'articles' feature view
        self.articles_fv = self.fs.get_feature_view(
            name="articles",
            version=1,
        )

        # Get list of feature names for articles
        self.articles_features = [feat.name for feat in self.articles_fv.schema]

        # Retrieve the 'customers' feature view
        self.customer_fv = self.fs.get_feature_view(
            name="customers",
            version=1,
        )

        self.customer_fv.init_serving(1)

        # Retrieve the 'candidate_embeddings' feature view
        self.candidate_index = self.fs.get_feature_view(
            name="candidate_embeddings",
            version=1,
        )

        # Retrieve ranking model
        mr = project.get_model_registry()
        model = mr.get_model(
            name="ranking_model",
            version=1,
        )

        self.ranking_fv = model.get_feature_view(init=False)
        self.ranking_fv.init_batch_scoring(1)

        # Get the names of features expected by the ranking model
        self.ranking_model_feature_names = [
            feature.name 
            for feature 
            in self.ranking_fv.schema 
            if feature.name != 'label'
        ]

    def preprocess(self, inputs):
        # Extract the input instance
        inputs = inputs["instances"][0]

        # Extract customer_id from inputs
        customer_id = inputs["customer_id"]

        # Search for candidate items
        neighbors = self.candidate_index.find_neighbors(
            inputs["query_emb"],
            k=100,
        )
        neighbors = [neighbor[0] for neighbor in neighbors]

        # Get IDs of items already bought by the customer
        already_bought_items_ids = (
            self.transactions_fg.select("article_id").filter(self.transactions_fg.customer_id==customer_id).read(dataframe_type="pandas").values.reshape(-1).tolist()
        )

        # Filter candidate items to exclude those already bought by the customer
        item_id_list = [
            str(item_id)
            for item_id in neighbors
            if str(item_id) not in already_bought_items_ids
        ]
        item_id_df = pd.DataFrame({"article_id": item_id_list})

        # Retrieve Article data for candidate items
        articles_data = [
            self.articles_fv.get_feature_vector({"article_id": item_id})
            for item_id in item_id_list
        ]

        logging.info("✅ Articles Data Retrieved!")

        articles_df = pd.DataFrame(
            data=articles_data,
            columns=self.articles_features,
        )

        # Join candidate items with their features
        ranking_model_inputs = item_id_df.merge(
            articles_df,
            on="article_id",
            how="inner",
        )

        logging.info("✅ Inputs are almost ready!")

        # Add customer features
        customer_features = self.customer_fv.get_feature_vector(
                {"customer_id": customer_id},
                return_type="pandas",
            )

        ranking_model_inputs["age"] = customer_features.age.values[0]
        ranking_model_inputs["trans_month_sin"] = inputs["month_sin"]
        ranking_model_inputs["trans_month_cos"] = inputs["month_cos"]

        # Select only the features required by the ranking model
        ranking_model_inputs = ranking_model_inputs[self.ranking_model_feature_names]

        logging.info("✅ Inputs are ready!")

        return {
            "inputs": [
                {
                    "ranking_features": ranking_model_inputs.values.tolist(),
                    "article_ids": item_id_list,
                }
            ]
        }

    def postprocess(self, outputs):
        logging.info("✅ Predictions are ready!")

        # Merge prediction scores and corresponding article IDs into a list of tuples
        ranking = list(zip(outputs["scores"], outputs["article_ids"]))

        # Sort the ranking list by score in descending order
        ranking.sort(reverse=True)

        # Return the sorted ranking list
        return {
            "ranking": ranking,
        }

In [None]:
# Copy transformer file into Hopsworks File System 
uploaded_file_path = dataset_api.upload(
    "ranking_transformer.py",    # File name to be uploaded
    "Resources",                 # Destination directory in Hopsworks File System 
    overwrite=True,              # Overwrite the file if it already exists
) 

# Construct the path to the uploaded transformer script
transformer_script_path = os.path.join(
    "/Projects",                 # Root directory for projects in Hopsworks
    project.name,                # Name of the current project
    uploaded_file_path,          # Path to the uploaded file within the project
)

In [None]:
%%writefile ranking_predictor.py

import os
import joblib
import numpy as np

import logging


class Predict(object):
    
    def __init__(self):
        self.model = joblib.load(os.environ["MODEL_FILES_PATH"] + "/ranking_model.pkl")

    def predict(self, inputs):
        
        logging.info(f"✅ Inputs: {inputs}")
        
        # Extract ranking features and article IDs from the inputs
        features = inputs[0].pop("ranking_features")
        article_ids = inputs[0].pop("article_ids")
        
        # Log the extracted features
        logging.info("predict -> " + str(features))
        
        # Log the extracted article ids
        logging.info(f'Article IDs: {article_ids}')
        
        logging.info(f"🚀 Predicting...")

        # Predict probabilities for the positive class
        scores = self.model.predict_proba(features).tolist()
        
        # Get scores of positive class
        scores = np.asarray(scores)[:,1].tolist() 

        # Return the predicted scores along with the corresponding article IDs
        return {
            "scores": scores, 
            "article_ids": article_ids,
        }

In [None]:
# Upload predictor file to Hopsworks
uploaded_file_path = dataset_api.upload(
    "ranking_predictor.py", 
    "Resources",
    overwrite=True,
)

# Construct the path to the uploaded script
predictor_script_path = os.path.join(
    "/Projects",
    project.name,
    uploaded_file_path,
)

With that in place, you can finally deploy your model.

In [None]:
from hsml.transformer import Transformer

ranking_deployment_name = "rankingdeployment"

# Define transformer
ranking_transformer=Transformer(
    script_file=transformer_script_path,
    resources={"num_instances": 0},
)

# Deploy ranking model
ranking_deployment = ranking_model.deploy(
    name=ranking_deployment_name,
    description="Deployment that search for item candidates and scores them based on customer metadata",
    script_file=predictor_script_path,
    resources={"num_instances": 0},
    transformer=ranking_transformer,
)

In [None]:
# Start the deployment and wait for it to be in a running state for up to 300 seconds
ranking_deployment.start(await_running=300)

In [None]:
def get_top_recommendations(ranked_candidates, k=3):
    return [candidate[-1] for candidate in ranked_candidates["ranking"][:k]]

In [None]:
# Define a test input example
test_ranking_input = [
        {
            "customer_id": "d327d0ad9e30085a436933dfbb7f77cf42e38447993a078ed35d93e3fd350ecf",
            "month_sin": 1.2246467991473532e-16,
            "query_emb": [
                0.214135289,
                0.571055949,
                0.330709577,
                -0.225899458,
                -0.308674961,
                -0.0115124583,
                0.0730511621,
                -0.495835781,
                0.625569344,
                -0.0438038409,
                0.263472944,
                -0.58485353,
                -0.307070434,
                0.0414443575,
                -0.321789205,
                0.966559,
            ],
            "month_cos": -1.0,
        }
    ]

# Test ranking deployment
ranked_candidates = ranking_deployment.predict(inputs=test_ranking_input)

# Retrieve article ids of the top recommended items
recommendations = get_top_recommendations(ranked_candidates["predictions"], k=3)
recommendations

In [None]:
# Check logs in case of failure
# ranking_deployment.get_logs(component="transformer",tail=200)

## <span style="color:#ff5f27">🚀 Query Model Deployment </span>

Next, you'll deploy your query model.

In [None]:
# Retrieve the 'query_model' from the Model Registry
query_model = mr.get_model(
    name="query_model",
    version=1,
)

In [None]:
%%writefile querymodel_transformer.py

import os
import numpy as np
import pandas as pd
from datetime import datetime
import logging
import hopsworks


class Transformer(object):
    
    def __init__(self):            
        # Connect to the Hopsworks
        project = hopsworks.login()
        ms = project.get_model_serving()
    
        # Retrieve the 'customers' feature view
        fs = project.get_feature_store()
        self.customer_fv = fs.get_feature_view(
            name="customers", 
            version=1,
        )
        
        # Retrieve  the "ranking" feature view and initialize the batch scoring server.
        self.ranking_fv = fs.get_feature_view(name="ranking", version=1)
        self.ranking_fv.init_batch_scoring(1)
        
        # Retrieve the ranking deployment 
        self.ranking_server = ms.get_deployment("rankingdeployment")
        
        
    def preprocess(self, inputs):
        # Check if the input data contains a key named "instances"
        # and extract the actual data if present
        inputs = inputs["instances"] if "instances" in inputs else inputs
        inputs = inputs[0]
        
        # Extract customer_id and transaction_date from the inputs
        customer_id = inputs["customer_id"]
        transaction_date = inputs["transaction_date"]
        
        # Extract month from the transaction_date
        month_of_purchase = datetime.fromisoformat(inputs.pop("transaction_date"))

        # Get customer features
        customer_features = self.customer_fv.get_feature_vector(
            {"customer_id": customer_id},
            return_type="pandas",
        )
        
        # Enrich inputs with customer age
        inputs["customers_age"] = customer_features.age.values[0]  
        
        # Calculate the sine and cosine of the month_of_purchase
        month_of_purchase = datetime.strptime(
            transaction_date, "%Y-%m-%dT%H:%M:%S.%f"
        ).month

        # Calculate the sine and cosine components for the month_of_purchase using on-demand transformation present in "ranking" feature view.
        feature_vector = self.ranking_fv._batch_scoring_server.compute_on_demand_features(
            feature_vectors=pd.DataFrame([inputs]), request_parameters={"trans_month": month_of_purchase}
        ).to_dict(orient="records")[0]

        inputs["month_sin"] = feature_vector["trans_month_sin"]
        inputs["month_cos"] = feature_vector["trans_month_cos"]

        return {"instances": [inputs]}
    
    def postprocess(self, outputs):
        # Return ordered ranking predictions
        return self.ranking_server.predict(inputs=outputs)

In [None]:
# Copy transformer file into Hopsworks File System
uploaded_file_path = dataset_api.upload(
    "querymodel_transformer.py", 
    "Models", 
    overwrite=True,
)

# Construct the path to the uploaded script
transformer_script_path = os.path.join(
    "/Projects", 
    project.name, 
    uploaded_file_path,
)

In [None]:
from hsml.transformer import Transformer

query_model_deployment_name = "querydeployment"

# Define transformer
query_model_transformer=Transformer(
    script_file=transformer_script_path, 
    resources={"num_instances": 0},
)

# Deploy the query model
query_model_deployment = query_model.deploy(
    name=query_model_deployment_name,
    description="Deployment that generates query embeddings from customer and item features using the query model",
    resources={"num_instances": 0},
    transformer=query_model_transformer,
)

At this point, you have registered your deployment. To start it up you need to run:

In [None]:
# Start the deployment and wait for it to be in a running state for up to 300 seconds
query_model_deployment.start(await_running=300)

In [None]:
data = [
    {
        "customer_id": "d327d0ad9e30085a436933dfbb7f77cf42e38447993a078ed35d93e3fd350ecf",
        "transaction_date": "2022-11-15T12:16:25.330916",
    }
]

In [None]:
ranked_candidates = query_model_deployment.predict(inputs=data)

# Retrieve article ids of the top recommended items
recommendations = get_top_recommendations(ranked_candidates["predictions"], k=3)
recommendations

In [None]:
# Check logs in case of failure
# query_model_deployment.get_logs(component="transformer",tail=200)

Stop the deployment when you're not using it.

In [None]:
# Stop the ranking model deployment
ranking_deployment.stop()

# Stop the query model deployment
query_model_deployment.stop()

---

In [None]:
# End the timer
notebook_end_time = time.time()

# Calculate and print the execution time
notebook_execution_time = notebook_end_time - notebook_start_time
print(f"⌛️ Notebook Execution time: {notebook_execution_time:.2f} seconds")

---