## <span style="color:#ff5f27">👨🏻‍🏫 Create Deployment </span>

In this notebook, you'll create a deployment for your recommendation system.

**NOTE Currently the transformer scripts are not implemented.**

## <span style="color:#ff5f27">📝 Imports </span>

In [None]:
# !pip install -r requirements.txt

In [None]:
import os

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

# Connect to Hopsworks Model Registry
mr = project.get_model_registry()

dataset_api = project.get_dataset_api()

## <span style="color:#ff5f27">🚀 Ranking Model Deployment </span>


You start by deploying your ranking model. Since it is a CatBoost model you need to implement a `Predict` class that tells Hopsworks how to load the model and how to use it.

In [None]:
ranking_model = mr.get_best_model(
    name="ranking_model", 
    metric="fscore", 
    direction="max",
)
ranking_model

In [None]:
ranking_model.model_schema["input_schema"]["columnar_schema"]

In [None]:
%%writefile ranking_transformer.py

import os
import pandas as pd

import hopsworks
from opensearchpy import OpenSearch

import logging


class Transformer(object):
    
    def __init__(self):
        # Connect to Hopsworks
        project = hopsworks.connection().get_project()
        self.fs = project.get_feature_store()
        
        # Retrieve the 'videos' feature view
        self.videos_fv = self.fs.get_feature_view(
            name="videos", 
            version=1,
        )
        
        # Get list of feature names for videos
        self.video_features = [feat.name for feat in self.videos_fv.schema]
        
        # Retrieve the 'users' feature view
        self.users_fv = self.fs.get_feature_view(
            name="users", 
            version=1,
        )

        # Retrieve the 'candidate_embeddings' feature view
        self.candidate_index = self.fs.get_feature_view(
            name="candidate_embeddings", 
            version=1,
        )

        # Retrieve ranking model
        mr = project.get_model_registry()
        model = mr.get_model(
            name="ranking_model", 
            version=1,
        )
        
        # Extract input schema from the model
        input_schema = model.model_schema["input_schema"]["columnar_schema"]
        
        # Get the names of features expected by the ranking model
        self.ranking_model_feature_names = [feat["name"] for feat in input_schema]
            
    def preprocess(self, inputs):
        # Extract the input instance
        inputs = inputs["instances"][0]

        # Extract customer_id from inputs
        user_id = inputs["user_id"]
        month_sin = inputs["month_sin"]
        month_cos = inputs["month_cos"]
                
        # Search for candidate items
        neighbors = self.candidate_index.find_neighbors(
            inputs["query_emb"], 
            k=100,
        )
        neighbors = [neighbor[0] for neighbor in neighbors]
        
        # Get IDs of items already bought by the customer
        already_seen_videos_ids = self.fs.sql(
            f"SELECT video_id from interactions_1 WHERE user_id = '{user_id}'", 
            online=True).values.reshape(-1).tolist()
        
        # Filter candidate items to exclude those already bought by the customer
        video_id_list = [
            video_id
            for video_id 
            in neighbors 
            if video_id
            not in already_seen_videos_ids
        ]
        
        # Retrieve Article data for candidate items
        videos_data = [
            self.videos_fv.get_feature_vector({"video_id": video_id}) 
            for video_id 
            in video_id_list
        ]

        ranking_model_inputs = pd.DataFrame(
            data=videos_data, 
            columns=self.video_features,
        )
        
        # Join candidate items with their features
        ranking_model_inputs["video_id"] = video_id_list
        
        # Add customer features
        user_features = self.users_fv.get_feature_vector(
            {"user_id": user_id}, 
            return_type="pandas",
        )
        
        ranking_model_inputs["gender"] = user_features["gender"].values[0] 
        ranking_model_inputs["age"] = user_features["age"].values[0] 
        ranking_model_inputs["country"] = user_features["country"].values[0] 
        ranking_model_inputs["month_sin"] = month_sin
        ranking_model_inputs["month_cos"] = month_cos
        
        # Select only the features required by the ranking model
        ranking_model_inputs = ranking_model_inputs[self.ranking_model_feature_names]
                
        return { 
            "inputs" : [{"ranking_features": ranking_model_inputs.values.tolist(), "video_ids": video_id_list}]
        }

    def postprocess(self, outputs):
        # Extract predictions from the outputs
        preds = outputs["predictions"]
        
        # Merge prediction scores and corresponding article IDs into a list of tuples
        ranking = list(zip(preds["scores"], preds["video_ids"]))
        
        # Sort the ranking list by score in descending order
        ranking.sort(reverse=True)
        
        # Return the sorted ranking list
        return { 
            "ranking": ranking,
        }

In [None]:
# Copy transformer file into Hopsworks File System 
uploaded_file_path = dataset_api.upload(
    "ranking_transformer.py",    # File name to be uploaded
    "Resources",                 # Destination directory in Hopsworks File System 
    overwrite=True,              # Overwrite the file if it already exists
) 

# Construct the path to the uploaded transformer script
transformer_script_path = os.path.join(
    "/Projects",                 # Root directory for projects in Hopsworks
    project.name,                # Name of the current project
    uploaded_file_path,          # Path to the uploaded file within the project
)

In [None]:
%%writefile ranking_predictor.py

import os
import joblib
import numpy as np

import logging

class Predict(object):
    
    def __init__(self):
        self.model = joblib.load(os.environ["ARTIFACT_FILES_PATH"] + "/ranking_model.pkl")

    def predict(self, inputs):
        # Extract ranking features and article IDs from the inputs
        features = inputs[0].pop("ranking_features")
        video_ids = inputs[0].pop("video_ids")
        
        # Log the extracted features
        logging.info("predict -> " + str(features))

        # Predict probabilities for the positive class
        scores = self.model.predict_proba(features).tolist()
        
        # Get scores of positive class
        scores = np.asarray(scores)[:,1].tolist() 

        # Return the predicted scores along with the corresponding article IDs
        return {
            "scores": scores, 
            "video_ids": video_ids,
        }

In [None]:
# Upload predictor file to Hopsworks
uploaded_file_path = dataset_api.upload(
    "ranking_predictor.py", 
    "Resources", 
    overwrite=True,
)

# Construct the path to the uploaded script
predictor_script_path = os.path.join(
    "/Projects", 
    project.name, 
    uploaded_file_path,
)

With that in place, you can finally deploy your model.

In [None]:
from hsml.transformer import Transformer

ranking_deployment_name = "rankingdeployment"

# Define transformer
ranking_transformer=Transformer(
    script_file=transformer_script_path, 
    resources={"num_instances": 1},
)

# Deploy ranking model
ranking_deployment = ranking_model.deploy(
    name=ranking_deployment_name,
    description="Deployment that search for video candidates and scores them based on user metadata",
    script_file=predictor_script_path,
    resources={"num_instances": 1},
    transformer=ranking_transformer,
)

In [None]:
# Start the deployment
ranking_deployment.start()

In [None]:
# Check logs in case of failure
#ranking_deployment.get_logs(component="predictor", tail=200)

In [None]:
def get_top_recommendations(ranked_candidates, k=3):
    return [candidate[-1] for candidate in ranked_candidates['ranking'][:k]]

In [None]:
# Define a test input example
test_ranking_input = {"instances": [{
    "user_id": "ED267E",
    "month_sin": 1.2246467991473532e-16,
    "month_cos": -1.0,
    "query_emb": [0.214135289,
     0.571055949,
     0.330709577,
     -0.225899458,
     -0.308674961,
     -0.0115124583,
     0.0730511621,
     -0.495835781,
     0.625569344,
     -0.0438038409,
     0.263472944,
     -0.58485353,
     -0.307070434,
     0.0414443575,
     -0.321789205,
     0.966559],
}]}

In [None]:
# Test ranking deployment
ranked_candidates = ranking_deployment.predict(test_ranking_input)

# Retrieve article ids of the top recommended items
recommendations = get_top_recommendations(ranked_candidates, k=3)
recommendations

In [None]:
# Check logs in case of failure
#ranking_deployment.get_logs(component="transformer",tail=200)

---

## <span style="color:#ff5f27">🚀 Query Model Deployment </span>

Next, you'll deploy your query model.

In [None]:
# Retrieve the 'query_model' from the Model Registry
query_model = mr.get_model(
    name="query_model",
    version=1,
)

In [None]:
%%writefile querymodel_transformer.py

import os
import numpy as np
import pandas as pd
from datetime import datetime

import hopsworks

import logging
import json


# Calculate ondemand features the sine and cosine of the month of interaction date
def month_sine(interaction_date):     
        # Calculate a coefficient for adjusting the periodicity of the month
        coef = np.random.uniform(0, 2 * np.pi) / 12

        #month_of_purchase = datetime.strptime(transaction_date, "%Y-%m-%dT%H:%M:%S").month
        month_of_interaction = interaction_date.month 
    
        # Calculate the sine and cosine components for the month_of_purchase
        return float(np.sin(month_of_interaction * coef)) 

def month_cosine(interaction_date):     
        # Calculate a coefficient for adjusting the periodicity of the month
        coef = np.random.uniform(0, 2 * np.pi) / 12

        #month_of_purchase = datetime.strptime(transaction_date, "%Y-%m-%dT%H:%M:%S").month
        month_of_interaction = interaction_date.month 
    
        # Calculate the sine and cosine components for the month_of_purchase
        return float(np.cos(month_of_interaction * coef))

    
class Transformer(object):
    
    def __init__(self):            
        # Connect to the Hopsworks
        project = hopsworks.connection().get_project()
        ms = project.get_model_serving()
    
        # Retrieve the 'users' feature view
        fs = project.get_feature_store()
        self.users_fv = fs.get_feature_view(
            name="users", 
            version=1,
        )
        # Retrieve the ranking deployment 
        self.ranking_server = ms.get_deployment("rankingdeployment")
        
        self.logger = logging.getLogger(__name__)

        
        
    def preprocess(self, inputs):
        # Check if the input data contains a key named "instances"
        # and extract the actual data if present
        inputs = inputs["instances"] if "instances" in inputs else inputs
        
        # Extract customer_id from the inputs
        user_id = inputs["user_id"]
        interaction_date = inputs.pop("interaction_date")

        # Get customer features
        user_features = self.users_fv.get_feature_vector(
            {"user_id": user_id}, 
            return_type="pandas",
        )

        # Enrich inputs with customer age
        inputs["gender"] = user_features['gender'].values[0]
        inputs["age"] = user_features['age'].values[0] 

        # Calculate the sine and cosine of the month_of_purchase
        interaction_date = datetime.strptime(interaction_date, "%Y-%m-%d %H:%M:%S")
        
        # Calculate the sine and cosine components for the month_of_purchase
        inputs["month_sin"] = month_sine(interaction_date)
        inputs["month_cos"] = month_cosine(interaction_date)
        
        inputs["country"] = user_features['country'].values[0]
        inputs["user_dislike_count"] = user_features['user_dislike_count'].values[0]
        inputs["user_like_count"] = user_features['user_like_count'].values[0]
        inputs["user_total_watch_time"] = user_features['user_total_watch_time'].values[0]
        inputs["user_view_count"] = user_features['user_view_count'].values[0]
        
        return {
            "instances" : [inputs]
        }
    
    def postprocess(self, outputs):
        # Return ordered ranking predictions
        return {
            "predictions": self.ranking_server.predict({ "instances": outputs["predictions"]}),
        }

In [None]:
# Copy transformer file into Hopsworks File System
uploaded_file_path = dataset_api.upload(
    "querymodel_transformer.py", 
    "Models", 
    overwrite=True,
)

# Construct the path to the uploaded script
transformer_script_path = os.path.join(
    "/Projects", 
    project.name, 
    uploaded_file_path,
)

In [None]:
from hsml.transformer import Transformer

query_model_deployment_name = "querydeployment"

# Define transformer
query_model_transformer=Transformer(
    script_file=transformer_script_path, 
    resources={"num_instances": 1},
)

# Deploy the query model
query_model_deployment = query_model.deploy(
    name=query_model_deployment_name,
    description="Deployment that generates query embeddings from user and video features using the query model",
    resources={"num_instances": 1},
    transformer=query_model_transformer,
)

At this point, you have registered your deployment. To start it up you need to run:

In [None]:
# Start the deployment
query_model_deployment.start()

In [None]:
# Check logs in case of failure
# query_model_deployment.get_logs(component="transformer", tail=20)

In [None]:
# Define a test input example
#data = {"instances": {"user_id": "ED267E"}}

# Define a test input example
data = {"instances": {"user_id": "ED267E", "interaction_date": "2024-02-10 15:33:11"}}


# Test the deployment
ranked_candidates = query_model_deployment.predict(data)

# Retrieve article ids of the top recommended items
recommendations = get_top_recommendations(
    ranked_candidates['predictions'], 
    k=3,
)
recommendations


In [None]:
# Check logs in case of failure
#query_model_deployment.get_logs(component="transformer",tail=200)

In [None]:
#ranking_deployment.get_logs(component="transformer",tail=200)

Stop the deployment when you're not using it.

In [None]:
# Stop the ranking model deployment
ranking_deployment.stop()

# Stop the query model deployment
query_model_deployment.stop()

In [None]:
inputs = data["instances"][0]

# Extract customer_id from the inputs
user_id = inputs["user_id"]
interaction_date = inputs["interaction_date"]


---