## <span style="color:#ff5f27">👨🏻‍🏫 Create Deployment </span>

In this notebook, you'll create a deployment for your recommendation system.


In [None]:
import time

# Start the timer
notebook_start_time = time.time()

## <span style="color:#ff5f27">📝 Imports </span>

In [None]:
import os

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

In [None]:
# Connect to Hopsworks Model Registry
mr = project.get_model_registry()

dataset_api = project.get_dataset_api()

In [None]:
# Retrieve the 'query_model' from the Model Registry
query_model = mr.get_model(
    name="query_model",
    version=1,
)

## <span style="color:#ff5f27">🚀 Recommender Model Deployment </span>

📝 Includes the next steps:
1. Generate query embedding from customer data.
2. Find the closest candidate items to customer embedding using similarity search.
3. Filter out items the customer has already bought.
4. Retrieve article features for remaining candidate items.
5. Score candidate items using the ranking model.
6. Sort items by prediction scores in descending order.
7. Return the ranked recommendations.


In [None]:
%%writefile recommender_transformer.py
import os
from datetime import datetime
import numpy as np
import pandas as pd
import joblib
import logging
import hopsworks


class Transformer(object):

    def __init__(self):
        # Connect to the Hopsworks
        project = hopsworks.login()
        ms = project.get_model_serving()
        mr = project.get_model_registry()

        # Retrieve the 'customers' feature view
        self.fs = project.get_feature_store()
        self.customer_fv = self.fs.get_feature_view(
            name="customers", 
            version=1,
        )
        self.customer_fv.init_serving(1)
        
        # Retrieve 'transactions' feature group.
        self.transactions_fg = self.fs.get_feature_group(
            name="transactions", 
            version=1,
        )

        # Retrieve the 'articles' feature view
        self.articles_fv = self.fs.get_feature_view(
            name="articles",
            version=1,
        )  

        # Get list of feature names for articles
        self.articles_features = [feat.name for feat in self.articles_fv.schema]

        # Retrieve the 'candidate_embeddings' feature view
        self.candidate_index = self.fs.get_feature_view(
            name="candidate_embeddings",
            version=1,
        )

        model = mr.get_model(
            name="ranking_model",
            version=1,
        )

        # Download the saved model files to a local directory
        saved_model_dir = model.download()

        self.model = joblib.load(saved_model_dir + "/ranking_model.pkl")

        self.ranking_fv = model.get_feature_view(init=False)
        self.ranking_fv.init_batch_scoring(1)

        # Get the names of features expected by the ranking model
        self.ranking_model_feature_names = [
            feature.name 
            for feature 
            in self.ranking_fv.schema 
            if feature.name != 'label'
        ]


    def preprocess(self, inputs):
        # Check if the input data contains a key named "instances"
        # and extract the actual data if present
        inputs = inputs["instances"] if "instances" in inputs else inputs
        inputs = inputs[0]
        
        # Extract customer_id and transaction_date from the inputs
        customer_id = inputs["customer_id"]
        transaction_date = inputs["date"]
        
        # Extract month from the transaction_date
        month_of_purchase = datetime.fromisoformat(inputs.pop("date"))

        # Get customer features
        customer_features = self.customer_fv.get_feature_vector(
            {"customer_id": customer_id},
            return_type="pandas",
        )
        
        # Enrich inputs with customer age
        inputs["age"] = customer_features.age.values[0]  
        
        # Calculate the sine and cosine of the month_of_purchase
        month_of_purchase = datetime.strptime(
            transaction_date, "%Y-%m-%dT%H:%M:%S.%f"
        ).month

        # Calculate the sine and cosine components for the month_of_purchase using on-demand transformation present in "ranking" feature view.
        feature_vector = self.ranking_fv._batch_scoring_server.compute_on_demand_features(
            feature_vectors=pd.DataFrame([inputs]), request_parameters={"month": month_of_purchase}
        ).to_dict(orient="records")[0]

        inputs["month_sin"] = feature_vector["month_sin"]
        inputs["month_cos"] = feature_vector["month_cos"]

        return {"instances": [inputs]}


    def postprocess(self, query_outputs):

        inputs = query_outputs[0]
        
        # Extract customer_id from inputs
        customer_id = inputs["customer_id"]

        # Search for candidate items
        neighbors = self.candidate_index.find_neighbors(
            inputs["query_emb"],
            k=100,
        )
        neighbors = [neighbor[0] for neighbor in neighbors]

        # Get IDs of items already bought by the customer
        already_bought_items_ids = (
            self.transactions_fg.select("article_id").filter(
                self.transactions_fg.customer_id==customer_id
            ).read(dataframe_type="pandas").values.reshape(-1).tolist()
        )

        # Filter candidate items to exclude those already bought by the customer
        item_id_list = [
            str(item_id)
            for item_id in neighbors
            if str(item_id) not in already_bought_items_ids
        ]
        item_id_df = pd.DataFrame({"article_id": item_id_list})

        # Retrieve Article data for candidate items
        articles_data = [
            self.articles_fv.get_feature_vector({"article_id": item_id})
            for item_id in item_id_list
        ]

        logging.info("✅ Articles Data Retrieved!")

        articles_df = pd.DataFrame(
            data=articles_data,
            columns=self.articles_features,
        )

        # Join candidate items with their features
        ranking_model_inputs = item_id_df.merge(
            articles_df,
            on="article_id",
            how="inner",
        )

        logging.info("✅ Inputs are almost ready!")

        # Add customer features
        customer_features = self.customer_fv.get_feature_vector(
                {"customer_id": customer_id},
                return_type="pandas",
            )

        ranking_model_inputs["age"] = customer_features.age.values[0]
        ranking_model_inputs["month_sin"] = inputs["month_sin"]
        ranking_model_inputs["month_cos"] = inputs["month_cos"]

        # Select only the features required by the ranking model
        ranking_model_inputs = ranking_model_inputs[self.ranking_model_feature_names]

        logging.info("✅ Inputs are ready!")

        features = ranking_model_inputs.values.tolist()
        article_ids = item_id_list

        # Log the extracted features
        logging.info("predict -> " + str(features))

        # Log the extracted article ids
        logging.info(f'Article IDs: {article_ids}')
        
        logging.info(f"🚀 Predicting...")

        # Predict probabilities for the positive class
        scores = self.model.predict_proba(features).tolist()
        
        # Get scores of positive class
        scores = np.asarray(scores)[:,1].tolist() 

        logging.info("✅ Predictions are ready!")

        # Merge prediction scores and corresponding article IDs into a list of tuples
        ranking = list(zip(scores, article_ids))

        # Sort the ranking list by score in descending order
        ranking.sort(reverse=True)

        # Return the sorted ranking list
        return {
            "ranking": ranking,
        }


In [None]:
# Copy transformer file into Hopsworks File System
uploaded_file_path = dataset_api.upload(
    "recommender_transformer.py", 
    "Models", 
    overwrite=True,
)

# Construct the path to the uploaded script
transformer_script_path = os.path.join(
    "/Projects", 
    project.name, 
    uploaded_file_path,
)

In [None]:
from hsml.transformer import Transformer

recommender_deployment_name = "recommenderdeployment"

# Define transformer
recommender_transformer=Transformer(
    script_file=transformer_script_path, 
    resources={"num_instances": 0},
)

# Deploy the query model
recommender_deployment = query_model.deploy(
    name=recommender_deployment_name,
    description="Recommender deployment that generates query embeddings from customer and item features using the query model",
    resources={"num_instances": 0},
    transformer=recommender_transformer,
    environment="pandas-inference-pipeline",
)

In [None]:
# Start the deployment and wait for it to be in a running state for up to 300 seconds
recommender_deployment.start(await_running=300)

In [None]:
# Uncomment the next line if you want to see the logs
# recommender_deployment.get_logs(component='transformer')

In [None]:
data = [
    {
        "customer_id": "d327d0ad9e30085a436933dfbb7f77cf42e38447993a078ed35d93e3fd350ecf",
        "date": "2022-11-15T12:16:25.330916",
    }
]

ranked_candidates = recommender_deployment.predict(inputs=data)
ranked_candidates["predictions"]["ranking"][:10]

In [None]:
def get_top_recommendations(ranked_candidates, k=3):
    return [candidate[-1] for candidate in ranked_candidates["ranking"][:k]]


# Retrieve article ids of the top recommended items
recommendations = get_top_recommendations(ranked_candidates["predictions"], k=3)
recommendations

In [None]:
# Stop the deployment
recommender_deployment.stop()

---

In [None]:
# End the timer
notebook_end_time = time.time()

# Calculate and print the execution time
notebook_execution_time = notebook_end_time - notebook_start_time
print(f"⌛️ Notebook Execution time: {notebook_execution_time:.2f} seconds")

---