## <span style="color:#ff5f27">👨🏻‍🏫 Create Deployment </span>

In this notebook, we'll create a deployment for our recommendation system.

**NOTE Currently the transformer scripts are not implemented.**

## <span style="color:#ff5f27">📝 Imports </span>

In [None]:
import os

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

In [None]:
# connect to Hopsworks Model Registry
mr = project.get_model_registry()

dataset_api = project.get_dataset_api()

## <span style="color:#ff5f27">🚀 Ranking Model Deployment </span>


Next, we'll deploy our ranking model. Since it is a CatBoost model we need to implement a `Predict` class that tells Hopsworks how to load the model and how to use it.

In [None]:
ranking_model = mr.get_best_model("ranking_model", "fscore", "max")

In [None]:
%%writefile ranking_transformer.py

import os
import pandas as pd

import hopsworks
from opensearchpy import OpenSearch

import logging


class Transformer(object):
    
    def __init__(self):
        # connect to Hopsworks
        project = hopsworks.connection().get_project()
        
        # get feature views
        self.fs = project.get_feature_store()
        self.articles_fv = self.fs.get_feature_view("articles", 1)
        self.articles_features = [feat.name for feat in self.articles_fv.schema]
        self.customer_fv = self.fs.get_feature_view("customers", 1)

        # create opensearch client
        opensearch_api = project.get_opensearch_api()
        self.os_client = OpenSearch(**opensearch_api.get_default_py_config())
        self.candidate_index = opensearch_api.get_project_index("candidate_index")

        # get ranking model feature names
        mr = project.get_model_registry()
        model = mr.get_model(os.environ["MODEL_NAME"], os.environ["MODEL_VERSION"])
        input_schema = model.model_schema["input_schema"]["columnar_schema"]
        
        self.ranking_model_feature_names = [feat["name"] for feat in input_schema]
    
    def preprocess(self, inputs):
        inputs = inputs["instances"][0]
        customer_id = inputs["customer_id"]
        
        # search for candidates
        hits = self.search_candidates(inputs["query_emb"], k=100)
        
        # get already bought items
        already_bought_items_ids = self.fs.sql(
            f"SELECT article_id from transactions_1 WHERE customer_id = '{customer_id}'"
        ).values.reshape(-1).tolist()
        
        # build dataframes
        item_id_list = []
        item_emb_list = []
        exclude_set = set(already_bought_items_ids)
        for el in hits:
            item_id = str(el["_id"])
            if item_id in exclude_set:
                continue
            item_emb = el["_source"]["my_vector1"]
            item_id_list.append(item_id)
            item_emb_list.append(item_emb)
        item_id_df = pd.DataFrame({"article_id" : item_id_list})
        item_emb_df = pd.DataFrame(item_emb_list).add_prefix("item_emb_")
        
        # get articles feature vectors
        articles_data = []
        for article_id in item_id_list:
            try:
                article_features = self.articles_fv.get_feature_vector({"article_id" : article_id})
                articles_data.append(article_features)
            except:
                logging.info("-- not found:" + str(article_id))
                pass # article might have been removed from catalogue
        articles_df = pd.DataFrame(data=articles_data, columns=self.articles_features)
        
        # join candidates with item features
        ranking_model_inputs = item_id_df.merge(articles_df, on="article_id", how="inner")
        
        # add customer features
        customer_features = self.customer_fv.get_feature_vector({"customer_id": customer_id})
        ranking_model_inputs["age"] = customer_features[1]
        ranking_model_inputs["month_sin"] = inputs["month_sin"]
        ranking_model_inputs["month_cos"] = inputs["month_cos"]
        ranking_model_inputs = ranking_model_inputs[self.ranking_model_feature_names]
        
        return { "inputs" : [{"ranking_features": ranking_model_inputs.values.tolist(), "article_ids": item_id_list} ]}

    def postprocess(self, outputs):
        preds = outputs["predictions"]
        ranking = list(zip(preds["scores"], preds["article_ids"])) # merge lists
        ranking.sort(reverse=True) # sort by score (descending)
        return { "ranking": ranking }
    
    def search_candidates(self, query_emb, k=100):
        k = 100
        query = {
          "size": k,
          "query": {
            "knn": {
              "my_vector1": {
                "vector": query_emb,
                "k": k
              }
            }
          }
        }
        return self.os_client.search(body = query, index = self.candidate_index)["hits"]["hits"]

In [None]:
# copy transformer file into Hopsworks File System
uploaded_file_path = dataset_api.upload("ranking_transformer.py", "Resources", overwrite=True)
transformer_script_path = os.path.join("/Projects", project.name, uploaded_file_path)

In [None]:
%%writefile ranking_predictor.py

import os
import joblib
import numpy as np


class Predict(object):
    
    def __init__(self):
        self.model = joblib.load(os.environ["ARTIFACT_FILES_PATH"] + "/ranking_model.pkl")

    def predict(self, inputs):
        features = inputs[0].pop("ranking_features")
        article_ids = inputs[0].pop("article_ids")

        scores = self.model.predict_proba(features).tolist()
        scores = np.asarray(scores)[:,1].tolist() # get scores of positive class

        return { "scores": scores, "article_ids": article_ids }

In [None]:
# upload predictor file to Hopsworks
uploaded_file_path = dataset_api.upload("ranking_predictor.py", "Resources", overwrite=True)
predictor_script_path = os.path.join("/Projects", project.name, uploaded_file_path)

With that in place, we can finally deploy our model.

In [None]:
from hsml.transformer import Transformer

ranking_deployment_name = "rankingdeployment"

# define transformer
ranking_transformer=Transformer(
    script_file=transformer_script_path, 
    resources={"num_instances": 0},
)

# deploy ranking model
ranking_deployment = ranking_model.deploy(
    name=ranking_deployment_name,
    description="Deployment that search for item candidates and scores them based on customer metadata",
    script_file=predictor_script_path,
    resources={"num_instances": 0},
    transformer=ranking_transformer,
)

In [None]:
ranking_deployment.start()

In [None]:
ranking_deployment.get_logs()

In [None]:
ranking_deployment.get_logs(component="transformer", tail=20)

In [None]:
# # test ranking deployment
# test_ranking_input = {'instances': [{'customer_id': '048962db9aca38ca4b98c70880a44b60f12562c8d4df5e34457401c14ec0dcbe',
#    'month_sin': 1.2246467991473532e-16,
#    'query_emb': [0.214135289,
#     0.571055949,
#     0.330709577,
#     -0.225899458,
#     -0.308674961,
#     -0.0115124583,
#     0.0730511621,
#     -0.495835781,
#     0.625569344,
#     -0.0438038409,
#     0.263472944,
#     -0.58485353,
#     -0.307070434,
#     0.0414443575,
#     -0.321789205,
#     0.966559],
#    'month_cos': -1.0}]}

# # test ranking
# ranking_deployment.predict(test_ranking_input)

In [None]:
# ranking_deployment.stop()

## <span style="color:#ff5f27">🚀 Query Model Deployment </span>

We start by deploying our query model.

In [None]:
query_model = mr.get_model(
    name="query_model",
    version=1,
)

In [None]:
%%writefile querymodel_transformer.py

import os
import numpy as np
from datetime import datetime

import hopsworks

import logging


class Transformer(object):
    
    def __init__(self):            
        # connect to Hopsworks
        project = hopsworks.connection().get_project()
    
        # get feature views and transformation functions
        fs = project.get_feature_store()
        self.customer_fv = fs.get_feature_view("customers", 1)
        self.month_to_sin = fs.get_transformation_function("month_sin").transformation_fn
        self.month_to_cos = fs.get_transformation_function("month_cos").transformation_fn
        
        # get ranking deployment metadata object
        ms = project.get_model_serving()
        self.ranking_server = ms.get_deployment("rankingdeployment")
        
        
    def preprocess(self, inputs):
        inputs = inputs["instances"] if "instances" in inputs else inputs
        
        # extract month
        month_of_purchase = datetime.fromisoformat(inputs.pop("month_of_purchase"))
        
        # get customer features
        customer_features = self.customer_fv.get_feature_vector(inputs)
        
        # enrich inputs
        inputs["age"] = customer_features[1]
        inputs["month_sin"] = self.month_to_sin(month_of_purchase)
        inputs["month_cos"] = self.month_to_cos(month_of_purchase)
                
        return {"instances" : [inputs]}
    
    def postprocess(self, outputs):
        # get ordered ranking predictions
        return {"predictions": self.ranking_server.predict({ "instances": outputs["predictions"] })}

In [None]:
# copy transformer file into Hopsworks File System
uploaded_file_path = dataset_api.upload("querymodel_transformer.py", "Models", overwrite=True)
transformer_script_path = os.path.join("/Projects", project.name, uploaded_file_path)

In [None]:
from hsml.transformer import Transformer

query_model_deployment_name = "querydeployment"

# define transformer
query_model_transformer=Transformer(
    script_file=transformer_script_path, 
    resources={"num_instances": 0},
)

# deploy query model
query_model_deployment = query_model.deploy(
    name=query_model_deployment_name,
    description="Deployment that generates query embeddings from customer and item features using the query model",
    resources={"num_instances": 0},
    transformer=query_model_transformer,
)

At this point, we have registered our deployment. To start it up we need to run:

In [None]:
query_model_deployment.start()

In [None]:
query_model_deployment.get_logs(component="transformer", tail=20)

We can test the deployment by making a prediction on the input example we registered together with the model.

In [None]:
# data = {"instances": {"customer_id": "048962db9aca38ca4b98c70880a44b60f12562c8d4df5e34457401c14ec0dcbe", "month_of_purchase": "2022-11-15T12:16:25.330916"}}
# # data = {"customer_id": "048962db9aca38ca4b98c70880a44b60f12562c8d4df5e34457401c14ec0dcbe", "month_of_purchase": "2022-11-15T12:16:25.330916"}

# res = query_model_deployment.predict(data)

## <span style="color:#ff5f27">🔮 Get ranking of recommendations by customer </span>

In [None]:
!pip install gradio --quiet
# !pip install typing-extensions==4.3.0

In [None]:
import gradio as gr
from datetime import datetime

def parse_output(ranking):
    output = ""
    for rec in ranking:
        output += "Item ID: " + str(rec[1]) + "\t\t--\t\t score: " + str(rec[0]) + "\n"
    return output

def recommender(c_id, dt):
    list_inputs = []
    list_inputs.append(c_id)
    list_inputs.append(dt)    
    mop = datetime.now().isoformat() if dt == "now" else dt
    data = {"instances": {"customer_id": c_id, "month_of_purchase": mop}}
    res = query_model_deployment.predict(data)
    return parse_output(res["predictions"]["ranking"])

demo = gr.Interface(
    fn=recommender,
    title="Fashion Items Recommender",
    description="Get recommendations of fashion items based on customer history and item features.",
    allow_flagging="never",
    inputs=[
        gr.inputs.Textbox(default="048962db9aca38ca4b98c70880a44b60f12562c8d4df5e34457401c14ec0dcbe", label="For which customer?"),
        gr.inputs.Textbox(default="now", label="When to purchase?"),
    ],
    outputs=gr.outputs.Textbox(label="Recommendations")
)

demo.launch(share=True)

Let's stop the deployment when we're not using it.

In [None]:
# user_model_deployment.stop()

In [None]:
{"customer_id": "048962db9aca38ca4b98c70880a44b60f12562c8d4df5e34457401c14ec0dcbe", "month_of_purchase": "2022-11-15T12:16:25.330916"}

---