In [None]:
from loguru import logger

from recsys import hopsworks_integration
from recsys.config import settings


In [None]:
project, fs = hopsworks_integration.get_feature_store()

# Deploy Ranking Inference

In [None]:
from hsml.transformer import Transformer
import os

In [None]:
class HopsworksRankingModel:
    deployment_name = "ranking"

    def __init__(self, model) -> None:
        self._model = model

    @classmethod
    def deploy(cls, project):
        mr = project.get_model_registry()
        dataset_api = project.get_dataset_api()

        models = mr.get_models(name="cb_ranking_model")
        if len(models) == 0:
            raise RuntimeError(
                "No 'candidate_model' found in Hopsworks model registry."
            )
        model = max(models, key=lambda m: m.version)

        # Copy transformer file into Hopsworks File System
        uploaded_file_path = dataset_api.upload(
            str(
                settings.RECSYS_DIR / "inference" / "ranking_transformer.py"
            ),  # File name to be uploaded
            "Resources",  # Destination directory in Hopsworks File System
            overwrite=True,  # Overwrite the file if it already exists
        )
        # Construct the path to the uploaded transformer script
        transformer_script_path = os.path.join(
            "/Projects",  # Root directory for projects in Hopsworks
            project.name,  # Name of the current project
            uploaded_file_path,  # Path to the uploaded file within the project
        )

        ranking_transformer = Transformer(
            script_file=transformer_script_path,
            resources={"num_instances": 0},
        )

        # Upload predictor file to Hopsworks
        uploaded_file_path = dataset_api.upload(
            str(settings.RECSYS_DIR / "inference" / "ranking_predictor.py"),
            "Resources",
            overwrite=True,
        )

        # Construct the path to the uploaded script
        predictor_script_path = os.path.join(
            "/Projects",
            project.name,
            uploaded_file_path,
        )

        # Deploy ranking model
        ranking_deployment = model.deploy(
            name=cls.deployment_name,
            description="Deployment that search for item candidates and scores them based on customer query",
            resources={"num_instances": 0},
            script_file=predictor_script_path,
            transformer=ranking_transformer,
        )

        return ranking_deployment

In [None]:
ranking_deployment = HopsworksRankingModel.deploy(
    project=project
)
ranking_deployment.start(await_running=180)

In [None]:
def get_top_recommendations(ranked_candidates, k=3):
    return [candidate[-1] for candidate in ranked_candidates["ranking"][:k]]

In [None]:
test_ranking_input = [
        {
            # "user_id": "256843",
            "customer_id": "256843",
            "query_emb":[1.0040990114212036, 0.02099212259054184, 0.6753973364830017, 
                         -1.018194556236267, 1.0765583515167236, 0.8201298117637634, 
                         0.1209947019815445, 0.5214401483535767, -1.350378394126892, 
                         -0.1466677188873291, -0.19946162402629852, -0.004622574429959059, 
                         -0.9494196176528931, -0.16883370280265808, -0.3382653594017029, 
                         1.305509328842163
                         ],
        }
    ]

# Test ranking deployment
ranked_candidates = ranking_deployment.predict(inputs=test_ranking_input)

# Retrieve article ids of the top recommended items
recommendations = get_top_recommendations(ranked_candidates["predictions"], k=3)
recommendations

In [None]:
print(f"{ranked_candidates=}")

In [None]:
ranking_deployment.get_logs()

In [None]:
items_fg = fs.get_feature_group(name="items", version=1)

# get books feature view
items_fv = fs.get_or_create_feature_view(
    version=1,
    name="items",
    description="Books feature view",
    query=items_fg.select_all()
)

recommendations_df = items_fv.get_feature_vectors([{"isbn": x} for x in recommendations], 
                                                  return_type="pandas")
recommendations_df

In [None]:
from IPython.display import HTML, display

image_urls = recommendations_df["image_url_l"].to_list()
grid_html = '<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; max-width: 900px;">'

for url in image_urls:
    grid_html += f'<img src="{url}" style="width: 100%; height: auto;">'

grid_html += "</div>"

display(HTML(grid_html))


In [None]:
import logging

import hopsworks
import pandas as pd

import nest_asyncio
nest_asyncio.apply()

class Transformer(object):
    def __init__(self):
        # Connect to Hopsworks
        project = hopsworks.login()
        self.fs = project.get_feature_store()

        # todo: get from rating feature view, but pop the ratings
        self.rating_features = ["user_id", "isbn", "age", "year_of_publication"]

        # Retrieve the 'candidate_embeddings' feature view
        self.candidate_index = self.fs.get_feature_view(
            version=1,
            name="candidate_embeddings",
        )

        # Retrieve the 'ratings' feature group
        self.ratings_fg = self.fs.get_feature_group(
            version=1,
            name="ratings",
        )

        self.items_fg = self.fs.get_feature_group(name="items", version=1)
        self.users_fg = self.fs.get_feature_group(name="users", version=1)

        self.users_fv = self.fs.get_or_create_feature_view(
            version=1,
            name="users",
            query=self.users_fg.select_all(),
            description="users_feature_view",
        )

    def preprocess(self, inputs):

        # Extract the input instance
        inputs = inputs["instances"][0]

        # Extract customer_id from inputs
        user_id = inputs["user_id"]

        # Search for neighbors in the candidate index
        neighbors = self.candidate_index.find_neighbors(
            inputs["query_emb"],
            k=100,
        )
        neighbors = [neighbor[0].decode('utf-8') for neighbor in neighbors]

        # Get IDs of items already bought by the customer
        already_bought_items_ids = (
            self.ratings_fg.select("isbn")
            .filter(self.ratings_fg.user_id==user_id)
            .read(dataframe_type="pandas").values.reshape(-1)
            .tolist()
        )

        # Filter candidate items to exclude those already bought by the customer
        item_id_list = [
            str(item_id)
            for item_id in neighbors
            if str(item_id) not in already_bought_items_ids
        ]

        # Get item features for the candidate items
        ranking_model_inputs_df = (
            self.items_fg.select_all()
            .filter(self.items_fg.isbn.isin(item_id_list))
            .read(dataframe_type="pandas")
        )

        logging.info("✅ Articles Data Retrieved!")

        # Add customer features
        user_features = self.users_fv.get_feature_vector(
                {"user_id": user_id},
                return_type="pandas",
            )
        
        ranking_model_inputs_df["user_id"] = user_id
        ranking_model_inputs_df = ranking_model_inputs_df.merge(
                                    user_features, 
                                    on="user_id",
                                    how="inner")
        
        ranking_model_inputs_df = ranking_model_inputs_df[self.rating_features]

        logging.info("✅ Inputs are ready!")

        return {
            "inputs": [
                {
                    "ranking_features": ranking_model_inputs_df.values.tolist(),
                    "item_ids": item_id_list,
                }
            ]
        }

    def postprocess(self, outputs):
        logging.info("✅ Predictions are ready!")

        # Merge prediction scores and corresponding article IDs into a list of tuples
        ranking = list(zip(outputs["scores"], outputs["item_ids"]))

        # Sort the ranking list by score in descending order
        ranking.sort(reverse=True)

        # Return the sorted ranking list
        return {
            "ranking": ranking,
        }


In [None]:
# # Test the transformer
# test_ranking_input = [
#         {
#             "user_id": "256843",
#             "query_emb":[1.0040990114212036, 0.02099212259054184, 0.6753973364830017, 
#                          -1.018194556236267, 1.0765583515167236, 0.8201298117637634, 
#                          0.1209947019815445, 0.5214401483535767, -1.350378394126892, 
#                          -0.1466677188873291, -0.19946162402629852, -0.004622574429959059, 
#                          -0.9494196176528931, -0.16883370280265808, -0.3382653594017029, 
#                          1.305509328842163
#                          ],
#         }
#     ]
# inputs = {"instances": test_ranking_input}

# transformer = Transformer()
# preprocessed_inputs = transformer.preprocess(inputs)
# preprocessed_inputs

In [None]:
from catboost import CatBoostRegressor

class Predict(object):
    
    def __init__(self):

        # self.model = load(os.environ["MODEL_FILES_PATH"] + "/ranking_model.pkl")
        self.model = CatBoostRegressor()
        self.model.load_model("./ranking_model/model.cbm") # os.environ["MODEL_FILES_PATH"] + 
    

    def predict(self, inputs):
        
        logging.info(f"✅ Inputs: {inputs}")
        
        # Extract ranking features and article IDs from the inputs
        features = inputs[0].pop("ranking_features")
        item_ids = inputs[0].pop("item_ids")

        # Make predictions
        scores = self.model.predict(features).tolist()

        return {"item_ids": item_ids, "scores": scores}


In [None]:
# # Test the predictor
# predictor = Predict()
# outputs = predictor.predict(preprocessed_inputs["inputs"])

# ranking = list(zip(outputs["scores"], outputs["item_ids"]))

# # Sort the ranking list by score in descending order
# ranking.sort(reverse=True)
# ranking[:3]


# Deploy Query Pipeline

In [None]:
from typing import Literal
import hopsworks

In [None]:
from hsml.transformer import Transformer

class HopsworksQueryModel:
    deployment_name = "query"

    def __init__(self, model) -> None:
        self.model = model

    @classmethod
    def deploy(cls, ranking_model_type: Literal["ranking", "llmranking"] = "ranking"):
        project = hopsworks.login()
        
        # Prepare secrets (the ranking deployment name) used in the deployment
        cls._prepare_secrets(ranking_model_type)

        mr = project.get_model_registry()
        dataset_api = project.get_dataset_api()

        # Retrieve the 'query_model' from the Model Registry
        query_model = mr.get_model(
            name="query_model",
            version=1,
        )

        # Upload input-Transformer script
        #   Copy transformer file into Hopsworks File System
        uploaded_file_path = dataset_api.upload(
            str(settings.RECSYS_DIR / "inference" / "query_transformer.py"),
            "Models",
            overwrite=True,
        )

        #   Construct the path to the uploaded script
        transformer_script_path = os.path.join(
            "/Projects",
            project.name,
            uploaded_file_path,
        )

        query_model_transformer = Transformer(
            script_file=transformer_script_path,
            resources={"num_instances": 0},
        )

        # Deploy the query model
        query_model_deployment = query_model.deploy(
            name=cls.deployment_name,
            description="Generates query embeddings from customer and further push it through the ranking deployment",
            resources={"num_instances": 0},
            transformer=query_model_transformer,
        )

        return query_model_deployment

    @classmethod
    def _prepare_secrets(cls, ranking_model_type: Literal["ranking", "llmranking"]):
        project = hopsworks.login(
            hostname_verification=False,
            api_key_value=settings.HOPSWORKS_API_KEY.get_secret_value(),     
        )
        secrets_api = hopsworks.get_secrets_api()
        secrets = secrets_api.get_secrets()

        # delete existing RANKING_MODEL_TYPE
        existing_secret_keys = [secret.name for secret in secrets]
        if "RANKING_MODEL_TYPE" in existing_secret_keys:
            secrets_api._delete(name="RANKING_MODEL_TYPE")

        # create new RANKING_MODEL_TYPE
        secrets_api.create_secret(
            "RANKING_MODEL_TYPE",
            ranking_model_type,
            project=project.name,
        )


In [None]:
query_model_deployment = HopsworksQueryModel.deploy(ranking_model_type="ranking")
query_model_deployment.start(await_running=180) # 

In [None]:
test_query_input = [
        {
            "user_id": "251843",
        }
    ]

ranked_candidates = query_model_deployment.predict(inputs=test_query_input)
print(f"Query embeddings: {ranked_candidates['predictions']}")


# # Retrieve article ids of the top recommended items
# recommendations = get_top_recommendations(ranked_candidates["predictions"], k=3)
# recommendations

In [None]:
query_model_deployment.get_logs()

In [None]:
class Transformer():
    def __init__(self) -> None:
        # Connect to the Hopsworks
        project = hopsworks.login()
        fs = project.get_feature_store()

        # Retrieve the deployed-ranking-model
        ms = project.get_model_serving()
        self._retrieve_secrets()
        print(f"{self.ranking_model_type=}")
        self.ranking_server = ms.get_deployment(self.ranking_model_type)

        # Retrieve the 'customers' feature view
        self.customer_fv = fs.get_feature_view(
            version=1,
            name="users",
        )

        # # Retrieve  the "ranking" feature view and initialize the batch scoring server.
        # self.ranking_fv = fs.get_feature_view(name="ranking", version=1)
        # self.ranking_fv.init_batch_scoring(1)


    def _retrieve_secrets(self):
        project = hopsworks.login()
        secrets_api = hopsworks.get_secrets_api()
        try:
            self.ranking_model_type = secrets_api.get_secret("RANKING_MODEL_TYPE").value
        except Exception as e:
            logging.error(e)
            logging.error("Could not retrieve secret RANKING_MODEL_TYPE, defaulting to ranker")
            self.ranking_model_type = "ranking"  

    def preprocess(self, inputs):
        # Check if the input data contains a key named "instances"
        # and extract the actual data if present
        inputs = inputs["instances"] if "instances" in inputs else inputs
        inputs = inputs[0]      

        # Extract customer_id and transaction_date from the inputs
        user_id = inputs["user_id"]
        # transaction_date = inputs["transaction_date"]

        # # Extract month from the transaction_date
        # month_of_purchase = datetime.fromisoformat(inputs.pop("transaction_date"))

        # Get customer features
        customer_features = self.customer_fv.get_feature_vector(
            {"user_id": user_id},
            return_type="pandas",
        )

        # Enrich inputs with customer age
        # todo: add other features!!
        inputs["age"] = customer_features.age.values[0]

        # # on-demand transformation
        # # on-demand transformation
        # # on-demand transformation
        # # Calculate the sine and cosine of the month_of_purchase
        # month_of_purchase = datetime.strptime(
        #     transaction_date, "%Y-%m-%dT%H:%M:%S.%f"
        # ).month

        # # Calculate the sine and cosine components for the month_of_purchase using on-demand transformation present in "ranking" feature view.
        # feature_vector = self.ranking_fv._batch_scoring_server.compute_on_demand_features(
        #     feature_vectors=pd.DataFrame([inputs]), request_parameters={"month": month_of_purchase}
        # ).to_dict(orient="records")[0]

        # inputs["month_sin"] = feature_vector["month_sin"]
        # inputs["month_cos"] = feature_vector["month_cos"]

        return {"instances": [inputs]}

    def postprocess(self, outputs):
        # Return ordered ranking predictions
        return self.ranking_server.predict(inputs=outputs)


In [None]:
# transformer = Transformer()
# preprocessed_inputs = transformer.preprocess(test_query_input)
# preprocessed_inputs

# Stop Deployment

In [None]:
ranking_deployment.stop()
query_model_deployment.stop()