In [15]:
import hopsworks

project = hopsworks.login()

2025-01-07 15:17:22,313 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-07 15:17:22,320 INFO: Initializing external client
2025-01-07 15:17:22,321 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-07 15:17:23,833 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1159324


In [16]:
# get Hopsworks Model Registry handle
mr = project.get_model_registry()

# get Hopsworks Model Serving handle
ms = project.get_model_serving()

## Upload feature engineering script

In [17]:
dataset_api = project.get_dataset_api()
uploaded_file_path = dataset_api.upload("utils/feature_engineering.py", "Resources", overwrite=True)

Uploading: 0.000%|          | 0/5948 elapsed<00:00 remaining<?

## Create Predict class

In [25]:
%%writefile my_predictor.py
import os
import hopsworks
from catboost import CatBoostRegressor

class Predict:
    def __init__(self):
        """ Initialization code goes here:
            - Download the model artifact
            - Load the model
        """
        project = hopsworks.login()
        mr = project.get_model_registry()
        
        dataset_api = project.get_dataset_api()
        
        # Download model
        my_model = mr.get_best_model("grailed_prediction_catboost_model", "RMSLE", 'max')
        model_dir = my_model.download()
        
        # Download feature engineering file
        current_dir = os.getcwd()
        feature_engineering_file = dataset_api.download(
            "Resources/feature_engineering.py", overwrite=True
        )
        # Load the feature engineering file into the current path
        import sys
        sys.path.append(current_dir)
        
        # Load the feature engineering file
        from feature_engineering import engineering_all_features
        self.engineering_all_features = engineering_all_features
        # Print the feature engineering parameters
        print(self.engineering_all_features)
        
        # Load the saved model
        self.model = CatBoostRegressor()
        self.model.load_model(f"{model_dir}/model.json")
        
        # Define the embedding features (same as in training)
        self.embedding_features = ['designer_names', 'hashtags', 'description', 'title']
        
    def predict(self, inputs):
        """ Serve predictions using the trained model"""
        """
        Expects inputs as a dictionary with the same features used in training:
        - designer_names (list of strings, in original API it's just a string)
        - category_path (string)
        - color (string)
        - condition (string)
        - followerno (int)
        - hashtags (list of strings)
        - description (string)
        - title (string)
        """
        labels = ["designer_names", "category_path", "color", "condition", "followerno", "hashtags", "description", "title", "size"]
        transformed_features = self.engineering_all_features([inputs], labels) 
        polars_df = transformed_features[0]
        pandas_df = polars_df.to_pandas()
        print(f"{pandas_df}=")

        # Drop size column
        pandas_df = pandas_df.drop(columns=["size"])

        # Make prediction
        prediction = self.model.predict(pandas_df)
        
        return float(prediction[0])  # Return single prediction value

Overwriting my_predictor.py


### Test it

In [5]:
from utils.feature_engineering import get_latest_listed_products, transform_features
product = get_latest_listed_products(no_of_hits=1)[0]

In [6]:
predictor = Predict()

NameError: name 'Predict' is not defined

In [None]:
predictor.predict(product), product['price']

## (Doesn't work ATM) Deploy the predictor 

In [26]:
import os

### Upload it

In [27]:
uploaded_file_path = dataset_api.upload("my_predictor.py", "Resources", overwrite=True)
predictor_script_path = os.path.join("/Projects", project.name, uploaded_file_path)

Uploading: 0.000%|          | 0/2488 elapsed<00:00 remaining<?

### Deployment

In [28]:
my_model = mr.get_best_model("grailed_prediction_catboost_model", "RMSLE", 'max')
my_model

Model(name: 'grailed_prediction_catboost_model', version: 2)

In [29]:
my_predictor = ms.create_predictor(my_model,
   # optional
   # model_server="PYTHON",
   serving_tool="KSERVE",
   script_file=predictor_script_path
)

## Create an entire deployment

In [None]:
my_deployment = ms.create_deployment(my_predictor, name="grailed05")
my_deployment.save()

In [None]:
my_deployment.get_state()

In [34]:
deps = ms.get_deployments()

In [37]:
deps[1].stop()

  0%|          | 0/2 [00:00<?, ?it/s]

RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1159324/serving/353360). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"errorCode":240003,"usrMsg":"Deployment is starting. Please, try again later","errorMsg":"Serving instance could not be started/stopped"}', error code: 240003, error msg: Serving instance could not be started/stopped, user msg: Deployment is starting. Please, try again later