In [None]:
import os
import joblib
import numpy as np
import pandas as pd
import xgboost as xgb

import matplotlib.pyplot as plt


import hopsworks

from hsml.schema import Schema
from hsml.model_schema import ModelSchema

In [None]:
os.environ['HOPSWORKS_PROJECT']="ondemad_feature"
os.environ['HOPSWORKS_HOST']="8c7943b0-a2d3-11ed-8cee-11db34df42ef.cloud.hopsworks.ai"
os.environ['HOPSWORKS_API_KEY']="rYKP15t4AHy0x9Xp.5EhDC1aatYdbm5JG4f8SCZzGTFXH254PVZ33BsiGjJ8KRU3vMifnGAU5idZWRLtP"

In [None]:
project = hopsworks.login()
fs = project.get_feature_store()

In [None]:
profile_fg = fs.get_or_create_feature_group(
    name="housing_fg",
    version=1,
    description="housing data",
    primary_key = ["houseid"],
    online_enabled=True
)

In [None]:
query = profile_fg.select_all()

In [None]:
# Load the transformation functions.
label_encoder = fs.get_transformation_function(name="label_encoder")

# Map features to transformation functions.
transformation_functions = {
    "ocean_proximity": label_encoder,
}

In [None]:
feature_view = fs.get_or_create_feature_view(
    name='housing_fv',
    version=1,
    query=query,
    transformation_functions=transformation_functions,
    labels=["median_house_value"],
)

In [None]:
td_version, td_job = feature_view.create_train_test_split(
    description = 'housing_training_dataset_random_splitted',
    data_format = 'csv',
    test_size = 0.3,
    write_options = {'wait_for_job': False},
    coalesce = True,
)

In [None]:
train_x, test_x, train_y, test_y = feature_view.get_train_test_split(1)

In [None]:
train_x.drop("houseid", axis=1, inplace=True)
test_x.drop("houseid", axis=1, inplace=True)

In [None]:
clf = xgb.XGBRegressor(verbosity=0)
clf.fit(train_x, train_y)

In [None]:
mr = project.get_model_registry()

In [None]:
model_dir="housing_model"
if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

pkl_file_name = model_dir + '/housing_model.pkl'

joblib.dump(clf, pkl_file_name)

input_schema = Schema(train_x)
output_schema = Schema(train_y)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)


model = mr.python.create_model(
    name="housingmodel",
    description = "housing Model",
    input_example = train_x.sample(),
    model_schema = model_schema
)

model.save(pkl_file_name)

In [None]:
%%writefile predict_example.py
import os
import numpy as np
import hsfs
import joblib

from features import coord_to_postcode

class Predict(object):      

    def __init__(self):
        """ Initializes the serving state, reads a trained model"""        
        # get feature store handle
        fs_conn = hsfs.connection()
        self.fs = fs_conn.get_feature_store()
        
        # get feature views
        self.fv = self.fs.get_feature_view("housing_fv", 1)
        
        # initialise serving
        self.fv.init_serving(1)

        # load the trained model
        self.model = joblib.load(os.environ["ARTIFACT_FILES_PATH"] + "/housing_model.pkl")
        print("Initialization Complete")

    def predict(self, inputs):
        """ Serves a prediction request usign a trained model"""
        zipcode = coord_to_postcode.coord2zipcode({'longitude':inputs[1], 'latitude': inputs[2]})        
        feature_vector = self.fv.get_feature_vector({"houseid": inputs[0]}, passed_features={"zipcode": zipcode})
        indexes_to_remove = [9]
        feature_vector = [i for j, i in enumerate(feature_vector) if j not in indexes_to_remove]
        
        return self.model.predict(np.asarray(feature_vector).reshape(1, -1)).tolist() # Numpy Arrays are not JSON serializable


In [None]:
dataset_api = project.get_dataset_api()

uploaded_file_path = dataset_api.upload("predict_example.py", "Models", overwrite=True)
predictor_script_path = os.path.join("/Projects", project.name, uploaded_file_path)

In [None]:
mr = project.get_model_registry()
model = mr.get_model(name="housingmodel",version=1)

try:
    ms = project.get_model_serving()
    deployment = ms.get_deployment("housingmodeldeployment")
except:
    deployment = model.deploy(
        name="housingmodeldeployment", 
        serving_tool="KSERVE",
        script_file=predictor_script_path
    )

In [None]:
state = deployment.get_state()

if state.status != "Running":
    deployment.start()
    deployment.describe()
else:
    print("Deployment already running")

In [None]:
deployment.get_state()

In [None]:
deployment.predict({"inputs": [0,-122.23,37.88]})