## <span style='color:#ff5f27'> 📝 Imports </span>

In [None]:
import joblib
import os
import datetime
import pandas as pd
import numpy as np
from matplotlib import pyplot

from sklearn.metrics import mean_absolute_error
from prophet import Prophet
from prophet.serialize import model_to_json

# Mute warnings
import warnings
warnings.filterwarnings("ignore")

## <span style="color:#ff5f27;"> 📡 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

### <span style="color:#ff5f27;"> 🔪 Feature Selection </span>

You will start by selecting all the features you want to include for model training/inference.

In [None]:
# Retrieve the 'patient_info' feature group
patient_info_fg = fs.get_feature_group(
    name="patient_info",
    version=1,
)

# Retrieve the 'medical_info' feature group
medical_info_fg = fs.get_feature_group(
    name="medical_info",
    version=1,
)

# Retrieve the 'transplant_compatibility' feature group
transplant_compatibility_fg = fs.get_feature_group(
    name="transplant_compatibility",
    version=1,
)

In [None]:
# Select features for training data.
selected_features = patient_info_fg.select_all(["id", "date"])\
    .join(medical_info_fg.select_except(["id", "date"]))\
    .join(transplant_compatibility_fg.select_except(["id", "date"])
)

In [None]:
# Uncomment this if you would like to view your selected features
selected_features.show(5)

## <span style="color:#ff5f27;"> ⚙️ Transformation Functions </span>


In [None]:
[f.name for f in fs.get_transformation_functions()]

In [None]:
label_encoder = fs.get_transformation_function(name="label_encoder")

standard_scaler = fs.get_transformation_function(name="standard_scaler")

In [None]:
features_category = ['gender', 'age_cat', 'blood_gp', 'underlying_disease', 'gestation', 'prior_transplant', 'if_transplanted']

transformation_functions_category = {
    feature_name: label_encoder
    for feature_name
    in features_category
}

In [None]:
features_numerical = [
    'age_at_list_registration', 'dialysis_duration', 'number_prior_transplant', 'cpra', 'hla_a1', 'hla_a2', 'hla_b1', 'hla_b2', 'hla_dr1', 'hla_dr2',
]

transformation_functions_numerical = {
    feature_name: standard_scaler
    for feature_name
    in features_numerical
}

In [None]:
# Join transformation_functions_category and transformation_functions_numerical dictionaries into one
transformation_functions = transformation_functions_category | transformation_functions_numerical

## <span style="color:#ff5f27;"> ⚙️ Feature View Creation </span>


In [None]:
# Get or create the 'medical_features' feature view
feature_view = fs.get_or_create_feature_view(
    name='medical_features',
    version=1,
    query=selected_features,
    labels=["duration"],
    transformation_functions=transformation_functions,
)

## <span style="color:#ff5f27;"> 🏋️ Training Dataset Creation</span>


In [None]:
# Split date with percentage 
df = patient_info_fg.read()

def split_dfs(df): 
    df = df.sort_values(by='date') 
    trainvals = df[:int(len(df)*0.8)] 
    testvals = df[int(len(df)*0.8):] 
    return {
        'train_start': min(trainvals.date).date(), 
        'train_end': max(trainvals.date).date(), 
        'test_start': min(testvals.date).date(), 
        'test_end': max(testvals.date).date(),
    }

split_dict = split_dfs(df)

In [None]:
split_dict

In [None]:
X_train, X_test, y_train, y_test = feature_view.train_test_split(
    train_start=split_dict['train_start'],
    train_end=split_dict['train_end'],
    test_start=split_dict['test_start'],
    test_end=split_dict['test_end'],    
    event_time=True,
)
X_train.head(3)

In [None]:
y_train.head(3)

In [None]:
# Sort the X_train DataFrame based on the "datetime" column in ascending order
X_train = X_train.sort_values("date")
# Reindex the y_train Series to match the order of rows in the sorted X_train DataFrame
y_train = y_train.reindex(X_train.index)

# Sort the X_test DataFrame based on the "datetime" column in ascending order
X_test = X_test.sort_values("date")
# Reindex the y_test Series to match the order of rows in the sorted X_test DataFrame
y_test = y_test.reindex(X_test.index)

In [None]:
X_train['y'] = y_train
X_train['ds'] = X_train.date
X_train['ds'] = pd.to_datetime(X_train.ds)
X_train['ds'] = X_train.ds.map(lambda x: x.replace(tzinfo=None))
X_train.drop(columns=["date"], axis=1, inplace=True)

In [None]:
X_test['ds'] = X_test.date
X_test['ds'] = pd.to_datetime(X_test.ds)
X_test['ds'] = X_test.ds.map(lambda x: x.replace(tzinfo=None))
X_test.drop(columns=["date"], axis=1, inplace=True)

## <span style="color:#ff5f27;"> 🧬 Modeling</span>


In [None]:
# Initialize the Prophet model with the appropriate seasonalities
model = Prophet(
    daily_seasonality=False,
    weekly_seasonality=True,
    yearly_seasonality=True,
)

# Add monthly seasonality with a period of 30.5 days (average length of a month)
model.add_seasonality(
    name='monthly', 
    period=30.5, 
    fourier_order=5,
    mode='additive',
)

# Add the additional regressors
additional_regressors = [
    'age_at_list_registration','cpra', 'hla_a1', 'hla_a2', 'hla_b1', 'hla_b2', 'hla_dr1', 'hla_dr2',
]

for regressor in additional_regressors:
    model.add_regressor(regressor)

# Fit the model
model.fit(X_train)

In [None]:
forecast = model.predict(X_test)

# Summarize the forecast
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head())

# Plot the forecast
fig = model.plot(forecast)

pyplot.show()

In [None]:
model.plot_components(forecast)

In [None]:
# calculate MAE between expected and predicted values for december
y_pred = forecast['yhat']
mae = mean_absolute_error(y_test, y_pred)
print('MAE: %.3f' % mae)
# plot expected vs actual

metrics = {
    "mae": round(mae,2)
}
metrics

### <span style="color:#ff5f27;">⚙️ Model Schema</span>


In [None]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

# Define the input schema using the values of X_test
input_schema = Schema(X_test.values)

# Define the output schema using y_train
output_schema = Schema(y_train)

# Create a ModelSchema object specifying the input and output schemas
model_schema = ModelSchema(
    input_schema=input_schema, 
    output_schema=output_schema,
)

# Convert the model schema to a dictionary for further inspection or serialization
model_schema.to_dict()

## <span style="color:#ff5f27;">📝 Register model</span>


In [None]:
# Specify the directory where the model will be saved
model_dir = "forecast_model"

# Check if the directory exists, and create it if it doesn't
if not os.path.isdir(model_dir):
    os.mkdir(model_dir)

# Save the trained model using joblib
with open(model_dir + '/serialized_model.json', 'w') as fout:
    fout.write(model_to_json(model))  # Save model
    
# Save the confusion matrix plot as an image file in the 'iris_model' directory
fig.savefig(model_dir + "/forecast.png")

In [None]:
# Get the model registry
mr = project.get_model_registry()

# Create a new model in the model registry
forecast_model = mr.python.create_model(
    name="waiting_time_forecast_model",   # Name for the model
    metrics=metrics,                      # Metrics used for evaluation
    model_schema=model_schema,            # Schema defining the model's input and output
    input_example=X_test.sample(),        # Example input data for reference
    description="Waiting time for a deceased donor kidney transplant forecasting model",  # Description of the model
)

# Save the model to the specified directory
forecast_model.save(model_dir)

---