# Vehicle Sales Price Predictions Workshop - Part 3 of 3


## Inference Pipeline

In order to make a machine learning system from this dataset, we have structured the service into 3 pipelines:

1. feature engineering pipeline notebook (see Part 1)
2. training pipeline notebook (see Part 2)
3. inferencing pipeline notebook (this Part 3)

This notebook will outline the third step, ie. the inference pipeline.

In [None]:
# We need to install a library to deploy the model. This install throws an error in colab, but will still work.
!pip install --quiet "hsfs[python] @ git+https://github.com/logicalclocks/feature-store-api@master#subdirectory=python"

In [None]:
# We will use the Hopsorks Model Registry to instantiate the Model

import hopsworks
import joblib
import torch
import torch.nn as nn

proj = hopsworks.login()
fs = proj.get_feature_store()
mr = proj.get_model_registry()

feature_view = fs.get_feature_view("car_prices", version=1)

model = mr.get_model(
    "car_prices",
    version=1,
)

# Download the model directory from the Model Registry
model_dir = model.download()

# Load the model using joblib from the downloaded model directory
label_encoders = joblib.load(model_dir + "/label_encoders.pkl")


# Definition of the model
class DeepRegressor(nn.Module):
    def __init__(self, input_size):
        super(DeepRegressor, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load the file of the model
model_data = torch.load(model_dir + '/regression_model.pth', map_location=torch.device('cpu'))

In [None]:
# Check the type of the loaded object
if isinstance(model_data, dict) and 'model_state_dict' in model_data:
    state_dict = model_data['model_state_dict']
elif isinstance(model_data, torch.nn.Module):
    state_dict = model_data.state_dict()
else:
    raise ValueError("The file does not contain a valid PyTorch model or the format is unexpected.")

# Show weights of each layer
print("Show the weights of the different layers of the model:")
for layer_name, weights in state_dict.items():
    print(f"\nLayer: {layer_name}")
    print(f"Weights: {weights.shape}")
    print(weights)

# If the object contains another structure, inspect it
if isinstance(model_data, dict):
    print("\nComplete structure of the saved model:")
    for key, value in model_data.items():
        if key != 'model_state_dict':
            print(f"\nStructure of the key '{key}':")
            if isinstance(value, torch.Tensor):
                print(f"  - Tensor with shape : {value.shape}")
            elif isinstance(value, dict):
                print("  - Dictionnary with keys :")
                for subkey in value.keys():
                    print(f"    - {subkey}")
            else:
                print(f"  - Type : {type(value)}")


# If the object is directly the model, display its architecture
elif isinstance(model_data, torch.nn.Module):
    print("\nThe saved model is a direct PyTorch model.")
    print("Architecture of the model :")
    print(model_data)

In [None]:
test_data = feature_view.get_batch_data(start_time="2015-07-01 00:00")
test_data

In [None]:
from sklearn.preprocessing import LabelEncoder

def encode_categorical_data(dataset, label_encoders):
    # Iterate over the columns of the DataFrame
    for column in dataset.columns:
        # Check if the column is of type 'object' (categorical)
        if dataset[column].dtype == 'object':
            # Create a LabelEncoder instance
            label_encoder = LabelEncoder()

            # Perform encoding on unique column values
            dataset[column] = label_encoder.fit_transform(dataset[column])

            # Add the encoder label to the dictionary
            label_encoders[column] = label_encoder
    return dataset


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd

def predict_selling_price(data, label_encoders, model):
    # Preprocess the data
    processed_data = encode_categorical_data(data, label_encoders)

    # Convert data to PyTorch tensors
    X_test = torch.tensor(processed_data.values.astype(np.float32))

    # Pass data to model for prediction
    with torch.no_grad():
        model.eval()
        predictions = model(X_test).numpy()

    predictions_df = pd.DataFrame(predictions, columns=['predicted_sale_price'])
    data['predicted_sale_price'] = predictions_df['predicted_sale_price']
    return data

df_encoded = predict_selling_price(test_data, label_encoders, model_data)
df_encoded

### 8. Use your trained model to make price estimates

Make a prediction function to load the model trained and saved in the model_regressor.pth file and then test on data that the user will enter manually. As a reminder, the categorical data was encoded and saved in a label_encoders.pth file. There are also numeric variables that the user must indicate.

In [None]:
!pip install --quiet gradio

In [None]:
import gradio as gr
from functools import partial

# Define the method that prints out the values entered
def print_values(year, make, model, trim, body, transmission, condition, odometer, color, interior, model_data, label_encoders):
    data = {
        "Year": [year],
        "Make": [make],
        "Model": [model],
        "Trim": [trim],
        "Body": [body],
        "Transmission": [transmission],
        "Condition": [condition],
        "Odometer": [odometer],
        "Color": [color],
        "Interior": [interior]
    }
    df = pd.DataFrame(data)
    df_encoded = predict_selling_price(df, label_encoders, model_data)
    return df_encoded.iloc[1]['predicted_sale_price']

print_values_partial = partial(print_values, model_data=model_data, label_encoders=label_encoders)


# Create the Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        year = gr.Number(label="Year", value=2014)
        make = gr.Textbox(label="Make", value="9")
        model = gr.Textbox(label="Model", value="88")
        trim = gr.Textbox(label="Trim", value="200")
        body = gr.Textbox(label="Body", value="14")

    with gr.Row():
        transmission = gr.Textbox(label="Transmission", value="0")
        condition = gr.Number(label="Condition", value=45.0)
        odometer = gr.Number(label="Odometer", value=33761.0)
        color = gr.Textbox(label="Color", value="14")
        interior = gr.Textbox(label="Interior", value="1")

    submit_button = gr.Button("Submit")
    output = gr.JSON(label="Entered Values")

    submit_button.click(print_values,
                        inputs=[year, make, model, trim, body, transmission, condition, odometer, color, interior],
                        outputs=output)

# Launch the interface
demo.launch(share=True)


This completes the entire process of feature engineering, training and inferencing pipelines, and therefore the delivery of an end-to-end machine learning system.