In [None]:
import pickle
import pandas as pd
import numpy as np
import keras

In [2]:
PREDICT_TARGET = "price"
SAVE_FOLDER = "new_model"

In [None]:
model = keras.saving.load_model(f"{SAVE_FOLDER}/model.keras")

test_data = pd.read_parquet(f'{SAVE_FOLDER}/X_test.parquet')

with open(f'{SAVE_FOLDER}/scalers.pkl', 'rb') as handle:
    scalers = pickle.load(handle)

In [None]:
# Evaluate performance on the test set
test_features = test_data.drop(PREDICT_TARGET, axis=1)
test_labels = test_data[[PREDICT_TARGET]]

print("\n Evaluate the new model against the test set:")
model.evaluate(x = test_features, y = test_labels)

In [5]:
# Get all test predictions
prediction = model(test_features)
predicted_df = test_data.copy()
predicted_df[[PREDICT_TARGET]] = prediction # Replace the column with our predicted one

In [6]:
for k, v in scalers.items():
    if k in predicted_df.columns:
        predicted_df[[k]] = v.inverse_transform(predicted_df[[k]])

In [7]:
def undummify(df, prefix_sep="_"):
    cols2collapse = {
        item.split(prefix_sep)[0]: (prefix_sep in item) for item in df.columns
    }
    series_list = []
    for col, needs_to_collapse in cols2collapse.items():
        if needs_to_collapse:
            undummified = (
                df.filter(like=col)
                .idxmax(axis=1)
                .apply(lambda x: x.split(prefix_sep, maxsplit=1)[1])
                .rename(col)
            )
            series_list.append(undummified)
        else:
            series_list.append(df[col])
    undummified_df = pd.concat(series_list, axis=1)
    return undummified_df

categores = [col for col in predicted_df.columns if "category" in col]

concat_categories = undummify(predicted_df[categores])
predicted_df = predicted_df.drop(categores, axis=1)
predicted_df["category"] = concat_categories

In [None]:
# See where we predict a price below 0
predicted_df[predicted_df["price"]<0]

In [None]:
from IPython.display import YouTubeVideo

YouTubeVideo('NlWCLw75XnE', width=800, height=300)

Let's see what the worst prediction we do is.

In [None]:
unscaled_labels = scalers[PREDICT_TARGET].inverse_transform(test_labels)
worst_prediction = np.argmax(np.abs(predicted_df[PREDICT_TARGET].values.reshape(-1, 1) - unscaled_labels))
predicted_df.iloc[[worst_prediction]]

In [None]:
comparison = {
    "prediction": predicted_df.reset_index().iloc[worst_prediction][PREDICT_TARGET],
    "target": unscaled_labels[worst_prediction][0],
    "error": predicted_df.reset_index().iloc[worst_prediction][PREDICT_TARGET] - unscaled_labels[worst_prediction][0]
}
pd.DataFrame([comparison])