In [None]:
import numpy as np
import pandas as pd
import configparser
import sys
import tensorflow as tf
config = configparser.ConfigParser()
config.read('project.env')
module_path = config['global']['MODULE_PATH']
sys.path.append(module_path)

In [None]:
from trained_models.baseline import TrainedBaseline

In [None]:
data_prefix = "all_data"
model_prefix = "standardised_all_data"
###########
# DATASETS
###########
test_dir_path = config["global"]["TESTING_DATASETS_PATH"]
if test_dir_path[-1] not in ["\\", "/"]:
    test_dir_path += "/"

saved_objects_dir_path = config["global"]["SAVED_OBJECTS_PATH"]
if saved_objects_dir_path[-1] not in ["\\", "/"]:
    saved_objects_dir_path += "/"
# All
test_x = np.load("{}{}_test_x.npy".format(test_dir_path, data_prefix))
test_y = np.load("{}{}_test_y.npy".format(test_dir_path, data_prefix))
# Low
low_test_x = np.load("{}{}_low_test_x.npy".format(test_dir_path, data_prefix))
low_test_y = np.load("{}{}_low_test_y.npy".format(test_dir_path, data_prefix))
# Medium
medium_test_x = np.load("{}{}_medium_test_x.npy".format(test_dir_path, data_prefix))
medium_test_y = np.load("{}{}_medium_test_y.npy".format(test_dir_path, data_prefix))
# High
high_test_x = np.load("{}{}_high_test_x.npy".format(test_dir_path, data_prefix))
high_test_y = np.load("{}{}_high_test_y.npy".format(test_dir_path, data_prefix))
model = TrainedBaseline("{}_baseline".format(model_prefix))
model.set_scaler("{}{}_scaler.sav".format(saved_objects_dir_path, data_prefix))
model.set_model(tf.keras.models.load_model("{}{}_baseline.hdf5".format(saved_objects_dir_path, model_prefix)))

In [None]:
from tqdm import tqdm
from matplotlib import pyplot as plt

In [None]:
x, y = test_x, test_y
features = model.get_scaler().get_feature_names_out()

In [None]:
k = model.predict(x)
print(k[1])
print(y[1])

In [None]:
def get_mape(true, predicted, epsilon=50):
    denominator = np.squeeze(true) + epsilon
    try:
        mape = np.mean(np.abs((np.squeeze(true) - predicted)/denominator))*100
    except Exception as e:
        print(e)
        mape = "n/a"
    return mape

In [None]:
def get_mse(true, predicted):
    mse = np.mean(np.power(np.squeeze(true)-predicted, 2))
    return mse

In [None]:
def get_mae(true, predicted):
    mae = np.mean(np.abs(np.squeeze(true)-predicted))
    return mae

In [None]:
results = []
print("Computing feature importance")

baseline_preds = model.predict(x)
baseline_mape = get_mape(y, baseline_preds)
baseline_mse = get_mse(y, baseline_preds)
baseline_mae = get_mae(y, baseline_preds)
results.append({"feature":"BASELINE", "mape":baseline_mape, "mse":baseline_mse, "mae":baseline_mae})

for k in tqdm(range(x.shape[2])):
    sav_col = x[:,:,k].copy()
    np.random.shuffle(x[:,:,k])
    important_preds = model.predict(x)
    mape = get_mape(y, important_preds)
    mse = get_mse(y, important_preds)
    mae = get_mae(y, important_preds)
    results.append({"feature":features[k], "mape": mape, "mse": mse, "mae":mae})
    x[:,:,k] = sav_col

result_df = pd.DataFrame(results)

In [None]:
result_df = result_df.sort_values("mape")
plt.figure(figsize=(10,20))
plt.barh(np.arange(len(features)+1),result_df.mape)
plt.yticks(np.arange(len(features)+1),result_df.feature.values)
plt.title('Feature Importance',size=16)
plt.ylim((-1,len(features)+1))
plt.plot([baseline_mape,baseline_mape],[-1,len(features)+1], '--', color='orange',
            label=f'Baseline OOF\nMAPE={baseline_mape:.3f}')
plt.xlabel('MAPE',size=14)
plt.ylabel('Feature',size=14)
plt.legend()
plt.show()

In [None]:
result_df = result_df.sort_values("mse")
plt.figure(figsize=(10,20))
plt.barh(np.arange(len(features)+1),result_df.mse)
plt.yticks(np.arange(len(features)+1),result_df.feature.values)
plt.title('LSTM Feature Importance',size=16)
plt.ylim((-1,len(features)+1))
plt.plot([baseline_mse,baseline_mse],[-1,len(features)+1], '--', color='orange',
            label=f'Baseline OOF\nmse={baseline_mse:.3f}')
plt.xlabel('Test mse with feature permuted',size=14)
plt.ylabel('Feature',size=14)
plt.legend()
plt.show()

In [None]:
result_df = result_df.sort_values("mae")
plt.figure(figsize=(10,20))
plt.barh(np.arange(len(features)+1),result_df.mae)
plt.yticks(np.arange(len(features)+1),result_df.feature.values)
plt.title('LSTM Feature Importance',size=16)
plt.ylim((-1,len(features)+1))
plt.plot([baseline_mae,baseline_mae],[-1,len(features)+1], '--', color='orange',
            label=f'Baseline OOF\nmae={baseline_mae:.3f}')
plt.xlabel('Test mae with feature permuted',size=14)
plt.ylabel('Feature',size=14)
plt.legend()
plt.show()