In [11]:
import os.path

PREDS_FILENAME = 'generated_preds.csv'
TRUE_FILENAME = 'generated_true.csv'

# Inspection config
START_FROM_IDX = 2200  # what index to start inspecting from?
EXPLORE_BLOOD_VESSEL = 33


In [12]:
# List of models to inspect
# !!! ADD HERE MORE MODELS !!!
models = dict(  # model-name -> model's absolute path
    BEST_DELAYED_RNN = r'PATH/TO/MODEL_LOGS/'
)

In [13]:
# ResultSumm code:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

class ResultsSummarizer:
    """
    Class for creating summary and visualizations from our models results and predictions
    """
    def __init__(self,
                 x, x_train, x_test,  y, y_train, y_test, y_pred, y_pred_train, y_pred_test
                 ):
        """ initialize the summarizer with all the actual / predicted data """
        self.x = x
        self.x_train = x_train
        self.x_test = x_test

        # Y stands for vascular-activity actual/predicted
        self.y = y
        self.y_train = y_train
        self.y_test = y_test

        self.y_pred = y_pred
        self.y_pred_train = y_pred_train
        self.y_pred_test = y_pred_test

    def plot_vascular_pred(self, idx=None):
        """
        Given vascular activity and predicted vascular activity, plots them
        """
        idx = idx or np.arange(5)
        # we only take the first 50 vessels (to not overload plotly...)
        df = self.get_true_and_pred_to_df(self.y[:, idx], self.y_pred[:, idx])

        fig = px.line(df)
        fig.update_xaxes(rangeslider_visible=True)
        # TODO add vertical lines for train, test, etc
        fig.update_layout(
            title="Vascular-Activity (Actual and Predicted) by Time",
            legend_title_text="Vessels List",
            xaxis_title='Time',
            yaxis_title='Vascular Activity',
        )

        # hides all graphs except for the first
        fig.for_each_trace(lambda trace: trace.update(visible="legendonly")
                                         if trace.name != "vessel_true_0" else ())

        fig.show()

    def plot_mse_per_vessel(self):
        """ Plots the (Normalized Root) MSE of each blood vessel, test-set"""
        # y, y_pred = self.y_test.T, self.y_pred_test.T
        y, y_pred = self.y.T, self.y_pred.T
        vessels_mse = np.zeros(y.shape[0])

        for i in range(y.shape[0]):
            vessels_mse[i] = np.sqrt(mean_squared_error(y[i], y_pred[i])) / np.mean(y[i])

        fig = px.scatter(vessels_mse)
        fig.update_layout(
            title="Normalized RMSE for each blood vessel",
            xaxis_title='#Blood-Vessel',
            yaxis_title='NRMSE',
        )
        fig.show()

        print("Average of NRMSE", vessels_mse.mean())
        return vessels_mse

    def describe(self):
        _mse = mean_squared_error(self.y, self.y_pred)
        _mae = mean_absolute_error(self.y, self.y_pred)
        _r2 = r2_score(self.y, self.y_pred)
        print(f">> Training: MSE={_mse}, R^2={_r2}, MAE={_mae}")

    def sanity_checks_plot(self):
        # plot MSE as function of diameter
        # plot MSE as function of (vessl) angle
        pass

    def plot_probing(self):
        # plots the probing of the model (for example the weights)
        pass

    @staticmethod
    def get_true_and_pred_to_df(y_true, y_pred, time_vector=None):
        """ returns a dataframe based on blood-vessels time-series """
        y_true = y_true.T
        y_pred = y_pred.T

        # Build dict to feed dataframe
        data = {}
        for i in range(len(y_true)):
            data[f"vessel_true_{i}"] = y_true[i]
            data[f"vessel_pred_{i}"] = y_pred[i]

        # Build dataframe
        df = pd.DataFrame(data)
        # df.index = time_vector

        return df

In [14]:
# Utils
import os

def _load(model_dir):
    y = np.loadtxt(os.path.join(model_dir, TRUE_FILENAME))
    y_pred = np.loadtxt(os.path.join(model_dir, PREDS_FILENAME))
    return y, y_pred

def _inspect(y, y_pred):
    resSum = ResultsSummarizer(None, None, None, y, None, None, y_pred, None, None)
    resSum.describe()
    mse_list = resSum.plot_mse_per_vessel()
    resSum.plot_vascular_pred([EXPLORE_BLOOD_VESSEL])
    return mse_list

In [15]:
for model_name, model_path in models.items():
    print(f">>> Inspecting: {model_name} Model <<<")
    y, y_pred = _load(model_dir=model_path)
    mse_list_control = _inspect(y[START_FROM_IDX:], y_pred[START_FROM_IDX:])
    print(mse_list_control.argsort()[:5])

>>> Inspecting: BEST_DELAYED_RNN Model <<<
>> Training: MSE=11.280131805270623, R^2=0.26492852618421675, MAE=2.5219539970736187


Average of NRMSE 0.15567920778362712


[ 67  11 317  73 205]
