In [1]:
%load_ext autoreload
%autoreload 2

import torch
from torch import optim
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix
import os
import random

from models.CAE import CAE
from train import Trainer

from dataloader import ElectricDevices, normalize
from utils import baseline

from scipy.ndimage import gaussian_filter1d

from pingouin import distance_corr # Szekely and Rizzo

import hydra
from hydra.experimental import initialize, initialize_config_dir, compose

from utils import get_shapley_values, get_layer_attrs, get_neuron_attrs

torch.manual_seed(4444)
np.random.seed(4444)
random.seed(4444)

# Create Dataset

In [2]:
with initialize_config_dir(config_dir=os.path.abspath("configs")):
    cfg = compose(config_name="config")
print(cfg)

{'model': {'M': 4, 'Lf': 3, 'length': 96, 'bottleneck_nn': 24, 'lmd': 1.0979131326372968e-05, 'alpha': 0.05}, 'train': {'verbose': True, 'shuffle': True, 'batch_size': 64, 'iters': 400, 'early_stopping_rounds': 23, 'lr': 0.001706322160346846}}


## Load observations

In [3]:
data_train_ori, data_valid_ori, data_test_ori = ElectricDevices()
data_train, data_valid, data_test = normalize(data_train_ori), normalize(data_valid_ori), normalize(data_test_ori)
X_train, y_train = data_train[:,:,:-1], data_train[:,:,-1]
X_valid, y_valid = data_valid[:,:,:-1], data_valid[:,:,-1]
X_test, y_test = data_test[:,:,:-1], data_test[:,:,-1]

# Data Exploration

# Compare to baseline

In [None]:
%%script false

baseline(data_train, data_valid, data_test)

# Model

In [4]:
model = CAE(cfg.model, num_classes=7)

In [None]:
%%script false

trainer = Trainer(cfg.train)
train_losses, valid_losses = trainer.fit(model, data_train, data_valid)

In [None]:
%%script false

plt.title("Cost")
plt.plot(train_losses, label="train")
plt.plot(valid_losses, label="validation")
plt.legend()

## Load/Save model

In [5]:
#torch.save(model.state_dict(), "../saved_models/mod")
model.load_state_dict(torch.load("../saved_models/mod"))

<All keys matched successfully>

# Shapley Values

## Shapley value of input with respect to output

In [None]:
%%script false

for focus in [1, 3]:
    inp = X_test[focus]
    for j in range(5):
        if j == 0:
            baselines = torch.tensor(gaussian_filter1d(X_test[focus,0], sigma=2)).reshape((1,1,96))
        elif j == 1:
            baselines = 1-X_test[focus].reshape((-1,1,96))
        elif j == 2:
            baselines = torch.ones((1,1,96))
        elif j == 3:
            baselines = torch.zeros((1,1,96))
        else:
            reps = 64
            inp = inp.repeat(reps, 1, 1)
            baselines = torch.rand((reps,1,96))

        input_attrs = get_shapley_values(inp, model, range(96), baselines)

        plt.close()
        sns.heatmap(input_attrs, cmap="coolwarm").get_figure().savefig(f"../plots/hm_{focus}-{j}.png")
        print()
        print()

In [None]:
#selected = [np.random.choice(np.where(y_test == i)[0]) for i in range(7)]
selected = [np.random.choice(np.where(y_test == 0)[0]) for _ in range(8)]

## Layer: bottleneck with respect to output

In [None]:
#sns.heatmap(get_layer_attrs(X_test[22], model, model.full_conv_bn, range(96)), cmap="coolwarm") # for row i: attribution of neuron (24) to output i

fig, axs = plt.subplots(nrows=2, ncols=4, figsize=(25,5))
for i, x in enumerate(selected):
    layer_attrs = get_layer_attrs(X_test[x], model, model.full_conv_bn, range(96))
    sns.heatmap(layer_attrs, ax=axs.flat[i], cmap="coolwarm")
fig.delaxes(axs.flatten()[-1])

## Input with respect to bottleneck

In [None]:
#sns.heatmap(get_neuron_attrs(X_test[22], model, model.full_conv_bn, range(24)), cmap="coolwarm")

fig, axs = plt.subplots(nrows=2, ncols=4, figsize=(25,5))
for i, x in enumerate(selected):
    neuron_attrs = get_neuron_attrs(X_test[x], model, model.full_conv_bn, range(24))
    sns.heatmap(neuron_attrs, ax=axs.flat[i], cmap="coolwarm")
fig.delaxes(axs.flatten()[-1])

## Average and std correlation

In [None]:
X_test, y_test = data_test[:,:,:-1], data_test[:,:,-1].numpy()
X_testp, outclass_testp, bn = model(X_test)
X_testp = X_testp.detach().numpy()
probs_testp = torch.nn.functional.softmax(outclass_testp, dim=1)
y_testp = torch.argmax(probs_testp, dim=1).detach().numpy()

# avg and std of cors
cors = [distance_corr(X_testp[i,0], X_test[i,0].detach().numpy(), n_boot=None) for i in range(X_test.shape[0])]
print("Distance Correlation avg and std:", np.mean(cors), np.std(cors))
print("NRMSE:", (torch.sqrt(torch.mean(torch.square(X_test-X_testp)))/(torch.max(X_test)-torch.min(X_test))).item())
print()

In [None]:
#selected = [0, 1, 2, 3, 97]

fig, axs = plt.subplots(nrows=2, ncols=len(selected), figsize=(25,5))
for i, x in enumerate(selected):
    vals = data_test_ori[x,0,:-1]
    min_v, max_v = torch.min(vals), torch.max(vals)
    axs[0,i].set_title(f"C: {int(y_test[x][0])} (min={min_v:.2f}, max={max_v:.2f})")
    axs[0,i].plot(X_test[x,0])
    axs[0,i].axis("off")
    axs[0,i].set_ylim((0,1))

    axs[1,i].set_title(f"Pred: {int(y_testp[x])}")
    axs[1,i].plot(X_testp[x,0])
    axs[1,i].axis("off")
    axs[1,i].set_ylim((0,1))

    print("cor:", distance_corr(X_testp[i,0], X_test[i,0], n_boot=None))

In [None]:
cm = confusion_matrix(y_test, y_testp)
sns.heatmap(cm, annot=True, cmap="Blues")
plt.xlabel("Predicted label")
plt.ylabel("True label")

print("Accuracy:", np.sum(np.diag(cm))/np.sum(cm))

# Feature Visualization

In [23]:
def feature_visualization(neuron): # https://pytorch.org/tutorials/advanced/neural_style_tutorial.html
    X = torch.rand((1,1,96), requires_grad=True)
    optimizer = optim.LBFGS([X])

    def closure():
        optimizer.zero_grad()
        model.zero_grad()
        _, _, bn = model(torch.sigmoid(X))
        y = -bn[0,neuron]
        y.backward()
        return y

    for i in range(100):
        optimizer.step(closure)
    return torch.sigmoid(X).detach().numpy().flatten()

if False:
    for i in range(24):
        a1 = feature_visualization(i)
        plt.plot(a1)
        plt.title(f"Neuron {i}")
        plt.ylim(-0.05, 1.05)
        plt.savefig(f"../plots/{i}.png")
        plt.close()

# Checking important filters

In [None]:
num_filter = model.k*model.M
w_per_filter = model.length
num_neurons = model.bottleneck_nn
M = model.M

w = np.array([[torch.mean(torch.abs(model.full_conv_bn.weight[j,i*w_per_filter:(i+1)*w_per_filter])).item() for i in range(num_filter)] for j in range(num_neurons)])

x_axis_labels = [f"{i}-d:{model.dilation[i//M]}" for i in range(w.shape[1])]
_ = sns.heatmap(w, cmap="coolwarm", xticklabels=x_axis_labels)