In [None]:
import os
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as utils
import pickle as pkl

from sklearn.metrics import classification_report, precision_score, roc_auc_score, accuracy_score
from sklearn.model_selection import KFold, StratifiedShuffleSplit
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import DataLoader

from captum.attr import IntegratedGradients, GradientShap, NoiseTunnel

import utils.visualization as vis
from utils.data import LocalDataLoader,  data_summary
from scripts.models.resnet import ResNetBaseline 
from scripts.train_dnn import Trainer

%load_ext autoreload
%autoreload 2

In [None]:
# for synthetic data
import itertools
from itertools import product

list_1 = ['SmallMiddle_', 'RareTime_', ]


list_2 = ['CAR', 'NARMA', 'Harmonic', 'PseudoPeriodic', 'GaussianProcess']

output = list(product(list_1, list_2))


ds_list = []
for i1, i2 in output:
    ds_list.append(''.join((i1,i2)))

ds_dir = 'data'

ds_list


In [None]:
# ds_list = ['BeepTest2']
ds_dir = 'data'

for ds in ds_list:
    print(ds)
    data_summary(ds_dir,ds)
    print('------------------------------')

In [None]:
# Train model

device = "cuda" if torch.cuda.is_available() else "cpu"


ds_list = ['BeepTest007']
for ds in ds_list:
        print('Dataset %s' %ds)
#     filename =  "model/%s_best.pkl" %ds
#     if os.path.isfile(filename): 
#         print('model trained.')
#     else:
        
        data = LocalDataLoader(datapath=ds_dir,dataset=ds)
        X_train,y_train,X_test,y_test =data.get_X_y(onehot_label=False)

        train_loader,test_loader = data.get_loaders(mode='train')
        train_features, train_labels = next(iter(train_loader))

        print(f"Feature batch shape: {train_features.size()}")
        print(f"Labels batch shape: {train_labels.size()}")
        nb_classes=len(np.unique(y_train))
        # Train model and save
        model = ResNetBaseline(in_channels=1, num_pred_classes=nb_classes).to(device)
        trainer = Trainer(model=model, datapath=ds_dir, ds=ds)
        trainer.fit(num_epochs=50,learning_rate=0.001, patience=100)
        savepath='./model/%s_best.pkl' %ds
#         torch.save(model,savepath)
#         print('model saved.')
        print(trainer.evaluate())
        print('---------------------------')



In [None]:
ds = 'BeepTest018'
savepath='./model/%s_best.pkl' %ds
torch.save(model,savepath)
        

In [None]:
# Verify saved model
datapath = ds_dir
ds_list = ['BeepTest007']

for ds in ds_list:
    print(ds)
    savepath='./model/%s_best.pkl' %ds
    # print(savepath)
    model = torch.load(savepath)

    data = LocalDataLoader(datapath=datapath,dataset=ds)
    X_train,y_train,X_test,y_test =data.get_X_y(onehot_label=False)

    test_loader,_ = data.get_loaders(mode='test')

    true_list, preds_list = [], []
    test_results = {}
    for x, y in test_loader:
        with torch.no_grad():
            true_list.append(y.detach().numpy())
            preds = model(x)
            preds = torch.softmax(preds, dim=-1)
            preds_list.append(preds.detach().numpy())

    true_np, preds_np = np.concatenate(true_list), np.concatenate(preds_list)
    preds_np = np.argmax(preds_np,axis=-1)
    true_np= np.argmax(true_np,axis=-1)
    
    test_results['accuracy_score'] = accuracy_score(true_np, preds_np)
#     print(f'Accuracy score: {round(test_results["accuracy_score"], 5)}')
    print('Accuracy score %2.5f' %test_results['accuracy_score'])


In [None]:
# Gradient SHAP and IG

for ds in ds_list:
    print('Dataset %s' %ds)
    filename = './exp_weights/weights_GradientShap_%s.txt' %ds
    if os.path.isfile(filename):
        print('calculated.')
    else:

        savepath='./model/%s_best.pkl' %ds
        # print(savepath)
        model = torch.load(savepath)
        data = LocalDataLoader(datapath=datapath,dataset=ds)
        X_train,y_train,X_test,y_test =data.get_X_y(onehot_label=False)
        test_loader,_ = data.get_loaders(mode='test', batch_size=1)

        true_list, preds_list = [], []
        test_results = {}
        for x, y in test_loader:
            with torch.no_grad():
                true_list.append(y.detach().numpy())
                preds = model(x)
                preds = torch.softmax(preds, dim=-1)
                preds_list.append(preds.detach().numpy())

        true_np, preds_np = np.concatenate(true_list), np.concatenate(preds_list)
        preds_np = np.argmax(preds_np,axis=-1)
        true_np= np.argmax(true_np,axis=-1)

        test_results['accuracy_score'] = accuracy_score(true_np, preds_np)
        print(f'Accuracy score: {round(test_results["accuracy_score"], 4)}')


        # FOR GRADIENTSHAP
        print('GradientSHAP result:')
        test_loader,_ = data.get_loaders(mode='test',batch_size=1)
        exp = []
        explainer = GradientShap(model)

        for idx, (input,label) in enumerate(test_loader):
            pred = model(input)
            pred = torch.softmax(pred, dim=-1)
            pred = torch.argmax(pred, dim=-1)
            baseline = torch.cat([0*input,1*input])
            attr = explainer.attribute(input, target=pred,baselines=baseline)
            attr = attr.detach().numpy()
            attr = np.squeeze(attr)
            exp.append(attr)

        exp = np.array(exp)
        savepath = './exp_weights/weights_GradientShap_%s.txt' %ds
        np.savetxt(savepath, exp, delimiter=',') 


#         for i in range(len(X_test)+1):
#             if i==18:
#                 print('Index %d ; True Class %d; Pred Class: %d' %(i,true_np[i],preds_np[i]))
#                 vis.visualize_explanation(i,X_test,exp,ds=ds)

    print('Dataset %s' %ds)
    filename = './exp_weights/weights_IG_%s.txt' %ds
    if os.path.isfile(filename):
        print('calculated.')
    else:

    
        # FOR IG
        print('IG result:')
        test_loader,_ = data.get_loaders(mode='test',batch_size=1)
        exp = []
        explainer = IntegratedGradients(model)

        for idx, (input,label) in enumerate(test_loader):
            pred = model(input)
            pred = torch.softmax(pred, dim=-1)
            pred = torch.argmax(pred, dim=-1)
            baseline = input*0
            attr = explainer.attribute(input, target=pred,baselines=baseline)
            attr = attr.detach().numpy()
            attr = np.squeeze(attr)
            exp.append(attr)


        exp = np.array(exp)
        savepath = './exp_weights/weights_IG_%s.txt' %ds
        np.savetxt(savepath, exp, delimiter=',') 

#         for i in range(len(X_test)+1):
#             if i==18:
#                 print('Index %d ; True Class %d; Pred Class: %d' %(i,true_np[i],preds_np[i]))
#                 vis.visualize_explanation(i,X_test,exp,ds=ds)


In [None]:
for i in range(len(X_test)+1):
    print('Index %d ; True Class %d; Pred Class: %d' %(i,true_np[i],preds_np[i]))
    vis.visualize_explanation(i,X_test,exp,ds=ds)
        

In [None]:

for ds in ds_list:
    print('Dataset %s' %ds)
    savepath='./model/%s_best.pkl' %ds
    # print(savepath)
    model = torch.load(savepath)
    data = LocalDataLoader(datapath=datapath,dataset=ds)
    X_train,y_train,X_test,y_test =data.get_X_y(onehot_label=False)
    X_test = np.squeeze(X_test)
    plt.plot(X_test[18,:])
    plt.show()

In [None]:
# Load GS and IG

for ds in ds_list:
    print('Dataset %s' %ds)
    savepath='./model/%s_best.pkl' %ds
    # print(savepath)
    model = torch.load(savepath)
    data = LocalDataLoader(datapath=datapath,dataset=ds)
    X_train,y_train,X_test,y_test =data.get_X_y(onehot_label=False)
    test_loader,_ = data.get_loaders(mode='test', batch_size=1)

    true_list, preds_list = [], []
    test_results = {}
    for x, y in test_loader:
        with torch.no_grad():
            true_list.append(y.detach().numpy())
            preds = model(x)
            preds = torch.softmax(preds, dim=-1)
            preds_list.append(preds.detach().numpy())

    true_np, preds_np = np.concatenate(true_list), np.concatenate(preds_list)
    preds_np = np.argmax(preds_np,axis=-1)
    true_np= np.argmax(true_np,axis=-1)

    test_results['accuracy_score'] = accuracy_score(true_np, preds_np)
    print(f'Accuracy score: {round(test_results["accuracy_score"], 4)}')

    
    # FOR GRADIENTSHAP
    print('GradientSHAP result:')
    savepath = './exp_weights/weights_GradientShap_%s.txt' %ds
    exp = np.genfromtxt(savepath, delimiter=',')
    

    for i in range(len(X_test)+1):
        if i==18:
            print('Index %d ; True Class %d; Pred Class: %d' %(i,true_np[i],preds_np[i]))
            vis.visualize_explanation(i,X_test,exp,ds=ds)
          
          
    
    # FOR IG
    print('IG result:')
    savepath = './exp_weights/weights_IG_%s.txt' %ds
    exp = np.genfromtxt(savepath, delimiter=',')
    
    for i in range(len(X_test)+1):
        if i==18:
            print('Index %d ; True Class %d; Pred Class: %d' %(i,true_np[i],preds_np[i]))
            vis.visualize_explanation(i,X_test,exp,ds=ds)
        

In [None]:
savepath = './exp_weights/weights_IG_%s.txt' %ds
np.savetxt(savepath, exp, delimiter=',') 

In [None]:
savepath