In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
np.set_printoptions(suppress=True)

In [None]:
import sys
sys.path.insert(0, 'preprocess/')
import vectorizer

import pickle

In [None]:
vec = pickle.load(open('preprocess/sim_data.p', 'rb'))

In [None]:
import model.Attn_Sim_Pert as AttnModel
Model = AttnModel.Model

In [None]:
vocab_size = vec.vocab_size
embed_size = vec.word_dim

In [None]:
X, Xt = vec.seq_text['train'], vec.seq_text['test']
y, yt = vec.label['train'], vec.label['test']

In [None]:
ind_1 = [i for i, x in enumerate(X) if len(x) <= 2]
X = [x for i, x in enumerate(X) if i not in ind_1]
y = [x for i, x in enumerate(y) if i not in ind_1]
print(len(ind_1))
ind_1 = [i for i, x in enumerate(Xt) if len(x) <= 2]
Xt = [x for i, x in enumerate(Xt) if i not in ind_1]
yt = [x for i, x in enumerate(yt) if i not in ind_1]

In [None]:
def train(name='') :
    model = Model(vocab_size, embed_size, 32, dirname='sim', hidden_size=6)
    for i in tqdm_notebook(range(20)) :
        loss = model.train(X, y)
        print(loss)
        o, he = model.evaluate(Xt)
        o = np.array(o)
        rep = classification_report(yt, (o > 0.5))
        print(rep)
        stmt = '%s, %s' % (i, loss)
        dirname = model.save_values(add_name=name)
        f = open(dirname + '/epoch.txt', 'a')
        f.write(stmt + '\n')
        f.write(rep + '\n')
        f.close()
    
    return model

In [None]:
import shutil 
shutil.rmtree('outputs/attn_sim_pert_sim', ignore_errors=True)
for i in range(20) :
    model = train(name='experiments_'+str(i))

In [None]:
def load_model(dirname) :
    model = Model(vocab_size, embed_size, 32, dirname='sst', hidden_size=6)
    model.dirname = dirname
    model.load_values(dirname)
    return model

In [None]:
import os
exps = os.listdir('outputs/attn_sim_pert_sim/')
exps = [e for e in exps if 'experiments' in e]

In [None]:
model_normal_list = {}
from sklearn.metrics import accuracy_score
for e in exps :
    dirname_normal = 'outputs/attn_sim_pert_sim/'+ e
    model = load_model(dirname_normal)
    o, he = model.evaluate(Xt)
    o = np.array(o)
    rep = accuracy_score(yt, (o > 0.5))
    print(rep)
    if rep > 0.7 :
        model_normal_list[e] = model

In [None]:
exps = list(model_normal_list.keys())
exps

Evaluation
==========

In [None]:
Xtest = Xt
ytest = yt

In [None]:
def save_grads(model) :
    predict_y, attn_test, perts_predict, perts_attn = model.evaluate(Xtest, sample=True)
        
    model.attn = attn_test
    model.perts_predict = perts_predict
    model.perts_attn = perts_attn

In [None]:
for e, model in model_normal_list.items() :
    save_grads(model)

In [None]:
for e in model_normal_list :
    attn = np.abs(np.array(model_normal_list[e].perts_attn))
    actual = attn[np.eye(15)[np.array(Xtest)].astype('bool')].reshape((100, 10, 10))
    m = attn.mean(2)
    s = attn.std(2)

    plt.hist(np.abs((actual - m)/s)[:, np.arange(7), np.arange(7)].flatten(), bins=30, alpha=0.5)

In [None]:
for e in model_normal_list :
    attn = np.abs(np.array(model_normal_list[e].perts_attn))
    attn1 = attn[:, np.arange(10), :, np.arange(10)]
    med = np.median(attn1, 2)
    sns.kdeplot(med.flatten(), cumulative=True)
    idx = np.where((med > 0.5).any(axis=0))[0]
    attn = np.abs(np.array(model_normal_list[e].perts_attn))
    actual = attn[np.eye(15)[np.array(Xtest)].astype('bool')].reshape((100, 10, 10))
    
    for i in idx[:5] :
        plt.matshow(np.vstack([np.diagonal(attn[i], 0, 0, 2), actual[i, 0:1, :]]).T, cmap='PuRd', vmin=0, vmax=1)
    plt.show()
    print("================================")

In [None]:
for e in model_normal_list :
    attn = np.abs(np.array(model_normal_list[e].perts_attn))
    actual = attn[np.eye(15)[np.array(Xtest)].astype('bool')].reshape((100, 10, 10))
    plt.matshow(np.vstack([np.diagonal(attn[0], 0, 0, 2), actual[0, 0:1, :]]), cmap='PuRd', vmin=0, vmax=1)

In [None]:
attn = np.abs(np.array(model_normal_list[exps[1]].perts_attn))
attn = np.diagonal(attn, 0, 1, 3)
plt.hist(attn.std(1).flatten(), bins=30)