In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import numpy as np
import torch
import pandas as pd
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
np.set_printoptions(suppress=True)

In [None]:
import sys
sys.path.insert(0, 'preprocess/')
import vectorizer

import pickle

In [None]:
vec = pickle.load(open('preprocess/SST/sst.p', 'rb'))

In [None]:
import model.Attn_Word_Pert as AttnModel
Model = AttnModel.Model

In [None]:
vocab_size = vec.vocab_size
embed_size = vec.word_dim

In [None]:
X, Xt = vec.seq_text['train'], vec.seq_text['test']
y, yt = vec.label['train'], vec.label['test']

In [None]:
ind_1 = [i for i, x in enumerate(X) if len(x) <= 2]
X = [x for i, x in enumerate(X) if i not in ind_1]
y = [x for i, x in enumerate(y) if i not in ind_1]
print(len(ind_1))
ind_1 = [i for i, x in enumerate(Xt) if len(x) <= 2]
Xt = [x for i, x in enumerate(Xt) if i not in ind_1]
yt = [x for i, x in enumerate(yt) if i not in ind_1]

In [None]:
def train(name='') :
    model = Model(vocab_size, embed_size, 32, dirname='sst', hidden_size=128)
    for i in tqdm_notebook(range(10)) :
        loss = model.train(X, y)
        print(loss)
        o, he = model.evaluate(Xt)
        o = np.array(o)
        rep = classification_report(yt, (o > 0.5))
        print(rep)
        stmt = '%s, %s' % (i, loss)
        dirname = model.save_values(add_name=name)
        f = open(dirname + '/epoch.txt', 'a')
        f.write(stmt + '\n')
        f.write(rep + '\n')
        f.close()
    
    return model

In [None]:
# import shutil 
# shutil.rmtree('outputs/attn_sim_pert_sst', ignore_errors=True)
# for i in tqdm_notebook(range(20)) :
#     model = train(name='experiments_'+str(i))

In [None]:
def load_model(dirname) :
    model = Model(vocab_size, embed_size, 100, dirname='sst', hidden_size=128)
    model.dirname = dirname
    model.load_values(dirname)
    model.encoder.gen_cells()
    return model

In [None]:
import os
exps = os.listdir('outputs/attn_sim_pert_sst/')[:5]
exps = [e for e in exps if 'experiments' in e]

In [None]:
model_normal_list = {}

from sklearn.metrics import accuracy_score
for e in exps :
    dirname_normal = 'outputs/attn_sim_pert_sst/'+ e
    model = load_model(dirname_normal)
    o, he = model.evaluate(Xt)
    o = np.array(o)
    rep = accuracy_score(yt, (o > 0.5))
    print(rep)
    if rep > 0.7 :
        model_normal_list[e] = model

In [None]:
exps = list(model_normal_list.keys())
exps

EVALUATION
==========

In [None]:
Xtest = Xt
ytest = yt

Evaluation -- Pertubation
-------------------------

In [None]:
def save_pertubations(model) :
    if os.path.exists(model.dirname + '/save_pertubations.p') :
        print(model.dirname)
#         try :
#             d = pickle.load(open(model.dirname + '/save_pertubations.p', 'rb'))
#             return
#         except :
#             pass
        
    model.vec = vec
    predict_y, attn_test = model.evaluate(Xtest)
    perts_predict, perts_attn, words_sampled = model.sampling(Xtest)
        
#     model.attn = attn_test
    model.perts_predict = perts_predict
    model.perts_attn = perts_attn
    model.words_sampled = words_sampled
    
    pickle.dump({'perts_predict' : model.perts_predict, 
                 'perts_attn' : model.perts_attn,
                 'words_sampled' : model.words_sampled }, 
                open(model.dirname + '/save_pertubations.p', 'wb'))
    
    model.perts_attn = None
    model.perts_predict = None
    model.words_sampled = None

In [None]:
def load_pertubations(model) :
    if os.path.exists(model.dirname + '/save_pertubations.p') :
        print(model.dirname)
        model.vec = vec
        predict_y, attn_test = model.evaluate(Xtest)
        try :
            d = pickle.load(open(model.dirname + '/save_pertubations.p', 'rb'))
        except :
            print("Error")
            #save_pertubations(model)
            #load_pertubations(model)
            return
        model.attn = attn_test
        model.perts_predict = d['perts_predict']
        model.perts_attn = d['perts_attn']
        model.words_sampled = d['words_sampled']
    else :
        raise("Error : No Pertubations")

In [None]:
def save_median_pertubation(model) :
    model.medians = []
    model.attn_list = []
    for i in range(len(model.perts_attn)) :
        attn = model.perts_attn[i]
        attn1 = np.diagonal(attn, 0, 0, 2)
        attn1 = attn1[:, :len(Xtest[i])]
        med = np.median(attn1, 0)
        model.medians.append(med)
        model.attn_list.append(model.attn[i][:len(Xtest[i])])
    model.words_sampled = None
    model.perts_attn = None
    model.perts_predict = None

In [None]:
# for e, model in tqdm_notebook(list(model_normal_list.items())):
#     save_pertubations(model)

In [None]:
for e, model in tqdm_notebook(list(model_normal_list.items())):
    load_pertubations(model)
    save_median_pertubation(model)

In [None]:
for e, model in tqdm_notebook(model_normal_list.items()):
    if hasattr(model_normal_list[e], "perts_predict") :
        highidxs = []
        for i in range(len(model_normal_list[e].perts_predict)) :
            attn = model_normal_list[e].perts_predict[i][:len(Xtest[i])]
            fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))

            actual = model_normal_list[e].attn[i][:len(Xtest[i])]
            
            attn1 = np.hstack([actual[:, None], attn[:, :50]])
            axes.matshow(attn1, cmap='PuRd', vmin=0, vmax=1)
            input_sentence = [vec.idx2word[x] for x in Xtest[i]]
            axes.set_yticks(np.arange(len(input_sentence)))

            axes.set_yticklabels(input_sentence, fontdict={'fontweight':10})
        
        plt.show()
        print("================================")
#         ss

In [None]:
for e, model in tqdm_notebook(model_normal_list.items()):
    medians = []
    if hasattr(model_normal_list[e], "perts_attn") :
        highidxs = []
        for i in range(len(model_normal_list[e].perts_attn)) :
            attn = model_normal_list[e].perts_attn[i]
            attn1 = np.diagonal(attn, 0, 0, 2)
            attn1 = attn1[:, :len(Xtest[i])]
            med = np.median(attn1, 0)
            medians += list(med)
#             if (med > 0.5).any() :
#             highidxs.append(i)
#             actual = model_normal_list[e].attn[i][:len(Xtest[i])]
        
#             attn1 = np.vstack([actual, attn1[:50]])
#             fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))

#             axes.matshow(attn1.T, cmap='PuRd', vmin=0, vmax=1)
#             input_sentence = [vec.idx2word[x] for x in Xtest[i]]
#             axes.set_yticks(np.arange(len(input_sentence)))

#             axes.set_yticklabels(input_sentence, fontdict={'fontweight':10})
            
#             words = [vec.idx2word[x] for x in model_normal_list[e].words_sampled[i]]
#             axes.set_xticks(np.arange(len(words[:50]) + 1))
#             axes.set_xticklabels(['actual'] + words[:50], rotation=85)
        sns.kdeplot(medians, cumulative=True)
        
        plt.show()
        print("================================")
#         ss

In [None]:
for e, model in tqdm_notebook(model_normal_list.items()):
    if hasattr(model_normal_list[e], "medians") :
        medians = np.array([x for y in model.medians for x in y])
        attn = np.array([x for y in model.attn_list for x in y])
        assert len(medians) == len(attn)
#         sns.kdeplot(medians, cumulative=True)

        filt = np.logical_and(medians > 0.5, attn > 0.5)
        print(np.sum(filt)/np.sum(attn > 0.5)*100)
#         plt.show()
        g = sns.regplot(attn, medians)
        g.figure.set_size_inches(16.5, 9.5)
        plt.show()
        print('='*30)

Evaluation -- Gradient
----------------------

In [None]:
def save_gradients(model) :
    model.vec = vec
    predict_y, attn_test = model.evaluate(Xtest)
    grad_test = model.gradient(Xtest)
    diff_test = model.zero_H_run(Xtest)
    
    idxs = {}
    for k in grad_test :
        idxs[k] = []
        for i in range(len(grad_test[k])) :
            grad_test[k][i] = np.sum(np.abs(grad_test[k][i]), axis=1) # * sims_test[i]) 
            if np.sum(grad_test[k][i]) != 0 :
                idxs[k].append(i)
            else : 
                print(i)
            grad_test[k][i] = grad_test[k][i] / np.sum(grad_test[k][i])
        
    if hasattr(model, 'attn') :
        assert len(model.attn) == len(attn_test)
        for i in range(len(attn_test)) :
            assert (attn_test[i] == model.attn[i]).all()
            
    else : model.attn = attn_test
        
    model.gradients_output = grad_test
    model.zero_diff = diff_test
    model.idxs = idxs

In [None]:
for e, model in tqdm_notebook(model_normal_list.items()):
    save_gradients(model)

In [None]:
import matplotlib.ticker as ticker
from mpl_toolkits.axes_grid1 import make_axes_locatable

def showAttention(fig, ax, input_sentence, model, n):
    attn = model.attn[n][:len(Xtest[n])]
    grads = [model.gradients_output[k][n][:len(Xtest[n])] for k in model.gradients_output]
    grads += [model.zero_diff[k][n][:len(Xtest[n])] for k in model.zero_diff]
    grads += [model.medians[n]]
    grads += list(model.copy_gradients['diff'][n][1:len(Xtest[n]), :len(Xtest[n])])
    attentions = np.stack([attn] + grads, axis=0)
    
    a1 = attentions
    img = ax.imshow(a1, cmap='PuRd', vmin=0, vmax=1, interpolation='none')
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="10%", pad=0.05)
    fig.colorbar(img, cax=cax, ax=ax)
    
    # Set up axes
    ax.set_xticks(np.arange(len(input_sentence)))
    ax.set_xticklabels(input_sentence, rotation=85)
    
    ax.set_yticks(np.arange(len(model.gradients_output)+len(model.zero_diff)+1+len(input_sentence)))
    ax.set_yticklabels(['attention'] + list(model.gradients_output.keys()) + list(model.zero_diff.keys()) + ['median'] + input_sentence[1:])

In [None]:
for e, model in tqdm_notebook(model_normal_list.items()):
    n = 30
    meds = np.array([max(x) for x in model.medians])
    lens = np.array([len(x) for x in model.medians])
    meds[lens < 5] = -100
    n = np.argmax(meds)
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,50))
    showAttention(fig, axes, [vec.idx2word[x] for x in Xtest[n]], model, n)

Evaluation -- Hidden
====================

In [None]:
def save_copies(model) :
    model.vec = vec
    predict_y, attn_test = model.evaluate(Xtest)
    grad_test = model.copy_H_run(Xtest)
        
    if hasattr(model, 'attn') :
        assert len(model.attn) == len(attn_test)
        for i in range(len(attn_test)) :
            assert (attn_test[i] == model.attn[i]).all()
            
    else : model.attn = attn_test
        
    model.copy_gradients = grad_test

In [None]:
for e, model in tqdm_notebook(model_normal_list.items()):
    save_copies(model)

In [None]:
for e, model in tqdm_notebook(model_normal_list.items()):
    xp, yp = [], []
    for i in range(len(testidx)) :
        attn = model.attn[i][:len(Xtest[i])]
        new_attn = np.diagonal(model.gradients_output['diff'][i])[:len(Xtest[i])]
        m = np.argmax(attn)
        if len(Xtest[i]) > 4 :
            xp.append(attn[m])
            yp.append(new_attn[m])
            if attn[m] > 0.6 and new_attn[m] > 0.6 :
                fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5,25))
                attn = model.attn[i][:len(Xtest[i])][None, :]
                diff = model.gradients_output['diff'][i][:len(Xtest[i]), :len(Xtest[i])]
                ax.matshow(np.vstack([diff, attn]), cmap='PuRd', vmin=0, vmax=1)
                input_sentence = [vec.idx2word[x] for x in Xtest[i]]
                ax.set_xticks(np.arange(len(input_sentence)))
                ax.set_xticklabels(input_sentence, rotation=85)
                
                ax.set_yticks(np.arange(len(input_sentence)+1))
                ax.set_yticklabels(input_sentence+['actual'])
                plt.show()
                
    plt.scatter(xp, yp, s=5)
    plt.show()
    print('='*20)