In [1]:
import keras
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Flatten, Embedding, regularizers
from keras.layers import Conv1D, MaxPooling1D
from keras.layers.merge import Concatenate
from keras import optimizers
from keras.preprocessing import sequence
from keras import backend as K
from w2v import train_word2vec 
from keras.utils import np_utils
import pickle, datetime
import numpy as np
import pandas as pd
from scipy.signal import convolve2d

import tensorflow as tf
from gensim import corpora
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
from nltk.stem import SnowballStemmer
from w2v import train_word2vec 
import pickle, datetime
import difflib

Using TensorFlow backend.


### Loading Data

In [2]:
df = pd.read_csv('../../Datasets/SST1_dataset/Processed_SST1.tsv', sep='\t')

raw_docs_train      = df[df.split_ind == 1]['Phrases'].values
sentiment_train     = df[df.split_ind == 1]['Label'].values
raw_docs_test       = df[df.split_ind == 2]['Phrases'].values
sentiment_test      = df[df.split_ind == 2]['Label'].values
num_labels          = len(np.unique(sentiment_train))

In [3]:
fname = '../../Datasets/SST1_dataset/sst_data'
with open(fname, 'rb') as file:
    data = pickle.load(file)
    
x_train, y_train, x_test, y_test, dictionary = data
seq_len = dictionary.seq_len
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(9645, 37) (9645, 5) (2210, 37) (2210, 5)


### Loading pretrained model 

In [7]:
def load_model(fname):
    model = keras.models.model_from_json(open(fname + '.json').read())
    model.load_weights(fname + '_weights.h5')
    model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=["accuracy"])
    return model


def save_model(model, fname):    
    json_string = model.to_json()
    open(fname + '.json', 'w').write(json_string)
    model.save_weights(fname + '_weights.h5', overwrite=True)
    with open( fname + '_history', 'wb') as output:
        pickle.dump([model.history.history], output, pickle.HIGHEST_PROTOCOL)

In [10]:
fname = 'dodo_cnn_non_static5'
model = load_model(fname)

In [11]:
scores = model.evaluate(x_train, y_train, verbose=0)
print("Train Accuracy: %.2f%%" % (scores[1]*100))
scores = model.evaluate(x_test, y_test, verbose=0)
print("Test Accuracy: %.2f%%" % (scores[1]*100))

Train Accuracy: 98.64%
Test Accuracy: 42.08%


### Occlusion Method

In [113]:
pad_ind = 11991

def occlude_sent(sent, ind):
    while ind < seq_len-1:
        sent[0,ind] = sent[0,ind+1]
        ind += 1 
    sent[0,seq_len-1] = pad_ind
    return sent

In [211]:
sent_id = 333
print(raw_docs_train[sent_id])

x_inp = x_train[sent_id, :]
x_inp = x_inp.reshape((1,seq_len))

y_inp = y_train[sent_id, :]
y_inp = y_inp.reshape((1,num_labels))

print('\n', x_inp, '\n\n', y_inp)
print(x_inp.shape, y_inp.shape)

the story and structure are well honed

 [[   29    65    31   986   182   242  1938 11991 11991 11991 11991 11991
  11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991
  11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991
  11991]] 

 [[ 0.  0.  0.  1.  0.]]
(1, 37) (1, 5)


In [212]:
print('Original Prediction:', model.predict(sent), '\n')

for ind in range(0,seq_len):
    if(x_inp[0,ind] == pad_ind):
        break
    
    sent = x_inp.copy()
    sent = occlude_sent(sent, ind)
    pred = model.predict(sent)
    
    print(dictionary.id2token[x_inp[0,ind]], np.argmax(model.predict(sent), axis=1))
    print(pred)

Original Prediction: [[ 0.0282236   0.15557611  0.3540647   0.38555646  0.07657916]] 

the [3]
[[ 0.08516022  0.16722625  0.19886796  0.44121018  0.10753546]]
stori [3]
[[ 0.08998668  0.17700012  0.19637123  0.42389011  0.11275184]]
and [3]
[[ 0.05646797  0.21363001  0.26372549  0.40625682  0.05991968]]
structur [3]
[[ 0.06134174  0.23766007  0.27704486  0.30417785  0.11977552]]
are [3]
[[ 0.03414585  0.16855714  0.31971237  0.37483883  0.10274588]]
well [1]
[[ 0.06075473  0.31963122  0.30222267  0.2577638   0.05962766]]
hone [3]
[[ 0.07638688  0.19144639  0.29416445  0.34355244  0.0944498 ]]


In [214]:
for sent_id in range(x_train.shape[0]):
    x_inp = x_train[sent_id, :]
    x_inp = x_inp.reshape((1,seq_len))

    y_inp = y_train[sent_id, :]
    y_inp = y_inp.reshape((1,num_labels))
    
    y_actual = np.argmax(y_inp)
                         
    for ind in range(0,seq_len):
        if(x_inp[0,ind] == pad_ind):
            break

        sent = x_inp.copy()
        sent = occlude_sent(sent, ind)
        pred = model.predict(sent)
                         
        y_pred = np.argmax(model.predict(sent), axis=1)
        
        if(y_actual != y_pred):
            print(sent_id, dictionary.id2token[x_inp[0,ind]], '|||', raw_docs_train[sent_id])
#             break

## Deconvolution

In [50]:
count = 0
for layer in model.layers:
    print(count, layer.name, layer.input_shape, layer.output_shape)
    count += 1
    wts = layer.get_weights()
    for wt in wts:
        print(wt.shape)
    print('****************************************')

0 input_1 (None, 37) (None, 37)
****************************************
1 embedding (None, 37) (None, 37, 300)
(11992, 300)
****************************************
2 dropout_1 (None, 37, 300) (None, 37, 300)
****************************************
3 conv1d_1 (None, 37, 300) (None, 35, 100)
(3, 300, 100)
(100,)
****************************************
4 conv1d_2 (None, 37, 300) (None, 34, 100)
(4, 300, 100)
(100,)
****************************************
5 conv1d_3 (None, 37, 300) (None, 33, 100)
(5, 300, 100)
(100,)
****************************************
6 max_pooling1d_1 (None, 35, 100) (None, 1, 100)
****************************************
7 max_pooling1d_2 (None, 34, 100) (None, 1, 100)
****************************************
8 max_pooling1d_3 (None, 33, 100) (None, 1, 100)
****************************************
9 flatten_1 (None, 1, 100) (None, 100)
****************************************
10 flatten_2 (None, 1, 100) (None, 100)
****************************************
11 

### Entire model in tensorflow

In [15]:
ftr_sz = [3,4,5]

inp = tf.placeholder(tf.int32, shape=(None, seq_len))
lbl = tf.placeholder(tf.float32, shape=(None, num_labels))

wts = model.get_layer('embedding').get_weights()
wEmb = tf.constant(wts[0])

emb = tf.nn.embedding_lookup(wEmb, inp)

# First convolution layer
wts = model.get_layer('conv1d_1').get_weights()
wConv1 = tf.constant(wts[0])
bConv1  = tf.constant(wts[1])

conv1 = tf.nn.conv1d(emb, wConv1, stride = 1, padding='VALID')
bias1 = conv1 + bConv1
relu1 = tf.nn.relu(bias1)

pool1 = tf.nn.max_pool([relu1], ksize = [1, 1, seq_len - ftr_sz[0] + 1, 1],
                       strides = [1,1,1,1], padding = 'VALID')

# Second convolution layer
wts = model.get_layer('conv1d_2').get_weights()
wConv2 = tf.constant(wts[0])
bConv2  = tf.constant(wts[1])

conv2 = tf.nn.conv1d(emb, wConv2, stride = 1, padding='VALID')
bias2 = conv2 + bConv2
relu2 = tf.nn.relu(bias2)

pool2 = tf.nn.max_pool([relu2], ksize = [1, 1, seq_len - ftr_sz[1] + 1, 1],
                       strides = [1,1,1,1], padding = 'VALID')

# Third convolution layer
wts = model.get_layer('conv1d_3').get_weights()
wConv3 = tf.constant(wts[0])
bConv3  = tf.constant(wts[1])

conv3 = tf.nn.conv1d(emb, wConv3, stride = 1, padding='VALID')
bias3 = conv3 + bConv3
relu3 = tf.nn.relu(bias3)

pool3 = tf.nn.max_pool([relu3], ksize = [1, 1, seq_len - ftr_sz[2] + 1, 1],
                       strides = [1,1,1,1], padding = 'VALID')


flat = tf.concat([pool1[0,:,0,:], pool2[0,:,0,:], pool3[0,:,0,:]], axis = 1)

wts = model.get_layer('dense_1').get_weights()
wDen1 = tf.constant(wts[0])
bDen1 = tf.constant(np.reshape(wts[1], (1, wts[1].shape[0],)))

den1 = tf.matmul(flat, wDen1)
den1f = tf.add(bDen1, den1)
den1f = tf.nn.relu(den1f)

wts = model.get_layer('dense_2').get_weights()
wDen2 = tf.constant(wts[0])
bDen2 = tf.constant(np.reshape(wts[1], (1, wts[1].shape[0],)))

den2 = tf.matmul(den1f, wDen2)
den2f = tf.add(bDen2, den2)

final = tf.nn.softmax_cross_entropy_with_logits(labels=den2f, logits=lbl)

In [20]:
sess = tf.Session()
with sess.as_default():
    res = den2f.eval(feed_dict={inp:x_inp, lbl:y_inp})
res

array([[-0.69845384, -0.02364065,  0.14965281,  0.94653302, -0.4651677 ]], dtype=float32)

## Visualization

In [26]:
def maxpool_arg(inp):
    ch, r, c = inp.shape
    out_shape = (ch, c)
    max_args = np.empty(out_shape, np.int32)

    for k in range(ch):
        for i in range(c):
            max_args[k, i] = np.argmax(inp[k,:,i])
    return max_args

def unpool(ind_mat, val_mat, out_shape):
    out_mat = np.zeros(out_shape, np.float32)
    b, r,c,ch = out_shape
        
    for k in range(ch):
        c1 = 0
        for i in range(0, r-2, 2):
            c2 = 0
            for j in range(0, c-2, 2):
                ind = ind_mat[0, c1, c2, k]
                val = val_mat[0, c1, c2, k]
                coord = np.unravel_index(ind, (3,3))                
                out_mat[0, i + coord[0] , j + coord[1],k] = val
                
                c2 += 1
            c1 +=1
            
    return out_mat

In [74]:
sent_id = 439
print(raw_docs_train[sent_id])

x_inp = x_train[sent_id, :]
x_inp = x_inp.reshape((1,seq_len))

y_inp = y_train[sent_id, :]
y_inp = y_inp.reshape((1,num_labels))

print('\n', x_inp, '\n\n', y_inp)
print(x_inp.shape, y_inp.shape)

it 's fun lite

 [[  134     7   149  2274 11991 11991 11991 11991 11991 11991 11991 11991
  11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991
  11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991 11991
  11991]] 

 [[ 0.  0.  0.  1.  0.]]
(1, 37) (1, 5)


In [75]:
sess = tf.InteractiveSession()
flat_res_fwd = flat.eval(feed_dict={inp:x_inp, lbl:y_inp})

pool1_ind = maxpool_arg(relu1.eval(feed_dict={inp:x_inp, lbl:y_inp}))[0,:]
pool2_ind = maxpool_arg(relu2.eval(feed_dict={inp:x_inp, lbl:y_inp}))[0,:]
pool3_ind = maxpool_arg(relu3.eval(feed_dict={inp:x_inp, lbl:y_inp}))[0,:]

pool1_res_fwd = pool1.eval(feed_dict={inp:x_inp, lbl:y_inp})[0,0,0,:]
pool2_res_fwd = pool2.eval(feed_dict={inp:x_inp, lbl:y_inp})[0,0,0,:]
pool3_res_fwd = pool3.eval(feed_dict={inp:x_inp, lbl:y_inp})[0,0,0,:]

print(flat_res_fwd.shape, pool1_ind.shape, pool2_ind.shape, pool3_ind.shape, 
      pool1_res_fwd.shape, pool2_res_fwd.shape, pool3_res_fwd.shape)

(1, 300) (100,) (100,) (100,) (100,) (100,) (100,)


In [76]:
y_inp = np.float32(np.reshape(y_test[0,:], (1,5), ))
den2_out = y_inp - bDen2
den1_out = tf.matmul(den2_out, wDen2, transpose_b=True)

den1_out = tf.nn.relu(den1_out)
den1_out = den1_out - bDen1
flat_out = tf.matmul(den1_out, wDen1, transpose_b=True)

flat_res_bwd = flat_out.eval(feed_dict={inp:x_inp, lbl:y_inp})

pool1_res_bwd = flat_res_bwd[0,0:100]
pool2_res_bwd = flat_res_bwd[0,100:200]
pool3_res_bwd = flat_res_bwd[0,200:300]

print(pool1_res_bwd.shape, pool2_res_bwd.shape, pool3_res_bwd.shape)

(100,) (100,) (100,)


### Back propagation

In [77]:
def calc_imp(t1,t2,t3):
    tsum = np.sum(t1) + np.sum(t2) + np.sum(t3)
    contri = np.zeros((seq_len,), np.float32)

    for i in range(100):
        for j in range(3):
            contri[pool1_ind[i] + j] += 1/3*t1[i]/tsum

    for i in range(100):
        for j in range(4):
            contri[pool2_ind[i] + j] += 1/4*t2[i]/tsum

    for i in range(100):
        for j in range(5):
            contri[pool3_ind[i] + j] += 1/5*t3[i]/tsum

    wrds = []
    sent = raw_docs_train[sent_id]
    wrd_prob = dict()

    counter = 0
    for i in x_inp[0,:]:
        wrd_prob[dictionary.id2token[i]] = contri[counter]
        counter += 1

    import operator
    # srt_list = sorted(wrd_prob.items())
    sorted_x = sorted(wrd_prob.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_x

In [78]:
t1 = pool1_res_fwd * pool1_res_bwd
t2 = pool2_res_fwd * pool2_res_bwd
t3 = pool3_res_fwd * pool3_res_bwd

calc_imp(t1,t2,t3)

[('lite', 0.30395886),
 ('fun', 0.26758641),
 ("'s", 0.16971132),
 ('it', 0.094893411),
 ('<PAD/>', 0.0)]

### Guided backpropagation

In [79]:
t1 = pool1_res_fwd * np.abs(pool1_res_bwd)
t2 = pool2_res_fwd * np.abs(pool2_res_bwd)
t3 = pool3_res_fwd * np.abs(pool3_res_bwd)
calc_imp(t1,t2,t3)

[('fun', 0.29317465),
 ('lite', 0.27859962),
 ("'s", 0.1769509),
 ('it', 0.067789622),
 ('<PAD/>', 0.0)]

### Deconvnet

In [80]:
t1 = np.abs(pool1_res_bwd)
t2 = np.abs(pool2_res_bwd)
t3 = np.abs(pool3_res_bwd)
calc_imp(t1,t2,t3)

[('fun', 0.25750187),
 ("'s", 0.250965),
 ('it', 0.24174465),
 ('lite', 0.16361547),
 ('<PAD/>', 0.0)]