In [None]:
import os
import sys
import codecs
import operator
import numpy as np
import re
from time import time

In [None]:
import _pickle as cPickle

In [None]:
data_path = 'data/doc_level-sentiment/doc_level'

### Reading preprocess data

In [None]:
def read_pickle(data_path, file_name):

    f = open(os.path.join(data_path, file_name), 'rb')
    read_file = cPickle.load(f)
    f.close()

    return read_file

def save_pickle(data_path, file_name, data):

    f = open(os.path.join(data_path, file_name), 'wb')
    cPickle.dump(data, f)
    print(" file saved to: %s"%(os.path.join(data_path, file_name)))
    f.close()

In [None]:
words_idx = read_pickle(data_path, 'words_idx.pkl')

In [None]:
idx_words = read_pickle(data_path, 'idx_words.pkl')

In [None]:
data = read_pickle(data_path, 'data.pkl')

In [None]:
label = read_pickle(data_path, 'label.pkl')

### Preparing training and validation set

In [None]:
from keras.preprocessing import sequence
from keras.utils.np_utils import to_categorical


In [None]:
rand_idx = np.arange(len(data))
np.random.shuffle(rand_idx)

data = data[rand_idx]
label = to_categorical(label)[rand_idx]

data_size = len(data)

test_x = data[0:1000]
test_y = label[0:1000]

dev_x = data[1000:5000]
dev_y = label[1000:5000]

train_x = data[5000:int(data_size)]
train_y = label[5000:int(data_size)]


In [None]:
maxlen = np.max([len(d) for d in dev_x])

import operator
words_idx = [x for (x, _) in sorted(words_idx.items(), key=operator.itemgetter(1))]

In [None]:
train_x_ = sequence.pad_sequences(train_x, maxlen)
dev_x_ = sequence.pad_sequences(dev_x, maxlen)
test_x_ = sequence.pad_sequences(test_x, maxlen)

In [None]:
train_x_ = np.array(train_x_)
train_y = np.array(train_y)

dev_x_ = np.array(dev_x_)
dev_y = np.array(dev_y)

test_x_ = np.array(test_x_)
test_y = np.array(test_y)

### Data iterator

In [None]:
class Dataiterator():
    '''
      1) Iteration over minibatches using next(); call reset() between epochs to randomly shuffle the data
      2) Access to the entire dataset using all()
    '''
    
    def __init__(self, X, y, seq_length=32, decoder_dim=300, batch_size=32):      
        self.X = X 
        self.y = y 
        self.p0 = np.zeros((len(X), 3))
        self.num_data = len(X) # total number of examples
        self.batch_size = batch_size # batch size
        self.reset() # initial: shuffling examples and set index to 0
    
    def __iter__(self): # iterates data
        return self


    def reset(self): # initials
        self.idx = 0
        self.order = np.random.permutation(self.num_data) # shuffling examples by providing randomized ids 
        
    def __next__(self): # return model inputs - outputs per batch
        
        X_ids = [] # hold ids per batch 
        while len(X_ids) < self.batch_size:
            X_id = self.order[self.idx] # copy random id from initial shuffling
            X_ids.append(X_id)
            self.idx += 1 # 
            if self.idx >= self.num_data: # exception if all examples of data have been seen (iterated)
                self.reset()
                raise StopIteration()
                
        batch_X = self.X[np.array(X_ids)] # X values (encoder input) per batch
        batch_y = self.y[np.array(X_ids)] # y_in values (decoder input) per batch
        batch_p0 = self.p0[np.array(X_ids)]
        
        return batch_X, batch_y, batch_p0

          
    def all(self): # return all data examples
        return self.X, self.y, self.p0

### LSTM Model for document level sentiment classification

In [None]:
from keras.layers import Dense, Dropout, Activation, Embedding, LSTM, Input, RepeatVector, Dot, Concatenate, Reshape
from keras.models import Model

In [None]:
import keras.backend as K
from keras.engine.topology import Layer
from keras import initializers
from keras import regularizers
from keras import constraints

### Attention Network

In [None]:
maxlen

In [None]:
vocab_size = len(words_idx) 
vocab_size

In [None]:
def custom_softmax(x, axis=1):
            """Softmax activation function.
            # Arguments
                x : Tensor.
                axis: Integer, axis along which the softmax normalization is applied.
            # Returns
                Tensor, output of softmax transformation.
            # Raises
                ValueError: In case `dim(x) == 1`.
            """
            ndim = K.ndim(x)
            if ndim == 2:
                return K.softmax(x)
            elif ndim > 2:
                e = K.exp(x - K.max(x, axis=axis, keepdims=True))
                s = K.sum(e, axis=axis, keepdims=True)
                return e / s
            else:
                raise ValueError('Cannot apply softmax to a tensor that is 1D')

In [None]:
repeator = RepeatVector(maxlen, name='repeator_att')
concatenator = Concatenate(axis=-1, name='concator_att')
densor1 = Dense(300, activation = "tanh", name='densor1_att')
densor2 = Dense(1, activation = "relu", name='densor2_att')
activator = Activation(custom_softmax, name='attention_weights')
dotor = Dot(axes = 1, name='dotor_att')

## What are the attention inputs (keys , query) in this sentiment classification task?

In [None]:
def attention(keys, query):
    
    query = repeator(query)
    print("query shape: %s" %str(query._keras_shape))
    concat = concatenator([keys, query])
    print("concat shape: %s" %str(concat._keras_shape))
    e1 = densor1(concat)
    print("e1 shape: %s" %str(e1._keras_shape))
    e2 = densor2(e1)
    print("e2 shape: %s" %str(e2._keras_shape))
    alphas = activator(e2)
    print("alphas shape: %s" %str(alphas._keras_shape))
    context = dotor([alphas, keys])
    print("context shape: %s" %str(context._keras_shape))
    
    return context, alphas

### Input Layer

### 1. Sentence / document input

In [None]:
### YOUR CODE HERE


### 2. Query input for attention network

In [None]:
# UNCOMMENT AND REPLACE '?' WITH A CORRECT NUMBER OF DIMENSION

#p0 = Input(shape=(?,), name='p0')
#probs = [p0]

### Layer to train embedding weights of words

In [None]:
### YOUR CODE HERE

### RNN-based layer 

In [None]:
### YOUR CODE HERE

### Get attention weights and attention-based context

In [None]:
### UNCOMMENT AND REPLACE KEYS?, QUERY? WITH THE CORRESPONDING TENSORS AS ATTENTION KEYS AND QUERY

#print ('use content attention to get term weights')
#att_context, att_weights = attention(KEYS?, QUERY?)

### Prediction layer

In [None]:
# UNCOMMENT AND REPLACE '?' WITH A CORRECT NUMBER OF DIMENSION

#att_context = Reshape((?,))(att_context)

In [None]:
### YOUR CODE HERE


In [None]:
probs = [probs] # Output from last softmax activation layer

### Construct the model

In [None]:
### YOUR CODE HERE

model = Model(inputs=[sentence_input, p0], outputs=probs)

In [None]:
import keras.optimizers as opt

In [None]:
optimizer = opt.RMSprop(lr=0.001, rho=0.9, epsilon=1e-06, clipnorm=10, clipvalue=0)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model.summary()

### Training with batch generator

In [None]:
batch_size = 50

In [None]:
train_steps_epoch = len(train_x_)/batch_size
batch_train_iter = Dataiterator(train_x_, train_y, batch_size)

In [None]:
val_steps_epoch = len(dev_x_)/batch_size
batch_val_iter = Dataiterator(dev_x_, dev_y, batch_size)

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

def train_generator(model, batch_train_iter, batch_val_iter):
    earlystop_callbacks = [EarlyStopping(monitor='val_loss', patience=10),
                     ModelCheckpoint(filepath=os.path.join('./','{epoch:02d}-{loss:.2f}.check'), \
                                     monitor='val_loss', save_best_only=False, \
                                     save_weights_only=True)
                     ]
    
    def train_gen():
        while True:
            train_batches = [[[X, p0], y] for X, y, p0 in batch_train_iter]
            for train_batch in train_batches:
                yield train_batch
                
    def val_gen():
        while True:
            val_batches = [[[X, p0], y] for X, y, p0 in batch_val_iter]
            for val_batch in val_batches:
                yield val_batch
                
    history = model.fit_generator(train_gen(), validation_data=val_gen(), \
                                  validation_steps=val_steps_epoch, steps_per_epoch=train_steps_epoch, \
                                  epochs = 20, callbacks = earlystop_callbacks)
      

In [None]:
train_generator(model, batch_train_iter, batch_val_iter)