In [1]:
import os
import sys
import codecs
import operator
import numpy as np
import re
from time import time

In [2]:
import _pickle as cPickle

In [3]:
aspect_path = 'data/aspect_level-sentiment/aspect_level'

In [4]:
doc_path = 'data/doc_level-sentiment/doc_level'

### Reading preprocess data

In [5]:
def read_pickle(data_path, file_name):

    f = open(os.path.join(data_path, file_name), 'rb')
    read_file = cPickle.load(f)
    f.close()

    return read_file

def save_pickle(data_path, file_name, data):

    f = open(os.path.join(data_path, file_name), 'wb')
    cPickle.dump(data, f)
    print(" file saved to: %s"%(os.path.join(data_path, file_name)))
    f.close()

In [6]:
vocab = read_pickle(aspect_path, 'all_vocab.pkl')

train_x = read_pickle(aspect_path, 'train_x.pkl')
train_y = read_pickle(aspect_path, 'train_y.pkl')
dev_x = read_pickle(aspect_path, 'dev_x.pkl')
dev_y = read_pickle(aspect_path, 'dev_y.pkl')
test_x = read_pickle(aspect_path, 'test_x.pkl')
test_y = read_pickle(aspect_path, 'test_y.pkl')

train_aspect = read_pickle(aspect_path, 'train_aspect.pkl')
dev_aspect = read_pickle(aspect_path, 'dev_aspect.pkl')
test_aspect = read_pickle(aspect_path, 'test_aspect.pkl')


pretrain_data = read_pickle(aspect_path, 'pretrain_data.pkl')
pretrain_label = read_pickle(aspect_path, 'pretrain_label.pkl')

### Batch generator and data iterator 

In [7]:
class Dataiterator():
    '''
      1) Iteration over minibatches using next(); call reset() between epochs to randomly shuffle the data
      2) Access to the entire dataset using all()
    '''
    
    def __init__(self, aspect_data, doc_data, seq_length=32, decoder_dim=300, batch_size=32):
        
        len_aspect_data = len(aspect_data[0])
        len_doc_data = len(doc_data[0])
        
        self.X_aspect = aspect_data[0] 
        self.y_aspect = aspect_data[1]
        self.aspect_terms = aspect_data[2]
        
        self.X_doc = doc_data[0]
        self.y_doc = doc_data[1]
        
        self.num_data = len_aspect_data
        self.batch_size = batch_size # batch size
        self.reset() # initial: shuffling examples and set index to 0
    
    def __iter__(self): # iterates data
        return self


    def reset(self): # initials
        self.idx = 0
        self.order = np.random.permutation(self.num_data) # shuffling examples by providing randomized ids 
        
    def __next__(self): # return model inputs - outputs per batch
        
        X_ids = [] # hold ids per batch 
        while len(X_ids) < self.batch_size:
            X_id = self.order[self.idx] # copy random id from initial shuffling
            X_ids.append(X_id)
            self.idx += 1 # 
            if self.idx >= self.num_data: # exception if all examples of data have been seen (iterated)
                self.reset()
                raise StopIteration()
                
        batch_X_aspect = self.X_aspect[np.array(X_ids)] # X values (encoder input) per batch
        batch_y_aspect = self.y_aspect[np.array(X_ids)] # y_in values (decoder input) per batch
        batch_aspect_terms = self.aspect_terms[np.array(X_ids)]
        batch_X_doc = self.X_doc[np.array(X_ids)]
        batch_y_doc = self.y_doc[np.array(X_ids)]
        
        
        return batch_X_aspect, batch_y_aspect, batch_aspect_terms, batch_X_doc, batch_y_doc

          
    def all(self): # return all data examples
        return self.X_aspect, self.y_aspect, self.aspect_terms, self.X_doc, self.y_doc

### Model

In [8]:
from keras.models import Model
from keras.layers import Input, Embedding, Dense, Lambda, Dropout, LSTM
from keras.layers import Reshape, Activation, RepeatVector, concatenate, Concatenate, Dot, Multiply

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [9]:
import keras.backend as K
from keras.engine.topology import Layer
from keras import initializers
from keras import regularizers
from keras import constraints

### Attention Network

In [10]:
overal_maxlen = 82
overal_maxlen_aspect = 7

In [11]:
def custom_softmax(x, axis=1):
            """Softmax activation function.
            # Arguments
                x : Tensor.
                axis: Integer, axis along which the softmax normalization is applied.
            # Returns
                Tensor, output of softmax transformation.
            # Raises
                ValueError: In case `dim(x) == 1`.
            """
            ndim = K.ndim(x)
            if ndim == 2:
                return K.softmax(x)
            elif ndim > 2:
                e = K.exp(x - K.max(x, axis=axis, keepdims=True))
                s = K.sum(e, axis=axis, keepdims=True)
                return e / s
            else:
                raise ValueError('Cannot apply softmax to a tensor that is 1D')

In [12]:
repeator = RepeatVector(overal_maxlen, name='repeator_att')
concatenator = Concatenate(axis=-1, name='concator_att')
densor1 = Dense(300, activation = "tanh", name='densor1_att')
densor2 = Dense(1, activation = "relu", name='densor2_att')
activator = Activation(custom_softmax, name='attention_weights')
dotor = Dot(axes = 1, name='dotor_att')

In [13]:
def attention(keys, query):
    
    query = repeator(query)
    print("query shape: %s" %str(query._keras_shape))
    concat = concatenator([keys, query])
    print("concat shape: %s" %str(concat._keras_shape))
    e1 = densor1(concat)
    print("e1 shape: %s" %str(e1._keras_shape))
    e2 = densor2(e1)
    print("e2 shape: %s" %str(e2._keras_shape))
    alphas = activator(e2)
    print("alphas shape: %s" %str(alphas._keras_shape))
    context = dotor([alphas, keys])
    print("context shape: %s" %str(context._keras_shape))
    
    return context, alphas

In [14]:
class Average(Layer):
  
    def __init__(self, mask_zero=True, **kwargs):
        self.mask_zero = mask_zero
        self.supports_masking = True
        super(Average, self).__init__(**kwargs)

    def call(self, x, mask=None):
        if self.mask_zero:
            mask = K.cast(mask, K.floatx())
            mask = K.expand_dims(mask)
            x = x * mask
            return K.sum(x, axis=1) / (K.sum(mask, axis=1) + K.epsilon())
        else:
            return K.mean(x, axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])
    
    def compute_mask(self, x, mask):
        return None

### Main model

In [15]:
dropout = 0.5     
recurrent_dropout = 0.1
vocab_size = len(vocab)
num_outputs = 3 # labels

### Inputs: How many inputs do you need for the current task?

In [16]:
##### Inputs #####
sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input')
aspect_input = Input(shape=(overal_maxlen_aspect,), dtype='int32', name='aspect_input')
pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input')

### Word-level embedding (shareable between all model inputs)

In [17]:
##### construct word embedding layer #####
word_emb = Embedding(vocab_size, 300, mask_zero=True, name='word_emb')

### Aspect-level representation (averaged)

In [18]:
### represent aspect as averaged word embedding ###
print ('use average term embs as aspect embedding')
aspect_term_embs = word_emb(aspect_input)
aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs)

use average term embs as aspect embedding
Instructions for updating:
Colocations handled automatically by placer.


In [19]:
aspect_embs.shape

TensorShape([Dimension(None), Dimension(300)])

### Sentence-level representation from two domains

In [20]:
### sentence representation ###
sentence_embs = word_emb(sentence_input) # from aspect-level domain
pretrain_embs = word_emb(pretrain_input) # from document-level domain

### LSTM layer (shared between three representations)

In [21]:
rnn = LSTM(300, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm')

In [22]:
sentence_lstm = rnn(sentence_embs)
pretrain_lstm = rnn(pretrain_embs)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [23]:
# UNCOMMENT REPLACE KEYS?, QUERY? WITH THE CORRESPONDING TENSORS AS ATTENTION KEYS AND QUERY

#att_context, att_weights = attention(KEYS?, QUERY?)

In [None]:
pretrain_avg = Average(mask_zero=True)(pretrain_lstm)

sentence_output = Dense(num_outputs, name='dense_1')(att_context)
pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_avg)

In [None]:
sentence_output = Reshape((num_outputs,))(sentence_output)

In [None]:
aspect_probs = Activation('softmax', name='aspect_model')(sentence_output)
doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output)

In [None]:
model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs])

In [None]:
import keras.optimizers as opt

optimizer = opt.RMSprop(lr=0.001, rho=0.9, epsilon=1e-06, clipnorm=10, clipvalue=0)

In [None]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sentence_input (InputLayer)     (None, 82)           0                                            
__________________________________________________________________________________________________
aspect_input (InputLayer)       (None, 7)            0                                            
__________________________________________________________________________________________________
word_emb (Embedding)            multiple             3000900     aspect_input[0][0]               
                                                                 sentence_input[0][0]             
                                                                 pretrain_input[0][0]             
__________________________________________________________________________________________________
aspect_emb

In [None]:
model.compile(optimizer=optimizer,
              loss={'aspect_model': 'categorical_crossentropy', 'pretrain_model': 'categorical_crossentropy'},
              loss_weights = {'aspect_model': 1, 'pretrain_model': 0.1},
              metrics = {'aspect_model': 'categorical_accuracy', 'pretrain_model': 'categorical_accuracy'})

### Training

In [None]:
batch_size = 32

In [None]:
train_steps_epoch = len(train_x)/batch_size
batch_train_iter = Dataiterator([train_x, train_y, train_aspect], \
                                [pretrain_data, pretrain_label], batch_size)

In [None]:
val_steps_epoch = len(dev_x)/batch_size
batch_val_iter = Dataiterator([dev_x, dev_y, dev_aspect], \
                              [pretrain_data, pretrain_label], batch_size)

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

def train_generator(model, batch_train_iter, batch_val_iter):
    
    earlystop_callbacks = [EarlyStopping(monitor='val_loss', patience=10),
                     ModelCheckpoint(filepath=os.path.join('./','{epoch:02d}-{loss:.2f}.check'), \
                                     monitor='val_loss', save_best_only=False, \
                                     save_weights_only=True)
                     ]
    
    def train_gen():
        while True:
            train_batches = [[[X, aspect, pretrain_X], [y, pretrain_y]] for X, y, \
                             aspect, pretrain_X, pretrain_y in batch_train_iter]
            for train_batch in train_batches:
                yield train_batch
                
    def val_gen():
        while True:
            val_batches = [[[X, aspect, pretrain_X], [y, pretrain_y]] for X, y, \
                           aspect, pretrain_X, pretrain_y in batch_val_iter]
            for val_batch in val_batches:
                yield val_batch
                
    history = model.fit_generator(train_gen(), validation_data=val_gen(), \
                                  validation_steps=val_steps_epoch, steps_per_epoch=train_steps_epoch, \
                                  epochs = 20, callbacks = earlystop_callbacks)
      

In [None]:
train_generator(model, batch_train_iter, batch_val_iter)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/20
 5/57 [=>............................] - ETA: 7:16 - loss: 1.2199 - aspect_model_loss: 1.1142 - pretrain_model_loss: 1.0571 - aspect_model_categorical_accuracy: 0.3750 - pretrain_model_categorical_accuracy: 0.7312