In [0]:
from google.colab import drive
drive.mount('/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /drive


In [0]:
import keras.backend as K
from keras.engine.topology import Layer
from keras import initializers
from keras import regularizers
from keras import constraints
import tensorflow as tf


from keras.models import Model
from keras.layers import Input, Embedding, Dense, Lambda, Dropout, LSTM
from keras.layers import Reshape, Activation, RepeatVector, concatenate, Concatenate, Dot, Multiply

In [0]:
# You will need to change the following directory path according to your own path
#cd drive/My Drive/Recsys-2019/sequence_classifier

In [0]:
import os
import sys
import codecs
import operator
import numpy as np
import re
from time import time

In [0]:
import _pickle as cPickle

In [0]:
aspect_path = '/drive/My Drive/Deep Learing Course/practice-5-data/aspect_level-sentiment/aspect_level/'

In [0]:
doc_path = '/drive/My Drive/Deep Learing Course/practice-5-data/doc_level-sentiment/doc_level/'

In [0]:
### Reading preprocess data

In [0]:
def read_pickle(data_path, file_name):

    f = open(os.path.join(data_path, file_name), 'rb')
    read_file = cPickle.load(f)
    f.close()

    return read_file

def save_pickle(data_path, file_name, data):

    f = open(os.path.join(data_path, file_name), 'wb')
    cPickle.dump(data, f)
    print(" file saved to: %s"%(os.path.join(data_path, file_name)))
    f.close()

In [0]:
vocab = read_pickle(aspect_path, 'all_vocab.pkl')

train_x = read_pickle(aspect_path, 'train_x.pkl')
train_y = read_pickle(aspect_path, 'train_y.pkl')
dev_x = read_pickle(aspect_path, 'dev_x.pkl')
dev_y = read_pickle(aspect_path, 'dev_y.pkl')
test_x = read_pickle(aspect_path, 'test_x.pkl')
test_y = read_pickle(aspect_path, 'test_y.pkl')

train_aspect = read_pickle(aspect_path, 'train_aspect.pkl')
dev_aspect = read_pickle(aspect_path, 'dev_aspect.pkl')
test_aspect = read_pickle(aspect_path, 'test_aspect.pkl')


pretrain_data = read_pickle(aspect_path, 'pretrain_data.pkl')
pretrain_label = read_pickle(aspect_path, 'pretrain_label.pkl')

### Batch generator and data iterator 

In [0]:
class Dataiterator():
    '''
      1) Iteration over minibatches using next(); call reset() between epochs to randomly shuffle the data
      2) Access to the entire dataset using all()
    '''
    
    def __init__(self, aspect_data, doc_data, seq_length=32, decoder_dim=300, batch_size=32):
        
        len_aspect_data = len(aspect_data[0])
        self.len_doc_data = len(doc_data[0])
        
        self.X_aspect = aspect_data[0] 
        self.y_aspect = aspect_data[1]
        self.aspect_terms = aspect_data[2]
        
        self.X_doc = doc_data[0]
        self.y_doc = doc_data[1]
        
        self.num_data = len_aspect_data
        self.batch_size = batch_size # batch size
        self.reset() # initial: shuffling examples and set index to 0
    
    def __iter__(self): # iterates data
        return self


    def reset(self): # initials
        self.idx = 0
        self.order = np.random.permutation(self.num_data) # shuffling examples by providing randomized ids 
        
    def __next__(self): # return model inputs - outputs per batch
        
        X_ids = [] # hold ids per batch 
        while len(X_ids) < self.batch_size:
            X_id = self.order[self.idx] # copy random id from initial shuffling
            X_ids.append(X_id)
            self.idx += 1 # 
            if self.idx >= self.num_data: # exception if all examples of data have been seen (iterated)
                self.reset()
                raise StopIteration()
                
        batch_X_aspect = self.X_aspect[np.array(X_ids)] # X values (encoder input) per batch
        batch_y_aspect = self.y_aspect[np.array(X_ids)] # y_in values (decoder input) per batch
        batch_aspect_terms = self.aspect_terms[np.array(X_ids)]
        indices_2 = np.random.choice(self.len_doc_data, self.batch_size)
        batch_X_doc = self.X_doc[indices_2]
        batch_y_doc = self.y_doc[indices_2]
        
        
        return batch_X_aspect, batch_y_aspect, batch_aspect_terms, batch_X_doc, batch_y_doc

          
    def all(self): # return all data examples
        return self.X_aspect, self.y_aspect, self.aspect_terms, self.X_doc, self.y_doc

In [0]:
overal_maxlen = 82   # the max length for sentence 
overal_maxlen_aspect = 7   # the max length for aspect terms

### Attention Network

In [0]:
#self defined class: to calculate softmax
class Custom_softmax(Layer):
  
    def __init__(self, mask_zero=True, **kwargs):
        self.mask_zero = mask_zero
        self.supports_masking = True
        super(Custom_softmax, self).__init__(**kwargs)

    def call(self, x,mask=None):
        if self.mask_zero:
            a = K.exp(x)         
            mask = K.cast(mask, K.floatx())  # mask for removing the influence of padded value
            mask = K.expand_dims(mask)
            a = a * mask
            a=a / (K.sum(a, axis=1, keepdims=True) + K.epsilon())
            return a
        else:
            return K.softmax(x, axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1],1)
    
    def compute_mask(self, x, mask):
        return None


###Attention function
$t$:query, the output of wordembedding from aspect term

$h$:keys, the output sequence of LSTM from sentence

$score(.)=v^T tanh(W[t;h])$

$\alpha=Softmax(score(.))$

$context=\sum{\alpha h}$

In [0]:
#instantiate operators for attention network
repeator = RepeatVector(overal_maxlen, name='repeator_att')  # repeat tensors: [None, 1, Dimension-output-wordembedding]->[None,overal-maxlen,Dimension-output-wordembedding]
concatenator = Concatenate(axis=-1, name='concator_att')     #concate key and query [t;h]
densor1 = Dense(300, activation = "tanh", name='densor1_att') #  for the tanh(W[t;h])
densor2 = Dense(1, activation = "relu", name='densor2_att')   # for the mulplication of V_T and tanh(W[t;h])
activator = Custom_softmax(mask_zero=True,name='softmax_att') # for th softmax
dotor = Dot(axes = 1, name='dotor_att')                       #to do weighted sum of output sequence of LSTM based on attention weights \alpha

In [0]:
########################################################################################################################################
#### Shape of Keys:[batch-size, overal-maxlen,Dimension_output_lstm], Shape of query: [batch-size,1,Dimension-output-wordEmbedding]  ###
########################################################################################################################################
def attention(keys, query):
    
    query = repeator(query)  #repeat aspect term to the over-maxlen
    print("query shape: %s" %str(query._keras_shape))
    concat = concatenator([keys, query])   #to concate key and query [t;h]
    print("concat shape: %s" %str(concat._keras_shape))
    e1 = densor1(concat)                   #tanh(W[t;h])
    print("e1 shape: %s" %str(e1._keras_shape))
    e2 = densor2(e1)                       # to mulitply V_T and tanh(W[t;h])
    print("e2 shape: %s" %str(e2._keras_shape))
    alphas = activator(e2)                 # softmax
    print("alphas shape: %s" %str(alphas._keras_shape))
    context = dotor([alphas, keys])        #weighted sum
    print("context shape: %s" %str(context._keras_shape))
    
    return context, alphas

In [0]:
#self-defined class for averaging a tensor with axis=1
class Average(Layer):
  
    def __init__(self, mask_zero=True, **kwargs):
        self.mask_zero = mask_zero
        self.supports_masking = True
        super(Average, self).__init__(**kwargs)

    def call(self, x,mask=None):
        if self.mask_zero:           
            mask = K.cast(mask, K.floatx())
            mask = K.expand_dims(mask)
            x = x * mask
            return K.sum(x, axis=1) / (K.sum(mask, axis=1) + K.epsilon())
        else:
            return K.mean(x, axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])
    
    def compute_mask(self, x, mask):
        return None

# Create computation Graph

In [0]:
dropout = 0.5     
recurrent_dropout = 0.1
vocab_size = len(vocab)
num_outputs = 3 # labels

### Define inputs

In [0]:
#####create  Input tensor #####
sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input')  #input for sentence from aspect-level data
aspect_input = Input(shape=(overal_maxlen_aspect,), dtype='int32', name='aspect_input') #input for aspect terms
pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input')             #input for sentence from document-level data

### Wordembedding layers

In [0]:
##### create word embedding layer #####
word_emb = Embedding(vocab_size, 300, mask_zero=True, name='word_emb')

### Aspect-level representation (averaged)

In [0]:
### represent aspect as averaged word embedding ###
print ('use average term embs as aspect embedding')
aspect_term_embs = word_emb(aspect_input)
aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs)  #There could be mutiple words for aspect terms, here is to average the wordembedding of these aspect terms

use average term embs as aspect embedding


### Sentence-level representation from two domains

In [0]:
### sentence representation ###
sentence_embs = word_emb(sentence_input) # from aspect-level domain
pretrain_embs = word_emb(pretrain_input) # from document-level domain

### LSTM layer (shared between three representations)

In [0]:
rnn = LSTM(300, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm')

In [0]:
### sentence representation ###
sentence_lstm = rnn(sentence_embs)    # from aspect-level domain
pretrain_lstm = rnn(pretrain_embs)     # from document-level domain
print(sentence_lstm.shape)

(None, 82, 300)


###attention layer

In [0]:
att_context,att_weights=attention(sentence_lstm,aspect_embs)  #create atention layer: keys: output sequence of LSTM, query: the wordembedding of aspect term

query shape: (None, 82, 300)
concat shape: (None, 82, 600)
e1 shape: (None, 82, 300)
e2 shape: (None, 82, 1)
alphas shape: (None, 82, 1)
context shape: (None, 1, 300)


###prediction layer

In [0]:
#prediction layer
pretrain_avg = Average(mask_zero=True)(pretrain_lstm)
sentence_output = Dense(num_outputs, name='dense_1')(att_context)  #for aspect-level
pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_avg)  # for document-level
sentence_output = Reshape((num_outputs,))(sentence_output)          # to squeeze the shape from (None,1,3) to (None,3)

In [0]:
aspect_probs = Activation('softmax', name='aspect_model')(sentence_output)
doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output)

###build model

In [0]:
model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs])

In [0]:
import keras.optimizers as opt

optimizer = opt.RMSprop(lr=0.001, rho=0.9, epsilon=1e-06, clipnorm=10, clipvalue=0)  #define optimizer

In [0]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sentence_input (InputLayer)     (None, 82)           0                                            
__________________________________________________________________________________________________
aspect_input (InputLayer)       (None, 7)            0                                            
__________________________________________________________________________________________________
word_emb (Embedding)            multiple             3000900     aspect_input[0][0]               
                                                                 sentence_input[0][0]             
                                                                 pretrain_input[0][0]             
____________________________________________________________________________________________

In [0]:
model.compile(optimizer=optimizer,
              loss={'aspect_model': 'categorical_crossentropy', 'pretrain_model': 'categorical_crossentropy'},
              loss_weights = {'aspect_model': 1, 'pretrain_model': 0.1},
              metrics = {'aspect_model': 'categorical_accuracy', 'pretrain_model': 'categorical_accuracy'})

### Training

In [0]:
batch_size = 32

In [0]:
#for train set
train_steps_epoch = len(train_x)/batch_size  #define epoches
batch_train_iter = Dataiterator([train_x, train_y, train_aspect], \
                                [pretrain_data, pretrain_label], batch_size)  # data iterator

In [0]:
#for validation set
val_steps_epoch = len(dev_x)/batch_size
batch_val_iter = Dataiterator([dev_x, dev_y, dev_aspect], \
                              [pretrain_data, pretrain_label], batch_size)

In [0]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

def train_generator(model, batch_train_iter, batch_val_iter):
    
    earlystop_callbacks = [EarlyStopping(monitor='val_loss', patience=10),
                     ModelCheckpoint(filepath=os.path.join('./','{epoch:02d}-{loss:.2f}.check'), \
                                     monitor='val_loss', save_best_only=False, \
                                     save_weights_only=True)
                     ]
    
    def train_gen():
        while True:
            train_batches = [[[X, aspect, pretrain_X], [y, pretrain_y]] for X, y, \
                             aspect, pretrain_X, pretrain_y in batch_train_iter]
            for train_batch in train_batches:
                yield train_batch
                
    def val_gen():
        while True:
            val_batches = [[[X, aspect, pretrain_X], [y, pretrain_y]] for X, y, \
                           aspect, pretrain_X, pretrain_y in batch_val_iter]
            for val_batch in val_batches:
                yield val_batch
                
    history = model.fit_generator(train_gen(), validation_data=val_gen(), \
                                  validation_steps=val_steps_epoch, steps_per_epoch=train_steps_epoch, \
                                  epochs = 10, callbacks = earlystop_callbacks)
      

In [0]:
train_generator(model, batch_train_iter, batch_val_iter)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/10

KeyboardInterrupt: ignored