In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2"         # 3 is can change to 0-3
gpu_num = 3
from keras.utils import multi_gpu_model

import pickle
import keras
from keras.models import Sequential, Model
from keras.layers.core import Dense, Dropout, Activation, Flatten, Lambda, Masking
from keras.layers.embeddings import Embedding
from keras.layers import Convolution1D, MaxPooling1D, GlobalMaxPooling1D, Input, Dense, Reshape, LSTM, GRU, Bidirectional, TimeDistributed
from keras.layers.merge import concatenate, add
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras import initializers
from keras.engine.topology import Layer
from keras import backend as K
import tensorflow as tf
import numpy as np
from util.util_functions import getWordIdx
from sklearn.metrics import classification_report, roc_auc_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# loading processed data

In [2]:
#loading the train_copus_padded data from .pickle file
file = open('pickle_data/train_copus_pad.pickle','rb')
train_copus_padded = pickle.load(file)

file = open('pickle_data/test_copus_pad.pickle','rb')
test_copus_padded = pickle.load(file)

file = open('pickle_data/vocab_train.pickle','rb')
vocab_to_int_train = pickle.load(file)

file = open('pickle_data/embedding_matrix','rb')
embedding_matrix = pickle.load(file)

file = open('pickle_data/train_label.pickle','rb')
train_label = pickle.load(file)

file = open('pickle_data/test_label.pickle','rb')
test_label = pickle.load(file)
file.close()

In [3]:
print('train test data shape:',train_copus_padded.shape, test_copus_padded.shape)
print('embedding_matrix shape:', embedding_matrix.shape)
#the size of vocabulary
vocab_size = len(vocab_to_int_train)
print('vocabulary size:', vocab_size)
# the maximal length of every sentence
MAX_SENTS = train_copus_padded.shape[1]
MAX_SENT_LENGTH = train_copus_padded.shape[2]
print('max sent number in a review:', MAX_SENTS, '\nmax words in a sentence:', MAX_SENT_LENGTH)

train test data shape: (25000, 30, 235) (25000, 30, 235)
embedding_matrix shape: (106180, 300)
vocabulary size: 106180
max sent number in a review: 30 
max words in a sentence: 235


# keras model building

In [4]:
# attention layer

class AttLayer(Layer):
    def __init__(self, attention_dim):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = attention_dim
        super(AttLayer, self).__init__()

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim, )))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weights = [self.W, self.b, self.u]
        super(AttLayer, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        # current shape: [batch_size, set_len]
        mask = None
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

In [5]:
# hyper-parameters
gru_dim = 100
dropout_rate = 0.3
atten_dim = 100
dense_dim = 50

batch_size = 100
epoch_num = 10

categorical_label = True

if categorical_label:
    train_label_cat = np_utils.to_categorical(train_label)
#     test_label_cat = np_utils.to_categorical(test_label)

In [6]:
# define some Keras layers
embedding_layer = Embedding(vocab_size, embedding_matrix.shape[1], input_length=MAX_SENT_LENGTH, 
                            weights=[embedding_matrix], trainable=False)

rnn_layer1 = Bidirectional(GRU(gru_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True))
rnn_layer2 = Bidirectional(GRU(gru_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True))
# rnn_layer = GRU(gru_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True)


# cnn_layer = Convolution1D(nb_filter=50,filter_length=3,border_mode='same',activation='tanh',subsample_length=1)
# max_pooling_layer = GlobalMaxPooling1D()

In [7]:
# build sentence encoder model
sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')

sent_embedding = embedding_layer(sentence_input)  #input shape:(MAX_SENT_LENGTH),output shape:(MAX_SENT_LENGTH,embed dimension)
# mask out padding tokens
sent_embedding = Masking(mask_value=0., input_shape=(MAX_SENT_LENGTH, embedding_matrix.shape[1]))(sent_embedding)

sent_lstm = rnn_layer1(sent_embedding)
sent_att = AttLayer(atten_dim)(sent_lstm)

sentEncoder = Model(sentence_input, sent_att)
sentEncoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 235)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 235, 300)          31854000  
_________________________________________________________________
masking_1 (Masking)          (None, 235, 300)          0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 235, 200)          240600    
_________________________________________________________________
att_layer_1 (AttLayer)       (None, 200)               20200     
Total params: 32,114,800
Trainable params: 260,800
Non-trainable params: 31,854,000
_________________________________________________________________


In [8]:
# build document encoder model
review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)   # out shape: (None, MAX_SENTS, gru_dim*2)
# mask out padding sentences
review_encoder = Masking(mask_value=0., input_shape=(MAX_SENTS, gru_dim*2))(review_encoder)

rnn_out = rnn_layer2(review_encoder) # (batch_size, timesteps, gru_dimx2)

att_out = AttLayer(atten_dim)(rnn_out)
# att_out = Dropout(dropout_rate)(att_out)

dense = Dense(dense_dim, activation='tanh')(att_out)
dense = Dropout(dropout_rate)(dense)

if categorical_label:
    preds = Dense(2, activation='softmax')(dense) # categorical output
    model = Model(review_input, preds)
    print(model.summary())
    # Replicates `model` on multiple GPUs.
    # This assumes that your machine has 'gpus' available GPUs.
    model = multi_gpu_model(model, gpus=gpu_num)
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['acc'])
    
else:
    preds = Dense(1, activation='sigmoid')(dense)
    model = Model(review_input, preds)
    print(model.summary())
    # Replicates `model` on multiple GPUs.
    # This assumes that your machine has 'gpus' available GPUs.
    model = multi_gpu_model(model, gpus=gpu_num)
    model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
    
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 30, 235)           0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 30, 200)           32114800  
_________________________________________________________________
masking_2 (Masking)          (None, 30, 200)           0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 30, 200)           180600    
_________________________________________________________________
att_layer_2 (AttLayer)       (None, 200)               20200     
_________________________________________________________________
dense_1 (Dense)              (None, 50)                10050     
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
__________

In [9]:
print('Training...')
for i in range(epoch_num):
    print('Training for epoch {}/{}'.format(i+1,epoch_num))
    if categorical_label:
        model.fit(train_copus_padded, train_label_cat, batch_size=batch_size,epochs=1)
    else:
        model.fit(train_copus_padded, train_label, batch_size=batch_size,epochs=1)
        
    print('Evaluating...')
    pred_test_prob = model.predict(test_copus_padded, batch_size=batch_size, verbose=True)
    # predict the class label
    if pred_test_prob.shape[-1]>1:
        pred_test = pred_test_prob.argmax(axis=-1)
    else:
        pred_test = (pred_test_prob>0.5).astype('int32')
        pred_test = pred_test.reshape(pred_test.shape[0])

    acc = np.sum(pred_test == test_label) / float(len(test_label))

    print("Accuracy: %.4f" % (acc))   
    print(classification_report(test_label, pred_test, digits=5, labels=[0, 1]))

Training...
Training for epoch 1/10
Epoch 1/1
Evaluating...
Accuracy: 0.8854
             precision    recall  f1-score   support

          0    0.88267   0.88888   0.88576     12500
          1    0.88809   0.88184   0.88496     12500

avg / total    0.88538   0.88536   0.88536     25000

Training for epoch 2/10
Epoch 1/1
Evaluating...
Accuracy: 0.8954
             precision    recall  f1-score   support

          0    0.88896   0.90368   0.89626     12500
          1    0.90206   0.88712   0.89453     12500

avg / total    0.89551   0.89540   0.89539     25000

Training for epoch 3/10
Epoch 1/1
Evaluating...
Accuracy: 0.8995
             precision    recall  f1-score   support

          0    0.89409   0.90632   0.90016     12500
          1    0.90502   0.89264   0.89879     12500

avg / total    0.89955   0.89948   0.89948     25000

Training for epoch 4/10
Epoch 1/1
Evaluating...
Accuracy: 0.8944
             precision    recall  f1-score   support

          0    0.93307   0.84