In [1]:
"""
Date : 14/06/2018
Time : 10:35
Version : 1.1
Dataset : IMDB dataset(https://www.kaggle.com/c/word2vec-nlp-tutorial/data)
          Glove 6B(https://nlp.stanford.edu/projects/glove/)
Description : Performing Text Classification using Keras for different deep neural architectures.
"""

'\nDate : 14/06/2018\nTime : 10:35\nVersion : 1.1\nDataset : IMDB dataset(https://www.kaggle.com/c/word2vec-nlp-tutorial/data)\n          Glove 6B(https://nlp.stanford.edu/projects/glove/)\nDescription : Performing Text Classification using Keras for different deep neural architectures.\n'

In [2]:
# Installing dependencies
"""
! pip install numpy
! pip install pandas
! apt-get install python3-bs4 
! easy_install beautifulsoup4
! pip install beautifulsoup4
! python setup.py install
! pip install keras
! pip install theano; python -c "import theano"
"""

'\n! pip install numpy\n! pip install pandas\n! apt-get install python3-bs4 \n! easy_install beautifulsoup4\n! pip install beautifulsoup4\n! python setup.py install\n! pip install keras\n! pip install theano; python -c "import theano"\n'

In [35]:
# Importing libraries
import numpy as np
import pandas as pd
import pickle
from collections import defaultdict
import re
import types
import tempfile

from bs4 import BeautifulSoup

import sys
import os

os.environ['KERAS_BACKEND']='theano'

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical

from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Merge, Dropout
from keras.layers import LSTM, GRU, Bidirectional
from keras.models import Model,load_model
from keras.models import Sequential
from keras.preprocessing import sequence

from keras import backend as K
from keras.engine.topology import Layer, InputSpec
from keras import initializers

import warnings
warnings.filterwarnings("ignore")

# Text classification using CNN (https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf)

In [34]:
# Setting the hyper-parameters
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
SPLIT_RATIO = 0.25
DROPOUT_MARGIN = 0.2

In [68]:
# Using BeautifulSoup to remove some html tags and remove some unwanted characters.
def clean_str(string):
    """
    Tokenization/string cleaning for dataset
    Every dataset is lower cased except
    """
    string = re.sub(r"\\", "", string)    
    string = re.sub(r"\'", "", string)    
    string = re.sub(r"\"", "", string)    
    return string.strip().lower()

In [6]:
# Importing the training data
train_data = pd.read_csv("labeledTrainData.tsv",sep="\t")
print("Shape of the training dataset:",train_data.shape)
train_data.head()

Shape of the training dataset: (25000, 3)


Unnamed: 0,id,sentiment,review
0,5814_8,1,With all this stuff going down at the moment w...
1,2381_9,1,"\The Classic War of the Worlds\"" by Timothy Hi..."
2,7759_3,0,The film starts with a manager (Nicholas Bell)...
3,3630_4,0,It must be assumed that those who praised this...
4,9495_8,1,Superbly trashy and wondrously unpretentious 8...


In [7]:
# Change the valuesof texts and labels as per the features and labels of your dataset 
texts,labels = ([] for i in range(2))
for idx in range(train_data.review.shape[0]):
    text = BeautifulSoup(train_data.review[idx])
    texts.append(clean_str(text.get_text()))
    labels.append(train_data.sentiment[idx])

In [8]:
# Performing text-processing
# Initializing Tokenizer from keras
tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
# Fitting text on the tokenizer
tokenizer.fit_on_texts(texts)
# Converting text to sequence
sequences = tokenizer.texts_to_sequences(texts)

# Finding unique tokenizer
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

# Padding sequences to the length of MAX_SEQUENCE_LENGTH
data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)

Found 81501 unique tokens.


In [9]:
# Getting the labels and features data
labels = to_categorical(np.asarray(labels))
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)

Shape of data tensor: (25000, 1000)
Shape of label tensor: (25000, 2)


In [10]:
# Getting the splitting index for training and testing data
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
validation_samples = int(SPLIT_RATIO * data.shape[0])

In [11]:
# Getting the testing and training dataset
X_train = data[:-validation_samples]
y_train = labels[:-validation_samples]
X_test = data[-validation_samples:]
y_test = labels[-validation_samples:]

In [12]:
# Using Google Glove 6B vector 100d and randomizing the vector
"""
Glove is an unsupervised learning algorithm for obtaining vector representations for words. 
Training is performed on aggregated global word-word co-occurrence statistics from a corpus, and the resulting 
representations showcase interesting linear substructures of the word vector space.
"""
current_directory = os.getcwd()
glove_directory = current_directory + "/glove.6B/glove.6B.100d.txt"
embeddings_index = {}
f = open(glove_directory)
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Total %s word vectors in Glove 6B 100d.' % len(embeddings_index))

Total 400000 word vectors in Glove 6B 100d.


In [13]:
# Getting vector for unique words in the corpus
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

# Initializing the embedded layer
embedding_layer = Embedding(len(word_index) + 1,EMBEDDING_DIM,weights=[embedding_matrix],input_length=MAX_SEQUENCE_LENGTH,trainable=True)

# Using Simple Convolution Neural Network (Accuracy = 96.82%)
# (Filters = 128, Size = 5, and Maximum Pooling of 5 and 35)

In [14]:
# Un-comment the below mentioned code to train your model
"""
# Configuring the neural network
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
l_cov1= Conv1D(128, 5, activation='relu')(embedded_sequences)
l_pool1 = MaxPooling1D(5)(l_cov1)
l_cov2 = Conv1D(128, 5, activation='relu')(l_pool1)
l_pool2 = MaxPooling1D(5)(l_cov2)
l_cov3 = Conv1D(128, 5, activation='relu')(l_pool2)
l_pool3 = MaxPooling1D(35)(l_cov3)  # global max pooling
l_flat = Flatten()(l_pool3)
l_dense = Dense(128, activation='relu')(l_flat)
preds = Dense(2, activation='softmax')(l_dense)
"""

"\n# Configuring the neural network\nsequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')\nembedded_sequences = embedding_layer(sequence_input)\nl_cov1= Conv1D(128, 5, activation='relu')(embedded_sequences)\nl_pool1 = MaxPooling1D(5)(l_cov1)\nl_cov2 = Conv1D(128, 5, activation='relu')(l_pool1)\nl_pool2 = MaxPooling1D(5)(l_cov2)\nl_cov3 = Conv1D(128, 5, activation='relu')(l_pool2)\nl_pool3 = MaxPooling1D(35)(l_cov3)  # global max pooling\nl_flat = Flatten()(l_pool3)\nl_dense = Dense(128, activation='relu')(l_flat)\npreds = Dense(2, activation='softmax')(l_dense)\n"

In [15]:
"""
# Compiling the model for cross entropy loss
model_simple_cnn = Model(sequence_input, preds)
model_simple_cnn.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])
print("model fitting - simplified convolutional neural network")
model_simple_cnn.summary()
# Training the model
model_simple_cnn.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=128)
# Saving the model
model_simple_cnn.save("simple_cnn.h5")
"""

'\n# Compiling the model for cross entropy loss\nmodel_simple_cnn = Model(sequence_input, preds)\nmodel_simple_cnn.compile(loss=\'categorical_crossentropy\',optimizer=\'rmsprop\',metrics=[\'acc\'])\nprint("model fitting - simplified convolutional neural network")\nmodel_simple_cnn.summary()\n# Training the model\nmodel_simple_cnn.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=128)\n# Saving the model\nmodel_simple_cnn.save("simple_cnn.h5")\n'

In [49]:
# Loading the pre-trained model
model_simple_cnn = load_model("simple_cnn.h5")

In [50]:
# Checking the accuracy
accuracy_simple_cnn = model_simple_cnn.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (accuracy_simple_cnn[1]*100))

Accuracy: 96.82%


In [17]:
# Making predictions in terms of probabilities for each class
prediction_simple_cnn = model_simple_cnn.predict(X_test,batch_size=10,verbose=0)

# Using Deeper Convolutional neural network with multiple filter sizes (Accuracy = 97.87%)

In [18]:
# Un-comment the below mentioned code to train your model
"""
convs = []
filter_sizes = [3,4,5]
"""

'\nconvs = []\nfilter_sizes = [3,4,5]\n'

In [19]:
"""
# Configuring the neural network
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
"""

"\n# Configuring the neural network\nsequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')\nembedded_sequences = embedding_layer(sequence_input)\n"

In [20]:
"""
for fsz in filter_sizes:
    l_conv = Conv1D(nb_filter=128,filter_length=fsz,activation='relu')(embedded_sequences)
    l_pool = MaxPooling1D(5)(l_conv)
    convs.append(l_pool)
"""

"\nfor fsz in filter_sizes:\n    l_conv = Conv1D(nb_filter=128,filter_length=fsz,activation='relu')(embedded_sequences)\n    l_pool = MaxPooling1D(5)(l_conv)\n    convs.append(l_pool)\n"

In [21]:
"""
l_merge = Merge(mode='concat', concat_axis=1)(convs)
l_cov1= Conv1D(128, 5, activation='relu')(l_merge)
l_pool1 = MaxPooling1D(5)(l_cov1)
l_cov2 = Conv1D(128, 5, activation='relu')(l_pool1)
l_pool2 = MaxPooling1D(30)(l_cov2)
l_cov3 = Conv1D(128, 5, activation='relu')(l_pool2)
l_pool3 = MaxPooling1D(30)(l_cov3)
l_flat = Flatten()(l_pool3)
l_dense = Dense(128, activation='relu')(l_flat)
preds = Dense(2, activation='softmax')(l_dense)
"""

"\nl_merge = Merge(mode='concat', concat_axis=1)(convs)\nl_cov1= Conv1D(128, 5, activation='relu')(l_merge)\nl_pool1 = MaxPooling1D(5)(l_cov1)\nl_cov2 = Conv1D(128, 5, activation='relu')(l_pool1)\nl_pool2 = MaxPooling1D(30)(l_cov2)\nl_flat = Flatten()(l_pool2)\nl_dense = Dense(128, activation='relu')(l_flat)\npreds = Dense(2, activation='softmax')(l_dense)\n"

In [22]:
"""
# Compiling the Model for cross entropy loss
model_deep_cnn = Model(sequence_input, preds)
model_deep_cnn.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print("model fitting - more complex convolutional neural network")
model_deep_cnn.summary()
# Training the model
model_deep_cnn.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=20, batch_size=50)
# Saving the model
model_deep_cnn.save("deep_cnn.h5")
"""

'\n# Compiling the Model for cross entropy loss\nmodel_deep_cnn = Model(sequence_input, preds)\nmodel_deep_cnn.compile(loss=\'categorical_crossentropy\',\n              optimizer=\'rmsprop\',\n              metrics=[\'acc\'])\n\nprint("model fitting - more complex convolutional neural network")\nmodel_deep_cnn.summary()\n# Training the model\nmodel_deep_cnn.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=20, batch_size=50)\n# Saving the model\nmodel_deep_cnn.save("deep_cnn.h5")\n'

In [23]:
# Loading the pre-trained model
model_deep_cnn = load_model("deep_cnn.h5")

In [48]:
# Checking the accuracy
accuracy_deep_cnn = model_deep_cnn.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (accuracy_deep_cnn[1]*100))

Accuracy: 97.87%


In [24]:
# Making predictions in terms of probabilities for each class
prediction_deep_cnn = model_deep_cnn.predict(X_test,batch_size=10,verbose=0)

# Text classification using Bidirectional LSTM encoders without adding dropout(Accuracy = 96.82%)
# (https://arxiv.org/pdf/1801.06261.pdf)

In [25]:
"""
By using LSTM encoder, we intent to encode all information of the text in the last output of recurrent neural 
network. Bidirectional LSTM is used in the below mentioned code and the last output of both LSTM are 
concatenated at the end.
"""

'\nBy using LSTM encoder, we intent to encode all information of the text in the last output of recurrent neural \nnetwork. Bidirectional LSTM is used in the below mentioned code and the last output of both LSTM are \nconcatenated at the end.\n'

In [27]:
# Un-comment the below mentioned code to train your model

"""
# Configuring the neural network
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
l_lstm = Bidirectional(LSTM(100))(embedded_sequences)
preds = Dense(2, activation='softmax')(l_lstm)
model_bidirectional_lstm = Model(sequence_input, preds)
model_bidirectional_lstm.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])

print("model fitting - Bidirectional LSTM")
model_bidirectional_lstm.summary()
model_bidirectional_lstm.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)
# Saving the model
model_bidirectional_lstm.save("lstm_encoder_without_dropout_layer.h5")
"""

'\n# Configuring the neural network\nsequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype=\'int32\')\nembedded_sequences = embedding_layer(sequence_input)\nl_lstm = Bidirectional(LSTM(100))(embedded_sequences)\npreds = Dense(2, activation=\'softmax\')(l_lstm)\nmodel_lstm = Model(sequence_input, preds)\nmodel_lstm.compile(loss=\'categorical_crossentropy\',optimizer=\'rmsprop\',metrics=[\'acc\'])\n\nprint("model fitting - Bidirectional LSTM")\nmodel_lstm.summary()\nmodel_lstm.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)\n# Saving the model\nmodel_lstm.save("lstm_encoder.h5")\n'

In [37]:
# Loading the pre-trained model
model_bidirectional_lstm = load_model("simple_cnn.h5")

In [51]:
# Checking the accuracy
accuracy_bidirection_lstm = model_bidirectional_lstm.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (accuracy_bidirection_lstm[1]*100))

Accuracy: 96.82%


In [38]:
# Making predictions in terms of probabilities for each class
prediction_bidirectional_lstm = model_bidirectional_lstm.predict(X_test,batch_size=10,verbose=0)

# Text classification using LSTM with dropout(Accuracy = 88.02%)

In [61]:
# Un-comment the below mentioned code to train your model
# dropout is applied to the input and recurrent connections of the memory units with the LSTM precisely and 
# separately.

# Configuring the neural network
"""
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
model_lstm_with_dropout = Sequential()
model_lstm_with_dropout.add(Embedding(MAX_NB_WORDS,EMBEDDING_DIM,input_length = MAX_SEQUENCE_LENGTH))
model_lstm_with_dropout.add(LSTM(100,dropout=DROPOUT_MARGIN, recurrent_dropout=DROPOUT_MARGIN))
model_lstm_with_dropout.add(Dense(2, activation='softmax'))
model_lstm_with_dropout.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])

print("model fitting - LSTM with dropout")
model_lstm_with_dropout.summary()
model_lstm_with_dropout.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)
# Saving the model
model_lstm_with_dropout.save("lstm_encoder_with_dropout_layer.h5")
"""

'\nsequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype=\'int32\')\nembedded_sequences = embedding_layer(sequence_input)\nmodel_lstm_with_dropout = Sequential()\nmodel_lstm_with_dropout.add(Embedding(MAX_NB_WORDS,EMBEDDING_DIM,input_length = MAX_SEQUENCE_LENGTH))\nmodel_lstm_with_dropout.add(LSTM(100,dropout=DROPOUT_MARGIN, recurrent_dropout=DROPOUT_MARGIN))\nmodel_lstm_with_dropout.add(Dense(2, activation=\'softmax\'))\nmodel_lstm_with_dropout.compile(loss=\'categorical_crossentropy\',optimizer=\'rmsprop\',metrics=[\'acc\'])\n\nprint("model fitting - LSTM with dropout")\nmodel_lstm_with_dropout.summary()\nmodel_lstm_with_dropout.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)\n# Saving the model\nmodel_lstm_with_dropout.save("lstm_encoder_with_dropout_layer.h5")\n'

In [55]:
# Loading the pre-trained model
model_lstm_with_dropout = load_model("lstm_encoder_with_dropout_layer.h5")

In [56]:
# checking the accuracy
accuracy_lstm_with_dropout = model_lstm_with_dropout.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (accuracy_lstm_with_dropout[1]*100))

Accuracy: 88.02%


In [57]:
# Making predictions in terms of probabilities for each class
prediction_lstm_with_dropout = model_lstm_with_dropout.predict(X_test,batch_size=10,verbose=0)

# Text classification using CNN and LSTM on the top of each other with dropout(http://www.aclweb.org/anthology/C16-1258, Accuracy = 99.40%)

In [58]:
"""
Adding an one-dimensional CNN and max pooling layers after the Embedding layer which is then feed the 
consolidated features to the LSTM.
"""

'\nAdding an one-dimensional CNN and max pooling layers after the Embedding layer which is then feed the \nconsolidated features to the LSTM.\n'

In [66]:
# Un-comment the below mentioned code to train your model

"""
# Configuring the neural network
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
model_cnn_lstm = Sequential()
model_cnn_lstm.add(Embedding(MAX_NB_WORDS,EMBEDDING_DIM,input_length = MAX_SEQUENCE_LENGTH))
model_cnn_lstm.add(Conv1D(filters=128, kernel_size=5, padding='same', activation='relu'))
model_cnn_lstm.add(MaxPooling1D(pool_size=2))
model_cnn_lstm.add(LSTM(100,dropout=DROPOUT_MARGIN, recurrent_dropout=DROPOUT_MARGIN))

model_cnn_lstm.add(Dense(2, activation='softmax'))
model_cnn_lstm.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])

print("model fitting - CNN and LSTM")
model_cnn_lstm.summary()
model_cnn_lstm.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)
# Saving the model
model_cnn_lstm.save("lstm_cnn.h5")
"""

'\n# Configuring the neural network\nsequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype=\'int32\')\nembedded_sequences = embedding_layer(sequence_input)\nmodel_cnn_lstm = Sequential()\nmodel_cnn_lstm.add(Embedding(MAX_NB_WORDS,EMBEDDING_DIM,input_length = MAX_SEQUENCE_LENGTH))\nmodel_cnn_lstm.add(Conv1D(filters=128, kernel_size=5, padding=\'same\', activation=\'relu\'))\nmodel_cnn_lstm.add(MaxPooling1D(pool_size=2))\nmodel_cnn_lstm.add(LSTM(100,dropout=DROPOUT_MARGIN, recurrent_dropout=DROPOUT_MARGIN))\n\nmodel_cnn_lstm.add(Dense(2, activation=\'softmax\'))\nmodel_cnn_lstm.compile(loss=\'categorical_crossentropy\',optimizer=\'rmsprop\',metrics=[\'acc\'])\n\nprint("model fitting - CNN and LSTM")\nmodel_cnn_lstm.summary()\nmodel_cnn_lstm.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)\n# Saving the model\nmodel_cnn_lstm.save("lstm_cnn.h5")\n'

In [63]:
# Loading the pre-trained model
model_cnn_lstm = load_model("lstm_cnn.h5")

In [64]:
# checking the accuracy
accuracy_lstm_cnn = model_cnn_lstm.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (accuracy_lstm_cnn[1]*100))

Accuracy: 88.11%


In [65]:
# Making predictions in terms of probabilities for each class
prediction_lstm_cnn = model_cnn_lstm.predict(X_test,batch_size=10,verbose=0)

# Text classification using Attentional based GRU network
# (http://colinraffel.com/publications/iclr2016feed.pdf)

In [30]:
"""
The below mentioned code illustrates a simplified model of attention which is applicable to feed-forward neural
networks and demonstrate that the resulting model can solve the synthetic “addition” and “multiplication” 
long-term memory problems for longer sequence lengths.
"""

'\nThe below mentioned code illustrates a simplified model of attention which is applicable to feed-forward neural\nnetworks and demonstrate that the resulting model can solve the synthetic “addition” and “multiplication” \nlong-term memory problems for longer sequence lengths.\n'

In [31]:
# Building a custom Keras layer to implement attention layer.
"""
The following code can only strictly run on Theano backend since tensorflow matrix dot product doesn’t behave 
the same as np.dot. I don’t know how to get a 2D tensor by dot product of 3D tensor of recurrent layer output 
and 1D tensor of weight.
"""

class AttLayer(Layer):
    def __init__(self, **kwargs):
        self.init = initializers.get('normal')
        #self.input_spec = [InputSpec(ndim=3)]
        super(AttLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape)==3
        #self.W = self.init((input_shape[-1],1))
        self.W = self.init((input_shape[-1],))
        #self.input_spec = [InputSpec(shape=input_shape)]
        self.trainable_weights = [self.W]
        super(AttLayer, self).build(input_shape)

    def call(self, x, mask=None):
        eij = K.tanh(K.dot(x, self.W))
        
        ai = K.exp(eij)
        weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')
        
        weighted_input = x*weights.dimshuffle(0,1,'x')
        return weighted_input.sum(axis=1)

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], input_shape[-1])

In [32]:
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector
        
embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=True)

In [69]:
# Un-comment the below mentioned code to train your model

# Configuring the neural network
"""sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
l_gru = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
l_att = AttLayer()(l_gru)
preds = Dense(2, activation='softmax')(l_att)
model_attension_gru = Model(sequence_input, preds)
model_attension_gru.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])

print("model fitting - attention GRU network")
model_attension_gru.summary()
model_attension_gru.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)
# Saving the model
model_attension_gru.save("attension-gru.h5")"""

'sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype=\'int32\')\nembedded_sequences = embedding_layer(sequence_input)\nl_gru = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)\nl_att = AttLayer()(l_gru)\npreds = Dense(2, activation=\'softmax\')(l_att)\nmodel_attension_gru = Model(sequence_input, preds)\nmodel_attension_gru.compile(loss=\'categorical_crossentropy\',optimizer=\'rmsprop\',metrics=[\'acc\'])\n\nprint("model fitting - attention GRU network")\nmodel_attension_gru.summary()\nmodel_attension_gru.fit(X_train, y_train, validation_data=(X_test, y_test),nb_epoch=10, batch_size=50)\n# Saving the model\nmodel_attension_gru.save("attension-gru.h5")'

In [None]:
# Loading the pre-trained model
model_attension_gru = load_model("attension-gru.h5")

In [None]:
# Making predictions in terms of probabilities for each class
prediction_attension_gru = model_attension_gru.predict(X_test,batch_size=10,verbose=0)

# Text Classification using Hierarchical attention network
# (https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf)

In [None]:
# Constructing the data input as 3D numpy arrays
tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)

data = np.zeros((len(texts), MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')

for i, sentences in enumerate(reviews):
    for j, sent in enumerate(sentences):
        if j< MAX_SENTS:
            wordTokens = text_to_word_sequence(sent)
            #update 1/10/2017 - bug fixed - set max number of words
            k=0
            for _, word in enumerate(wordTokens):
                if k<MAX_SENT_LENGTH and tokenizer.word_index[word]<MAX_NB_WORDS:
                    data[i,j,k] = tokenizer.word_index[word]
                    k=k+1               