# Assignment 3.2

# Sequence classification model

In [1]:
from __future__ import print_function

## 1. Preprocess data

#### Tasks:

* Create word vocabulary index (use 10.000 most frequent words)
* Preprocess training and validation set for model input. Use vocabulary index as lookup dictionary to transform raw text into integer sequences. You may use code from Practical 5.

In [6]:
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving indices_words.npy to indices_words (1).npy
Saving words_indices.npy to words_indices (1).npy
Saving X_train_word.npy to X_train_word (1).npy
Saving X_valid_word.npy to X_valid_word.npy
Saving y_train_word.npy to y_train_word.npy
Saving y_valid_word.npy to y_valid_word.npy
User uploaded file "indices_words.npy" with length 1204992 bytes
User uploaded file "words_indices.npy" with length 1204992 bytes
User uploaded file "X_train_word.npy" with length 50000128 bytes
User uploaded file "X_valid_word.npy" with length 50000128 bytes
User uploaded file "y_train_word.npy" with length 100128 bytes
User uploaded file "y_valid_word.npy" with length 100128 bytes


In [2]:
import os
import sys
import numpy as np
import pandas as pd
import re
import nltk

from keras.models import Model, Sequential, load_model
from keras.layers import Dense, Input, Dropout, Lambda, LSTM, GRU, Bidirectional
from keras.utils import HDF5Matrix
import keras.callbacks

import tensorflow as tf

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
#### USE FOR JUPYTER NOTEBOOK
DATA_PATH = "data"

# YOUR CODE HERE 
words_indices_nparray = np.load(os.path.join(DATA_PATH,'words_indices.npy'))
words_indices = dict(np.ndarray.tolist(words_indices_nparray))
# In words_indices, the 10000 most common words are stored, together with '<start>' (10001), '<end>' (10002) and '<unk>'(100003)
# '<unk>' means original word did not belong to 10000 most common ones

indices_words_nparray = np.load(os.path.join(DATA_PATH,'indices_words.npy'))

# provide model inputs (sequences from training and validation set) and outputs (sentiment labels)
X_train = np.load(os.path.join(DATA_PATH, "X_train_word.npy"))
Y_train = np.load(os.path.join(DATA_PATH, "Y_train_word.npy"))
X_validation = np.load(os.path.join(DATA_PATH, "X_valid_word.npy"))
Y_validation = np.load(os.path.join(DATA_PATH, "Y_valid_word.npy"))

#trim train and validation samples to speed up training
X_train = X_train[:5000]
Y_train = Y_train[:5000]

X_validation = X_validation[5000:6000]
Y_validation = Y_validation[5000:6000]

In [0]:
#### USE FOR GOOGLE COLAB
# DATA_PATH = "data"

# YOUR CODE HERE 
words_indices_nparray = np.load('words_indices.npy')
words_indices = dict(np.ndarray.tolist(words_indices_nparray))
# In words_indices, the 10000 most common words are stored, together with '<start>' (10001), '<end>' (10002) and '<unk>'(100003)
# '<unk>' means original word did not belong to 10000 most common ones

indices_words_nparray = np.load('indices_words.npy')

# provide model inputs (sequences from training and validation set) and outputs (sentiment labels)
X_train = np.load("X_train_word (1).npy")
Y_train = np.load("y_train_word.npy")
X_validation = np.load( "X_valid_word.npy")
Y_validation = np.load("y_valid_word.npy")

#trim train and validation samples to speed up training
X_train = X_train[:5000]
Y_train = Y_train[:5000]

X_validation = X_validation[5000:6000]
Y_validation = Y_validation[5000:6000]

## 2. Sequence classification model

#### Tasks: 

* Create a RNN model to classify sequence of words on a binary classification task
* Compare four (4) different gate memory units
* Plot model performance (loss and accuracy) of these 4 models
* Present the performance (loss and accuarcy) of these 4 models in a comparison table

In [4]:
# YOUR CODE HERE
num_words = len(words_indices)
max_sequence_length = 500
rnn_dim = 32

def binarize(x, sz=num_words):
    return tf.to_float(tf.one_hot(x, sz, on_value=1, off_value=0, axis=-1))

def binarize_outshape(in_shape):
    return in_shape[0], in_shape[1], num_words

# decide number of epochs and batch_size (used same settings as in practical 5.2)
num_epochs = 5 
batch_size = 64 

### Model 1 : LSTM

In [0]:
# YOUR CODE HERE
# MODEL-1: LSTM

# construct architecture
input_layer = Input(shape=(max_sequence_length, ), name='input_layer', dtype='int32')
word_embedding = Lambda(binarize, output_shape=binarize_outshape,name='word_embedding')(input_layer)
lstm_layer = LSTM(rnn_dim, name='lstm_layer')(word_embedding)
output_layer = Dense(1, name='prediction_layer', activation='sigmoid')(lstm_layer)

# define and load model
lstm_model = Model(inputs=input_layer, outputs=output_layer)
lstm_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     (None, 500)               0         
_________________________________________________________________
word_embedding (Lambda)      (None, 500, 10004)        0         
_________________________________________________________________
lstm_layer (LSTM)            (None, 32)                1284736   
_________________________________________________________________
prediction_layer (Dense)     (None, 1)                 33        
Total params: 1,284,769
Trainable params: 1,284,769
Non-trainable params: 0
_________________________________________________________________


### Model 2: GRU

In [0]:
# YOUR CODE HERE
# MODEL-2: GRU

# construct the model
input_layer = Input(shape=(max_sequence_length, ), name='input_layer', dtype='int32')
word_embedding = Lambda(binarize, output_shape=binarize_outshape,name='word_embedding')(input_layer)
gru_layer = GRU(rnn_dim, name='gru_layer')(word_embedding)
output_layer = Dense(1, name='prediction_layer', activation='sigmoid')(gru_layer)

# define and load model
gru_model = Model(inputs=input_layer, outputs=output_layer)
gru_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     (None, 500)               0         
_________________________________________________________________
word_embedding (Lambda)      (None, 500, 10004)        0         
_________________________________________________________________
gru_layer (GRU)              (None, 32)                963552    
_________________________________________________________________
prediction_layer (Dense)     (None, 1)                 33        
Total params: 963,585
Trainable params: 963,585
Non-trainable params: 0
_________________________________________________________________


### Model 3 : Bidirectional LSTM

In [0]:
# YOUR CODE HERE
# MODEL-3: Bidirectional LSTM

# construct the model

# construct architecture
input_layer = Input(shape=(max_sequence_length, ), name='input_layer', dtype='int32')
word_embedding = Lambda(binarize, output_shape=binarize_outshape,name='word_embedding')(input_layer)
bilstm_layer = Bidirectional(LSTM(rnn_dim, name='bilstm_layer'))(word_embedding)
output_layer = Dense(1, name='prediction_layer', activation='sigmoid')(bilstm_layer)

# define and load model
bilstm_model = Model(inputs=input_layer, outputs=output_layer)
bilstm_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     (None, 500)               0         
_________________________________________________________________
word_embedding (Lambda)      (None, 500, 10004)        0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64)                2569472   
_________________________________________________________________
prediction_layer (Dense)     (None, 1)                 65        
Total params: 2,569,537
Trainable params: 2,569,537
Non-trainable params: 0
_________________________________________________________________


### Model 4 : Bidirectional GRU

In [0]:
# YOUR CODE HERE
# MODEL-4: Bidirectional GRU

# construct the model
input_layer = Input(shape=(max_sequence_length, ), name='input_layer', dtype='int32')
word_embedding = Lambda(binarize, output_shape=binarize_outshape,name='word_embedding')(input_layer)
bigru_layer = Bidirectional(GRU(rnn_dim, name='gru_layer'))(word_embedding)
output_layer = Dense(1, name='prediction_layer', activation='sigmoid')(bigru_layer)

# define and load model
bigru_model = Model(inputs=input_layer, outputs=output_layer)
bigru_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     (None, 500)               0         
_________________________________________________________________
word_embedding (Lambda)      (None, 500, 10004)        0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 64)                1927104   
_________________________________________________________________
prediction_layer (Dense)     (None, 1)                 65        
Total params: 1,927,169
Trainable params: 1,927,169
Non-trainable params: 0
_________________________________________________________________


### Compile models

In [0]:
# YOUR CODE HERE 
# decide loss function, and optimizer
lstm_model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

In [0]:
# YOUR CODE HERE 
# decide loss function, and optimizer
gru_model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

In [0]:
# YOUR CODE HERE 
# decide loss function, and optimizer
bilstm_model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

In [0]:
# YOUR CODE HERE 
# decide loss function, and optimizer
bigru_model.compile(loss='binary_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

### Train models

In [0]:
# YOUR CODE HERE 
# add callback history during the training stage of the model
lstm_model.fit(X_train, Y_train, validation_data=(X_validation, Y_validation), \
               epochs=num_epochs, batch_size=batch_size)

Train on 5000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fc183c6a9e8>

In [0]:
# YOUR CODE HERE 
# add callback history during the training stage of the model
gru_model.fit(X_train, Y_train, validation_data=(X_validation, Y_validation), \
               epochs=num_epochs, batch_size=batch_size)

Train on 5000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fc17e927710>

In [0]:
# YOUR CODE HERE 
# add callback history during the training stage of the model
bilstm_model.fit(X_train, Y_train, validation_data=(X_validation, Y_validation), \
               epochs=num_epochs, batch_size=batch_size)

Train on 5000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fc17e2701d0>

In [0]:
# YOUR CODE HERE 
# add callback history during the training stage of the model
bigru_model.fit(X_train, Y_train, validation_data=(X_validation, Y_validation), \
               epochs=num_epochs, batch_size=batch_size)

Train on 5000 samples, validate on 1000 samples
Epoch 1/5
  64/5000 [..............................] - ETA: 5:23 - loss: 0.6928 - acc: 0.4531

KeyboardInterrupt: ignored

### Save models and weight parameters

In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once in a notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


In [0]:
# Save model
lstm_model.save('lstm_model.h5')
# Save trained weight parameters
lstm_model.save_weights('weights_lstm_model.hdf5')

# Save model
gru_model.save('gru_model.h5')
# Save trained weight parameters
gru_model.save_weights('weights_gru_model.hdf5')

# Save model
bilstm_model.save('bilstm_model.h5')
# Save trained weight parameters
bilstm_model.save_weights('weights_bilstm_model.hdf5')

# Save model
bigru_model.save('bigru_model.h5')
# Save trained weight parameters
bigru_model.save_weights('weights_bigru_model.hdf5')

In [0]:
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'lstm_model.h5'})
uploaded.SetContentFile('lstm_model.h5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

Uploaded file with ID 1AcGxWbPIFGa89GXT4Rra9ZoC4xwUwY23


In [0]:
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'weights_lstm_model.hdf5'})
uploaded.SetContentFile('weights_lstm_model.hdf5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

Uploaded file with ID 1s7DSEsDIOyJRSkN8-nu7pyTM-AkEjRhP


In [0]:
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'gru_model.h5'})
uploaded.SetContentFile('gru_model.h5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'weights_gru_model.hdf5'})
uploaded.SetContentFile('weights_gru_model.hdf5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

Uploaded file with ID 1fEMk94w2xZctcH39eB3q6MfaIfWvUVIU
Uploaded file with ID 1krICAPfGlO12qwM9R6sRA09coWs9ntEz


In [0]:
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'bilstm_model.h5'})
uploaded.SetContentFile('bilstm_model.h5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'weights_bilstm_model.hdf5'})
uploaded.SetContentFile('weights_bilstm_model.hdf5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

Uploaded file with ID 1fPwZg5SlKFp0QYatY_mNwxMQM_SLi2it
Uploaded file with ID 1eEU96NoyPifMdkinZf5WzXu3r_ectBeX


In [0]:
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'bigru_model.h5'})
uploaded.SetContentFile('bigru_model.h5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))
# Create & upload a file.
uploaded = drive.CreateFile({'title': 'weights_bigru_model.hdf5'})
uploaded.SetContentFile('weights_bigru_model.hdf5')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

#### Plot model performance (loss, accuracy)

Present two (2) plots for each model 

In [9]:
import tensorflow as tf

MODEL_PATH = "models"
lstm_model = load_model(os.path.join(MODEL_PATH, 'lstm_model.h5'))

NameError: name 'tf' is not defined

In [18]:
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving lstm_model.h5 to lstm_model.h5
Saving weights_lstm_model.hdf5 to weights_lstm_model.hdf5
User uploaded file "lstm_model.h5" with length 10299232 bytes
User uploaded file "weights_lstm_model.hdf5" with length 5153776 bytes


In [39]:
import tensorflow as tf
from keras import backend as K

load_model('lstm_model.h5')

NameError: ignored

In [27]:
# lstm_model = load_model('lstm_model.h5')
weights_lstm_model = HDF5Matrix('weights_lstm_model.hdf5', 'weights_lstm_model') #x_data = HDF5Matrix('input/file.hdf5', 'data')
# import h5py
# filename = '.hdf5'
# f = h5py.File(filename, 'r')

KeyError: ignored

In [0]:
# YOUR CODE HERE 
# Fit the model
history = model.fit(X, Y, validation_split=0.33, epochs=150, batch_size=10, verbose=0)
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#### Present table comparison of model performance

#### Answer:

=== write your answer here ===

(can also be presented as figure of table)

### 3. Feature extraction

#### Tasks: 

* Choose one model with the best performance and use the model to produce ”neural codes” (document embedding) of raw text (5000 instances of unseen validation set) from RNN layer.
*  Use tSNE to reduce the dimension of extracted text features (encoded version of 5000 documents) into two (2) dimensions and visualize it towards their sentiment labels.


In [0]:
# load the trained model
from keras.models import load_model

# YOUR CODE HERE
# choose the best model

model = load_model('')
model.load_weights('')

#### Produce Neural codes of text reviews

In [0]:
# YOUR CODE HERE

# define sub model to retrieve "neural codes" (document embedding) from recurrent layer
encoder_model =

# generate "neural codes" (document embedding) from the model
doc_embedding =

#### tSNE visualization

In [0]:
# YOUR CODE HERE

# use tSNE to plot document embedding on its sentiment label

### 4. One shot learning on multi-class classification

#### Tasks: 

* Use the RNN model that has been trained on binary classification task to extract features from different data set (Amazon product reviews)
* Define and implement an approach to assign labels on unlabelled set of reviews, by using the concept of "one shot learning"
* Compute accuracy, given ground truth labels
* Discuss the results 

Explain how the approach works.


#### Answer:

=== write your answer here ===

In [0]:
# YOUR CODE HERE

# one shot learning task


# compute overall accuracy, given ground truth labels
accuracy = 

Discuss the result. Why it works -- or why it does not work.

#### Answer:

=== write your answer here ===