# Embedding, LSTM, GRU and Conv1D
Cognitive Systems for Health Technology Applications<br>
8.3.2019, Sakari Lukkarinen<br>
[Helsinki Metropolia University of Applied Sciences](https://www.metropolia.fi/en)


## Objectives
The aim of this Notebook is to show how to use keras embedding layers together with LSTM, GRU and Conv1D layers to classify review converted to sequences. 

The examples are following the structure given in book: Chollet, Deep Learning with Python, Ch. 6. Deep learning for text and sequences. The examples are not optimized but the aim is to show how to configure the model architectures.

## Data preparation

In [2]:
# Read basic libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
print(os.listdir("../../case3_datas"))


['drugsComTest_raw.csv', 'drugsComTrain_raw.csv']


In [3]:
# Read the training data
# Create dataframes train and test
test = pd.read_csv(r"../../case3_datas/drugsComTest_raw.csv")
train = pd.read_csv(r"../../case3_datas/drugsComTrain_raw.csv")
print(train)

        uniqueID                            drugName  \
0         206461                           Valsartan   
1          95260                          Guanfacine   
2          92703                              Lybrel   
3         138000                          Ortho Evra   
4          35696            Buprenorphine / naloxone   
5         155963                              Cialis   
6         165907                      Levonorgestrel   
7         102654                        Aripiprazole   
8          74811                              Keppra   
9          48928  Ethinyl estradiol / levonorgestrel   
10         29607                          Topiramate   
11         75612                      L-methylfolate   
12        191290                             Pentasa   
13        221320                    Dextromethorphan   
14         98494                           Nexplanon   
15         81890                         Liraglutide   
16         48188                        Trimetho

In [4]:
# Show the head of the data
train.head()

Unnamed: 0,uniqueID,drugName,condition,review,rating,date,usefulCount
0,206461,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9,20-May-12,27
1,95260,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8,27-Apr-10,192
2,92703,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5,14-Dec-09,17
3,138000,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8,3-Nov-15,10
4,35696,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9,27-Nov-16,37


In [5]:
# Create labels based on the original article: Grässer et al. (2018)
r = train['rating']
labels = -1*(r <= 4) + 1*(r >= 7)
# Add the label column to the data
train['label'] = labels
# Check the new data
train.head()

NameError: name 'data' is not defined

In [None]:
# Check ratings to labels conversion
import matplotlib.pyplot as plt
train.plot(x = 'rating', y = 'label', kind = 'scatter')
plt.show()

In [None]:
# Plot distribution of labels
train.hist(column = 'label', bins = np.arange(-1, 3), align = 'left');

## Convert reviews to padded sequences

In [None]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# Read a part of the reviews and create training sequences (x_train)
samples = train['review'].iloc[:10000]
tokenizer = Tokenizer(num_words = 1000)
tokenizer.fit_on_texts(samples)
sequences = tokenizer.texts_to_sequences(samples)
x_train = pad_sequences(sequences, maxlen = 500)

## Convert labels to one-hot-categories

In [None]:
from keras.utils import to_categorical

# Convert the labels to one_hot_category values
one_hot_labels = to_categorical(labels[:10000], num_classes = 3)

## Check the shapes

In [None]:
# Check the training and label sets
x_train.shape, one_hot_labels.shape

## Helper functions

In [None]:
# We use the same plotting commands several times, so create a function for that purpose
def plot_history(history):
    
    f, ax = plt.subplots(1, 2, figsize = (16, 7))
    
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(acc) + 1)

    plt.sca(ax[0])
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()

    plt.sca(ax[1])
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()

In [None]:
# Similarly create a function for model training, for demonstration purposes we use constant values
def train_model(model, x, y, e = 10, bs = 32, v = 1, vs = 0.25):
    h = model.fit(x, y, epochs = e, batch_size = bs, verbose = v, validation_split = vs)
    return h

## Example 1 - Embedding and Flatten

In [None]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense

# First model: Embedding layer -> Flatten -> Dense classifier
m0 = Sequential()
m0.add(Embedding(1000, 64, input_length = 500)) # 1000 = num_words, 64 = Embedding layers, 500 = sequence length
m0.add(Flatten())
m0.add(Dense(32, activation = 'relu'))
m0.add(Dense(3, activation = 'softmax'))
m0.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['acc'])
m0.summary()

In [None]:
# Train the first model and plot the history
h0 = train_model(m0, x_train, one_hot_labels)
plot_history(h0)

## Example 2 - Embedding and LSTM

In [None]:
from keras.layers import LSTM

# Second model: Embedding -> LSTM -> Dense classifier
m1 = Sequential()
m1.add(Embedding(1000, 8, input_length = 500))
m1.add(LSTM(8))
m1.add(Dense(3, activation = 'softmax'))
m1.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['acc'])
m1.summary()

In [None]:
# Train the second model and plot the history
h1 = train_model(m1, x_train, one_hot_labels)
plot_history(h1)

## Example 3 - Embedding and GRU

In [None]:
from keras.layers import GRU

# Third model: Embedding -> GRU -> Dense classifier
m2 = Sequential()
m2.add(Embedding(1000, 8, input_length = 500))
m2.add(GRU(8))
m2.add(Dense(3, activation = 'softmax'))
m2.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['acc'])
m2.summary()

In [None]:
# Train the third model and plot the history
h2 = train_model(m2, x_train, one_hot_labels)
plot_history(h2)

## Example 4 - Embedding and GRU with dropout

In [None]:
# Fourth model: Embedding -> GRU with dropouts -> Dense classifier
m3 = Sequential()
m3.add(Embedding(1000, 8, input_length = 500))
m3.add(GRU(8, dropout = 0.2, recurrent_dropout = 0.2))
m3.add(Dense(3, activation = 'softmax'))
m3.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['acc'])
m3.summary()

In [None]:
# Train and plot the history
h3 = train_model(m3, x_train, one_hot_labels)
plot_history(h3)

## Example 5 - Embedding and stack of GRUs

In [None]:
# Fifth model: Embedding -> Stack of GRU layers -> Dense classifier
m4 = Sequential()
m4.add(Embedding(1000, 8, input_length = 500))
m4.add(GRU(8, dropout = 0.1, recurrent_dropout = 0.5, return_sequences = True))
m4.add(GRU(8, activation = 'relu', dropout = 0.1, recurrent_dropout = 0.5))
m4.add(Dense(3, activation = 'softmax'))
m4.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['acc'])
m4.summary()

In [None]:
# Train and plot the history
h4 = train_model(m4, x_train, one_hot_labels)
plot_history(h4)

## Example 6 - Embedding and Conv1D

In [None]:
from keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D

# Sixth model: Embedding -> Conv1D & MaxPooling1D -> Dense classifier
m5 = Sequential()
m5.add(Embedding(1000, 32, input_length = 500))
m5.add(Conv1D(32, 7, activation = 'relu'))
m5.add(MaxPooling1D(5))
m5.add(Conv1D(32, 7, activation = 'relu'))
m5.add(GlobalMaxPooling1D())
m5.add(Dense(3, activation = 'softmax'))
m5.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['acc'])
m5.summary()

In [None]:
# Train and plot the history
h5 = train_model(m5, x_train, one_hot_labels)
plot_history(h5)

## Example 7 - Embedding and mixed Conv1D and GRU

In [None]:
# Seventh model: Embedding -> 2 x Conv1D + MaxPooling -> GRU -> Dense
m6 = Sequential()
m6.add(Embedding(1000, 32, input_length = 500))
m6.add(Conv1D(32, 5, activation = 'relu'))
m6.add(MaxPooling1D(5))
m6.add(Conv1D(32, 7, activation = 'relu'))
m6.add(GRU(8, dropout = 0.1, recurrent_dropout = 0.5))
m6.add(Dense(3, activation = 'softmax'))
m6.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['acc'])
m6.summary()

# Train and plot the history
h6 = train_model(m6, x_train, one_hot_labels)
plot_history(h6)

In [None]:
h6 = train_model(m6, x_train, one_hot_labels) plot_history(h6)