# Building a neural-network with Keras
**Citation: https://github.com/slundberg/shap**

(Colab is recommended)

First, import prerequisites

In [2]:
import numpy as np
import matplotlib.pyplot as plt

from __future__ import print_function

from keras.datasets import imdb

from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.models import Sequential

from keras.utils import np_utils

Words are indexed by overall frequency in the dataset, so that for instance the integer "3" encodes the 3rd most frequent word in the data

In [15]:
max_features = 20000

In [16]:
(x_train_original, y_train_original), (x_test_original, y_test_original) = imdb.load_data(num_words=max_features)
print('the original shape of x_train: ' + str(x_train_original.shape))
print('the original shape of x_test: ' + str(x_test_original.shape))

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])


the original shape of x_train: (25000,)
the original shape of x_test: (25000,)


  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


## Prepare the input sentences

Zero-pad all these lists so that their length is the length of the longest sentence.

Padding handles sequences of varying length
* The common solution to handling sequences of **different length** is to use padding.  Specifically:
    * Set a maximum sequence length
    * Pad all sequences to have the same length.  


In [17]:
maxLen = 100

In [18]:
x_train = sequence.pad_sequences(x_train_original, maxlen=maxLen)
x_test = sequence.pad_sequences(x_test_original, maxlen=maxLen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

x_train shape: (25000, 100)
x_test shape: (25000, 100)


## Model

In [20]:
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))



After creating your model in Keras, you need to compile it. 

In [21]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

## train the model

In [22]:
model.fit(x_train, y_train_original,
          batch_size=32,
          epochs=15,
          validation_data=(x_test, y_test_original))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7f5be59c3358>

In [23]:
score, acc = model.evaluate(x_test, y_test_original,
                            batch_size=32)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 0.9738485217094421
Test accuracy: 0.8250399827957153
