# imdb dataset

In [None]:
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb

In [None]:
vocabulary_size = 15000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocabulary_size)

In [None]:
print(x_train[0])

In [None]:
word2num = imdb.get_word_index()

In [None]:
num2word = {v:k for k,v in word2num.items()}

In [None]:
print(" - ".join(num2word[x] for x in x_train[0]))

In [None]:
y_train[0]

In [None]:
x_train.shape, x_train.dtype

### padding the data

In [None]:
maxlen = 200
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

In [None]:
x_train[2]

In [None]:
maxlen = 60
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

### LSTM using Keras

In [None]:
model = Sequential()
model.add(Embedding(vocabulary_size, 128))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
from IPython.display import SVG, display
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [None]:
model.fit(x_train, y_train,
          batch_size=32,
          epochs=3,
          validation_data=(x_test, y_test))

In [None]:
score, acc = model.evaluate(x_test, y_test, batch_size=32)
print(score, acc)

In [None]:
from PIL import Image
import numpy as np
import tensorflow as tf

## MNIST Dataset

In [None]:
import lzma
import pickle
with lzma.open("mnist.pkl.xz", 'rb') as f:
    train_set, validation_set, test_set = pickle.load(f, encoding='latin1')

In [None]:
train_X, train_y = train_set
validation_X, validation_y = validation_set
test_X, test_y = test_set
train_Y = np.eye(10)[train_y]
test_Y = np.eye(10)[test_y]
validation_Y = np.eye(10)[validation_y]

In [None]:
from IPython.display import display
def showX(X):
    int_X = (X*255).clip(0,255).astype('uint8')
    # N*784 -> N*28*28 -> 28*N*28 -> 28 * 28N
    int_X_reshape = int_X.reshape(-1,28,28).swapaxes(0,1).reshape(28,-1)
    display(Image.fromarray(int_X_reshape))
# 訓練資料， X 的前 20 筆
showX(train_X[:20])
print(train_y)

### using dataset

In [None]:
training_data = tf.data.Dataset.from_tensor_slices((train_X, train_Y)).shuffle(buffer_size=10000).batch(40)
validation_data = tf.data.Dataset.from_tensor_slices((validation_X, validation_Y)).batch(40)


iterator = tf.data.Iterator.from_structure(training_data.output_types,
                                           training_data.output_shapes)

training_init_op = iterator.make_initializer(training_data)
validation_init_op = iterator.make_initializer(validation_data)

X, Y_ = iterator.get_next()

In [None]:
# 設定 cnn 的 layers

X_image = tf.reshape(X, [-1, 28, 28, 1])

# fisrt layer
h_conv1 = tf.layers.conv2d(X_image, 32, 3, padding='same', activation=tf.nn.relu)
h_pool1 = tf.layers.max_pooling2d(h_conv1, 2, 2)

# second layer
h_conv2 = tf.layers.conv2d(h_pool1, 64, 3, padding='same', activation=tf.nn.relu)
h_pool2 = tf.layers.max_pooling2d(h_conv2, 2, 2)
h_pool2_flat = tf.layers.flatten(h_pool2)

# fully-connected layer
h_fc = tf.layers.dense(h_pool2_flat, 1024, activation=tf.nn.relu, name='fc')

# Dropout
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder("float", name="keep_prob")
    h_fc_drop = tf.nn.dropout(h_fc, keep_prob)

# Readout
Y = tf.layers.dense(h_fc_drop, 10, activation=tf.nn.relu, name='readout')

cross_entropy =  tf.losses.softmax_cross_entropy(onehot_labels=Y_, logits=Y)
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
prediction = tf.argmax(Y, 1, name="prediction")
correct_prediction = tf.equal(prediction, tf.argmax(Y_, 1), name="correction")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")

In [None]:
import time
sess = tf.Session()
sess.run(tf.global_variables_initializer())
t0 = time.time()
for epoch in range(3):
    sess.run(training_init_op)
    while True:
        try:
            sess.run(train_step, {keep_prob: 0.5 })
        except tf.errors.OutOfRangeError:
            print("End of epoch", epoch, "time:", time.time()-t0)
            break
    sess.run(validation_init_op)
    validation_accuracy = np.mean([sess.run(accuracy,{keep_prob: 1.0 }) for i in range(10)])
    print("Epoch %d, validation accuracy %g"%(epoch, validation_accuracy))
sess.close()