In [None]:
import tensorflow as tf
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from timeit import default_timer as timer
import platform

from keras_tqdm import TQDMNotebookCallback

print("Python: {}, TensorFlow:{}, Keras:{}".\
      format(platform.python_version(), tf.__version__, keras.__version__))

# Hyperparameters
batch_size = 128
epochs = 20
num_classes = 10

# Load MNIST data
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

train_size=len(X_train)
test_size=len(X_test)

X_train = X_train.reshape(train_size, 784)
X_train = X_train.astype('float32')
X_train /= 255
Y_train = keras.utils.to_categorical(Y_train, num_classes)

X_test = X_test.reshape(test_size, 784)
X_test = X_test.astype('float32')
X_test /= 255
Y_test = keras.utils.to_categorical(Y_test, num_classes)

print(X_train.shape, 'TRAIN samples')
print(X_test.shape, 'TEST samples')

model = Sequential()
model.add(Dense(800, activation='relu', input_shape=(784,)))
model.add(Dense(800, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

# ----------
import subprocess
import shlex
nvidia_smi_max_timeout = 300
cmd = 'timeout {} nvidia-smi'+\
      ' --query-gpu=utilization.gpu,utilization.memory'+\
      ' --format=csv,nounits -lms 100 -f GPU-info.csv'
cmd = cmd.format(nvidia_smi_max_timeout)
print("Running process: [{}]".format(cmd))
nvidia_smi = subprocess.Popen(shlex.split(cmd))
# ----------

# ===== EVERYTHING ABOVE HERE IS IDENTICAL BETWEEN "Plain fit" and "with Generator".

# Whether to use processes or threads
# True => Processes, False => Threads
if ('use_multiprocessing' in vars()):
    multiprocessing=use_multiprocessing
else:
    multiprocessing=False

print("NOTE! Using {} for Generator!"\
      .format('PROCESSES' if multiprocessing else 'THREADS'))

# Stolen from https://github.com/fchollet/keras/issues/1638
import threading
class threadsafe_iter:
    """Takes an iterator/generator and makes it thread-safe by
    serializing call to the `next` method of given iterator/generator.
    """
    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return self.it.__next__()


def threadsafe_generator(f):
    """A decorator that takes a generator function and makes it thread-safe.
    """
    def g(*a, **kw):
        return threadsafe_iter(f(*a, **kw))
    return g


@threadsafe_generator
def data_generator_mnist_train(X, Y):
    dataset_size = len(X)

    nb_classes = 10

    i = 0
    while True:
        yield X[i:i+batch_size], Y[i:i+batch_size]
        i += batch_size
        if (i + batch_size>dataset_size) :
            i = 0;

time_start = timer()
history = model.fit_generator(data_generator_mnist_train(X_train, Y_train), 
                              steps_per_epoch=int(train_size/batch_size),
                              epochs=epochs, 
                              workers=2, pickle_safe=multiprocessing,
                              verbose=0, callbacks=[TQDMNotebookCallback()],
                              validation_data=[X_test, Y_test],
                              max_q_size=50)
training_time = timer()-time_start

scores = model.evaluate(X_test, Y_test, verbose=0)

# ----------
import time
time.sleep(0.5)
nvidia_smi.terminate()
time.sleep(0.5)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
sb.set_style("darkgrid")
gpu = pd.read_csv("./GPU-info.csv")
gpu.plot()
plt.show()
# ----------

print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
print("TRAINING took {} seconds.".format(training_time))