In [1]:
import time

import tensorflow as tf
import numpy as np
import ray

from sklearn.model_selection import train_test_split

from model.ram import RecurrentAttentionModel

from data.augmented_mnist import minibatcher
from data.augmented_mnist import get_mnist

In [2]:
(X_train, y_train),(X_test, y_test) = get_mnist(True, True, False)
print(X_train.shape, y_train.shape, np.max(X_train), np.min(X_train))
print(X_test.shape, y_test.shape, np.max(X_test), np.min(X_test))

(60000, 28, 28, 1) (60000, 10) 1.0 0.0
(10000, 28, 28, 1) (10000, 10) 1.0 0.0


In [3]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

print(X_train.shape, y_train.shape, np.max(X_train), np.min(X_train))
print(X_val.shape, y_val.shape, np.max(X_val), np.min(X_val))

(45000, 28, 28, 1) (45000, 10) 1.0 0.0
(15000, 28, 28, 1) (15000, 10) 1.0 0.0


In [4]:
@ray.remote
def train(hyperparameter, X_train, y_train, X_val, y_val):
    ram = RecurrentAttentionModel(time_steps=7,
                                  n_glimpses=1, 
                                  glimpse_size=8,
                                  num_classes=10,
                                  max_gradient_norm=5.0,
                                  std=hyperparameter["std"])
    optimizer = tf.keras.optimizers.Adam(hyperparameter["learning_rate"])
    for e in range(10):
        # trainings step
        batcher = minibatcher(X_train, y_train, hyperparameter["batch_size"], True)
        for X, y in batcher:
            with tf.GradientTape() as tape:
                logits = ram(X)
                hybrid_loss, _, _, _ = ram.hybrid_loss(logits, y)

                gradients = tape.gradient(hybrid_loss, ram.trainable_variables)
                optimizer.apply_gradients(zip(gradients, ram.trainable_variables))

        # testing step
        batcher = minibatcher(X_val, y_val, hyperparameter["batch_size"], True)
        accuracys = []
        for X, y in batcher:
            logits = ram(X)
            accuracy, _, _ = ram.predict(logits, y)
            accuracy = accuracy.numpy()
            accuracys.append(accuracy)
    return np.mean(accuracys)

In [None]:
def generate_hyperparameters():
    # Randomly choose values for the hyperparameters.
    return {"learning_rate": 10 ** -np.random.uniform(0, 8),
            "batch_size": np.random.randint(20, 200),
            "std": 10 ** -np.random.uniform(0, 2)}

In [None]:
ray.init()
hyperparameter_configurations = [generate_hyperparameters()]

# Launch some experiments.
remaining_ids = []
for hyperparameters in hyperparameter_configurations:
    remaining_ids.append(train.remote(hyperparameters, X_train, y_train, X_val, y_val))

# Whenever a new experiment finishes, print the value and start a new
# experiment.
for i in range(10):
    ready_ids, remaining_ids = ray.wait(remaining_ids, num_returns=1)
    accuracy = ray.get(ready_ids[0])
    print("Accuracy is {}".format(accuracy))
    # Start a new experiment.
    new_hyperparameters = generate_hyperparameters()
    remaining_ids.append(train.remote(new_hyperparameters, X_train, y_train, X_val, y_val))

2019-04-16 09:49:28,807	INFO node.py:423 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-04-16_09-49-28_19313/logs.
2019-04-16 09:49:28,948	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:44450 to respond...
2019-04-16 09:49:29,081	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:13578 to respond...
2019-04-16 09:49:29,084	INFO services.py:760 -- Starting Redis shard with 10.0 GB max memory.
2019-04-16 09:49:29,135	INFO services.py:1384 -- Starting the Plasma object store with 20.0 GB memory using /tmp.


[2m[36m(pid=19369)[0m 2019-04-16 09:49:33.891263: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
[2m[36m(pid=19369)[0m 2019-04-16 09:49:33.898338: E tensorflow/stream_executor/cuda/cuda_driver.cc:320] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(pid=19369)[0m 2019-04-16 09:49:33.898406: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:166] retrieving CUDA diagnostic information for host: 7576cf7f41cf
[2m[36m(pid=19369)[0m 2019-04-16 09:49:33.898424: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:173] hostname: 7576cf7f41cf
[2m[36m(pid=19369)[0m 2019-04-16 09:49:33.898559: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:197] libcuda reported version is: 410.104.0
[2m[36m(pid=19369)[0m 2019-04-16 09:49:33.898592: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:201] kernel reported version is: 410.104.0
[2m[36m(pid=19369)[0m 2019-04-16 0