<a href="https://colab.research.google.com/github/MyDearGreatTeacher/AI4high/blob/master/Keras_Fashion_MNIST_TPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

資料來源:

https://medium.com/swlh/how-to-perform-keras-hyperparameter-optimization-x3-faster-on-tpu-for-free-602b97812602

https://github.com/Tony607/Keras_auto

https://colab.research.google.com/github/Tony607/Keras_auto/blob/master/Keras_Fashion_MNIST_GPU.ipynb#scrollTo=1qzMA0Bd6Xhr

## Keras hyperparameter optimization - TPUs
[How to perform Keras hyperparameter optimization x3 faster on TPU for free](https://www.dlology.com/blog/how-to-perform-keras-hyperparameter-optimization-on-tpu-for-free/)

In [1]:
import numpy as np
import keras
from keras import backend as K

batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28

(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Using TensorFlow backend.


Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [0]:
x = np.concatenate((x_train, x_test), axis=0)

In [3]:
x.shape

(70000, 28, 28, 1)

In [4]:
y = np.concatenate((y_train, y_test), axis=0)
y.shape

(70000, 10)

In [5]:
!pip install -q talos

[K     |████████████████████████████████| 51kB 16.8MB/s 
[K     |████████████████████████████████| 788kB 9.9MB/s 
[K     |████████████████████████████████| 12.6MB 38.5MB/s 
[?25h  Building wheel for talos (setup.py) ... [?25l[?25hdone
  Building wheel for astetik (setup.py) ... [?25l[?25hdone
  Building wheel for chances (setup.py) ... [?25l[?25hdone
  Building wheel for kerasplotlib (setup.py) ... [?25l[?25hdone
  Building wheel for wrangle (setup.py) ... [?25l[?25hdone
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.8 which is incompatible.[0m


In [0]:
para = {
    'dense1_neuron': [256, 512],
    'activation': ['relu', 'elu'],
    'conv_dropout': [0.25, 0.4]
}

In [0]:
import tensorflow as tf
import os
def fashion_mnist_fn_tpu(x_train, y_train, x_val, y_val, params):
    tf.keras.backend.clear_session()
    conv_dropout = float(params['conv_dropout'])
    dense1_neuron = int(params['dense1_neuron'])
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation=params['activation']))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(tf.keras.layers.Dropout(conv_dropout))

    model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation=params['activation']))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(conv_dropout))

    model.add(tf.keras.layers.BatchNormalization(input_shape=x_train.shape[1:]))
    model.add(tf.keras.layers.Conv2D(256, (5, 5), padding='same', activation=params['activation']))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(tf.keras.layers.Dropout(conv_dropout))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(params['dense1_neuron']))
    model.add(tf.keras.layers.Activation(params['activation']))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10))
    model.add(tf.keras.layers.Activation('softmax'))
    
    tpu_model = tf.contrib.tpu.keras_to_tpu_model(
        model,
        strategy=tf.contrib.tpu.TPUDistributionStrategy(
            tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
        )
    )
    tpu_model.compile(
        optimizer=tf.train.AdamOptimizer(learning_rate=1e-3, ),
        loss=tf.keras.losses.categorical_crossentropy,
        metrics=['categorical_accuracy']
    )


    out = tpu_model.fit(
        x, y, epochs=10, batch_size = 1024,
        verbose=0,
        validation_data=[x_val, y_val]
    )
    
    return out, tpu_model.sync_to_cpu()


In [0]:
import talos as ta

In [9]:
scan_results = ta.Scan(x, y, para, fashion_mnist_fn_tpu)

  0%|          | 0/8 [00:00<?, ?it/s]

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttri

 12%|█▎        | 1/8 [01:21<09:28, 81.17s/it]

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6221578934700509721)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11021500064047853842)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 795616089580472036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/jo

 25%|██▌       | 2/8 [02:43<08:09, 81.50s/it]

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6221578934700509721)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11021500064047853842)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 795616089580472036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/jo

 38%|███▊      | 3/8 [04:03<06:45, 81.12s/it]

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6221578934700509721)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11021500064047853842)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 795616089580472036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/jo

 50%|█████     | 4/8 [05:24<05:24, 81.09s/it]

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6221578934700509721)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11021500064047853842)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 795616089580472036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/jo

 62%|██████▎   | 5/8 [06:46<04:03, 81.19s/it]

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6221578934700509721)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11021500064047853842)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 795616089580472036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/jo

 75%|███████▌  | 6/8 [08:08<02:43, 81.63s/it]

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6221578934700509721)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11021500064047853842)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 795616089580472036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/jo

 88%|████████▊ | 7/8 [09:32<01:22, 82.19s/it]

INFO:tensorflow:Querying Tensorflow master (grpc://10.4.146.194:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 462960227721379780)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 6335134745944299261)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 6221578934700509721)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 11021500064047853842)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 795616089580472036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/jo

100%|██████████| 8/8 [10:51<00:00, 81.41s/it]


In [10]:
scan_results.data

Unnamed: 0,round_epochs,val_loss,val_categorical_accuracy,loss,categorical_accuracy,dense1_neuron,activation,conv_dropout
0,10,0.188446,0.931286,0.259249,0.904343,256,elu,0.4
1,10,0.181522,0.930333,0.227953,0.916243,512,relu,0.25
2,10,0.187045,0.931619,0.264395,0.9024,512,elu,0.4
3,10,0.158611,0.941857,0.218269,0.919257,256,elu,0.25
4,10,0.237423,0.908952,0.277482,0.900629,256,relu,0.4
5,10,0.170075,0.934857,0.236671,0.914971,256,relu,0.25
6,10,0.1665,0.938571,0.215054,0.921429,512,elu,0.25
7,10,0.229709,0.914286,0.276842,0.899057,512,relu,0.4


In [11]:
model_id = scan_results.data['val_categorical_accuracy'].astype('float').argmax() - 1
model_id + 1

3

In [0]:
tf.keras.backend.clear_session()
from tensorflow.keras.models import model_from_json
model = model_from_json(scan_results.saved_models[model_id])
model.set_weights(scan_results.saved_weights[model_id])

In [13]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_v1 (Batc (None, 28, 28, 1)         4         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 64)        1664      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 64)        0         
_________________________________________________________________
batch_normalization_v1_1 (Ba (None, 14, 14, 64)        256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 128)       204928    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 128)         0         
__________

In [0]:
model.save('./best_model.h5')

In [0]:
from google.colab import files

files.download('./best_model.h5')

In [16]:
# access the summary details
scan_results.details

random_method            uniform_mersenne
reduction_method                     None
reduction_interval                     50
reduction_window                       20
grid_downsample                         1
reduction_threshold                   0.2
reduction_metric                  val_acc
reduce_loss                         False
experiment_name             050419083207_
complete_time              05/04/19/08:42
x_shape                (70000, 28, 28, 1)
y_shape                       (70000, 10)
dtype: object

In [17]:
# use Scan object as input
report = ta.Reporting(scan_results)
# access the dataframe with the results
report.data.head(-3)

Unnamed: 0,round_epochs,val_loss,val_categorical_accuracy,loss,categorical_accuracy,dense1_neuron,activation,conv_dropout
0,10,0.188446,0.931286,0.259249,0.904343,256,elu,0.4
1,10,0.181522,0.930333,0.227953,0.916243,512,relu,0.25
2,10,0.187045,0.931619,0.264395,0.9024,512,elu,0.4
3,10,0.158611,0.941857,0.218269,0.919257,256,elu,0.25
4,10,0.237423,0.908952,0.277482,0.900629,256,relu,0.4


In [18]:
# get the number of rounds in the Scan
report.rounds()

8

In [19]:
# get the highest result for any metric
report.high('categorical_accuracy')

0.92142856