<a href="https://colab.research.google.com/github/mgmk2/TensorFlow_v2_save_weights_on_colabTPU/blob/master/save_weights_on_TPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# tensorflow 2.0.0をインストール

In [1]:
!pip install tensorflow==2.0.0

Collecting tensorflow==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (86.3MB)
[K     |████████████████████████████████| 86.3MB 26kB/s 
[?25hCollecting tensorboard<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/9b/a6/e8ffa4e2ddb216449d34cfcb825ebb38206bee5c4553d69e7bc8bc2c5d64/tensorboard-2.0.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 36.7MB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/fc/08/8b927337b7019c374719145d1dceba21a8bb909b93b1ad6f8fb7d22c1ca1/tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 59.7MB/s 
Installing collected packages: tensorboard, tensorflow-estimator, tensorflow
  Found existing installation: tensorboard 1.15.0
    Uninstalling tensorboard-1.15.0:


# TPU上ではCheckpointで保存できない

tf.keras.Modelのsave_weightsメソッドも同様のErrorが出るはず

In [0]:
import os
import time
import numpy as np
import tensorflow as tf

print(tf.__version__)

batch_size = 256
epochs = 10
tpu_address = "grpc://" + os.environ["TPU_NAME"]

cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=tpu_address)
tf.config.experimental_connect_to_cluster(cluster_resolver)
tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
tpu_strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)

with tpu_strategy.scope():
    #=========================================================================
    # データセット
    #=========================================================================

    # データセットをロード
    # 今回はMNIST
    (X_train, Y_train), (X_test, Y_test) = tf.keras.datasets.mnist.load_data()
    X_train = X_train[..., np.newaxis].astype(np.float32)
    Y_train = Y_train.astype(np.int32)
    N = X_train.shape[0]

    # tf.data.Dataset APIを使う
    dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    dataset = dataset.shuffle(buffer_size=N)
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = tpu_strategy.experimental_distribute_dataset(dataset)

    #=========================================================================
    # ネットワーク定義
    #=========================================================================

    input_shape = (28, 28, 1) # 入力のshape. 最初の次元（バッチサイズ）は除く.

    # ネットワークの定義
    # 入力層
    x = tf.keras.layers.Input(input_shape)
    # 畳み込み層1
    h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(x)
    h = tf.keras.layers.ReLU()(h)
    # 畳み込み層2
    h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
    h = tf.keras.layers.ReLU()(h)
    # 畳み込み層3
    h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
    h = tf.keras.layers.ReLU()(h)
    # 線形層
    h = tf.keras.layers.Flatten()(h)
    y = tf.keras.layers.Dense(10)(h)

    # モデルの作成
    model = tf.keras.Model(x, y)

    #=========================================================================
    # 学習ステップの定義
    #=========================================================================

    optimizer = tf.optimizers.Adam(1.0e-4)

    @tf.function
    def train_step(dist_inputs):
        def _train_step(inputs):
            images, labels = inputs

            # tf.GtadientTapeブロックで入力からロスまで計算
            with tf.GradientTape() as tape:
                logits = model(images)
                loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits)
                loss = tf.reduce_sum(loss) / batch_size
                
            # gradientを計算
            grad = tape.gradient(loss, sources=model.trainable_variables)

            # optimizerで重みを更新
            optimizer.apply_gradients(zip(grad, model.trainable_variables))

            acc = tf.metrics.sparse_categorical_accuracy(labels, logits)
            acc = tf.reduce_sum(acc) / batch_size
            
            return loss, acc
        
        losses, accs = tpu_strategy.experimental_run_v2(_train_step, args=(dist_inputs,))
        losses = tpu_strategy.reduce(tf.distribute.ReduceOp.SUM, losses, axis=None)
        accs = tpu_strategy.reduce(tf.distribute.ReduceOp.SUM, accs, axis=None)
        return losses, accs

ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)

with tpu_strategy.scope():
    #=========================================================================
    # Dataset APIで学習を実行
    #=========================================================================

    print('train with Dataset API.')

    for epoch in range(epochs):
        time_start = time.time()
        train_loss = 0
        train_acc = 0
        dataset_iter = iter(dataset)

        for i in range(N // batch_size):
            loss_tmp, acc_tmp = train_step(next(dataset_iter)) # 1step分の学習を実行
            train_loss += loss_tmp
            train_acc += acc_tmp

        # 平均ロスと平均精度
        epoch_loss = train_loss / (N // batch_size)
        epoch_acc = 100 * train_acc / (N // batch_size)

        # epochの結果を表示
        time_epoch = time.time() - time_start
        print('epoch: {:} loss: {:.4f} acc: {:.2f}% time: {:.2f}s'.format(
            epoch + 1, epoch_loss, epoch_acc, time_epoch))

ckpt.save('/content/test_save')

2.0.0
INFO:tensorflow:Initializing the TPU system: grpc://10.36.208.210:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.36.208.210:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


train with Dataset API.
epoch: 1 loss: 0.4460 acc: 90.44% time: 12.61s
epoch: 2 loss: 0.0746 acc: 97.76% time: 7.25s
epoch: 3 loss: 0.0411 acc: 98.74% time: 7.16s
epoch: 4 loss: 0.0248 acc: 99.23% time: 7.07s
epoch: 5 loss: 0.0150 acc: 99.57% time: 7.16s
epoch: 6 loss: 0.0100 acc: 99.72% time: 7.19s
epoch: 7 loss: 0.0072 acc: 99.80% time: 7.28s
epoch: 8 loss: 0.0047 acc: 99.90% time: 6.66s
epoch: 9 loss: 0.0037 acc: 99.92% time: 7.38s
epoch: 10 loss: 0.0018 acc: 99.98% time: 6.86s


UnimplementedError: ignored

# モデルの重み作成について

モデルの重みをassignメソッドで復元するためには、学習前に重みが作成されている必要がある。

## Functional API

モデル作成の時点で重みは作成済み

In [0]:
import tensorflow as tf

input_shape = (28, 28, 1) # 入力のshape. 最初の次元（バッチサイズ）は除く.

# ネットワークの定義
# 入力層
x = tf.keras.layers.Input(input_shape)
# 畳み込み層1
h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(x)
h = tf.keras.layers.ReLU()(h)
# 畳み込み層2
h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
h = tf.keras.layers.ReLU()(h)
# 畳み込み層3
h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
h = tf.keras.layers.ReLU()(h)
# 線形層
h = tf.keras.layers.Flatten()(h)
y = tf.keras.layers.Dense(10)(h)

# モデルの作成
model = tf.keras.Model(x, y)

print([v.name for v in model.weights])

['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'dense/kernel:0', 'dense/bias:0']


## Custom Model

モデル作成の時点で重みは作成されず、初めてcallした時点で作成される

In [1]:
import tensorflow as tf

input_shape = (28, 28, 1) # 入力のshape. 最初の次元（バッチサイズ）は除く.

class CustomModel(tf.keras.Model):
    def __init__(self, *args, **kwargs):
        super(CustomModel, self).__init__(*args, **kwargs)

        # 畳み込み層1
        self.conv0 = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')
        self.act0 = tf.keras.layers.ReLU()
        # 畳み込み層2
        self.conv1 = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')
        self.act1 = tf.keras.layers.ReLU()
        # 畳み込み層3
        self.conv2 = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')
        self.act2 = tf.keras.layers.ReLU()
        # 線形層
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(10)

    def call(self, inputs):
        h = self.conv0(inputs)
        h = self.act0(h)
        h = self.conv1(h)
        h = self.act1(h)
        h = self.conv2(h)
        h = self.act2(h)
        h = self.flatten(h)
        outputs = self.dense(h)
        return outputs

# モデルの作成
model = CustomModel()

# モデル作成の時点で保持している重み
print([v.name for v in model.weights])

x = tf.zeros((1, *input_shape), tf.float32)
y = model(x)

# 一度callした後に保持している重み
print([v.name for v in model.weights])

[]
['custom_model/conv2d/kernel:0', 'custom_model/conv2d/bias:0', 'custom_model/conv2d_1/kernel:0', 'custom_model/conv2d_1/bias:0', 'custom_model/conv2d_2/kernel:0', 'custom_model/conv2d_2/bias:0', 'custom_model/dense/kernel:0', 'custom_model/dense/bias:0']


Inputレイヤーを入力としてcallして重みを作成

In [0]:
import tensorflow as tf

input_shape = (28, 28, 1) # 入力のshape. 最初の次元（バッチサイズ）は除く.

class CustomModel(tf.keras.Model):
    def __init__(self, input_shape, *args, **kwargs):
        super(CustomModel, self).__init__(*args, **kwargs)

        # 畳み込み層1
        self.conv0 = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')
        self.act0 = tf.keras.layers.ReLU()
        # 畳み込み層2
        self.conv1 = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')
        self.act1 = tf.keras.layers.ReLU()
        # 畳み込み層3
        self.conv2 = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')
        self.act2 = tf.keras.layers.ReLU()
        # 線形層
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(10)

        # callして重みを作成する
        # dummy_inputsは1次元目(batch size)がNoneで2次元目以降がinput_shapeのTensor
        dummy_inputs = tf.keras.layers.Input(input_shape)
        _ = self.call(dummy_inputs)

    def call(self, inputs):
        h = self.conv0(inputs)
        h = self.act0(h)
        h = self.conv1(h)
        h = self.act1(h)
        h = self.conv2(h)
        h = self.act2(h)
        h = self.flatten(h)
        outputs = self.dense(h)
        return outputs

# モデルの作成
model = CustomModel(input_shape)

# modelが保持している重みの名前を表示
print([v.name for v in model.weights])

['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'conv2d_2/kernel:0', 'conv2d_2/bias:0', 'dense/kernel:0', 'dense/bias:0']


# オプティマイザーの重み作成について

自動的に作成される前に、add_slotメソッドでslotを作成

In [0]:
import tensorflow as tf

input_shape = (28, 28, 1) # 入力のshape. 最初の次元（バッチサイズ）は除く.

# ネットワークの定義
# 入力層
x = tf.keras.layers.Input(input_shape)
# 畳み込み層1
h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(x)
h = tf.keras.layers.ReLU()(h)
# 畳み込み層2
h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
h = tf.keras.layers.ReLU()(h)
# 畳み込み層3
h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
h = tf.keras.layers.ReLU()(h)
# 線形層
h = tf.keras.layers.Flatten()(h)
y = tf.keras.layers.Dense(10)(h)

# モデルの作成
model = tf.keras.Model(x, y)

slot_names = ['m', 'v']
optimizer = tf.optimizers.Adam(1.0e-4)
with tf.name_scope(optimizer._name):
    for v in model.weights:
        for slot in slot_names:
            optimizer.add_slot(v, slot, initializer='zeros')

# optimizerが保持している重みの名前を表示
print([v.name for v in optimizer.weights])


['Adam/conv2d/kernel/m:0', 'Adam/conv2d/kernel/v:0', 'Adam/conv2d/bias/m:0', 'Adam/conv2d/bias/v:0', 'Adam/conv2d_1/kernel/m:0', 'Adam/conv2d_1/kernel/v:0', 'Adam/conv2d_1/bias/m:0', 'Adam/conv2d_1/bias/v:0', 'Adam/conv2d_2/kernel/m:0', 'Adam/conv2d_2/kernel/v:0', 'Adam/conv2d_2/bias/m:0', 'Adam/conv2d_2/bias/v:0', 'Adam/dense/kernel/m:0', 'Adam/dense/kernel/v:0', 'Adam/dense/bias/m:0', 'Adam/dense/bias/v:0']


# CNNでテストしてみる

テストしやすくするため、諸々の関数等をまとめてTrainerクラスを作成

In [1]:
import os
import time
import pickle
import numpy as np
import tensorflow as tf

print(tf.__version__)

class Trainer(object):
    def __init__(self):
        self.batch_size = 256
        self.learning_rate = 1.0e-4
        self.input_shape = (28, 28, 1) # 入力のshape. 最初の次元（バッチサイズ）は除く.
        
        tpu_address = "grpc://" + os.environ["TPU_NAME"]
        cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=tpu_address)
        tf.config.experimental_connect_to_cluster(cluster_resolver)
        tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
        self.tpu_strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)

        print('=' * 50)

        #=========================================================================
        # データセット
        #=========================================================================

        # データセットをロード
        # 今回はMNIST
        (X_train, Y_train), (X_test, Y_test) = tf.keras.datasets.mnist.load_data()
        X_train = X_train[..., np.newaxis].astype(np.float32)
        Y_train = Y_train.astype(np.int32)
        X_test = X_test[..., np.newaxis].astype(np.float32)
        Y_test = Y_test.astype(np.int32)
        self.N_train = X_train.shape[0]
        self.N_test = X_test.shape[0]

        with self.tpu_strategy.scope():
            # tf.data.Dataset APIを使う
            dataset_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
            dataset_train = dataset_train.shuffle(buffer_size=self.N_train)
            dataset_train = dataset_train.batch(self.batch_size, drop_remainder=True)
            self.dataset_train = self.tpu_strategy.experimental_distribute_dataset(dataset_train)

            dataset_test = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
            dataset_test = dataset_test.batch(self.batch_size, drop_remainder=True)
            self.dataset_test = self.tpu_strategy.experimental_distribute_dataset(dataset_test)

    def tpu_decorator(func):
        def wrapper(self, *args, **kwargs):
            if tf.distribute.in_cross_replica_context():
                outputs = func(self, *args, **kwargs)
            else:
                with self.tpu_strategy.scope():
                    outputs = func(self, *args, **kwargs)
            return outputs
        return wrapper

    @tpu_decorator
    def build_model(self):
        #=========================================================================
        # ネットワーク定義
        #=========================================================================

        # ネットワークの定義
        # 入力層
        x = tf.keras.layers.Input(self.input_shape)
        # 畳み込み層1
        h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(x)
        h = tf.keras.layers.ReLU()(h)
        # 畳み込み層2
        h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
        h = tf.keras.layers.ReLU()(h)
        # 畳み込み層3
        h = tf.keras.layers.Conv2D(64, (3, 3), padding='SAME')(h)
        h = tf.keras.layers.ReLU()(h)
        # 線形層
        h = tf.keras.layers.Flatten()(h)
        y = tf.keras.layers.Dense(10)(h)

        # モデルの作成
        self.model = tf.keras.Model(x, y)
        self.optimizer = tf.optimizers.Adam(self.learning_rate)

    @tpu_decorator
    @tf.function
    def train_step(self, dist_inputs):
        def _train_step(inputs):
            images, labels = inputs

            # tf.GtadientTapeブロックで入力からロスまで計算
            with tf.GradientTape() as tape:
                logits = self.model(images)
                loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits)
                loss = tf.reduce_sum(loss) / self.batch_size
                
            # gradientを計算
            grad = tape.gradient(loss, sources=self.model.trainable_variables)

            # optimizerで重みを更新
            self.optimizer.apply_gradients(zip(grad, self.model.trainable_variables))

            acc = tf.metrics.sparse_categorical_accuracy(labels, logits)
            acc = tf.reduce_sum(acc) / self.batch_size
            
            return loss, acc
        
        losses, accs = self.tpu_strategy.experimental_run_v2(_train_step, args=(dist_inputs,))
        losses = self.tpu_strategy.reduce(tf.distribute.ReduceOp.SUM, losses, axis=None)
        accs = self.tpu_strategy.reduce(tf.distribute.ReduceOp.SUM, accs, axis=None)
        return losses, accs

    @tpu_decorator
    @tf.function
    def eval_step(self, dist_inputs):
        def _eval_step(inputs):
            images, labels = inputs

            logits = self.model(images)
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits)
            loss = tf.reduce_sum(loss) / self.batch_size
            acc = tf.metrics.sparse_categorical_accuracy(labels, logits)
            acc = tf.reduce_sum(acc) / self.batch_size
            return loss, acc
        
        losses, accs = self.tpu_strategy.experimental_run_v2(_eval_step, args=(dist_inputs,))
        losses = self.tpu_strategy.reduce(tf.distribute.ReduceOp.SUM, losses, axis=None)
        accs = self.tpu_strategy.reduce(tf.distribute.ReduceOp.SUM, accs, axis=None)
        return losses, accs

    @tpu_decorator
    def train(self, epochs):
        iterations = self.N_train // self.batch_size

        for epoch in range(epochs):
            time_start = time.time()
            train_loss = 0
            train_acc = 0
            dataset_iter = iter(self.dataset_train)

            for i in range(iterations):
                loss_tmp, acc_tmp = self.train_step(next(dataset_iter)) # 1step分の学習を実行
                train_loss += loss_tmp
                train_acc += acc_tmp

            # 平均ロスと平均精度
            epoch_loss = train_loss / iterations
            epoch_acc = 100 * train_acc / iterations

            # epochの結果を表示
            time_epoch = time.time() - time_start
            print('epoch: {:} loss: {:.4f} acc: {:.2f}% time: {:.2f}s'.format(
                epoch + 1, epoch_loss, epoch_acc, time_epoch))
    
    @tpu_decorator
    def eval(self):
        iterations = self.N_test // self.batch_size
        test_loss = 0
        test_acc = 0
        dataset_iter = iter(self.dataset_test)

        for i in range(iterations):
            loss_tmp, acc_tmp = self.eval_step(next(dataset_iter))
            test_loss += loss_tmp
            test_acc += acc_tmp
        test_loss /= iterations
        test_acc *= 100 / iterations
        print('test loss: {:.4f} acc: {:.2f}% '.format(test_loss, test_acc))
    
    @tpu_decorator
    def get_model_weights_as_numpy(self):
        weights = {}
        for v in self.model.weights:
            # model.weightsで各Layerの重みを取り出し
            # 各variableはnumpyメソッドでnumpy配列に変換できる
            weights[v.name] = v.numpy()
        return weights

    @tpu_decorator
    def get_optimizer_weights_as_numpy(self):
        weights = {}
        slot_names = self.optimizer.get_slot_names()
        for v in self.model.weights:
            # model.weightsで各Layerの重みを取り出し
            weights[v.name] = {}
            for slot in slot_names:
                # 各Slotに対し、optimizerのget_slotで値を取り出す
                weights[v.name][slot] = self.optimizer.get_slot(v, slot).numpy()
        return {'optimizer_name': self.optimizer._name, 'weights': weights}

    @tpu_decorator
    def save_weights_as_pickle(self, file_prefix):
        model_weights = self.get_model_weights_as_numpy()
        optimizer_weights = self.get_optimizer_weights_as_numpy()
        all_weights = {'model': model_weights, 'optimizer': optimizer_weights}

        with open(file_prefix + '.pkl', 'wb') as f:
            pickle.dump(all_weights, f)

    @tpu_decorator
    def set_model_weights_from_numpy(self, weights):
        for v in self.model.weights:
            if v.name in weights.keys():
                v.assign(weights[v.name])
            else:
                print('Not loaded weights: ' + v.name)

    @tpu_decorator
    def set_optimizer_weights_from_numpy(self, weights):
        # 必ずoptimizerの名前でscopeする
        with tf.name_scope(weights['optimizer_name']):
            optimizer_weights = weights['weights']
            for v in self.model.weights:
                if v.name in optimizer_weights.keys():
                    for slot in optimizer_weights[v.name].keys():
                        # 学習済みの重みを初期値としてslotを作成
                        initializer = tf.initializers.Constant(optimizer_weights[v.name][slot])
                        self.optimizer.add_slot(v, slot, initializer=initializer)
                else:
                    print('Not loaded optimizer weights: ' + v.name)

    @tpu_decorator
    def load_weights_from_pickle(self, file_prefix):
        with open(file_prefix + '.pkl', 'rb') as f:
            weights = pickle.load(f)
        
        self.set_model_weights_from_numpy(weights['model'])
        self.set_optimizer_weights_from_numpy(weights['optimizer'])


2.0.0


## 学習して重み保存

In [2]:
trainer = Trainer()
trainer.build_model()
trainer.eval()
trainer.train(epochs=10)
trainer.eval()
trainer.save_weights_as_pickle('/content/save_test')

INFO:tensorflow:Initializing the TPU system: grpc://10.9.31.26:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.9.31.26:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


test loss: 6.4830 acc: 7.69% 
epoch: 1 loss: 0.2818 acc: 92.73% time: 12.25s
epoch: 2 loss: 0.0562 acc: 98.33% time: 7.11s
epoch: 3 loss: 0.0296 acc: 99.13% time: 7.21s
epoch: 4 loss: 0.0166 acc: 99.52% time: 6.97s
epoch: 5 loss: 0.0096 acc: 99.75% time: 6.83s
epoch: 6 loss: 0.0066 acc: 99.82% time: 7.28s
epoch: 7 loss: 0.0046 acc: 99.87% time: 7.01s
epoch: 8 loss: 0.0029 acc: 99.93% time: 7.30s
epoch: 9 loss: 0.0020 acc: 99.96% time: 7.08s
epoch: 10 loss: 0.0033 acc: 99.89% time: 6.98s
test loss: 0.0645 acc: 98.52% 


## 学習済み重みを復元して評価

以下のセルを実行する前に必ずすること
1. ランタイムを再起動
2. Trainerクラスの定義のコードセルを実行

In [2]:
trainer = Trainer()
trainer.build_model()
trainer.load_weights_from_pickle('/content/save_test')
trainer.eval()

INFO:tensorflow:Initializing the TPU system: grpc://10.9.31.26:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.9.31.26:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


test loss: 0.0645 acc: 98.52% 
