In [1]:
!pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
!python -m pip install -U pip
!pip -V
!pip install sklearn pandas

Writing to /root/.config/pip/pip.conf
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already up-to-date: pip in /usr/local/lib/python3.6/dist-packages (20.2.4)
pip 20.2.4 from /usr/local/lib/python3.6/dist-packages/pip (python 3.6)
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/


In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

In [3]:
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.3.1
sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)
matplotlib 3.3.2
numpy 1.18.5
pandas 1.1.4
sklearn 0.23.2
tensorflow 2.3.1
tensorflow.keras 2.4.0


In [6]:
# tf.debugging.set_log_device_placement(True)
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
print(len(gpus))

logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(logical_gpus))

1
1


In [5]:
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
x_train_all = x_train_all / 255.0
x_test = x_test / 255.0
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(5000, 28, 28) (5000,)
(55000, 28, 28) (55000,)
(10000, 28, 28) (10000,)


In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(
    x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)
x_valid_scaled = scaler.transform(
    x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28,1 )
x_test_scaled = scaler.transform(
    x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)

In [10]:
def make_dataset(images, labels, epochs, batch_size, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size).prefetch(50)
    return dataset

strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
    batch_size_per_replica = 256
    batch_size = batch_size_per_replica * len(logical_gpus)
    train_dataset = make_dataset(x_train_scaled, y_train, 1, batch_size)
    valid_dataset = make_dataset(x_valid_scaled, y_valid, 1, batch_size)
    train_dataset_distribute = strategy.experimental_distribute_dataset(train_dataset)
    valid_dataset_distribute = strategy.experimental_distribute_dataset(valid_dataset)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [11]:
with strategy.scope():
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu', input_shape=(28,28,1)))
    model.add(keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'))
    model.add(keras.layers.MaxPool2D(pool_size=2))
    model.add(keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
    model.add(keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
    model.add(keras.layers.MaxPool2D(pool_size=2))
    model.add(keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'))
    model.add(keras.layers.Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'))
    model.add(keras.layers.MaxPool2D(pool_size=2))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(512, activation='relu'))
    model.add(keras.layers.Dense(10, activation='softmax'))

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 128)       1280      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 128)       147584    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 256)       295168    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 256)       590080    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 256)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 7, 7, 512)         1

In [13]:
# customized training loop.
# 1. define losses functions
# 2. define function train_step
# 3. define function test_step
# 4. for-loop trianing loop

with strategy.scope():
    # batch_size, batch_size / #{gpu}
    # eg: 64, 4 gpu: 16
    loss_func = keras.losses.SparseCategoricalCrossentropy(
        reduction=keras.losses.Reduction.NONE)
    def compute_loss(labels, predictions):
        per_replica_loss = loss_func(labels, predictions)
        return tf.nn.compute_average_loss(per_replica_loss,
                                         global_batch_size = batch_size)

    test_loss = keras.metrics.Mean(name='test_loss')
    train_accuray = keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')
    test_accuray = keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy')

    optimizer = keras.optimizers.SGD(lr=0.01)

    def train_step(inputs):
        images, labels = inputs
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = compute_loss(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        train_accuray.update_state(labels, predictions)
        return loss
    
    @tf.function
    def distributed_train_step(inputs):
        per_replica_average_loss = strategy.experimental_run_v2(train_step, args = (inputs,))
        return strategy.reduce(tf.distribute.ReduceOp.SUM,
                              per_replica_average_loss,
                              axis=None)

    def test_step(inputs):
        images, labels = inputs
        predictions = model(images)
        t_loss = loss_func(labels, predictions)
        test_loss.update_state(t_loss)
        test_accuray.update_state(labels, predictions)
        
    @tf.function
    def distribute_test_step(inputs):
        strategy.experimental_run_v2(test_step, args = (inputs,))
        
    epochs = 10
    for epoch in range(epochs):
        total_loss = 0.
        num_batches = 0
        for x in train_dataset:
            start_time = time.time()
            total_loss += distributed_train_step(x)
            run_time = time.time() - start_time
            num_batches += 1
            print('\ntotal: %3.3f, num_batches: %d, '
                  'average: %3.3f, time: %3.3f' 
                  % (total_loss, num_batches, 
                     total_loss / num_batches, run_time), 
                  end='')
        train_loss = total_loss / num_batches
        for x in valid_dataset:
            distribute_test_step(x)

        print('\rEpoch: %d, Loss: %3.3f, Acc: %3.3f,'
              'Val_Loss: %3.3f, Val_Acc: %3.3f' 
              % (epoch + 1, train_loss, train_accuray.result(),
                test_loss.result(), test_accuray.result()))
        test_loss.reset_states()
        train_accuray.reset_states()
        test_accuray.reset_states()

Instructions for updating:
renamed to `run`

total: 2.302, num_batches: 1, average: 2.302, time: 4.771
total: 4.603, num_batches: 2, average: 2.302, time: 0.048
total: 6.909, num_batches: 3, average: 2.303, time: 0.049
total: 9.210, num_batches: 4, average: 2.302, time: 0.049
total: 11.510, num_batches: 5, average: 2.302, time: 0.049
total: 13.812, num_batches: 6, average: 2.302, time: 0.049
total: 16.113, num_batches: 7, average: 2.302, time: 0.049
total: 18.411, num_batches: 8, average: 2.301, time: 0.049
total: 20.710, num_batches: 9, average: 2.301, time: 0.048
total: 23.003, num_batches: 10, average: 2.300, time: 0.049
total: 25.296, num_batches: 11, average: 2.300, time: 0.048
total: 27.591, num_batches: 12, average: 2.299, time: 0.048
total: 29.888, num_batches: 13, average: 2.299, time: 0.050
total: 32.181, num_batches: 14, average: 2.299, time: 0.049
total: 34.477, num_batches: 15, average: 2.298, time: 0.048
total: 36.769, num_batches: 16, average: 2.298, time: 0.049
total: 3

total: 283.713, num_batches: 132, average: 2.149, time: 0.069
total: 285.178, num_batches: 133, average: 2.144, time: 0.072
total: 286.670, num_batches: 134, average: 2.139, time: 0.055
total: 288.240, num_batches: 135, average: 2.135, time: 0.055
total: 289.599, num_batches: 136, average: 2.129, time: 0.057
total: 290.876, num_batches: 137, average: 2.123, time: 0.071
total: 292.226, num_batches: 138, average: 2.118, time: 0.069
total: 293.629, num_batches: 139, average: 2.112, time: 0.077
total: 294.930, num_batches: 140, average: 2.107, time: 0.075
total: 296.193, num_batches: 141, average: 2.101, time: 0.055
total: 297.453, num_batches: 142, average: 2.095, time: 0.058
total: 298.785, num_batches: 143, average: 2.089, time: 0.057
total: 300.303, num_batches: 144, average: 2.085, time: 0.060
total: 301.845, num_batches: 145, average: 2.082, time: 0.059
total: 303.702, num_batches: 146, average: 2.080, time: 0.072
total: 305.024, num_batches: 147, average: 2.075, time: 0.068
total: 3

total: 24.891, num_batches: 26, average: 0.957, time: 0.054
total: 25.730, num_batches: 27, average: 0.953, time: 0.080
total: 26.575, num_batches: 28, average: 0.949, time: 0.075
total: 27.438, num_batches: 29, average: 0.946, time: 0.066
total: 28.321, num_batches: 30, average: 0.944, time: 0.059
total: 29.205, num_batches: 31, average: 0.942, time: 0.054
total: 30.081, num_batches: 32, average: 0.940, time: 0.056
total: 31.060, num_batches: 33, average: 0.941, time: 0.057
total: 31.862, num_batches: 34, average: 0.937, time: 0.062
total: 32.717, num_batches: 35, average: 0.935, time: 0.056
total: 33.805, num_batches: 36, average: 0.939, time: 0.060
total: 34.842, num_batches: 37, average: 0.942, time: 0.054
total: 35.682, num_batches: 38, average: 0.939, time: 0.059
total: 36.570, num_batches: 39, average: 0.938, time: 0.058
total: 37.386, num_batches: 40, average: 0.935, time: 0.056
total: 38.184, num_batches: 41, average: 0.931, time: 0.067
total: 39.084, num_batches: 42, average:

total: 134.414, num_batches: 161, average: 0.835, time: 0.058
total: 135.214, num_batches: 162, average: 0.835, time: 0.067
total: 135.861, num_batches: 163, average: 0.834, time: 0.053
total: 136.648, num_batches: 164, average: 0.833, time: 0.055
total: 137.389, num_batches: 165, average: 0.833, time: 0.060
total: 138.071, num_batches: 166, average: 0.832, time: 0.055
total: 138.874, num_batches: 167, average: 0.832, time: 0.058
total: 139.751, num_batches: 168, average: 0.832, time: 0.067
total: 140.441, num_batches: 169, average: 0.831, time: 0.057
total: 141.172, num_batches: 170, average: 0.830, time: 0.059
total: 141.874, num_batches: 171, average: 0.830, time: 0.063
total: 142.567, num_batches: 172, average: 0.829, time: 0.055
total: 143.281, num_batches: 173, average: 0.828, time: 0.071
total: 144.125, num_batches: 174, average: 0.828, time: 0.062
total: 144.715, num_batches: 175, average: 0.827, time: 0.060
total: 145.353, num_batches: 176, average: 0.826, time: 0.066
total: 1

total: 58.266, num_batches: 84, average: 0.694, time: 0.061
total: 58.893, num_batches: 85, average: 0.693, time: 0.067
total: 59.575, num_batches: 86, average: 0.693, time: 0.063
total: 60.332, num_batches: 87, average: 0.693, time: 0.057
total: 60.956, num_batches: 88, average: 0.693, time: 0.063
total: 61.623, num_batches: 89, average: 0.692, time: 0.060
total: 62.237, num_batches: 90, average: 0.692, time: 0.054
total: 63.022, num_batches: 91, average: 0.693, time: 0.053
total: 63.546, num_batches: 92, average: 0.691, time: 0.066
total: 64.167, num_batches: 93, average: 0.690, time: 0.061
total: 64.801, num_batches: 94, average: 0.689, time: 0.059
total: 65.384, num_batches: 95, average: 0.688, time: 0.056
total: 65.970, num_batches: 96, average: 0.687, time: 0.058
total: 66.631, num_batches: 97, average: 0.687, time: 0.063
total: 67.290, num_batches: 98, average: 0.687, time: 0.059
total: 67.898, num_batches: 99, average: 0.686, time: 0.065
total: 68.609, num_batches: 100, average

total: 2.829, num_batches: 5, average: 0.566, time: 0.056
total: 3.398, num_batches: 6, average: 0.566, time: 0.069
total: 4.173, num_batches: 7, average: 0.596, time: 0.055
total: 5.018, num_batches: 8, average: 0.627, time: 0.060
total: 5.774, num_batches: 9, average: 0.642, time: 0.055
total: 6.431, num_batches: 10, average: 0.643, time: 0.059
total: 7.154, num_batches: 11, average: 0.650, time: 0.057
total: 7.738, num_batches: 12, average: 0.645, time: 0.056
total: 8.312, num_batches: 13, average: 0.639, time: 0.059
total: 8.920, num_batches: 14, average: 0.637, time: 0.064
total: 9.526, num_batches: 15, average: 0.635, time: 0.067
total: 10.119, num_batches: 16, average: 0.632, time: 0.084
total: 10.760, num_batches: 17, average: 0.633, time: 0.072
total: 11.380, num_batches: 18, average: 0.632, time: 0.061
total: 11.954, num_batches: 19, average: 0.629, time: 0.054
total: 12.620, num_batches: 20, average: 0.631, time: 0.059
total: 13.192, num_batches: 21, average: 0.628, time: 0.

total: 84.712, num_batches: 144, average: 0.588, time: 0.059
total: 85.189, num_batches: 145, average: 0.588, time: 0.057
total: 85.771, num_batches: 146, average: 0.587, time: 0.057
total: 86.335, num_batches: 147, average: 0.587, time: 0.060
total: 86.835, num_batches: 148, average: 0.587, time: 0.054
total: 87.379, num_batches: 149, average: 0.586, time: 0.065
total: 87.900, num_batches: 150, average: 0.586, time: 0.065
total: 88.522, num_batches: 151, average: 0.586, time: 0.061
total: 89.011, num_batches: 152, average: 0.586, time: 0.061
total: 89.543, num_batches: 153, average: 0.585, time: 0.058
total: 90.230, num_batches: 154, average: 0.586, time: 0.060
total: 90.781, num_batches: 155, average: 0.586, time: 0.056
total: 91.431, num_batches: 156, average: 0.586, time: 0.056
total: 92.058, num_batches: 157, average: 0.586, time: 0.060
total: 92.619, num_batches: 158, average: 0.586, time: 0.056
total: 93.219, num_batches: 159, average: 0.586, time: 0.059
total: 93.810, num_batch

total: 33.444, num_batches: 63, average: 0.531, time: 0.054
total: 34.018, num_batches: 64, average: 0.532, time: 0.066
total: 34.493, num_batches: 65, average: 0.531, time: 0.062
total: 35.144, num_batches: 66, average: 0.532, time: 0.064
total: 35.664, num_batches: 67, average: 0.532, time: 0.068
total: 36.150, num_batches: 68, average: 0.532, time: 0.067
total: 36.546, num_batches: 69, average: 0.530, time: 0.054
total: 37.050, num_batches: 70, average: 0.529, time: 0.067
total: 37.657, num_batches: 71, average: 0.530, time: 0.056
total: 38.222, num_batches: 72, average: 0.531, time: 0.059
total: 38.782, num_batches: 73, average: 0.531, time: 0.062
total: 39.253, num_batches: 74, average: 0.530, time: 0.055
total: 39.800, num_batches: 75, average: 0.531, time: 0.064
total: 40.268, num_batches: 76, average: 0.530, time: 0.058
total: 40.728, num_batches: 77, average: 0.529, time: 0.062
total: 41.164, num_batches: 78, average: 0.528, time: 0.060
total: 41.682, num_batches: 79, average:

total: 100.486, num_batches: 197, average: 0.510, time: 0.060
total: 101.034, num_batches: 198, average: 0.510, time: 0.065
total: 101.507, num_batches: 199, average: 0.510, time: 0.055
total: 102.019, num_batches: 200, average: 0.510, time: 0.069
total: 102.471, num_batches: 201, average: 0.510, time: 0.065
total: 102.931, num_batches: 202, average: 0.510, time: 0.066
total: 103.393, num_batches: 203, average: 0.509, time: 0.060
total: 104.039, num_batches: 204, average: 0.510, time: 0.068
total: 104.542, num_batches: 205, average: 0.510, time: 0.062
total: 104.963, num_batches: 206, average: 0.510, time: 0.056
total: 105.440, num_batches: 207, average: 0.509, time: 0.056
total: 105.937, num_batches: 208, average: 0.509, time: 0.068
total: 106.443, num_batches: 209, average: 0.509, time: 0.065
total: 106.988, num_batches: 210, average: 0.509, time: 0.065
total: 107.445, num_batches: 211, average: 0.509, time: 0.062
total: 107.933, num_batches: 212, average: 0.509, time: 0.066
total: 1

total: 56.559, num_batches: 121, average: 0.467, time: 0.048
total: 57.055, num_batches: 122, average: 0.468, time: 0.049
total: 57.525, num_batches: 123, average: 0.468, time: 0.048
total: 57.955, num_batches: 124, average: 0.467, time: 0.049
total: 58.378, num_batches: 125, average: 0.467, time: 0.048
total: 58.874, num_batches: 126, average: 0.467, time: 0.048
total: 59.461, num_batches: 127, average: 0.468, time: 0.065
total: 59.998, num_batches: 128, average: 0.469, time: 0.059
total: 60.424, num_batches: 129, average: 0.468, time: 0.053
total: 60.800, num_batches: 130, average: 0.468, time: 0.049
total: 61.196, num_batches: 131, average: 0.467, time: 0.048
total: 61.633, num_batches: 132, average: 0.467, time: 0.050
total: 62.014, num_batches: 133, average: 0.466, time: 0.051
total: 62.480, num_batches: 134, average: 0.466, time: 0.053
total: 62.894, num_batches: 135, average: 0.466, time: 0.062
total: 63.380, num_batches: 136, average: 0.466, time: 0.068
total: 63.862, num_batch

total: 18.094, num_batches: 41, average: 0.441, time: 0.049
total: 18.502, num_batches: 42, average: 0.441, time: 0.049
total: 18.971, num_batches: 43, average: 0.441, time: 0.049
total: 19.364, num_batches: 44, average: 0.440, time: 0.049
total: 19.767, num_batches: 45, average: 0.439, time: 0.049
total: 20.230, num_batches: 46, average: 0.440, time: 0.049
total: 20.659, num_batches: 47, average: 0.440, time: 0.048
total: 20.973, num_batches: 48, average: 0.437, time: 0.049
total: 21.430, num_batches: 49, average: 0.437, time: 0.048
total: 21.844, num_batches: 50, average: 0.437, time: 0.049
total: 22.300, num_batches: 51, average: 0.437, time: 0.049
total: 22.745, num_batches: 52, average: 0.437, time: 0.049
total: 23.226, num_batches: 53, average: 0.438, time: 0.048
total: 23.645, num_batches: 54, average: 0.438, time: 0.049
total: 24.160, num_batches: 55, average: 0.439, time: 0.049
total: 24.570, num_batches: 56, average: 0.439, time: 0.048
total: 25.028, num_batches: 57, average:

total: 76.099, num_batches: 177, average: 0.430, time: 0.048
total: 76.503, num_batches: 178, average: 0.430, time: 0.048
total: 76.945, num_batches: 179, average: 0.430, time: 0.048
total: 77.356, num_batches: 180, average: 0.430, time: 0.048
total: 77.823, num_batches: 181, average: 0.430, time: 0.058
total: 78.254, num_batches: 182, average: 0.430, time: 0.065
total: 78.721, num_batches: 183, average: 0.430, time: 0.057
total: 79.263, num_batches: 184, average: 0.431, time: 0.058
total: 79.876, num_batches: 185, average: 0.432, time: 0.052
total: 80.509, num_batches: 186, average: 0.433, time: 0.049
total: 80.931, num_batches: 187, average: 0.433, time: 0.049
total: 81.274, num_batches: 188, average: 0.432, time: 0.054
total: 81.657, num_batches: 189, average: 0.432, time: 0.051
total: 82.074, num_batches: 190, average: 0.432, time: 0.050
total: 82.451, num_batches: 191, average: 0.432, time: 0.049
total: 82.870, num_batches: 192, average: 0.432, time: 0.049
total: 83.267, num_batch

total: 40.380, num_batches: 98, average: 0.412, time: 0.049
total: 40.760, num_batches: 99, average: 0.412, time: 0.049
total: 41.163, num_batches: 100, average: 0.412, time: 0.049
total: 41.546, num_batches: 101, average: 0.411, time: 0.048
total: 42.032, num_batches: 102, average: 0.412, time: 0.049
total: 42.532, num_batches: 103, average: 0.413, time: 0.048
total: 42.897, num_batches: 104, average: 0.412, time: 0.048
total: 43.268, num_batches: 105, average: 0.412, time: 0.048
total: 43.703, num_batches: 106, average: 0.412, time: 0.048
total: 44.057, num_batches: 107, average: 0.412, time: 0.049
total: 44.563, num_batches: 108, average: 0.413, time: 0.049
total: 45.004, num_batches: 109, average: 0.413, time: 0.049
total: 45.476, num_batches: 110, average: 0.413, time: 0.049
total: 45.791, num_batches: 111, average: 0.413, time: 0.048
total: 46.154, num_batches: 112, average: 0.412, time: 0.048
total: 46.621, num_batches: 113, average: 0.413, time: 0.049
total: 47.085, num_batches

total: 6.663, num_batches: 17, average: 0.392, time: 0.051
total: 7.089, num_batches: 18, average: 0.394, time: 0.050
total: 7.498, num_batches: 19, average: 0.395, time: 0.060
total: 7.843, num_batches: 20, average: 0.392, time: 0.064
total: 8.174, num_batches: 21, average: 0.389, time: 0.066
total: 8.566, num_batches: 22, average: 0.389, time: 0.068
total: 8.897, num_batches: 23, average: 0.387, time: 0.061
total: 9.310, num_batches: 24, average: 0.388, time: 0.063
total: 9.610, num_batches: 25, average: 0.384, time: 0.057
total: 10.005, num_batches: 26, average: 0.385, time: 0.049
total: 10.360, num_batches: 27, average: 0.384, time: 0.053
total: 10.723, num_batches: 28, average: 0.383, time: 0.049
total: 11.165, num_batches: 29, average: 0.385, time: 0.049
total: 11.622, num_batches: 30, average: 0.387, time: 0.049
total: 12.045, num_batches: 31, average: 0.389, time: 0.049
total: 12.372, num_batches: 32, average: 0.387, time: 0.049
total: 12.675, num_batches: 33, average: 0.384, t

total: 59.268, num_batches: 154, average: 0.385, time: 0.048
total: 59.735, num_batches: 155, average: 0.385, time: 0.049
total: 60.201, num_batches: 156, average: 0.386, time: 0.050
total: 60.615, num_batches: 157, average: 0.386, time: 0.053
total: 61.116, num_batches: 158, average: 0.387, time: 0.048
total: 61.595, num_batches: 159, average: 0.387, time: 0.051
total: 61.953, num_batches: 160, average: 0.387, time: 0.049
total: 62.294, num_batches: 161, average: 0.387, time: 0.049
total: 62.732, num_batches: 162, average: 0.387, time: 0.049
total: 63.110, num_batches: 163, average: 0.387, time: 0.048
total: 63.588, num_batches: 164, average: 0.388, time: 0.048
total: 64.006, num_batches: 165, average: 0.388, time: 0.048
total: 64.453, num_batches: 166, average: 0.388, time: 0.056
total: 64.935, num_batches: 167, average: 0.389, time: 0.059
total: 65.271, num_batches: 168, average: 0.389, time: 0.057
total: 65.635, num_batches: 169, average: 0.388, time: 0.061
total: 65.967, num_batch

total: 27.887, num_batches: 76, average: 0.367, time: 0.049
total: 28.249, num_batches: 77, average: 0.367, time: 0.049
total: 28.651, num_batches: 78, average: 0.367, time: 0.049
total: 29.019, num_batches: 79, average: 0.367, time: 0.049
total: 29.490, num_batches: 80, average: 0.369, time: 0.049
total: 29.758, num_batches: 81, average: 0.367, time: 0.049
total: 30.109, num_batches: 82, average: 0.367, time: 0.049
total: 30.390, num_batches: 83, average: 0.366, time: 0.048
total: 30.675, num_batches: 84, average: 0.365, time: 0.049
total: 31.102, num_batches: 85, average: 0.366, time: 0.049
total: 31.470, num_batches: 86, average: 0.366, time: 0.049
total: 31.829, num_batches: 87, average: 0.366, time: 0.049
total: 32.202, num_batches: 88, average: 0.366, time: 0.049
total: 32.534, num_batches: 89, average: 0.366, time: 0.049
total: 32.931, num_batches: 90, average: 0.366, time: 0.049
total: 33.312, num_batches: 91, average: 0.366, time: 0.049
total: 33.661, num_batches: 92, average:

total: 77.765, num_batches: 212, average: 0.367, time: 0.061
total: 78.138, num_batches: 213, average: 0.367, time: 0.052
total: 78.475, num_batches: 214, average: 0.367, time: 0.050
Epoch: 10, Loss: 0.366, Acc: 0.866,Val_Loss: 0.357, Val_Acc: 0.875
