In [None]:
import os
import re
import glob
import numpy as np
import PIL.Image as img
import random
from sklearn import metrics
from sklearn.utils.linear_assignment_ import linear_assignment
import tensorflow as tf



In [None]:
# Option
mode = 'Training'
num_cluster = 10
eps = 1e-10
height = 28
width = 28
channel = 1

In [None]:
# Get Datas and Labels as batch_size
def get_batch(batch_size, img_data, imgt_labels):
    batch_index = random.sample(range(len(imgt_labels)), batch_size)

    batch_data = np.empty([batch_size, height, width, channel], dtype=np.float32)
    batch_label = np.empty([batch_size], dtype=np.int32)

    for n, i in enumerate(batch_index):
        batch_data[n, ...] = img_data[i, ...]
        batch_label[n] = imgt_labels[i]
    return batch_data, batch_label

In [None]:
# Get Datas and Labels as batch_size for Testing
def get_batch_test(batch_size, img_data, i):
    batch_data = np.copy(img_data[batch_size * i:batch_size * (i + 1), ...])
    return batch_data


In [None]:
def clustering_acc(y_true, y_pred):
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    ind = linear_assignment(w.max() - w)

    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

In [None]:

def NMI(y_true, y_pred):
    return metrics.normalized_mutual_info_score(y_true, y_pred)

def ARI(y_true, y_pred):
    return metrics.adjusted_rand_score(y_true, y_pred)

In [None]:
from tensorflow.python.keras import backend as k

In [None]:
tf.compat.v1.get_default_graph()

<tensorflow.python.framework.ops.Graph at 0x7ffab6e2d1d0>

In [None]:
def ConvNetwork(in_img, num_cluster, name='ConvNetwork', reuse=False):
	with tf.variable_scope(name, reuse=reuse):
		# conv1
		conv1 = tf.layers.conv2d(in_img, 64, [3,3], [1,1], padding='valid', activation=None, kernel_initializer=tf.keras.initializers.he_normal())
		conv1 = tf.layers.batch_normalization(conv1, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv1 = tf.nn.relu(conv1)
		# conv2
		conv2 = tf.layers.conv2d(conv1, 64, [3,3], [1,1], padding='valid', activation=None, kernel_initializer=tf.keras.initializers.he_normal())
		conv2 = tf.layers.batch_normalization(conv2, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv2 = tf.nn.relu(conv2)
		# conv3
		conv3 = tf.layers.conv2d(conv2, 64, [3,3], [1,1], padding='valid', activation=None, kernel_initializer=tf.keras.initializers.he_normal())
		conv3 = tf.layers.batch_normalization(conv3, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv3 = tf.nn.relu(conv3)
		conv3 = tf.layers.max_pooling2d(conv3, [2,2], [2,2])
		conv3 = tf.layers.batch_normalization(conv3, axis=-1, epsilon=1e-5, training=True, trainable=False)
		# conv4
		conv4 = tf.layers.conv2d(conv3, 128, [3,3], [1,1], padding='valid', activation=None, kernel_initializer=tf.keras.initializers.he_normal())
		conv4 = tf.layers.batch_normalization(conv4, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv4 = tf.nn.relu(conv4)
		# conv5
		conv5 = tf.layers.conv2d(conv4, 128, [3,3], [1,1], padding='valid', activation=None, kernel_initializer=tf.keras.initializers.he_normal())
		conv5 = tf.layers.batch_normalization(conv5, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv5 = tf.nn.relu(conv5)
		# conv6
		conv6 = tf.layers.conv2d(conv5, 128, [3,3], [1,1], padding='valid', activation=None, kernel_initializer=tf.keras.initializers.he_normal())
		conv6 = tf.layers.batch_normalization(conv6, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv6 = tf.nn.relu(conv6)
		conv6 = tf.layers.max_pooling2d(conv6, [2,2], [2,2])
		conv6 = tf.layers.batch_normalization(conv6, axis=-1, epsilon=1e-5, training=True, trainable=False)
		# conv7
		conv7 = tf.layers.conv2d(conv6, 10, [1,1], [1,1], padding='valid', activation=None, kernel_initializer=tf.keras.initializers.he_normal())
		conv7 = tf.layers.batch_normalization(conv7, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv7 = tf.nn.relu(conv7)
		conv7 = tf.layers.average_pooling2d(conv7, [2,2], [2,2])
		conv7 = tf.layers.batch_normalization(conv7, axis=-1, epsilon=1e-5, training=True, trainable=False)
		conv7_flat = tf.layers.flatten(conv7)

		# dense8
		fc8 = tf.layers.dense(conv7_flat, 10, kernel_initializer=tf.initializers.identity())
		fc8 = tf.layers.batch_normalization(fc8, axis=-1, epsilon=1e-5, training=True, trainable=False)
		fc8 = tf.nn.relu(fc8)
		# dense9
		fc9 = tf.layers.dense(fc8, num_cluster, kernel_initializer=tf.initializers.identity())
		fc9 = tf.layers.batch_normalization(fc9, axis=-1, epsilon=1e-5, training=True, trainable=False)
		fc9 = tf.nn.relu(fc9)

		out = tf.nn.softmax(fc9)

	return out


In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 

Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
image_pool_input = tf.placeholder(shape=[None, height, width, channel], dtype=tf.float32, name='image_pool_input')
u_thres = tf.placeholder(shape=[], dtype=tf.float32, name='u_thres')
l_thres = tf.placeholder(shape=[], dtype=tf.float32, name='l_thres')
lr = tf.placeholder(shape=[], dtype=tf.float32, name='learning_rate')

label_feat = ConvNetwork(image_pool_input, num_cluster, name='ConvNetwork', reuse=False)
label_feat_norm = tf.nn.l2_normalize(label_feat, dim=1)
sim_mat = tf.matmul(label_feat_norm, label_feat_norm, transpose_b=True)

pos_loc = tf.greater(sim_mat, u_thres, name='greater')
neg_loc = tf.less(sim_mat, l_thres, name='less')
pos_loc_mask = tf.cast(pos_loc, dtype=tf.float32)
neg_loc_mask = tf.cast(neg_loc, dtype=tf.float32)

pred_label = tf.argmax(label_feat, axis=1)

# Deep Adaptive Image Clustering Cost Function Optimize
pos_entropy = tf.multiply(-tf.log(tf.clip_by_value(sim_mat, eps, 1.0)), pos_loc_mask)
neg_entropy = tf.multiply(-tf.log(tf.clip_by_value(1-sim_mat, eps, 1.0)), neg_loc_mask)

loss_sum = tf.reduce_mean(pos_entropy) + tf.reduce_mean(neg_entropy)
train_op = tf.train.RMSPropOptimizer(lr).minimize(loss_sum)




Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.BatchNormalization` documentation).
Instructions for updating:
Use keras.layers.MaxPooling2D instead.
Instructions for updating:
Use keras.layers.AveragePooling2D instead.
Instructions for updating:
Use keras.layers.Flatten instead.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
dim is deprecated, use axis instead
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [None]:
import tensorflow as tf
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
mnist_train = np.reshape(train_images, (-1, 28, 28, 1))  # reshape into 1-channel image
mnist_train_labels = np.asarray(train_labels, dtype=np.int32)
mnist_test = np.reshape(test_images, (-1, 28, 28, 1))  # reshape into 1-channel image
mnist_test_labels = np.asarray(test_labels, dtype=np.int32)
image_data = np.concatenate([mnist_train, mnist_test], axis=0)
image_label = np.concatenate([mnist_train_labels, mnist_test_labels], axis=0)
print(len(image_data))

70000


In [None]:
# Compress Dimension
mapping = {}
mapped_label = 0
for index,data in enumerate(image_label):
  if(data in mapping):
    image_label[index] = mapping[data]
  else:
    image_label[index] = mapping[data] = mapped_label+1
    mapped_label += 1

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [None]:
saver = tf.train.Saver()

In [None]:
if mode == 'Training':
    batch_size = 128
    test_batch_size = 512
    base_lr = 0.001
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        lamda = 0
        epoch = 1
        u = 0.95
        l = 0.455

        while u > l:
            u = 0.95 - lamda
            l = 0.455 + 0.1 * lamda
            print(u, l)
            for i in range(1, int(1001)):  # 1000 iterations is roughly 1 epoch
                data_samples, _ = get_batch(batch_size, image_data, image_label)
                feed_dict = {image_pool_input: data_samples, u_thres: u, l_thres: l, lr: base_lr}
                train_loss, _ = sess.run([loss_sum, train_op], feed_dict=feed_dict)
                if i % 20 == 0:
                    print('training loss at iter %d is %f' % (i, train_loss))

            lamda += 1.1 * 0.009
            print(lamda)
            # run testing every epoch
            data_samples, data_labels = get_batch(test_batch_size, image_data, image_label)
            feed_dict = {image_pool_input: data_samples}
            pred_cluster = sess.run(pred_label, feed_dict=feed_dict)

            acc = clustering_acc(data_labels, pred_cluster)
            nmi = NMI(data_labels, pred_cluster)
            ari = ARI(data_labels, pred_cluster)
            print('testing NMI, ARI, ACC at epoch %d is %f, %f, %f.' % (epoch, nmi, ari, acc))

            if epoch % 5 == 0:  # save model at every 5 epochs
                model_name = 'DAC_ep_' + str(epoch) + '.ckpt'
                save_path = saver.save(sess, 'DAC_models/' + model_name)
                print("Model saved in file: %s" % save_path)

            epoch += 1

elif mode == 'Testing':
    test_batch_size = 1000
    with tf.Session() as sess:
        saver.restore(sess, "DAC_models/DAC_ep_45.ckpt")
        print('model restored!')
        all_predictions = np.zeros([len(image_label)], dtype=np.float32)
        for i in range(len(image_datsa) // test_batch_size):
            data_samples = get_batch_test(test_batch_size, image_data, i)
            feed_dict = {image_pool_input: data_samples}
            pred_cluster = sess.run(pred_label, feed_dict=feed_dict)
            all_predictions[i * test_batch_size:(i + 1) * test_batch_size] = pred_cluster

        acc = clustering_acc(image_label.astype(int), all_predictions.astype(int))
        nmi = NMI(image_label.astype(int), all_predictions.astype(int))
        ari = ARI(image_label.astype(int), all_predictions.astype(int))
        print('testing NMI, ARI, ACC are %f, %f, %f.' % (nmi, ari, acc))

0.95 0.455
training loss at iter 20 is 0.149347
training loss at iter 40 is 0.170385
training loss at iter 60 is 0.146097
training loss at iter 80 is 0.178811
training loss at iter 100 is 0.183752
training loss at iter 120 is 0.175200
training loss at iter 140 is 0.185121
training loss at iter 160 is 0.177561
training loss at iter 180 is 0.180105
training loss at iter 200 is 0.179779
training loss at iter 220 is 0.171554
training loss at iter 240 is 0.162225
training loss at iter 260 is 0.164176
training loss at iter 280 is 0.174341
training loss at iter 300 is 0.157272
training loss at iter 320 is 0.151768
training loss at iter 340 is 0.156998
training loss at iter 360 is 0.142101
training loss at iter 380 is 0.146148
training loss at iter 400 is 0.153183
training loss at iter 420 is 0.140798
training loss at iter 440 is 0.157799
training loss at iter 460 is 0.151539
training loss at iter 480 is 0.143286
training loss at iter 500 is 0.162643
training loss at iter 520 is 0.149354
train



testing NMI, ARI, ACC at epoch 1 is 0.934073, 0.922542, 0.962891.
0.9400999999999999 0.45599
training loss at iter 20 is 0.138686
training loss at iter 40 is 0.130629
training loss at iter 60 is 0.137978
training loss at iter 80 is 0.133647
training loss at iter 100 is 0.140571
training loss at iter 120 is 0.132992
training loss at iter 140 is 0.138531
training loss at iter 160 is 0.132507
training loss at iter 180 is 0.137796
training loss at iter 200 is 0.128005
training loss at iter 220 is 0.136457
training loss at iter 240 is 0.148888
training loss at iter 260 is 0.143744
training loss at iter 280 is 0.143197
training loss at iter 300 is 0.132649
training loss at iter 320 is 0.129489
training loss at iter 340 is 0.135899
training loss at iter 360 is 0.129418
training loss at iter 380 is 0.137737
training loss at iter 400 is 0.138591
training loss at iter 420 is 0.129365
training loss at iter 440 is 0.134367
training loss at iter 460 is 0.133880
training loss at iter 480 is 0.139443



training loss at iter 20 is 0.128675
training loss at iter 40 is 0.138146
training loss at iter 60 is 0.135212
training loss at iter 80 is 0.133597
training loss at iter 100 is 0.139735
training loss at iter 120 is 0.136165
training loss at iter 140 is 0.129509
training loss at iter 160 is 0.133774
training loss at iter 180 is 0.154692
training loss at iter 200 is 0.124796
training loss at iter 220 is 0.126286
training loss at iter 240 is 0.135680
training loss at iter 260 is 0.134470
training loss at iter 280 is 0.136192
training loss at iter 300 is 0.132227
training loss at iter 320 is 0.125263
training loss at iter 340 is 0.131121
training loss at iter 360 is 0.161975
training loss at iter 380 is 0.129970
training loss at iter 400 is 0.137771
training loss at iter 420 is 0.127105
training loss at iter 440 is 0.145586
training loss at iter 460 is 0.135014
training loss at iter 480 is 0.131194
training loss at iter 500 is 0.127329
training loss at iter 520 is 0.131351
training loss at



training loss at iter 20 is 0.132103
training loss at iter 40 is 0.137584
training loss at iter 60 is 0.127436
training loss at iter 80 is 0.130922
training loss at iter 100 is 0.130243
training loss at iter 120 is 0.139164
training loss at iter 140 is 0.126362
training loss at iter 160 is 0.126222
training loss at iter 180 is 0.125626
training loss at iter 200 is 0.126723
training loss at iter 220 is 0.129216
training loss at iter 240 is 0.132542
training loss at iter 260 is 0.122165
training loss at iter 280 is 0.136705
training loss at iter 300 is 0.128406
training loss at iter 320 is 0.127426
training loss at iter 340 is 0.140676
training loss at iter 360 is 0.138218
training loss at iter 380 is 0.136834
training loss at iter 400 is 0.132061
training loss at iter 420 is 0.145589
training loss at iter 440 is 0.136262
training loss at iter 460 is 0.124524
training loss at iter 480 is 0.143575
training loss at iter 500 is 0.134073
training loss at iter 520 is 0.132753
training loss at



training loss at iter 20 is 0.126479
training loss at iter 40 is 0.118806
training loss at iter 60 is 0.122909
training loss at iter 80 is 0.130580
training loss at iter 100 is 0.131065
training loss at iter 120 is 0.137201
training loss at iter 140 is 0.126051
training loss at iter 160 is 0.134796
training loss at iter 180 is 0.133112
training loss at iter 200 is 0.133104
training loss at iter 220 is 0.131593
training loss at iter 240 is 0.135058
training loss at iter 260 is 0.126842
training loss at iter 280 is 0.128256
training loss at iter 300 is 0.123765
training loss at iter 320 is 0.145869
training loss at iter 340 is 0.122068
training loss at iter 360 is 0.129137
training loss at iter 380 is 0.144484
training loss at iter 400 is 0.135602
training loss at iter 420 is 0.130801
training loss at iter 440 is 0.144104
training loss at iter 460 is 0.138842
training loss at iter 480 is 0.131049
training loss at iter 500 is 0.136554
training loss at iter 520 is 0.123968
training loss at



Model saved in file: DAC_models/DAC_ep_5.ckpt
0.9005 0.45995
training loss at iter 20 is 0.130365
training loss at iter 40 is 0.121540
training loss at iter 60 is 0.126319
training loss at iter 80 is 0.131890
training loss at iter 100 is 0.132039
training loss at iter 120 is 0.133863
training loss at iter 140 is 0.129829
training loss at iter 160 is 0.132558
training loss at iter 180 is 0.133879
training loss at iter 200 is 0.131846
training loss at iter 220 is 0.127363
training loss at iter 240 is 0.127618
training loss at iter 260 is 0.120233
training loss at iter 280 is 0.124086
training loss at iter 300 is 0.127008
training loss at iter 320 is 0.133264
training loss at iter 340 is 0.125215
training loss at iter 360 is 0.125382
training loss at iter 380 is 0.127317
training loss at iter 400 is 0.133364
training loss at iter 420 is 0.139372
training loss at iter 440 is 0.138188
training loss at iter 460 is 0.131074
training loss at iter 480 is 0.137875
training loss at iter 500 is 0.



training loss at iter 20 is 0.138000
training loss at iter 40 is 0.130275
training loss at iter 60 is 0.128093
training loss at iter 80 is 0.126625
training loss at iter 100 is 0.138588
training loss at iter 120 is 0.124782
training loss at iter 140 is 0.136962
training loss at iter 160 is 0.124754
training loss at iter 180 is 0.132286
training loss at iter 200 is 0.127871
training loss at iter 220 is 0.141382
training loss at iter 240 is 0.120555
training loss at iter 260 is 0.124219
training loss at iter 280 is 0.124882
training loss at iter 300 is 0.122884
training loss at iter 320 is 0.123245
training loss at iter 340 is 0.144479
training loss at iter 360 is 0.123476
training loss at iter 380 is 0.131736
training loss at iter 400 is 0.142274
training loss at iter 420 is 0.138042
training loss at iter 440 is 0.134362
training loss at iter 460 is 0.125929
training loss at iter 480 is 0.130217
training loss at iter 500 is 0.125720
training loss at iter 520 is 0.132305
training loss at



training loss at iter 20 is 0.138268
training loss at iter 40 is 0.145340
training loss at iter 60 is 0.133365
training loss at iter 80 is 0.136161
training loss at iter 100 is 0.131206
training loss at iter 120 is 0.128225
training loss at iter 140 is 0.130612
training loss at iter 160 is 0.122592
training loss at iter 180 is 0.127103
training loss at iter 200 is 0.123031
training loss at iter 220 is 0.128396
training loss at iter 240 is 0.137397
training loss at iter 260 is 0.123944
training loss at iter 280 is 0.126734
training loss at iter 300 is 0.129135
training loss at iter 320 is 0.142371
training loss at iter 340 is 0.130875
training loss at iter 360 is 0.123900
training loss at iter 380 is 0.131883
training loss at iter 400 is 0.122268
training loss at iter 420 is 0.122948
training loss at iter 440 is 0.134940
training loss at iter 460 is 0.129136
training loss at iter 480 is 0.130272
training loss at iter 500 is 0.122429
training loss at iter 520 is 0.128360
training loss at



training loss at iter 20 is 0.130816
training loss at iter 40 is 0.116777
training loss at iter 60 is 0.122427
training loss at iter 80 is 0.123479
training loss at iter 100 is 0.116481
training loss at iter 120 is 0.136878
training loss at iter 140 is 0.129658
training loss at iter 160 is 0.125684
training loss at iter 180 is 0.140665
training loss at iter 200 is 0.139662
training loss at iter 220 is 0.134563
training loss at iter 240 is 0.124682
training loss at iter 260 is 0.118727
training loss at iter 280 is 0.127520
training loss at iter 300 is 0.138053
training loss at iter 320 is 0.133603
training loss at iter 340 is 0.130452
training loss at iter 360 is 0.128048
training loss at iter 380 is 0.128541
training loss at iter 400 is 0.128726
training loss at iter 420 is 0.147068
training loss at iter 440 is 0.121642
training loss at iter 460 is 0.124984
training loss at iter 480 is 0.124937
training loss at iter 500 is 0.132651
training loss at iter 520 is 0.133847
training loss at



training loss at iter 20 is 0.123930
training loss at iter 40 is 0.125232
training loss at iter 60 is 0.119550
training loss at iter 80 is 0.135342
training loss at iter 100 is 0.126110
training loss at iter 120 is 0.119701
training loss at iter 140 is 0.129489
training loss at iter 160 is 0.123114
training loss at iter 180 is 0.130830
training loss at iter 200 is 0.119349
training loss at iter 220 is 0.138540
training loss at iter 240 is 0.119491
training loss at iter 260 is 0.129762
training loss at iter 280 is 0.121986
training loss at iter 300 is 0.124642
training loss at iter 320 is 0.130051
training loss at iter 340 is 0.134164
training loss at iter 360 is 0.130299
training loss at iter 380 is 0.129963
training loss at iter 400 is 0.142359
training loss at iter 420 is 0.123675
training loss at iter 440 is 0.124421
training loss at iter 460 is 0.126349
training loss at iter 480 is 0.136449
training loss at iter 500 is 0.124040
training loss at iter 520 is 0.129778
training loss at



training loss at iter 20 is 0.130654
training loss at iter 40 is 0.129475
training loss at iter 60 is 0.127941
training loss at iter 80 is 0.128858
training loss at iter 100 is 0.124480
training loss at iter 120 is 0.136388
training loss at iter 140 is 0.124441
training loss at iter 160 is 0.136556
training loss at iter 180 is 0.144488
training loss at iter 200 is 0.129116
training loss at iter 220 is 0.134017
training loss at iter 240 is 0.118179
training loss at iter 260 is 0.124313
training loss at iter 280 is 0.130069
training loss at iter 300 is 0.130926
training loss at iter 320 is 0.145950
training loss at iter 340 is 0.120217
training loss at iter 360 is 0.122476
training loss at iter 380 is 0.124266
training loss at iter 400 is 0.121877
training loss at iter 420 is 0.131788
training loss at iter 440 is 0.121714
training loss at iter 460 is 0.134229
training loss at iter 480 is 0.121288
training loss at iter 500 is 0.127531
training loss at iter 520 is 0.122999
training loss at



training loss at iter 20 is 0.129712
training loss at iter 40 is 0.136329
training loss at iter 60 is 0.134464
training loss at iter 80 is 0.133661
training loss at iter 100 is 0.129044
training loss at iter 120 is 0.122358
training loss at iter 140 is 0.122310
training loss at iter 160 is 0.134485
training loss at iter 180 is 0.129605
training loss at iter 200 is 0.131685
training loss at iter 220 is 0.128758
training loss at iter 240 is 0.135065
training loss at iter 260 is 0.140519
training loss at iter 280 is 0.130680
training loss at iter 300 is 0.123329
training loss at iter 320 is 0.135941
training loss at iter 340 is 0.129153
training loss at iter 360 is 0.135891
training loss at iter 380 is 0.125979
training loss at iter 400 is 0.131065
training loss at iter 420 is 0.128979
training loss at iter 440 is 0.134200
training loss at iter 460 is 0.123696
training loss at iter 480 is 0.128418
training loss at iter 500 is 0.132863
training loss at iter 520 is 0.129039
training loss at



training loss at iter 20 is 0.119364
training loss at iter 40 is 0.127907
training loss at iter 60 is 0.127231
training loss at iter 80 is 0.144315
training loss at iter 100 is 0.126136
training loss at iter 120 is 0.127911
training loss at iter 140 is 0.118507
training loss at iter 160 is 0.137702
training loss at iter 180 is 0.119912
training loss at iter 200 is 0.129957
training loss at iter 220 is 0.126248
training loss at iter 240 is 0.123113
training loss at iter 260 is 0.121928
training loss at iter 280 is 0.120557
training loss at iter 300 is 0.126822
training loss at iter 320 is 0.140061
training loss at iter 340 is 0.124237
training loss at iter 360 is 0.120730
training loss at iter 380 is 0.126650
training loss at iter 400 is 0.129497
training loss at iter 420 is 0.125933
training loss at iter 440 is 0.120162
training loss at iter 460 is 0.118871
training loss at iter 480 is 0.128834
training loss at iter 500 is 0.130732
training loss at iter 520 is 0.124360
training loss at



training loss at iter 20 is 0.128555
training loss at iter 40 is 0.128135
training loss at iter 60 is 0.127762
training loss at iter 80 is 0.140331
training loss at iter 100 is 0.135449
training loss at iter 120 is 0.122696
training loss at iter 140 is 0.130809
training loss at iter 160 is 0.132521
training loss at iter 180 is 0.126706
training loss at iter 200 is 0.125890
training loss at iter 220 is 0.125999
training loss at iter 240 is 0.123056
training loss at iter 260 is 0.141497
training loss at iter 280 is 0.122384
training loss at iter 300 is 0.119925
training loss at iter 320 is 0.127952
training loss at iter 340 is 0.135182
training loss at iter 360 is 0.129022
training loss at iter 380 is 0.133653
training loss at iter 400 is 0.132134
training loss at iter 420 is 0.125309
training loss at iter 440 is 0.133237
training loss at iter 460 is 0.128126
training loss at iter 480 is 0.121168
training loss at iter 500 is 0.133788
training loss at iter 520 is 0.130272
training loss at



training loss at iter 20 is 0.123091
training loss at iter 40 is 0.137464
training loss at iter 60 is 0.122923
training loss at iter 80 is 0.125199
training loss at iter 100 is 0.129394
training loss at iter 120 is 0.120881
training loss at iter 140 is 0.125305
training loss at iter 160 is 0.125167
training loss at iter 180 is 0.124994
training loss at iter 200 is 0.122946
training loss at iter 220 is 0.139144
training loss at iter 240 is 0.143768
training loss at iter 260 is 0.127144
training loss at iter 280 is 0.135055
training loss at iter 300 is 0.130720
training loss at iter 320 is 0.138442
training loss at iter 340 is 0.124689
training loss at iter 360 is 0.120328
training loss at iter 380 is 0.132575
training loss at iter 400 is 0.138379
training loss at iter 420 is 0.126614
training loss at iter 440 is 0.114886
training loss at iter 460 is 0.121491
training loss at iter 480 is 0.142049
training loss at iter 500 is 0.143365
training loss at iter 520 is 0.128767
training loss at



training loss at iter 20 is 0.117489
training loss at iter 40 is 0.129038
training loss at iter 60 is 0.131607
training loss at iter 80 is 0.126673
training loss at iter 100 is 0.125091
training loss at iter 120 is 0.119097
training loss at iter 140 is 0.133425
training loss at iter 160 is 0.141340
training loss at iter 180 is 0.137259
training loss at iter 200 is 0.135277
training loss at iter 220 is 0.121438
training loss at iter 240 is 0.123989
training loss at iter 260 is 0.133556
training loss at iter 280 is 0.133680
training loss at iter 300 is 0.123720
training loss at iter 320 is 0.122777
training loss at iter 340 is 0.139086
training loss at iter 360 is 0.120826
training loss at iter 380 is 0.115098
training loss at iter 400 is 0.145535
training loss at iter 420 is 0.135764
training loss at iter 440 is 0.124589
training loss at iter 460 is 0.127561
training loss at iter 480 is 0.125415
training loss at iter 500 is 0.129210
training loss at iter 520 is 0.127381
training loss at



training loss at iter 20 is 0.119157
training loss at iter 40 is 0.117377
training loss at iter 60 is 0.122768
training loss at iter 80 is 0.119748
training loss at iter 100 is 0.124867
training loss at iter 120 is 0.134591
training loss at iter 140 is 0.125435
training loss at iter 160 is 0.134130
training loss at iter 180 is 0.121530
training loss at iter 200 is 0.126162
training loss at iter 220 is 0.118490
training loss at iter 240 is 0.135178
training loss at iter 260 is 0.136771
training loss at iter 280 is 0.124673
training loss at iter 300 is 0.118391
training loss at iter 320 is 0.120795
training loss at iter 340 is 0.128638
training loss at iter 360 is 0.130788
training loss at iter 380 is 0.125903
training loss at iter 400 is 0.117994
training loss at iter 420 is 0.132846
training loss at iter 440 is 0.120370
training loss at iter 460 is 0.130907
training loss at iter 480 is 0.143615
training loss at iter 500 is 0.124118
training loss at iter 520 is 0.130774
training loss at



training loss at iter 20 is 0.144066
training loss at iter 40 is 0.127121
training loss at iter 60 is 0.130903
training loss at iter 80 is 0.126033
training loss at iter 100 is 0.143729
training loss at iter 120 is 0.126891
training loss at iter 140 is 0.122964
training loss at iter 160 is 0.127864
training loss at iter 180 is 0.132006
training loss at iter 200 is 0.122412
training loss at iter 220 is 0.116431
training loss at iter 240 is 0.131416
training loss at iter 260 is 0.131125
training loss at iter 280 is 0.123982
training loss at iter 300 is 0.137662
training loss at iter 320 is 0.123038
training loss at iter 340 is 0.118928
training loss at iter 360 is 0.132774
training loss at iter 380 is 0.121902
training loss at iter 400 is 0.131524
training loss at iter 420 is 0.134241
training loss at iter 440 is 0.125134
training loss at iter 460 is 0.135687
training loss at iter 480 is 0.143240
training loss at iter 500 is 0.138903
training loss at iter 520 is 0.125554
training loss at



training loss at iter 20 is 0.135843
training loss at iter 40 is 0.128329
training loss at iter 60 is 0.125841
training loss at iter 80 is 0.132530
training loss at iter 100 is 0.125329
training loss at iter 120 is 0.128017
training loss at iter 140 is 0.127690
training loss at iter 160 is 0.123403
training loss at iter 180 is 0.120809
training loss at iter 200 is 0.122654
training loss at iter 220 is 0.134953
training loss at iter 240 is 0.125559
training loss at iter 260 is 0.128503
training loss at iter 280 is 0.144881
training loss at iter 300 is 0.126231
training loss at iter 320 is 0.135894
training loss at iter 340 is 0.120066
training loss at iter 360 is 0.126562
training loss at iter 380 is 0.123040
training loss at iter 400 is 0.123162
training loss at iter 420 is 0.122046
training loss at iter 440 is 0.135697
training loss at iter 460 is 0.121122
training loss at iter 480 is 0.126803
training loss at iter 500 is 0.125254
training loss at iter 520 is 0.135113
training loss at



training loss at iter 20 is 0.128394
training loss at iter 40 is 0.120336
training loss at iter 60 is 0.120825
training loss at iter 80 is 0.119447
training loss at iter 100 is 0.126115
training loss at iter 120 is 0.132227
training loss at iter 140 is 0.128541
training loss at iter 160 is 0.129762
training loss at iter 180 is 0.123307
training loss at iter 200 is 0.127358
training loss at iter 220 is 0.126204
training loss at iter 240 is 0.125295
training loss at iter 260 is 0.138737
training loss at iter 280 is 0.128967
training loss at iter 300 is 0.131093
training loss at iter 320 is 0.129292
training loss at iter 340 is 0.132893
training loss at iter 360 is 0.131302
training loss at iter 380 is 0.127146
training loss at iter 400 is 0.123191
training loss at iter 420 is 0.127738
training loss at iter 440 is 0.131728
training loss at iter 460 is 0.134424
training loss at iter 480 is 0.133524
training loss at iter 500 is 0.126097
training loss at iter 520 is 0.129878
training loss at



training loss at iter 20 is 0.132276
training loss at iter 40 is 0.123058
training loss at iter 60 is 0.141250
training loss at iter 80 is 0.123798
training loss at iter 100 is 0.132633
training loss at iter 120 is 0.129694
training loss at iter 140 is 0.137739
training loss at iter 160 is 0.119653
training loss at iter 180 is 0.127491
training loss at iter 200 is 0.123885
training loss at iter 220 is 0.127338
training loss at iter 240 is 0.126021
training loss at iter 260 is 0.120879
training loss at iter 280 is 0.135253
training loss at iter 300 is 0.125662
training loss at iter 320 is 0.122279
training loss at iter 340 is 0.120612
training loss at iter 360 is 0.141311
training loss at iter 380 is 0.122450
training loss at iter 400 is 0.121293
training loss at iter 420 is 0.124925
training loss at iter 440 is 0.120903
training loss at iter 460 is 0.132811
training loss at iter 480 is 0.129554
training loss at iter 500 is 0.125568
training loss at iter 520 is 0.126106
training loss at



training loss at iter 20 is 0.132955
training loss at iter 40 is 0.137680
training loss at iter 60 is 0.123415
training loss at iter 80 is 0.119794
training loss at iter 100 is 0.127207
training loss at iter 120 is 0.121203
training loss at iter 140 is 0.120458
training loss at iter 160 is 0.144892
training loss at iter 180 is 0.136248
training loss at iter 200 is 0.128230
training loss at iter 220 is 0.123909
training loss at iter 240 is 0.127207
training loss at iter 260 is 0.133378
training loss at iter 280 is 0.121601
training loss at iter 300 is 0.134115
training loss at iter 320 is 0.127422
training loss at iter 340 is 0.121326
training loss at iter 360 is 0.125406
training loss at iter 380 is 0.135910
training loss at iter 400 is 0.127971
training loss at iter 420 is 0.124123
training loss at iter 440 is 0.129342
training loss at iter 460 is 0.124583
training loss at iter 480 is 0.139271
training loss at iter 500 is 0.126790
training loss at iter 520 is 0.118942
training loss at



training loss at iter 20 is 0.124910
training loss at iter 40 is 0.130417
training loss at iter 60 is 0.122876
training loss at iter 80 is 0.134187
training loss at iter 100 is 0.125321
training loss at iter 120 is 0.119619
training loss at iter 140 is 0.118639
training loss at iter 160 is 0.127396
training loss at iter 180 is 0.122152
training loss at iter 200 is 0.124273
training loss at iter 220 is 0.122779
training loss at iter 240 is 0.127435
training loss at iter 260 is 0.140878
training loss at iter 280 is 0.131060
training loss at iter 300 is 0.123768
training loss at iter 320 is 0.123880
training loss at iter 340 is 0.128628
training loss at iter 360 is 0.129832
training loss at iter 380 is 0.124977
training loss at iter 400 is 0.131012
training loss at iter 420 is 0.125493
training loss at iter 440 is 0.123906
training loss at iter 460 is 0.125902
training loss at iter 480 is 0.118587
training loss at iter 500 is 0.123321
training loss at iter 520 is 0.140725
training loss at



training loss at iter 20 is 0.120427
training loss at iter 40 is 0.132948
training loss at iter 60 is 0.121656
training loss at iter 80 is 0.118800
training loss at iter 100 is 0.130091
training loss at iter 120 is 0.119275
training loss at iter 140 is 0.137727
training loss at iter 160 is 0.126944
training loss at iter 180 is 0.119391
training loss at iter 200 is 0.147194
training loss at iter 220 is 0.132421
training loss at iter 240 is 0.120704
training loss at iter 260 is 0.127053
training loss at iter 280 is 0.136078
training loss at iter 300 is 0.126754
training loss at iter 320 is 0.118960
training loss at iter 340 is 0.128788
training loss at iter 360 is 0.150039
training loss at iter 380 is 0.129194
training loss at iter 400 is 0.127077
training loss at iter 420 is 0.128226
training loss at iter 440 is 0.130736
training loss at iter 460 is 0.124312
training loss at iter 480 is 0.122756
training loss at iter 500 is 0.144297
training loss at iter 520 is 0.131240
training loss at



training loss at iter 20 is 0.126521
training loss at iter 40 is 0.125566
training loss at iter 60 is 0.134515
training loss at iter 80 is 0.119777
training loss at iter 100 is 0.125110
training loss at iter 120 is 0.124346
training loss at iter 140 is 0.124749
training loss at iter 160 is 0.122309
training loss at iter 180 is 0.124558
training loss at iter 200 is 0.127559
training loss at iter 220 is 0.122430
training loss at iter 240 is 0.131075
training loss at iter 260 is 0.122575
training loss at iter 280 is 0.117428
training loss at iter 300 is 0.133235
training loss at iter 320 is 0.121442
training loss at iter 340 is 0.122783
training loss at iter 360 is 0.138668
training loss at iter 380 is 0.128063
training loss at iter 400 is 0.130141
training loss at iter 420 is 0.131019
training loss at iter 440 is 0.122452
training loss at iter 460 is 0.125106
training loss at iter 480 is 0.126417
training loss at iter 500 is 0.128245
training loss at iter 520 is 0.126847
training loss at



training loss at iter 20 is 0.118946
training loss at iter 40 is 0.129749
training loss at iter 60 is 0.132529
training loss at iter 80 is 0.122783
training loss at iter 100 is 0.123214
training loss at iter 120 is 0.133746
training loss at iter 140 is 0.132571
training loss at iter 160 is 0.142254
training loss at iter 180 is 0.125838
training loss at iter 200 is 0.120161
training loss at iter 220 is 0.124510
training loss at iter 240 is 0.123064
training loss at iter 260 is 0.124338
training loss at iter 280 is 0.122564
training loss at iter 300 is 0.128349
training loss at iter 320 is 0.117434
training loss at iter 340 is 0.139359
training loss at iter 360 is 0.120271
training loss at iter 380 is 0.133812
training loss at iter 400 is 0.128055
training loss at iter 420 is 0.134725
training loss at iter 440 is 0.131953
training loss at iter 460 is 0.141567
training loss at iter 480 is 0.125198
training loss at iter 500 is 0.140234
training loss at iter 520 is 0.131546
training loss at



training loss at iter 20 is 0.142819
training loss at iter 40 is 0.121972
training loss at iter 60 is 0.141877
training loss at iter 80 is 0.120998
training loss at iter 100 is 0.127880
training loss at iter 120 is 0.134384
training loss at iter 140 is 0.125038
training loss at iter 160 is 0.139308
training loss at iter 180 is 0.132185
training loss at iter 200 is 0.124874
training loss at iter 220 is 0.132099
training loss at iter 240 is 0.122313
training loss at iter 260 is 0.137684
training loss at iter 280 is 0.116240
training loss at iter 300 is 0.149437
training loss at iter 320 is 0.135646
training loss at iter 340 is 0.128493
training loss at iter 360 is 0.119363
training loss at iter 380 is 0.131710
training loss at iter 400 is 0.117335
training loss at iter 420 is 0.128759
training loss at iter 440 is 0.128731
training loss at iter 460 is 0.144043
training loss at iter 480 is 0.125175
training loss at iter 500 is 0.143539
training loss at iter 520 is 0.121681
training loss at



training loss at iter 20 is 0.125557
training loss at iter 40 is 0.128279
training loss at iter 60 is 0.119397
training loss at iter 80 is 0.125933
training loss at iter 100 is 0.130824
training loss at iter 120 is 0.120010
training loss at iter 140 is 0.130821
training loss at iter 160 is 0.125431
training loss at iter 180 is 0.134920
training loss at iter 200 is 0.131599
training loss at iter 220 is 0.139199
training loss at iter 240 is 0.126031
training loss at iter 260 is 0.127847
training loss at iter 280 is 0.119027
training loss at iter 300 is 0.126661
training loss at iter 320 is 0.124042
training loss at iter 340 is 0.123529
training loss at iter 360 is 0.118646
training loss at iter 380 is 0.123886
training loss at iter 400 is 0.142426
training loss at iter 420 is 0.121007
training loss at iter 440 is 0.121799
training loss at iter 460 is 0.123580
training loss at iter 480 is 0.125443
training loss at iter 500 is 0.122595
training loss at iter 520 is 0.120500
training loss at



training loss at iter 20 is 0.116164
training loss at iter 40 is 0.126537
training loss at iter 60 is 0.126108
training loss at iter 80 is 0.149546
training loss at iter 100 is 0.142712
training loss at iter 120 is 0.123409
training loss at iter 140 is 0.128199
training loss at iter 160 is 0.131208
training loss at iter 180 is 0.122330
training loss at iter 200 is 0.127175
training loss at iter 220 is 0.121243
training loss at iter 240 is 0.127597
training loss at iter 260 is 0.124400
training loss at iter 280 is 0.139678
training loss at iter 300 is 0.135658
training loss at iter 320 is 0.133156
training loss at iter 340 is 0.116387
training loss at iter 360 is 0.120204
training loss at iter 380 is 0.125224
training loss at iter 400 is 0.129556
training loss at iter 420 is 0.130053
training loss at iter 440 is 0.135616
training loss at iter 460 is 0.129771
training loss at iter 480 is 0.122134
training loss at iter 500 is 0.125337
training loss at iter 520 is 0.122965
training loss at



training loss at iter 20 is 0.136733
training loss at iter 40 is 0.125913
training loss at iter 60 is 0.130705
training loss at iter 80 is 0.132840
training loss at iter 100 is 0.123787
training loss at iter 120 is 0.139781
training loss at iter 140 is 0.125017
training loss at iter 160 is 0.128248
training loss at iter 180 is 0.129193
training loss at iter 200 is 0.118412
training loss at iter 220 is 0.128402
training loss at iter 240 is 0.118905
training loss at iter 260 is 0.126768
training loss at iter 280 is 0.120412
training loss at iter 300 is 0.129370
training loss at iter 320 is 0.128144
training loss at iter 340 is 0.138774
training loss at iter 360 is 0.126285
training loss at iter 380 is 0.126099
training loss at iter 400 is 0.145040
training loss at iter 420 is 0.138489
training loss at iter 440 is 0.125013
training loss at iter 460 is 0.132676
training loss at iter 480 is 0.123562
training loss at iter 500 is 0.127852
training loss at iter 520 is 0.135669
training loss at



training loss at iter 20 is 0.127339
training loss at iter 40 is 0.122559
training loss at iter 60 is 0.129475
training loss at iter 80 is 0.117295
training loss at iter 100 is 0.128350
training loss at iter 120 is 0.124361
training loss at iter 140 is 0.125163
training loss at iter 160 is 0.120244
training loss at iter 180 is 0.126237
training loss at iter 200 is 0.136472
training loss at iter 220 is 0.138269
training loss at iter 240 is 0.129522
training loss at iter 260 is 0.131960
training loss at iter 280 is 0.127925
training loss at iter 300 is 0.120832
training loss at iter 320 is 0.139045
training loss at iter 340 is 0.121047
training loss at iter 360 is 0.134363
training loss at iter 380 is 0.136512
training loss at iter 400 is 0.133752
training loss at iter 420 is 0.124649
training loss at iter 440 is 0.133659
training loss at iter 460 is 0.127433
training loss at iter 480 is 0.140401
training loss at iter 500 is 0.131892
training loss at iter 520 is 0.136358
training loss at



training loss at iter 20 is 0.122353
training loss at iter 40 is 0.121949
training loss at iter 60 is 0.125997
training loss at iter 80 is 0.116829
training loss at iter 100 is 0.123035
training loss at iter 120 is 0.120565
training loss at iter 140 is 0.120669
training loss at iter 160 is 0.129316
training loss at iter 180 is 0.134205
training loss at iter 200 is 0.125987
training loss at iter 220 is 0.129841
training loss at iter 240 is 0.114534
training loss at iter 260 is 0.127509
training loss at iter 280 is 0.123134
training loss at iter 300 is 0.123745
training loss at iter 320 is 0.128173
training loss at iter 340 is 0.129438
training loss at iter 360 is 0.123866
training loss at iter 380 is 0.122210
training loss at iter 400 is 0.125347
training loss at iter 420 is 0.126297
training loss at iter 440 is 0.122649
training loss at iter 460 is 0.138758
training loss at iter 480 is 0.125150
training loss at iter 500 is 0.134652
training loss at iter 520 is 0.129420
training loss at



training loss at iter 20 is 0.116394
training loss at iter 40 is 0.117764
training loss at iter 60 is 0.134536
training loss at iter 80 is 0.126088
training loss at iter 100 is 0.144857
training loss at iter 120 is 0.124253
training loss at iter 140 is 0.117943
training loss at iter 160 is 0.128637
training loss at iter 180 is 0.127034
training loss at iter 200 is 0.131855
training loss at iter 220 is 0.126779
training loss at iter 240 is 0.127566
training loss at iter 260 is 0.127415
training loss at iter 280 is 0.128494
training loss at iter 300 is 0.124818
training loss at iter 320 is 0.140390
training loss at iter 340 is 0.122642
training loss at iter 360 is 0.138648
training loss at iter 380 is 0.125075
training loss at iter 400 is 0.120623
training loss at iter 420 is 0.132861
training loss at iter 440 is 0.136219
training loss at iter 460 is 0.138048
training loss at iter 480 is 0.125042
training loss at iter 500 is 0.123114
training loss at iter 520 is 0.129982
training loss at



training loss at iter 20 is 0.119156
training loss at iter 40 is 0.120070
training loss at iter 60 is 0.133969
training loss at iter 80 is 0.132461
training loss at iter 100 is 0.124625
training loss at iter 120 is 0.127459
training loss at iter 140 is 0.124264
training loss at iter 160 is 0.126365
training loss at iter 180 is 0.132183
training loss at iter 200 is 0.133942
training loss at iter 220 is 0.127537
training loss at iter 240 is 0.130214
training loss at iter 260 is 0.127390
training loss at iter 280 is 0.128690
training loss at iter 300 is 0.121393
training loss at iter 320 is 0.124487
training loss at iter 340 is 0.125958
training loss at iter 360 is 0.133992
training loss at iter 380 is 0.127750
training loss at iter 400 is 0.127154
training loss at iter 420 is 0.147970
training loss at iter 440 is 0.126218
training loss at iter 460 is 0.127810
training loss at iter 480 is 0.127744
training loss at iter 500 is 0.121297
training loss at iter 520 is 0.115924
training loss at



training loss at iter 20 is 0.122163
training loss at iter 40 is 0.115862
training loss at iter 60 is 0.132428
training loss at iter 80 is 0.123874
training loss at iter 100 is 0.129812
training loss at iter 120 is 0.127136
training loss at iter 140 is 0.121279
training loss at iter 160 is 0.131525
training loss at iter 180 is 0.117128
training loss at iter 200 is 0.122649
training loss at iter 220 is 0.121759
training loss at iter 240 is 0.130105
training loss at iter 260 is 0.122944
training loss at iter 280 is 0.128238
training loss at iter 300 is 0.125292
training loss at iter 320 is 0.123593
training loss at iter 340 is 0.126822
training loss at iter 360 is 0.125702
training loss at iter 380 is 0.124187
training loss at iter 400 is 0.135010
training loss at iter 420 is 0.130103
training loss at iter 440 is 0.119543
training loss at iter 460 is 0.128383
training loss at iter 480 is 0.124317
training loss at iter 500 is 0.127376
training loss at iter 520 is 0.128888
training loss at



training loss at iter 20 is 0.120617
training loss at iter 40 is 0.127891
training loss at iter 60 is 0.130405
training loss at iter 80 is 0.147121
training loss at iter 100 is 0.119048
training loss at iter 120 is 0.121016
training loss at iter 140 is 0.126163
training loss at iter 160 is 0.122742
training loss at iter 180 is 0.128868
training loss at iter 200 is 0.120607
training loss at iter 220 is 0.120949
training loss at iter 240 is 0.126630
training loss at iter 260 is 0.138684
training loss at iter 280 is 0.120011
training loss at iter 300 is 0.122082
training loss at iter 320 is 0.132053
training loss at iter 340 is 0.139174
training loss at iter 360 is 0.126652
training loss at iter 380 is 0.125399
training loss at iter 400 is 0.130912
training loss at iter 420 is 0.123734
training loss at iter 440 is 0.127584
training loss at iter 460 is 0.135794
training loss at iter 480 is 0.121089
training loss at iter 500 is 0.128353
training loss at iter 520 is 0.129162
training loss at



training loss at iter 20 is 0.156672
training loss at iter 40 is 0.122234
training loss at iter 60 is 0.125575
training loss at iter 80 is 0.124918
training loss at iter 100 is 0.125340
training loss at iter 120 is 0.125372
training loss at iter 140 is 0.128994
training loss at iter 160 is 0.120671
training loss at iter 180 is 0.122437
training loss at iter 200 is 0.134701
training loss at iter 220 is 0.121940
training loss at iter 240 is 0.122409
training loss at iter 260 is 0.135492
training loss at iter 280 is 0.132878
training loss at iter 300 is 0.135077
training loss at iter 320 is 0.127150
training loss at iter 340 is 0.118382
training loss at iter 360 is 0.124281
training loss at iter 380 is 0.126815
training loss at iter 400 is 0.120459
training loss at iter 420 is 0.124274
training loss at iter 440 is 0.127068
training loss at iter 460 is 0.118419
training loss at iter 480 is 0.142171
training loss at iter 500 is 0.120519
training loss at iter 520 is 0.128876
training loss at



training loss at iter 20 is 0.119952
training loss at iter 40 is 0.126303
training loss at iter 60 is 0.126644
training loss at iter 80 is 0.140025
training loss at iter 100 is 0.132144
training loss at iter 120 is 0.125094
training loss at iter 140 is 0.125660
training loss at iter 160 is 0.131821
training loss at iter 180 is 0.122889
training loss at iter 200 is 0.122897
training loss at iter 220 is 0.137489
training loss at iter 240 is 0.122612
training loss at iter 260 is 0.127779
training loss at iter 280 is 0.126767
training loss at iter 300 is 0.124220
training loss at iter 320 is 0.134352
training loss at iter 340 is 0.115383
training loss at iter 360 is 0.127776
training loss at iter 380 is 0.119993
training loss at iter 400 is 0.117376
training loss at iter 420 is 0.137362
training loss at iter 440 is 0.121539
training loss at iter 460 is 0.137577
training loss at iter 480 is 0.128417
training loss at iter 500 is 0.121121
training loss at iter 520 is 0.129167
training loss at



training loss at iter 20 is 0.122432
training loss at iter 40 is 0.127353
training loss at iter 60 is 0.125770
training loss at iter 80 is 0.126500
training loss at iter 100 is 0.117587
training loss at iter 120 is 0.137821
training loss at iter 140 is 0.122996
training loss at iter 160 is 0.118553
training loss at iter 180 is 0.124776
training loss at iter 200 is 0.138188
training loss at iter 220 is 0.125490
training loss at iter 240 is 0.133963
training loss at iter 260 is 0.124372
training loss at iter 280 is 0.122285
training loss at iter 300 is 0.132872
training loss at iter 320 is 0.115959
training loss at iter 340 is 0.123482
training loss at iter 360 is 0.136896
training loss at iter 380 is 0.120854
training loss at iter 400 is 0.122350
training loss at iter 420 is 0.120002
training loss at iter 440 is 0.120473
training loss at iter 460 is 0.131189
training loss at iter 480 is 0.124928
training loss at iter 500 is 0.125148
training loss at iter 520 is 0.126810
training loss at



training loss at iter 20 is 0.121542
training loss at iter 40 is 0.129343
training loss at iter 60 is 0.128764
training loss at iter 80 is 0.130060
training loss at iter 100 is 0.121559
training loss at iter 120 is 0.121541
training loss at iter 140 is 0.123593
training loss at iter 160 is 0.127657
training loss at iter 180 is 0.130426
training loss at iter 200 is 0.120532
training loss at iter 220 is 0.131919
training loss at iter 240 is 0.123756
training loss at iter 260 is 0.131524
training loss at iter 280 is 0.127657
training loss at iter 300 is 0.129132
training loss at iter 320 is 0.126231
training loss at iter 340 is 0.128473
training loss at iter 360 is 0.124657
training loss at iter 380 is 0.118546
training loss at iter 400 is 0.129608
training loss at iter 420 is 0.136236
training loss at iter 440 is 0.120811
training loss at iter 460 is 0.122109
training loss at iter 480 is 0.135232
training loss at iter 500 is 0.146557
training loss at iter 520 is 0.121919
training loss at



training loss at iter 20 is 0.144861
training loss at iter 40 is 0.125961
training loss at iter 60 is 0.128124
training loss at iter 80 is 0.129425
training loss at iter 100 is 0.121138
training loss at iter 120 is 0.125897
training loss at iter 140 is 0.125020
training loss at iter 160 is 0.130568
training loss at iter 180 is 0.126731
training loss at iter 200 is 0.124210
training loss at iter 220 is 0.130433
training loss at iter 240 is 0.120405
training loss at iter 260 is 0.122790
training loss at iter 280 is 0.126982
training loss at iter 300 is 0.126156
training loss at iter 320 is 0.122690
training loss at iter 340 is 0.121245
training loss at iter 360 is 0.122510
training loss at iter 380 is 0.130138
training loss at iter 400 is 0.134210
training loss at iter 420 is 0.128147
training loss at iter 440 is 0.122218
training loss at iter 460 is 0.119414
training loss at iter 480 is 0.119480
training loss at iter 500 is 0.129402
training loss at iter 520 is 0.131395
training loss at



training loss at iter 20 is 0.122288
training loss at iter 40 is 0.124884
training loss at iter 60 is 0.125614
training loss at iter 80 is 0.125506
training loss at iter 100 is 0.144443
training loss at iter 120 is 0.127280
training loss at iter 140 is 0.126564
training loss at iter 160 is 0.130970
training loss at iter 180 is 0.137139
training loss at iter 200 is 0.128093
training loss at iter 220 is 0.120977
training loss at iter 240 is 0.132090
training loss at iter 260 is 0.123626
training loss at iter 280 is 0.124049
training loss at iter 300 is 0.135184
training loss at iter 320 is 0.124703
training loss at iter 340 is 0.127752
training loss at iter 360 is 0.131831
training loss at iter 380 is 0.129341
training loss at iter 400 is 0.134482
training loss at iter 420 is 0.128009
training loss at iter 440 is 0.127267
training loss at iter 460 is 0.118438
training loss at iter 480 is 0.119309
training loss at iter 500 is 0.132619
training loss at iter 520 is 0.131645
training loss at



training loss at iter 20 is 0.121480
training loss at iter 40 is 0.125213
training loss at iter 60 is 0.123759
training loss at iter 80 is 0.123785
training loss at iter 100 is 0.127526
training loss at iter 120 is 0.136617
training loss at iter 140 is 0.120562
training loss at iter 160 is 0.123030
training loss at iter 180 is 0.121579
training loss at iter 200 is 0.130361
training loss at iter 220 is 0.124349
training loss at iter 240 is 0.137417
training loss at iter 260 is 0.133757
training loss at iter 280 is 0.117602
training loss at iter 300 is 0.125154
training loss at iter 320 is 0.134039
training loss at iter 340 is 0.127082
training loss at iter 360 is 0.149891
training loss at iter 380 is 0.120612
training loss at iter 400 is 0.116348
training loss at iter 420 is 0.127711
training loss at iter 440 is 0.133592
training loss at iter 460 is 0.136229
training loss at iter 480 is 0.122566
training loss at iter 500 is 0.118632
training loss at iter 520 is 0.130181
training loss at



training loss at iter 20 is 0.125621
training loss at iter 40 is 0.124291
training loss at iter 60 is 0.123428
training loss at iter 80 is 0.132045
training loss at iter 100 is 0.126693
training loss at iter 120 is 0.128286
training loss at iter 140 is 0.123060
training loss at iter 160 is 0.124380
training loss at iter 180 is 0.130914
training loss at iter 200 is 0.116088
training loss at iter 220 is 0.122320
training loss at iter 240 is 0.119317
training loss at iter 260 is 0.115050
training loss at iter 280 is 0.127510
training loss at iter 300 is 0.120978
training loss at iter 320 is 0.119892
training loss at iter 340 is 0.128919
training loss at iter 360 is 0.125339
training loss at iter 380 is 0.126920
training loss at iter 400 is 0.125620
training loss at iter 420 is 0.123937
training loss at iter 440 is 0.120792
training loss at iter 460 is 0.127562
training loss at iter 480 is 0.134844
training loss at iter 500 is 0.124959
training loss at iter 520 is 0.120238
training loss at



training loss at iter 20 is 0.133822
training loss at iter 40 is 0.119971
training loss at iter 60 is 0.124720
training loss at iter 80 is 0.133050
training loss at iter 100 is 0.128434
training loss at iter 120 is 0.132091
training loss at iter 140 is 0.124966
training loss at iter 160 is 0.126877
training loss at iter 180 is 0.125675
training loss at iter 200 is 0.132072
training loss at iter 220 is 0.126639
training loss at iter 240 is 0.124895
training loss at iter 260 is 0.128481
training loss at iter 280 is 0.137192
training loss at iter 300 is 0.121146
training loss at iter 320 is 0.122788
training loss at iter 340 is 0.126582
training loss at iter 360 is 0.136681
training loss at iter 380 is 0.130438
training loss at iter 400 is 0.123522
training loss at iter 420 is 0.134455
training loss at iter 440 is 0.132926
training loss at iter 460 is 0.121524
training loss at iter 480 is 0.135561
training loss at iter 500 is 0.127164
training loss at iter 520 is 0.127813
training loss at



training loss at iter 20 is 0.122273
training loss at iter 40 is 0.116654
training loss at iter 60 is 0.131121
training loss at iter 80 is 0.122498
training loss at iter 100 is 0.124764
training loss at iter 120 is 0.131157
training loss at iter 140 is 0.128475
training loss at iter 160 is 0.125961
training loss at iter 180 is 0.126790
training loss at iter 200 is 0.122757
training loss at iter 220 is 0.138188
training loss at iter 240 is 0.135761
training loss at iter 260 is 0.119640
training loss at iter 280 is 0.135086
training loss at iter 300 is 0.128450
training loss at iter 320 is 0.129770
training loss at iter 340 is 0.116304
training loss at iter 360 is 0.133214
training loss at iter 380 is 0.134833
training loss at iter 400 is 0.117580
training loss at iter 420 is 0.126938
training loss at iter 440 is 0.125587
training loss at iter 460 is 0.128665
training loss at iter 480 is 0.126112
training loss at iter 500 is 0.125144
training loss at iter 520 is 0.118971
training loss at



training loss at iter 20 is 0.140386
training loss at iter 40 is 0.123780
training loss at iter 60 is 0.122276
training loss at iter 80 is 0.120907
training loss at iter 100 is 0.133124
training loss at iter 120 is 0.121180
training loss at iter 140 is 0.128807
training loss at iter 160 is 0.124775
training loss at iter 180 is 0.123807
training loss at iter 200 is 0.120280
training loss at iter 220 is 0.125723
training loss at iter 240 is 0.117817
training loss at iter 260 is 0.116450
training loss at iter 280 is 0.118429
training loss at iter 300 is 0.127243
training loss at iter 320 is 0.130316
training loss at iter 340 is 0.141857
training loss at iter 360 is 0.126606
training loss at iter 380 is 0.124468
training loss at iter 400 is 0.126355
training loss at iter 420 is 0.125211
training loss at iter 440 is 0.120545
training loss at iter 460 is 0.119184
training loss at iter 480 is 0.132607
training loss at iter 500 is 0.127184
training loss at iter 520 is 0.128455
training loss at

