In [1]:
import pandas as pd
import re
import random
import sys
import re

import nltk
from nltk.tag import StanfordNERTagger
from nltk.tokenize import word_tokenize, sent_tokenize
import itertools
from collections import defaultdict

import numpy as np

import pickle

from __future__ import division

In [2]:
# reading data
sample = pd.read_csv("data/tweets.csv")
sample.head()

Unnamed: 0,PaxText,Reply
0,"@AirCanada thanks, what's te difference from t...","@Agent_613 Hi Bryan, Cargo may or may not be a..."
1,@SouthwestAir Tried cancel reservation w/i 24 ...,@mjij3 Hi there! Please follow/DM us your trav...
2,@SouthwestAir Can't DM without a follow :),@BobMcKelvey Whoops!
3,@SouthwestAir if my flight (4898) is delayed y...,@sozymicmike Fair! We're working hard to get y...
4,@AirCanada Just got a call similar to those We...,"@kerryinyyz Hi Kerry, these are certainly not ..."


In [3]:
# groupby multiple replies
sample = sample.groupby('PaxText')['Reply'].apply(lambda x: "%s" % ' '.join(x)).reset_index()

In [4]:
contractions = { 
    "i ain't": "am not",
    "you ain't": "you are not",
    "they ain't": "they are not",
    "she ain't": "she is not",
    "he ain't": "he is not",
    "aren't": "are not",
    "can't": "cannot",
    "can't've": "cannot have",
    "'cause": "because",
    "could've": "could have",
    "couldn't": "could not",
    "couldn't've": "could not have",
    "didn't": "did not",
    "doesn't": "does not",
    "don't": "do not",
    "hadn't": "had not",
    "hadn't've": "had not have",
    "hasn't": "has not",
    "haven't": "have not",
    "he'd": "he would",
    "he'd've": "he would have",
    "he'll": "he will",
    "he'll've": "he will have",
    "he's": "he is",
    "how'd": "how did",
    "how'd'y": "how do you",
    "how'll": "how will",
    "how's": "how is",
    "i'd": "i would",
    "i'd've": "i would have",
    "i'll": "i will",
    "i'll've": "i will have",
    "i'm": "i am",
    "i've": "i have",
    "isn't": "is not",
    "it'd": "it would",
    "it'd've": "it would have",
    "it'll": "it will",
    "it'll've": "it will have",
    "it's": "it is",
    "let's": "let us",
    "ma'am": "madam",
    "mayn't": "may not",
    "might've": "might have",
    "mightn't": "might not",
    "mightn't've": "might not have",
    "must've": "must have",
    "mustn't": "must not",
    "mustn't've": "must not have",
    "needn't": "need not",
    "needn't've": "need not have",
    "o'clock": "of the clock",
    "oughtn't": "ought not",
    "oughtn't've": "ought not have",
    "shan't": "shall not",
    "sha'n't": "shall not",
    "shan't've": "shall not have",
    "she'd": "she would",
    "she'd've": "she would have",
    "she'll": "she will",
    "she'll've": "she will have",
    "she's": "she is",
    "should've": "should have",
    "shouldn't": "should not",
    "shouldn't've": "should not have",
    "so've": "so have",
    "so's": "so is",
    "that'd": "that had",
    "that'd've": "that would have",
    "that's": "that is",
    "there'd": "there would",
    "there'd've": "there would have",
    "there's": "there is",
    "they'd": "they would",
    "they'd've": "they would have",
    "they'll": "they will",
    "they'll've": "they will have",
    "they're": "they are",
    "they've": "they have",
    "to've": "to have",
    "wasn't": "was not",
    "we'd": "we would",
    "we'd've": "we would have",
    "we'll": "we will",
    "we'll've": "we will have",
    "we're": "we are",
    "we've": "we have",
    "weren't": "were not",
    "what'll": "what will",
    "what'll've": "what will have",
    "what're": "what are",
    "what's": "what is",
    "what've": "what have",
    "when's": "when is",
    "when've": "when have",
    "where'd": "where did",
    "where's": "where is",
    "where've": "where have",
    "who'll": "who will",
    "who'll've": "who will have",
    "who's": "who is",
    "who've": "who have",
    "why's": "why is",
    "why've": "why have",
    "will've": "will have",
    "won't": "will not",
    "won't've": "will not have",
    "would've": "would have",
    "wouldn't": "would not",
    "wouldn't've": "would not have",
    "y'all": "you all",
    "y'all'd": "you all would",
    "y'all'd've": "you all would have",
    "y'all're": "you all are",
    "y'all've": "you all have",
    "you'd": "you would",
    "you'd've": "you would have",
    "you'll": "you will",
    "you'll've": "you will have",
    "you're": "you are",
    "you've": "you have"   
}

contractions_re = re.compile('(%s)' % '|'.join(contractions.keys()))
def expand_contractions(s, contractions_dict=contractions, contractions_re=contractions_re):
    def replace(match):
        return contractions_dict[match.group(0)]
    return contractions_re.sub(replace, s)

In [5]:
name_list = pd.read_csv("data/firstnames.csv")["firstname"].str.lower().tolist()
print name_list[:15]

def clean_text(text, name_list=name_list):
    # to lower case
    text = text.lower()
    # remove @ mentions
    text = re.sub(r'@\w+\b', '', text)
    # remove url links
    text = re.sub(r'\bhttp.+\b', '', text)
    # remove line break
    text = re.sub(r'\n', ' ', text)
    
    # expand contraction
    text = expand_contractions(text)
    
    # replace some common shorthands
    text = re.sub(r'&amp;', ' and ', text)
    text = re.sub(r'\bb/c\b', 'because', text)
    text = re.sub(r'\b&lt;\b', '<', text)
    text = re.sub(r'\b&gt;\b', '>', text)
    
    # remove punctuation
    text = re.sub(r'[^\w\d_\s]+', '', text)
    
    # get rid of common english names
    text = [w for w in text.split() if w not in name_list]
    
    # get rid of initials at the end
    if len(text) > 0:
        if re.match(r'\b(?:\/|\^)[A-z]{2}\b', text[-1]) is not None:
            text.pop()
        
    if len(text) <= 3:
        return "TO_DELETE"
    else:
        return " ".join(text)

['aaron', 'aaron', 'abbey', 'abbie', 'abby', 'abdul', 'abe', 'abel', 'abigail', 'abraham', 'abram', 'ada', 'adah', 'adalberto', 'adaline']


In [6]:
sample["PaxText"] = sample["PaxText"].apply(lambda x: clean_text(x))
sample["Reply"] = sample["Reply"].apply(lambda x: clean_text(x))
sample = sample[(sample["PaxText"] != "TO_DELETE") & (sample["Reply"] != "TO_DELETE")]
sample.head()

Unnamed: 0,PaxText,Reply
0,boeing747 sunset beautiful ht,hi glad you are having such a great day
2,happy 30th birthday emirates30 halamadrid,thank you emirates30 halamadrid
4,traveling firstclass is always style thank you...,we you too it is our pleasure having you on board
6,and having bag ripped but the baggage claim pp...,we are glad to hear our airport team took good...
8,off to la la land la wheelsup,sweet pic margeaux we luv that view vp


In [7]:
print len(sample)

845997


In [8]:
limit = {'maxq' : 500,
         'minq' : 3,
         'maxa' : 500,
         'mina' : 3
        }

UNK = 'unk'
VOCAB_SIZE = 6000

In [9]:
qlines = sample["PaxText"].tolist()
alines = sample["Reply"].tolist()

qtokenized = [sent.split(' ') for sent in qlines]
atokenized = [sent.split(' ') for sent in alines]

print "Q: " + " ".join(qtokenized[5])
print "A: " + " ".join(atokenized[5])

Q: leavin on a plane do not know when i be back again
A: have the best trip ever


In [10]:
def index_(tokenized_sentences, vocab_size):
    # get frequency distribution
    freq_dist = nltk.FreqDist(itertools.chain(*tokenized_sentences))
    # get vocabulary of 'vocab_size' most used words
    vocab = freq_dist.most_common(vocab_size)
    # index2word
    index2word = ['_'] + [UNK] + [ x[0] for x in vocab ]
    # word2index
    word2index = dict([(w,i) for i,w in enumerate(index2word)] )
    return index2word, word2index, freq_dist

idx2w, w2idx, freq_dist = index_(qtokenized + atokenized, vocab_size=VOCAB_SIZE)

In [11]:
def pad_seq(seq, lookup, maxlen):
    indices = []
    for word in seq:
        if word in lookup:
            indices.append(lookup[word])
        else:
            indices.append(lookup[UNK])
    return indices + [0]*(maxlen - len(seq))

def zero_pad(qtokenized, atokenized, w2idx):
    # num of rows
    data_len = len(qtokenized)

    # numpy arrays to store indices
    idx_q = np.zeros([data_len, limit['maxq']], dtype=np.int32)
    idx_a = np.zeros([data_len, limit['maxa']], dtype=np.int32)

    for i in range(data_len):
        q_indices = pad_seq(qtokenized[i], w2idx, limit['maxq'])
        a_indices = pad_seq(atokenized[i], w2idx, limit['maxa'])

        #print(len(idx_q[i]), len(q_indices))
        #print(len(idx_a[i]), len(a_indices))
        idx_q[i] = np.array(q_indices)
        idx_a[i] = np.array(a_indices)

    return idx_q, idx_a

idx_q, idx_a = zero_pad(qtokenized, atokenized, w2idx)

In [12]:
# count of unknowns
unk_count = (idx_q == 1).sum() + (idx_a == 1).sum()
# count of words

word_count = (idx_q > 1).sum() + (idx_a > 1).sum()
# % unknown

print unk_count
print word_count
print '% unknown : {}'.format(100 * (unk_count/word_count))

1031451
27147884
% unknown : 3.79937898659


In [13]:
print len(idx_q)
print len(idx_a)

845997
845997


## Training a Model

In [14]:
idx_q_filtered = []
idx_a_filtered = []

for i in range(len(idx_q)):
    w_count_q = (idx_q[i] > 1).sum()
    u_count_q = (idx_q[i] == 1).sum()
    
    w_count_a = (idx_a[i] > 1).sum()
    u_count_a = (idx_a[i] == 1).sum()
    
    if w_count_q/(w_count_q + u_count_q) >= 0.8 and w_count_a/(w_count_a + u_count_a) >= 0.8:
        idx_q_filtered.append(np.array(idx_q[i]))
        idx_a_filtered.append(np.array(idx_a[i]))
        
idx_q_filtered = np.array(idx_q_filtered)
idx_a_filtered = np.array(idx_a_filtered)

print len(idx_q_filtered)
print len(idx_a_filtered)

796449
796449


In [15]:
def split_dataset(x, y, ratio = [0.7, 0.15, 0.15] ):
    # number of examples
    data_len = len(x)
    lens = [ int(data_len*item) for item in ratio ]

    trainX, trainY = x[:lens[0]], y[:lens[0]]
    testX, testY = x[lens[0]:lens[0]+lens[1]], y[lens[0]:lens[0]+lens[1]]
    validX, validY = x[-lens[-1]:], y[-lens[-1]:]

    return (trainX,trainY), (testX,testY), (validX,validY)

(trainX, trainY), (testX, testY), (validX, validY) = split_dataset(idx_q_filtered, idx_a_filtered)

trainX = trainX.tolist()
trainY = trainY.tolist()
testX = testX.tolist()
testY = testY.tolist()
validX = validX.tolist()
validY = validY.tolist()

print trainX[0]
print trainY[0]

[449, 2105, 11, 145, 2068, 59, 3, 6, 1, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [16]:
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *

In [17]:
trainX = tl.prepro.remove_pad_sequences(trainX)
trainY = tl.prepro.remove_pad_sequences(trainY)
testX = tl.prepro.remove_pad_sequences(testX)
testY = tl.prepro.remove_pad_sequences(testY)
validX = tl.prepro.remove_pad_sequences(validX)
validY = tl.prepro.remove_pad_sequences(validY)

print trainX[10]
print trainY[10]

[1, 2, 682, 765, 8, 27, 226, 78, 4685, 8, 38]
[134, 54, 964, 59, 3, 6, 81, 18, 25, 539]


In [18]:
###============= parameters
xseq_len = len(trainX)#.shape[-1]
yseq_len = len(trainY)#.shape[-1]
assert xseq_len == yseq_len

batch_size = 10
n_step = int(xseq_len/batch_size)

xvocab_size = len(idx2w)
emb_dim = 1024

In [19]:
print xseq_len
print n_step
print xvocab_size

557514
55751
6002


In [20]:
unk_id = w2idx['unk']   # 1
pad_id = w2idx['_']     # 0

start_id = xvocab_size   # 6002
end_id = xvocab_size + 1  # 6003

In [21]:
w2idx.update({'start_id': start_id})
w2idx.update({'end_id': end_id})
idx2w = idx2w + ['start_id', 'end_id']

xvocab_size = yvocab_size = xvocab_size + 2

# let us now save the necessary dictionaries
metadata = {
            'w2idx' : w2idx,
            'idx2w' : idx2w,
            'xvocab_size' : xvocab_size,
            'emb_dim': emb_dim
            }

# write to disk : data control dictionaries
with open('metadata_1M.pkl', 'wb') as f:
        pickle.dump(metadata, f)

In [22]:
print "encode_seqs:", [idx2w[id] for id in trainX[10]]

target_seqs = tl.prepro.sequences_add_end_id([trainY[10]], end_id=end_id)[0]
print "target_seqs:", [idx2w[id] for id in target_seqs]

decode_seqs = tl.prepro.sequences_add_start_id([trainY[10]], start_id=start_id, remove_last=False)[0]
print "decode_seqs:", [idx2w[id] for id in decode_seqs]

target_mask = tl.prepro.sequences_get_mask([target_seqs])[0]
print "target_mask:", target_mask

print(len(target_seqs), len(decode_seqs), len(target_mask))

encode_seqs: ['unk', 'to', 'nyc', 'miami', 'i', 'be', 'right', 'back', 'skypriority', 'i', 'am']
target_seqs: ['hello', 'great', 'pic', 'thank', 'you', 'for', 'flying', 'with', 'us', 'rd', 'end_id']
decode_seqs: ['start_id', 'hello', 'great', 'pic', 'thank', 'you', 'for', 'flying', 'with', 'us', 'rd']
target_mask: [1 1 1 1 1 1 1 1 1 1 1]
(11, 11, 11)


#### Model

In [23]:
def model(encode_seqs, decode_seqs, is_train=True, reuse=False):
    with tf.variable_scope("model", reuse=reuse):
        # for chatbot, you can use the same embedding layer,
        # for translation, you may want to use 2 separated embedding layers
        with tf.variable_scope("embedding") as vs:
            net_encode = EmbeddingInputlayer(
                inputs = encode_seqs,
                vocabulary_size = xvocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
            
            vs.reuse_variables()
            tl.layers.set_name_reuse(True)
            
            net_decode = EmbeddingInputlayer(
                inputs = decode_seqs,
                vocabulary_size = xvocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
            
        net_rnn = Seq2Seq(net_encode, net_decode,
                cell_fn = tf.contrib.rnn.BasicLSTMCell,
                n_hidden = emb_dim,
                initializer = tf.random_uniform_initializer(-0.1, 0.1),
                encode_sequence_length = retrieve_seq_length_op2(encode_seqs),
                decode_sequence_length = retrieve_seq_length_op2(decode_seqs),
                initial_state_encode = None,
                dropout = (0.5 if is_train else None),
                n_layer = 3,
                return_seq_2d = True,
                name = 'seq2seq')
        
        net_out = DenseLayer(net_rnn, n_units=xvocab_size, act=tf.identity, name='output')
    return net_out, net_rnn

In [24]:
# model for training
encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs")
decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs")
target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask()
net_out, _ = model(encode_seqs, decode_seqs, is_train=True, reuse=False)

  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [**] Seq2Seq model/seq2seq: n_hidden:1024 cell_fn:BasicLSTMCell dropout:0.5 n_layer:3
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_encode: n_hidden:1024, in_dim:3 in_shape:(10, ?, 1024) cell_fn:BasicLSTMCell dropout:0.5 n_layer:3
       batch_size (concurrent processes): 10
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_decode: n_hidden:1024, in_dim:3 in_shape:(10, ?, 1024) cell_fn:BasicLSTMCell dropout:0.5 n_layer:3
       batch_size (concurrent processes): 10
  [TL] DenseLayer  model/output: 6004 identity


In [25]:
# loss for training
loss = tl.cost.cross_entropy_seq_with_mask(logits=net_out.outputs, 
                                           target_seqs=target_seqs, 
                                           input_mask=target_mask, 
                                           return_details=False, 
                                           name='cost')

In [26]:
net_out.print_params(False)

lr = 0.0001
train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

  param   0: model/embedding/seq_embedding/embeddings:0 (6004, 1024)       float32_ref
  param   1: model/seq2seq/seq2seq_decode/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0 (2048, 4096)       float32_ref
  param   2: model/seq2seq/seq2seq_decode/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0 (4096,)            float32_ref
  param   3: model/seq2seq/seq2seq_decode/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0 (2048, 4096)       float32_ref
  param   4: model/seq2seq/seq2seq_decode/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0 (4096,)            float32_ref
  param   5: model/seq2seq/seq2seq_decode/rnn/multi_rnn_cell/cell_2/basic_lstm_cell/kernel:0 (2048, 4096)       float32_ref
  param   6: model/seq2seq/seq2seq_decode/rnn/multi_rnn_cell/cell_2/basic_lstm_cell/bias:0 (4096,)            float32_ref
  param   7: model/seq2seq/seq2seq_encode/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0 (2048, 4096)       float32_ref
  param   8: model/seq2seq/seq2seq_encode/rnn/multi

In [27]:
# model for inferencing
encode_seqs2 = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_seqs")
decode_seqs2 = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_seqs")
net, net_rnn = model(encode_seqs2, decode_seqs2, is_train=False, reuse=True)
y = tf.nn.softmax(net.outputs)

  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [TL] EmbeddingInputlayer model/embedding/seq_embedding: (6004, 1024)
  [**] Seq2Seq model/seq2seq: n_hidden:1024 cell_fn:BasicLSTMCell dropout:None n_layer:3
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_encode: n_hidden:1024, in_dim:3 in_shape:(1, ?, 1024) cell_fn:BasicLSTMCell dropout:None n_layer:3
       batch_size (concurrent processes): 1
  [TL] DynamicRNNLayer model/seq2seq/seq2seq_decode: n_hidden:1024, in_dim:3 in_shape:(1, ?, 1024) cell_fn:BasicLSTMCell dropout:None n_layer:3
       batch_size (concurrent processes): 1
  [TL] DenseLayer  model/output: 6004 identity


#### Train

In [28]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
tl.layers.initialize_global_variables(sess)

n_epoch = 50

for epoch in range(n_epoch):
    epoch_time = time.time()
    ## shuffle training data
    from sklearn.utils import shuffle
    trainX, trainY = shuffle(trainX, trainY, random_state=0)
    ## train an epoch
    total_err, n_iter = 0, 0
    for X, Y in tl.iterate.minibatches(inputs=trainX, targets=trainY, batch_size=batch_size, shuffle=False):
        step_time = time.time()

        X = tl.prepro.pad_sequences(X)
        _target_seqs = tl.prepro.sequences_add_end_id(Y, end_id=end_id)
        _target_seqs = tl.prepro.pad_sequences(_target_seqs)

        _decode_seqs = tl.prepro.sequences_add_start_id(Y, start_id=start_id, remove_last=False)
        _decode_seqs = tl.prepro.pad_sequences(_decode_seqs)
        _target_mask = tl.prepro.sequences_get_mask(_target_seqs)

        ## you can view the data here
        # for i in range(len(X)):
        #     print(i, [idx2w[id] for id in X[i]])
        #     print(i, [idx2w[id] for id in Y[i]])
        #     print(i, [idx2w[id] for id in _target_seqs[i]])
        #     print(i, [idx2w[id] for id in _decode_seqs[i]])
        #     print(i, _target_mask[i])
        #     print(len(_target_seqs[i]), len(_decode_seqs[i]), len(_target_mask[i]))
        # exit()

        _, err = sess.run([train_op, loss],
                        {encode_seqs: X,
                        decode_seqs: _decode_seqs,
                        target_seqs: _target_seqs,
                        target_mask: _target_mask})

        if n_iter % 200 == 0:
            print("Epoch[%d/%d] step:[%d/%d] loss:%f took:%.5fs" % (epoch, n_epoch, n_iter, n_step, err, time.time() - step_time))

        total_err += err; n_iter += 1

Epoch[0/50] step:[0/55751] loss:9.329982 took:0.66202s
Epoch[0/50] step:[200/55751] loss:6.587735 took:0.20728s
Epoch[0/50] step:[400/55751] loss:5.720950 took:0.17277s
Epoch[0/50] step:[600/55751] loss:6.133894 took:0.17288s
Epoch[0/50] step:[800/55751] loss:5.805511 took:0.22027s
Epoch[0/50] step:[1000/55751] loss:5.839880 took:0.24309s
Epoch[0/50] step:[1200/55751] loss:5.374759 took:0.17250s
Epoch[0/50] step:[1400/55751] loss:5.749952 took:0.20990s
Epoch[0/50] step:[1600/55751] loss:5.726033 took:0.21670s
Epoch[0/50] step:[1800/55751] loss:5.973401 took:0.44089s
Epoch[0/50] step:[2000/55751] loss:5.573874 took:0.23528s
Epoch[0/50] step:[2200/55751] loss:5.358806 took:0.22894s
Epoch[0/50] step:[2400/55751] loss:5.166379 took:0.24321s
Epoch[0/50] step:[2600/55751] loss:5.782174 took:0.26178s
Epoch[0/50] step:[2800/55751] loss:5.653172 took:0.19375s
Epoch[0/50] step:[3000/55751] loss:5.658379 took:0.27822s
Epoch[0/50] step:[3200/55751] loss:5.659799 took:0.17825s
Epoch[0/50] step:[340

Epoch[0/50] step:[28000/55751] loss:3.369488 took:0.23412s
Epoch[0/50] step:[28200/55751] loss:3.383142 took:0.20501s
Epoch[0/50] step:[28400/55751] loss:3.885865 took:0.18308s
Epoch[0/50] step:[28600/55751] loss:2.856195 took:0.32355s
Epoch[0/50] step:[28800/55751] loss:3.557212 took:0.18434s
Epoch[0/50] step:[29000/55751] loss:3.687995 took:0.24290s
Epoch[0/50] step:[29200/55751] loss:3.534324 took:0.30945s
Epoch[0/50] step:[29400/55751] loss:3.769305 took:0.27691s
Epoch[0/50] step:[29600/55751] loss:2.767051 took:0.21974s
Epoch[0/50] step:[29800/55751] loss:3.687118 took:0.27164s
Epoch[0/50] step:[30000/55751] loss:3.619653 took:0.23112s
Epoch[0/50] step:[30200/55751] loss:3.228127 took:0.21382s
Epoch[0/50] step:[30400/55751] loss:2.504023 took:0.20525s
Epoch[0/50] step:[30600/55751] loss:3.566951 took:0.24458s
Epoch[0/50] step:[30800/55751] loss:3.480915 took:0.35202s
Epoch[0/50] step:[31000/55751] loss:2.840909 took:0.18793s
Epoch[0/50] step:[31200/55751] loss:3.201037 took:0.2902

Epoch[1/50] step:[0/55751] loss:3.269706 took:0.22681s
Epoch[1/50] step:[200/55751] loss:3.157961 took:0.20133s
Epoch[1/50] step:[400/55751] loss:2.649162 took:0.44950s
Epoch[1/50] step:[600/55751] loss:2.935627 took:0.20092s
Epoch[1/50] step:[800/55751] loss:3.113456 took:0.35136s
Epoch[1/50] step:[1000/55751] loss:2.762359 took:0.32038s
Epoch[1/50] step:[1200/55751] loss:3.273232 took:0.20051s
Epoch[1/50] step:[1400/55751] loss:3.328963 took:0.24525s
Epoch[1/50] step:[1600/55751] loss:3.385464 took:0.25768s
Epoch[1/50] step:[1800/55751] loss:2.329816 took:0.24037s
Epoch[1/50] step:[2000/55751] loss:3.249865 took:0.29163s
Epoch[1/50] step:[2200/55751] loss:3.196080 took:0.24538s
Epoch[1/50] step:[2400/55751] loss:3.279532 took:0.26467s
Epoch[1/50] step:[2600/55751] loss:2.880117 took:0.19761s
Epoch[1/50] step:[2800/55751] loss:3.310514 took:0.21110s
Epoch[1/50] step:[3000/55751] loss:3.944660 took:0.24146s
Epoch[1/50] step:[3200/55751] loss:2.663777 took:0.26153s
Epoch[1/50] step:[340

Epoch[1/50] step:[28000/55751] loss:2.964138 took:0.21915s
Epoch[1/50] step:[28200/55751] loss:3.803422 took:0.29948s
Epoch[1/50] step:[28400/55751] loss:3.052292 took:0.21141s
Epoch[1/50] step:[28600/55751] loss:3.136235 took:0.19883s
Epoch[1/50] step:[28800/55751] loss:2.399521 took:0.18977s
Epoch[1/50] step:[29000/55751] loss:3.219660 took:0.36529s
Epoch[1/50] step:[29200/55751] loss:3.232952 took:0.17605s
Epoch[1/50] step:[29400/55751] loss:3.471229 took:0.21588s
Epoch[1/50] step:[29600/55751] loss:3.132441 took:0.25513s
Epoch[1/50] step:[29800/55751] loss:2.367842 took:0.23628s
Epoch[1/50] step:[30000/55751] loss:3.328022 took:0.29696s
Epoch[1/50] step:[30200/55751] loss:3.211452 took:0.25688s
Epoch[1/50] step:[30400/55751] loss:2.549711 took:0.20888s
Epoch[1/50] step:[30600/55751] loss:2.774108 took:0.30713s
Epoch[1/50] step:[30800/55751] loss:3.132149 took:0.20643s
Epoch[1/50] step:[31000/55751] loss:3.436945 took:0.27989s
Epoch[1/50] step:[31200/55751] loss:3.603184 took:0.2821

Epoch[2/50] step:[0/55751] loss:2.501693 took:0.28517s
Epoch[2/50] step:[200/55751] loss:3.264061 took:0.17999s
Epoch[2/50] step:[400/55751] loss:2.261023 took:0.18095s
Epoch[2/50] step:[600/55751] loss:2.295483 took:0.18325s
Epoch[2/50] step:[800/55751] loss:2.729541 took:0.30452s
Epoch[2/50] step:[1000/55751] loss:3.141888 took:0.20183s
Epoch[2/50] step:[1200/55751] loss:3.639694 took:0.24217s
Epoch[2/50] step:[1400/55751] loss:3.342757 took:0.23480s
Epoch[2/50] step:[1600/55751] loss:2.295555 took:0.25035s
Epoch[2/50] step:[1800/55751] loss:2.438114 took:0.40312s
Epoch[2/50] step:[2000/55751] loss:3.242840 took:0.31890s
Epoch[2/50] step:[2200/55751] loss:2.458366 took:0.26551s
Epoch[2/50] step:[2400/55751] loss:2.971375 took:0.25968s
Epoch[2/50] step:[2600/55751] loss:3.186669 took:0.20600s
Epoch[2/50] step:[2800/55751] loss:3.266727 took:0.30737s
Epoch[2/50] step:[3000/55751] loss:3.019815 took:0.29779s
Epoch[2/50] step:[3200/55751] loss:3.009851 took:0.22807s
Epoch[2/50] step:[340

Epoch[2/50] step:[28000/55751] loss:3.146364 took:0.20804s
Epoch[2/50] step:[28200/55751] loss:2.846606 took:0.19452s
Epoch[2/50] step:[28400/55751] loss:2.553436 took:0.19576s
Epoch[2/50] step:[28600/55751] loss:3.216370 took:0.24839s
Epoch[2/50] step:[28800/55751] loss:2.519154 took:0.21123s
Epoch[2/50] step:[29000/55751] loss:2.582134 took:0.20521s
Epoch[2/50] step:[29200/55751] loss:3.059658 took:0.27102s
Epoch[2/50] step:[29400/55751] loss:2.463834 took:0.27974s
Epoch[2/50] step:[29600/55751] loss:2.121257 took:0.16793s
Epoch[2/50] step:[29800/55751] loss:2.405751 took:0.23019s
Epoch[2/50] step:[30000/55751] loss:3.935938 took:0.46289s
Epoch[2/50] step:[30200/55751] loss:3.040976 took:0.21666s
Epoch[2/50] step:[30400/55751] loss:2.918218 took:0.19624s
Epoch[2/50] step:[30600/55751] loss:2.836962 took:0.19991s
Epoch[2/50] step:[30800/55751] loss:2.845182 took:0.43061s
Epoch[2/50] step:[31000/55751] loss:2.934272 took:0.22546s
Epoch[2/50] step:[31200/55751] loss:2.360733 took:0.2879

Epoch[3/50] step:[0/55751] loss:2.757391 took:0.21721s
Epoch[3/50] step:[200/55751] loss:2.859266 took:0.18344s
Epoch[3/50] step:[400/55751] loss:2.895404 took:0.20661s
Epoch[3/50] step:[600/55751] loss:2.834432 took:0.41549s
Epoch[3/50] step:[800/55751] loss:2.763395 took:0.23931s
Epoch[3/50] step:[1000/55751] loss:2.675444 took:0.17742s
Epoch[3/50] step:[1200/55751] loss:2.903245 took:0.19965s
Epoch[3/50] step:[1400/55751] loss:3.249366 took:0.18284s
Epoch[3/50] step:[1600/55751] loss:2.911292 took:0.18944s
Epoch[3/50] step:[1800/55751] loss:2.605377 took:0.37950s
Epoch[3/50] step:[2000/55751] loss:2.554570 took:0.24646s
Epoch[3/50] step:[2200/55751] loss:2.978697 took:0.19166s
Epoch[3/50] step:[2400/55751] loss:2.583720 took:0.19996s
Epoch[3/50] step:[2600/55751] loss:2.419369 took:0.22507s
Epoch[3/50] step:[2800/55751] loss:3.853896 took:0.33325s
Epoch[3/50] step:[3000/55751] loss:3.358689 took:0.31932s
Epoch[3/50] step:[3200/55751] loss:2.927094 took:0.19012s
Epoch[3/50] step:[340

Epoch[3/50] step:[28000/55751] loss:2.622857 took:0.23333s
Epoch[3/50] step:[28200/55751] loss:2.983330 took:0.18224s
Epoch[3/50] step:[28400/55751] loss:2.507429 took:0.23317s
Epoch[3/50] step:[28600/55751] loss:2.714149 took:0.20885s
Epoch[3/50] step:[28800/55751] loss:3.178757 took:0.22985s
Epoch[3/50] step:[29000/55751] loss:2.842385 took:0.42365s
Epoch[3/50] step:[29200/55751] loss:2.830250 took:0.27448s
Epoch[3/50] step:[29400/55751] loss:1.747025 took:0.20392s
Epoch[3/50] step:[29600/55751] loss:2.176516 took:0.36565s
Epoch[3/50] step:[29800/55751] loss:2.529950 took:0.19528s
Epoch[3/50] step:[30000/55751] loss:2.631794 took:0.50048s
Epoch[3/50] step:[30200/55751] loss:2.736872 took:0.27525s
Epoch[3/50] step:[30400/55751] loss:2.190447 took:0.24277s
Epoch[3/50] step:[30600/55751] loss:2.968510 took:0.22539s
Epoch[3/50] step:[30800/55751] loss:2.911305 took:0.22457s
Epoch[3/50] step:[31000/55751] loss:3.210593 took:0.18987s
Epoch[3/50] step:[31200/55751] loss:2.485804 took:0.1945

Epoch[4/50] step:[0/55751] loss:2.340856 took:0.20281s
Epoch[4/50] step:[200/55751] loss:2.757301 took:0.17881s
Epoch[4/50] step:[400/55751] loss:2.186712 took:0.19888s
Epoch[4/50] step:[600/55751] loss:2.487377 took:0.27428s
Epoch[4/50] step:[800/55751] loss:2.625352 took:0.20085s
Epoch[4/50] step:[1000/55751] loss:2.389151 took:0.31086s
Epoch[4/50] step:[1200/55751] loss:1.977777 took:0.22174s
Epoch[4/50] step:[1400/55751] loss:2.711086 took:0.24811s
Epoch[4/50] step:[1600/55751] loss:2.428580 took:0.23032s
Epoch[4/50] step:[1800/55751] loss:2.718792 took:0.21322s
Epoch[4/50] step:[2000/55751] loss:3.073198 took:0.17212s
Epoch[4/50] step:[2200/55751] loss:2.317029 took:0.30286s
Epoch[4/50] step:[2400/55751] loss:2.452794 took:0.23693s
Epoch[4/50] step:[2600/55751] loss:3.439208 took:0.22635s
Epoch[4/50] step:[2800/55751] loss:3.299816 took:0.28112s
Epoch[4/50] step:[3000/55751] loss:3.064444 took:0.21732s
Epoch[4/50] step:[3200/55751] loss:2.919681 took:0.30895s
Epoch[4/50] step:[340

Epoch[4/50] step:[28000/55751] loss:2.139047 took:0.25222s
Epoch[4/50] step:[28200/55751] loss:2.811086 took:0.21130s
Epoch[4/50] step:[28400/55751] loss:2.705307 took:0.25523s
Epoch[4/50] step:[28600/55751] loss:2.900304 took:0.58212s
Epoch[4/50] step:[28800/55751] loss:2.910511 took:0.18266s
Epoch[4/50] step:[29000/55751] loss:3.355458 took:0.18051s
Epoch[4/50] step:[29200/55751] loss:2.135944 took:0.17248s
Epoch[4/50] step:[29400/55751] loss:2.492169 took:0.22031s
Epoch[4/50] step:[29600/55751] loss:1.864785 took:0.19517s
Epoch[4/50] step:[29800/55751] loss:3.474520 took:0.20492s
Epoch[4/50] step:[30000/55751] loss:2.966640 took:0.27822s
Epoch[4/50] step:[30200/55751] loss:2.059696 took:0.17487s
Epoch[4/50] step:[30400/55751] loss:2.860778 took:0.25779s
Epoch[4/50] step:[30600/55751] loss:2.570573 took:0.17917s
Epoch[4/50] step:[30800/55751] loss:2.563593 took:0.19297s
Epoch[4/50] step:[31000/55751] loss:3.085794 took:0.28317s
Epoch[4/50] step:[31200/55751] loss:2.399266 took:0.1850

Epoch[5/50] step:[0/55751] loss:2.208181 took:0.21230s
Epoch[5/50] step:[200/55751] loss:2.335962 took:0.19479s
Epoch[5/50] step:[400/55751] loss:2.338373 took:0.21815s
Epoch[5/50] step:[600/55751] loss:2.554247 took:0.22678s
Epoch[5/50] step:[800/55751] loss:2.267000 took:0.24096s
Epoch[5/50] step:[1000/55751] loss:2.562115 took:0.20037s
Epoch[5/50] step:[1200/55751] loss:2.732574 took:0.16143s
Epoch[5/50] step:[1400/55751] loss:2.440482 took:0.30469s
Epoch[5/50] step:[1600/55751] loss:2.828381 took:0.24274s
Epoch[5/50] step:[1800/55751] loss:2.969668 took:0.22213s
Epoch[5/50] step:[2000/55751] loss:2.084967 took:0.19536s
Epoch[5/50] step:[2200/55751] loss:2.389556 took:0.26533s
Epoch[5/50] step:[2400/55751] loss:2.873374 took:0.21228s
Epoch[5/50] step:[2600/55751] loss:2.910940 took:0.22786s
Epoch[5/50] step:[2800/55751] loss:2.853893 took:0.32583s
Epoch[5/50] step:[3000/55751] loss:2.756639 took:0.29150s
Epoch[5/50] step:[3200/55751] loss:2.734850 took:0.23029s
Epoch[5/50] step:[340

Epoch[5/50] step:[28000/55751] loss:1.967420 took:0.45727s
Epoch[5/50] step:[28200/55751] loss:2.621556 took:0.21169s
Epoch[5/50] step:[28400/55751] loss:2.156180 took:0.20034s
Epoch[5/50] step:[28600/55751] loss:2.572922 took:0.22501s
Epoch[5/50] step:[28800/55751] loss:2.588402 took:0.27834s
Epoch[5/50] step:[29000/55751] loss:2.349917 took:0.31791s
Epoch[5/50] step:[29200/55751] loss:2.544077 took:0.17799s
Epoch[5/50] step:[29400/55751] loss:2.576973 took:0.23276s
Epoch[5/50] step:[29600/55751] loss:2.213033 took:0.19363s
Epoch[5/50] step:[29800/55751] loss:3.333107 took:0.21405s
Epoch[5/50] step:[30000/55751] loss:2.570036 took:0.20176s
Epoch[5/50] step:[30200/55751] loss:2.559742 took:0.30202s
Epoch[5/50] step:[30400/55751] loss:2.089204 took:0.21546s
Epoch[5/50] step:[30600/55751] loss:3.264108 took:0.19093s
Epoch[5/50] step:[30800/55751] loss:2.756359 took:0.18654s
Epoch[5/50] step:[31000/55751] loss:2.454274 took:0.19333s
Epoch[5/50] step:[31200/55751] loss:2.845390 took:0.2308

Epoch[6/50] step:[0/55751] loss:2.953561 took:0.26067s
Epoch[6/50] step:[200/55751] loss:2.293021 took:0.23819s
Epoch[6/50] step:[400/55751] loss:2.352217 took:0.23273s
Epoch[6/50] step:[600/55751] loss:2.926005 took:0.29265s
Epoch[6/50] step:[800/55751] loss:2.080417 took:0.18979s
Epoch[6/50] step:[1000/55751] loss:2.364206 took:0.25970s
Epoch[6/50] step:[1200/55751] loss:2.546064 took:0.20063s
Epoch[6/50] step:[1400/55751] loss:2.445338 took:0.20936s
Epoch[6/50] step:[1600/55751] loss:2.479281 took:0.18036s
Epoch[6/50] step:[1800/55751] loss:2.708373 took:0.34589s
Epoch[6/50] step:[2000/55751] loss:2.139989 took:0.16465s
Epoch[6/50] step:[2200/55751] loss:2.075121 took:0.18967s
Epoch[6/50] step:[2400/55751] loss:3.632374 took:0.23164s
Epoch[6/50] step:[2600/55751] loss:3.037117 took:0.43221s
Epoch[6/50] step:[2800/55751] loss:2.236327 took:0.23386s
Epoch[6/50] step:[3000/55751] loss:2.394299 took:0.26557s
Epoch[6/50] step:[3200/55751] loss:2.584160 took:0.29624s
Epoch[6/50] step:[340

Epoch[6/50] step:[28000/55751] loss:2.701477 took:0.21150s
Epoch[6/50] step:[28200/55751] loss:2.283904 took:0.21232s
Epoch[6/50] step:[28400/55751] loss:2.409762 took:0.22712s
Epoch[6/50] step:[28600/55751] loss:2.145202 took:0.26396s
Epoch[6/50] step:[28800/55751] loss:2.416001 took:0.32666s
Epoch[6/50] step:[29000/55751] loss:1.991840 took:0.26932s
Epoch[6/50] step:[29200/55751] loss:2.878760 took:0.25149s
Epoch[6/50] step:[29400/55751] loss:3.061058 took:0.25207s
Epoch[6/50] step:[29600/55751] loss:3.065972 took:0.28891s
Epoch[6/50] step:[29800/55751] loss:2.023954 took:0.18490s
Epoch[6/50] step:[30000/55751] loss:2.824114 took:0.25828s
Epoch[6/50] step:[30200/55751] loss:2.826731 took:0.23590s
Epoch[6/50] step:[30400/55751] loss:2.634142 took:0.19662s
Epoch[6/50] step:[30600/55751] loss:2.166260 took:0.29735s
Epoch[6/50] step:[30800/55751] loss:2.368112 took:0.24517s
Epoch[6/50] step:[31000/55751] loss:2.654596 took:0.22299s
Epoch[6/50] step:[31200/55751] loss:1.854310 took:0.2140

Epoch[7/50] step:[0/55751] loss:2.988204 took:0.18554s
Epoch[7/50] step:[200/55751] loss:2.517268 took:0.25785s
Epoch[7/50] step:[400/55751] loss:2.114923 took:0.24978s
Epoch[7/50] step:[600/55751] loss:2.280854 took:0.22566s
Epoch[7/50] step:[800/55751] loss:2.082056 took:0.17242s
Epoch[7/50] step:[1000/55751] loss:2.167912 took:0.16998s
Epoch[7/50] step:[1200/55751] loss:2.229914 took:0.21091s
Epoch[7/50] step:[1400/55751] loss:3.121120 took:0.29974s
Epoch[7/50] step:[1600/55751] loss:3.107193 took:0.26036s
Epoch[7/50] step:[1800/55751] loss:2.728512 took:0.30553s
Epoch[7/50] step:[2000/55751] loss:2.708241 took:0.22107s
Epoch[7/50] step:[2200/55751] loss:2.417268 took:0.17305s
Epoch[7/50] step:[2400/55751] loss:2.175534 took:0.17134s
Epoch[7/50] step:[2600/55751] loss:2.540598 took:0.24553s
Epoch[7/50] step:[2800/55751] loss:2.601180 took:0.27775s
Epoch[7/50] step:[3000/55751] loss:2.789011 took:0.23871s
Epoch[7/50] step:[3200/55751] loss:2.347465 took:0.21695s
Epoch[7/50] step:[340

Epoch[7/50] step:[28000/55751] loss:2.376882 took:0.43327s
Epoch[7/50] step:[28200/55751] loss:2.762639 took:0.19403s
Epoch[7/50] step:[28400/55751] loss:3.507679 took:0.27510s
Epoch[7/50] step:[28600/55751] loss:2.478986 took:0.21693s
Epoch[7/50] step:[28800/55751] loss:2.600552 took:0.40548s
Epoch[7/50] step:[29000/55751] loss:2.590881 took:0.22515s
Epoch[7/50] step:[29200/55751] loss:1.946926 took:0.18769s
Epoch[7/50] step:[29400/55751] loss:2.746811 took:0.18837s
Epoch[7/50] step:[29600/55751] loss:2.665011 took:0.19318s
Epoch[7/50] step:[29800/55751] loss:3.083929 took:0.23511s
Epoch[7/50] step:[30000/55751] loss:1.990945 took:0.21274s
Epoch[7/50] step:[30200/55751] loss:2.550523 took:0.31320s
Epoch[7/50] step:[30400/55751] loss:2.168164 took:0.25450s
Epoch[7/50] step:[30600/55751] loss:2.946119 took:0.25478s
Epoch[7/50] step:[30800/55751] loss:2.550682 took:0.18869s
Epoch[7/50] step:[31000/55751] loss:3.123786 took:0.26066s
Epoch[7/50] step:[31200/55751] loss:2.341809 took:0.2649

Epoch[8/50] step:[0/55751] loss:2.773488 took:0.19730s
Epoch[8/50] step:[200/55751] loss:3.225747 took:0.18110s
Epoch[8/50] step:[400/55751] loss:2.763969 took:0.18257s
Epoch[8/50] step:[600/55751] loss:1.956706 took:0.27781s
Epoch[8/50] step:[800/55751] loss:2.162959 took:0.30157s
Epoch[8/50] step:[1000/55751] loss:2.726709 took:0.34581s
Epoch[8/50] step:[1200/55751] loss:2.831597 took:0.21233s
Epoch[8/50] step:[1400/55751] loss:2.344404 took:0.21155s
Epoch[8/50] step:[1600/55751] loss:2.887229 took:0.33820s
Epoch[8/50] step:[1800/55751] loss:2.397575 took:0.24738s
Epoch[8/50] step:[2000/55751] loss:1.861284 took:0.18629s
Epoch[8/50] step:[2200/55751] loss:2.762949 took:0.22316s
Epoch[8/50] step:[2400/55751] loss:2.169971 took:0.25118s
Epoch[8/50] step:[2600/55751] loss:2.164183 took:0.24375s
Epoch[8/50] step:[2800/55751] loss:2.814989 took:0.18642s
Epoch[8/50] step:[3000/55751] loss:2.771981 took:0.27548s
Epoch[8/50] step:[3200/55751] loss:2.876618 took:0.31319s
Epoch[8/50] step:[340

Epoch[8/50] step:[28000/55751] loss:2.838585 took:0.24633s
Epoch[8/50] step:[28200/55751] loss:3.225751 took:0.29761s
Epoch[8/50] step:[28400/55751] loss:3.368948 took:0.26308s
Epoch[8/50] step:[28600/55751] loss:2.403670 took:0.23436s
Epoch[8/50] step:[28800/55751] loss:1.823674 took:0.23838s
Epoch[8/50] step:[29000/55751] loss:2.572625 took:0.23534s
Epoch[8/50] step:[29200/55751] loss:2.285294 took:0.24800s
Epoch[8/50] step:[29400/55751] loss:2.998060 took:0.27502s
Epoch[8/50] step:[29600/55751] loss:2.913362 took:0.32837s
Epoch[8/50] step:[29800/55751] loss:3.112243 took:0.32820s
Epoch[8/50] step:[30000/55751] loss:2.214474 took:0.23662s
Epoch[8/50] step:[30200/55751] loss:2.265702 took:0.21644s
Epoch[8/50] step:[30400/55751] loss:2.350537 took:0.22126s
Epoch[8/50] step:[30600/55751] loss:2.624499 took:0.19068s
Epoch[8/50] step:[30800/55751] loss:2.644031 took:0.25587s
Epoch[8/50] step:[31000/55751] loss:2.857882 took:0.23033s
Epoch[8/50] step:[31200/55751] loss:2.052327 took:0.2577

Epoch[9/50] step:[0/55751] loss:2.768947 took:0.18781s
Epoch[9/50] step:[200/55751] loss:2.606971 took:0.40686s
Epoch[9/50] step:[400/55751] loss:2.907110 took:0.20759s
Epoch[9/50] step:[600/55751] loss:2.611964 took:0.27247s
Epoch[9/50] step:[800/55751] loss:2.008555 took:0.20990s
Epoch[9/50] step:[1000/55751] loss:1.805624 took:0.21474s
Epoch[9/50] step:[1200/55751] loss:2.468758 took:0.29989s
Epoch[9/50] step:[1400/55751] loss:2.656060 took:0.34878s
Epoch[9/50] step:[1600/55751] loss:2.698774 took:0.18202s
Epoch[9/50] step:[1800/55751] loss:1.983472 took:0.24464s
Epoch[9/50] step:[2000/55751] loss:3.109217 took:0.23066s
Epoch[9/50] step:[2200/55751] loss:2.681388 took:0.36305s
Epoch[9/50] step:[2400/55751] loss:2.210290 took:0.22669s
Epoch[9/50] step:[2600/55751] loss:2.024865 took:0.23161s
Epoch[9/50] step:[2800/55751] loss:2.026462 took:0.19034s
Epoch[9/50] step:[3000/55751] loss:2.784500 took:0.19735s
Epoch[9/50] step:[3200/55751] loss:3.098523 took:0.29380s
Epoch[9/50] step:[340

Epoch[9/50] step:[28000/55751] loss:2.484870 took:0.22203s
Epoch[9/50] step:[28200/55751] loss:3.145016 took:0.31459s
Epoch[9/50] step:[28400/55751] loss:2.568285 took:0.24362s
Epoch[9/50] step:[28600/55751] loss:2.762566 took:0.25864s
Epoch[9/50] step:[28800/55751] loss:3.204106 took:0.23617s
Epoch[9/50] step:[29000/55751] loss:2.262322 took:0.23696s
Epoch[9/50] step:[29200/55751] loss:2.804399 took:0.19746s
Epoch[9/50] step:[29400/55751] loss:2.910990 took:0.29069s
Epoch[9/50] step:[29600/55751] loss:2.903672 took:0.28170s
Epoch[9/50] step:[29800/55751] loss:2.684704 took:0.23694s
Epoch[9/50] step:[30000/55751] loss:1.816152 took:0.23310s
Epoch[9/50] step:[30200/55751] loss:2.313236 took:0.20160s
Epoch[9/50] step:[30400/55751] loss:2.275740 took:0.29215s
Epoch[9/50] step:[30600/55751] loss:2.051530 took:0.16386s
Epoch[9/50] step:[30800/55751] loss:2.769652 took:0.34926s
Epoch[9/50] step:[31000/55751] loss:2.501127 took:0.22168s
Epoch[9/50] step:[31200/55751] loss:2.348888 took:0.2171

Epoch[10/50] step:[0/55751] loss:2.435438 took:0.17783s
Epoch[10/50] step:[200/55751] loss:2.914900 took:0.26957s
Epoch[10/50] step:[400/55751] loss:1.953010 took:0.39239s
Epoch[10/50] step:[600/55751] loss:2.258018 took:0.24929s
Epoch[10/50] step:[800/55751] loss:2.534492 took:0.27633s
Epoch[10/50] step:[1000/55751] loss:2.601918 took:0.35359s
Epoch[10/50] step:[1200/55751] loss:2.135803 took:0.16927s
Epoch[10/50] step:[1400/55751] loss:2.363893 took:0.20717s
Epoch[10/50] step:[1600/55751] loss:2.256545 took:0.19109s
Epoch[10/50] step:[1800/55751] loss:2.179236 took:0.21763s
Epoch[10/50] step:[2000/55751] loss:2.502544 took:0.18020s
Epoch[10/50] step:[2200/55751] loss:2.338300 took:0.21283s
Epoch[10/50] step:[2400/55751] loss:2.137384 took:0.23223s
Epoch[10/50] step:[2600/55751] loss:2.551644 took:0.26218s
Epoch[10/50] step:[2800/55751] loss:2.993524 took:0.18957s
Epoch[10/50] step:[3000/55751] loss:2.564357 took:0.23824s
Epoch[10/50] step:[3200/55751] loss:2.906269 took:0.19534s
Epoc

Epoch[10/50] step:[27600/55751] loss:3.106369 took:0.32019s
Epoch[10/50] step:[27800/55751] loss:2.627766 took:0.19413s
Epoch[10/50] step:[28000/55751] loss:1.758232 took:0.27526s
Epoch[10/50] step:[28200/55751] loss:2.523583 took:0.40800s
Epoch[10/50] step:[28400/55751] loss:2.359634 took:0.21703s
Epoch[10/50] step:[28600/55751] loss:3.005014 took:0.18619s
Epoch[10/50] step:[28800/55751] loss:2.463194 took:0.24187s
Epoch[10/50] step:[29000/55751] loss:2.784505 took:0.23913s
Epoch[10/50] step:[29200/55751] loss:2.158007 took:0.25561s
Epoch[10/50] step:[29400/55751] loss:2.213960 took:0.17385s
Epoch[10/50] step:[29600/55751] loss:2.049103 took:0.18986s
Epoch[10/50] step:[29800/55751] loss:2.187077 took:0.25619s
Epoch[10/50] step:[30000/55751] loss:2.619201 took:0.25444s
Epoch[10/50] step:[30200/55751] loss:2.722682 took:0.36362s
Epoch[10/50] step:[30400/55751] loss:2.126348 took:0.26943s
Epoch[10/50] step:[30600/55751] loss:2.889319 took:0.19414s
Epoch[10/50] step:[30800/55751] loss:2.2

Epoch[10/50] step:[55000/55751] loss:1.933077 took:0.19554s
Epoch[10/50] step:[55200/55751] loss:2.282076 took:0.21052s
Epoch[10/50] step:[55400/55751] loss:1.710720 took:0.18913s
Epoch[10/50] step:[55600/55751] loss:2.307888 took:0.17233s
Epoch[11/50] step:[0/55751] loss:2.914372 took:0.29708s
Epoch[11/50] step:[200/55751] loss:2.726615 took:0.27640s
Epoch[11/50] step:[400/55751] loss:2.596944 took:0.19608s
Epoch[11/50] step:[600/55751] loss:2.934755 took:0.47192s
Epoch[11/50] step:[800/55751] loss:1.955085 took:0.21398s
Epoch[11/50] step:[1000/55751] loss:2.721867 took:0.29509s
Epoch[11/50] step:[1200/55751] loss:2.949477 took:0.25942s
Epoch[11/50] step:[1400/55751] loss:2.732538 took:0.23469s
Epoch[11/50] step:[1600/55751] loss:2.402731 took:0.19307s
Epoch[11/50] step:[1800/55751] loss:2.168642 took:0.28147s
Epoch[11/50] step:[2000/55751] loss:2.374731 took:0.18864s
Epoch[11/50] step:[2200/55751] loss:2.891066 took:0.20715s
Epoch[11/50] step:[2400/55751] loss:2.922961 took:0.28195s


Epoch[11/50] step:[26800/55751] loss:3.045445 took:0.25912s
Epoch[11/50] step:[27000/55751] loss:2.667336 took:0.36244s
Epoch[11/50] step:[27200/55751] loss:2.959774 took:0.27466s
Epoch[11/50] step:[27400/55751] loss:2.415482 took:0.20851s
Epoch[11/50] step:[27600/55751] loss:2.131178 took:0.20427s
Epoch[11/50] step:[27800/55751] loss:2.336641 took:0.18360s
Epoch[11/50] step:[28000/55751] loss:2.601204 took:0.25037s
Epoch[11/50] step:[28200/55751] loss:2.429236 took:0.16452s
Epoch[11/50] step:[28400/55751] loss:2.028226 took:0.24991s
Epoch[11/50] step:[28600/55751] loss:2.753020 took:0.34690s
Epoch[11/50] step:[28800/55751] loss:2.403037 took:0.26117s
Epoch[11/50] step:[29000/55751] loss:2.494111 took:0.38997s
Epoch[11/50] step:[29200/55751] loss:2.435939 took:0.23815s
Epoch[11/50] step:[29400/55751] loss:2.432107 took:0.33901s
Epoch[11/50] step:[29600/55751] loss:2.566416 took:0.23216s
Epoch[11/50] step:[29800/55751] loss:2.693968 took:0.17981s
Epoch[11/50] step:[30000/55751] loss:2.1

Epoch[11/50] step:[54200/55751] loss:2.312982 took:0.20562s
Epoch[11/50] step:[54400/55751] loss:3.015017 took:0.29127s
Epoch[11/50] step:[54600/55751] loss:2.415601 took:0.23879s
Epoch[11/50] step:[54800/55751] loss:2.084219 took:0.27254s
Epoch[11/50] step:[55000/55751] loss:2.720547 took:0.25666s
Epoch[11/50] step:[55200/55751] loss:2.600679 took:0.22273s
Epoch[11/50] step:[55400/55751] loss:2.538370 took:0.27208s
Epoch[11/50] step:[55600/55751] loss:2.946696 took:0.27390s
Epoch[12/50] step:[0/55751] loss:2.694948 took:0.23693s
Epoch[12/50] step:[200/55751] loss:2.599445 took:0.31457s
Epoch[12/50] step:[400/55751] loss:2.472495 took:0.21162s
Epoch[12/50] step:[600/55751] loss:2.473077 took:0.23226s
Epoch[12/50] step:[800/55751] loss:3.215676 took:0.24752s
Epoch[12/50] step:[1000/55751] loss:2.533998 took:0.25963s
Epoch[12/50] step:[1200/55751] loss:2.719080 took:0.22035s
Epoch[12/50] step:[1400/55751] loss:3.226361 took:0.22186s
Epoch[12/50] step:[1600/55751] loss:2.113799 took:0.228

Epoch[12/50] step:[26000/55751] loss:2.535989 took:0.20845s
Epoch[12/50] step:[26200/55751] loss:2.479047 took:0.23962s
Epoch[12/50] step:[26400/55751] loss:1.919555 took:0.25410s
Epoch[12/50] step:[26600/55751] loss:2.859074 took:0.30416s
Epoch[12/50] step:[26800/55751] loss:2.623600 took:0.19456s
Epoch[12/50] step:[27000/55751] loss:2.399472 took:0.50763s
Epoch[12/50] step:[27200/55751] loss:2.302964 took:0.18085s
Epoch[12/50] step:[27400/55751] loss:2.712817 took:0.18896s
Epoch[12/50] step:[27600/55751] loss:2.439376 took:0.24729s
Epoch[12/50] step:[27800/55751] loss:2.266640 took:0.24211s
Epoch[12/50] step:[28000/55751] loss:2.596398 took:0.23972s
Epoch[12/50] step:[28200/55751] loss:2.490053 took:0.18518s
Epoch[12/50] step:[28400/55751] loss:2.661331 took:0.19303s
Epoch[12/50] step:[28600/55751] loss:2.182795 took:0.27997s
Epoch[12/50] step:[28800/55751] loss:2.559201 took:0.20568s
Epoch[12/50] step:[29000/55751] loss:2.184958 took:0.31138s
Epoch[12/50] step:[29200/55751] loss:2.7

Epoch[12/50] step:[53400/55751] loss:3.127558 took:0.37883s
Epoch[12/50] step:[53600/55751] loss:2.680958 took:0.20502s
Epoch[12/50] step:[53800/55751] loss:2.980281 took:0.32892s
Epoch[12/50] step:[54000/55751] loss:2.478983 took:0.19977s
Epoch[12/50] step:[54200/55751] loss:2.436926 took:0.17656s
Epoch[12/50] step:[54400/55751] loss:2.502968 took:0.24107s
Epoch[12/50] step:[54600/55751] loss:3.007122 took:0.26055s
Epoch[12/50] step:[54800/55751] loss:2.725855 took:0.22227s
Epoch[12/50] step:[55000/55751] loss:2.762520 took:0.22045s
Epoch[12/50] step:[55200/55751] loss:2.300598 took:0.21587s
Epoch[12/50] step:[55400/55751] loss:2.607362 took:0.20076s
Epoch[12/50] step:[55600/55751] loss:2.555941 took:0.37694s
Epoch[13/50] step:[0/55751] loss:3.128757 took:0.25839s
Epoch[13/50] step:[200/55751] loss:2.943148 took:0.25166s
Epoch[13/50] step:[400/55751] loss:2.442415 took:0.18840s
Epoch[13/50] step:[600/55751] loss:2.293628 took:0.18076s
Epoch[13/50] step:[800/55751] loss:2.050345 took:0

Epoch[13/50] step:[25200/55751] loss:3.289389 took:0.45636s
Epoch[13/50] step:[25400/55751] loss:2.028996 took:0.18119s
Epoch[13/50] step:[25600/55751] loss:2.593750 took:0.25163s
Epoch[13/50] step:[25800/55751] loss:2.795666 took:0.31305s
Epoch[13/50] step:[26000/55751] loss:2.349818 took:0.16981s
Epoch[13/50] step:[26200/55751] loss:2.512723 took:0.30658s
Epoch[13/50] step:[26400/55751] loss:2.352350 took:0.24030s
Epoch[13/50] step:[26600/55751] loss:2.342780 took:0.18308s
Epoch[13/50] step:[26800/55751] loss:2.657401 took:0.22940s
Epoch[13/50] step:[27000/55751] loss:2.031395 took:0.17677s
Epoch[13/50] step:[27200/55751] loss:1.657501 took:0.18127s
Epoch[13/50] step:[27400/55751] loss:2.577446 took:0.26912s
Epoch[13/50] step:[27600/55751] loss:2.332324 took:0.24805s
Epoch[13/50] step:[27800/55751] loss:2.689367 took:0.34463s
Epoch[13/50] step:[28000/55751] loss:2.277620 took:0.31647s
Epoch[13/50] step:[28200/55751] loss:2.434471 took:0.22790s
Epoch[13/50] step:[28400/55751] loss:2.1

Epoch[13/50] step:[52600/55751] loss:2.441788 took:0.27626s
Epoch[13/50] step:[52800/55751] loss:2.718000 took:0.22645s
Epoch[13/50] step:[53000/55751] loss:2.743533 took:0.32371s
Epoch[13/50] step:[53200/55751] loss:3.478815 took:0.19597s
Epoch[13/50] step:[53400/55751] loss:2.194440 took:0.23706s
Epoch[13/50] step:[53600/55751] loss:3.048893 took:0.33233s
Epoch[13/50] step:[53800/55751] loss:2.627845 took:0.24073s
Epoch[13/50] step:[54000/55751] loss:2.474006 took:0.23237s
Epoch[13/50] step:[54200/55751] loss:2.658233 took:0.27746s
Epoch[13/50] step:[54400/55751] loss:2.090791 took:0.17680s
Epoch[13/50] step:[54600/55751] loss:2.375173 took:0.21908s
Epoch[13/50] step:[54800/55751] loss:2.769477 took:0.27612s
Epoch[13/50] step:[55000/55751] loss:2.337409 took:0.17888s
Epoch[13/50] step:[55200/55751] loss:2.967439 took:0.26609s
Epoch[13/50] step:[55400/55751] loss:2.035810 took:0.25560s
Epoch[13/50] step:[55600/55751] loss:2.709673 took:0.21323s
Epoch[14/50] step:[0/55751] loss:2.38777

Epoch[14/50] step:[24400/55751] loss:3.036633 took:0.22563s
Epoch[14/50] step:[24600/55751] loss:2.108697 took:0.27746s
Epoch[14/50] step:[24800/55751] loss:2.695050 took:0.23753s
Epoch[14/50] step:[25000/55751] loss:2.489789 took:0.24284s
Epoch[14/50] step:[25200/55751] loss:2.474396 took:0.18743s
Epoch[14/50] step:[25400/55751] loss:2.411004 took:0.19735s
Epoch[14/50] step:[25600/55751] loss:1.936063 took:0.18800s
Epoch[14/50] step:[25800/55751] loss:2.407572 took:0.20507s
Epoch[14/50] step:[26000/55751] loss:2.543873 took:0.30912s
Epoch[14/50] step:[26200/55751] loss:2.438063 took:0.20665s
Epoch[14/50] step:[26400/55751] loss:2.347705 took:0.27965s
Epoch[14/50] step:[26600/55751] loss:2.458334 took:0.17901s
Epoch[14/50] step:[26800/55751] loss:2.138012 took:0.34037s
Epoch[14/50] step:[27000/55751] loss:2.638311 took:0.23010s
Epoch[14/50] step:[27200/55751] loss:1.961500 took:0.25762s
Epoch[14/50] step:[27400/55751] loss:2.441526 took:0.20407s
Epoch[14/50] step:[27600/55751] loss:2.1

Epoch[14/50] step:[51800/55751] loss:2.779521 took:0.23157s
Epoch[14/50] step:[52000/55751] loss:2.521294 took:0.22331s
Epoch[14/50] step:[52200/55751] loss:2.396750 took:0.20244s
Epoch[14/50] step:[52400/55751] loss:1.910584 took:0.18638s
Epoch[14/50] step:[52600/55751] loss:2.407799 took:0.24849s
Epoch[14/50] step:[52800/55751] loss:3.018686 took:0.18399s
Epoch[14/50] step:[53000/55751] loss:3.247487 took:0.20619s
Epoch[14/50] step:[53200/55751] loss:2.089673 took:0.21910s
Epoch[14/50] step:[53400/55751] loss:2.255807 took:0.16665s
Epoch[14/50] step:[53600/55751] loss:2.004811 took:0.25208s
Epoch[14/50] step:[53800/55751] loss:2.227084 took:0.19006s
Epoch[14/50] step:[54000/55751] loss:2.819490 took:0.18933s
Epoch[14/50] step:[54200/55751] loss:2.314976 took:0.20901s
Epoch[14/50] step:[54400/55751] loss:3.047347 took:0.34042s
Epoch[14/50] step:[54600/55751] loss:2.139165 took:0.20058s
Epoch[14/50] step:[54800/55751] loss:2.788322 took:0.26220s
Epoch[14/50] step:[55000/55751] loss:3.1

Epoch[15/50] step:[23600/55751] loss:2.693413 took:0.18797s
Epoch[15/50] step:[23800/55751] loss:2.921573 took:0.20706s
Epoch[15/50] step:[24000/55751] loss:2.875696 took:0.23337s
Epoch[15/50] step:[24200/55751] loss:2.198243 took:0.20483s
Epoch[15/50] step:[24400/55751] loss:2.114692 took:0.17113s
Epoch[15/50] step:[24600/55751] loss:2.318985 took:0.25079s
Epoch[15/50] step:[24800/55751] loss:2.186861 took:0.30275s
Epoch[15/50] step:[25000/55751] loss:2.421210 took:0.20108s
Epoch[15/50] step:[25200/55751] loss:2.218076 took:0.19195s
Epoch[15/50] step:[25400/55751] loss:2.715265 took:0.26873s
Epoch[15/50] step:[25600/55751] loss:2.331409 took:0.34759s
Epoch[15/50] step:[25800/55751] loss:2.383277 took:0.22680s
Epoch[15/50] step:[26000/55751] loss:2.132585 took:0.21301s
Epoch[15/50] step:[26200/55751] loss:2.817822 took:0.26992s
Epoch[15/50] step:[26400/55751] loss:2.994681 took:0.29565s
Epoch[15/50] step:[26600/55751] loss:2.130360 took:0.23171s
Epoch[15/50] step:[26800/55751] loss:2.0

Epoch[15/50] step:[51000/55751] loss:2.053098 took:0.18981s
Epoch[15/50] step:[51200/55751] loss:2.930759 took:0.25876s
Epoch[15/50] step:[51400/55751] loss:2.059194 took:0.17388s
Epoch[15/50] step:[51600/55751] loss:2.467768 took:0.27758s
Epoch[15/50] step:[51800/55751] loss:1.622183 took:0.18318s
Epoch[15/50] step:[52000/55751] loss:2.427252 took:0.19242s
Epoch[15/50] step:[52200/55751] loss:3.074453 took:0.20090s
Epoch[15/50] step:[52400/55751] loss:2.498569 took:0.29952s
Epoch[15/50] step:[52600/55751] loss:2.122914 took:0.24159s
Epoch[15/50] step:[52800/55751] loss:2.175507 took:0.31091s
Epoch[15/50] step:[53000/55751] loss:1.655804 took:0.45561s
Epoch[15/50] step:[53200/55751] loss:1.751144 took:0.19966s
Epoch[15/50] step:[53400/55751] loss:2.783687 took:0.25439s
Epoch[15/50] step:[53600/55751] loss:2.789830 took:0.27507s
Epoch[15/50] step:[53800/55751] loss:2.541017 took:0.25203s
Epoch[15/50] step:[54000/55751] loss:2.621267 took:0.26864s
Epoch[15/50] step:[54200/55751] loss:2.2

Epoch[16/50] step:[22800/55751] loss:2.250032 took:0.20749s
Epoch[16/50] step:[23000/55751] loss:2.544790 took:0.31251s
Epoch[16/50] step:[23200/55751] loss:2.147830 took:0.15493s
Epoch[16/50] step:[23400/55751] loss:2.513803 took:0.29245s
Epoch[16/50] step:[23600/55751] loss:2.202465 took:0.24795s
Epoch[16/50] step:[23800/55751] loss:1.628360 took:0.23515s
Epoch[16/50] step:[24000/55751] loss:3.030252 took:0.27017s
Epoch[16/50] step:[24200/55751] loss:2.185317 took:0.18653s
Epoch[16/50] step:[24400/55751] loss:2.864803 took:0.22621s
Epoch[16/50] step:[24600/55751] loss:2.771143 took:0.33839s
Epoch[16/50] step:[24800/55751] loss:2.036365 took:0.21847s
Epoch[16/50] step:[25000/55751] loss:2.181532 took:0.18565s
Epoch[16/50] step:[25200/55751] loss:2.682302 took:0.28734s
Epoch[16/50] step:[25400/55751] loss:2.479000 took:0.24110s
Epoch[16/50] step:[25600/55751] loss:2.286115 took:0.21396s
Epoch[16/50] step:[25800/55751] loss:1.977972 took:0.25306s
Epoch[16/50] step:[26000/55751] loss:2.1

Epoch[16/50] step:[50200/55751] loss:1.994567 took:0.18529s
Epoch[16/50] step:[50400/55751] loss:3.017266 took:0.23042s
Epoch[16/50] step:[50600/55751] loss:2.571438 took:0.19140s
Epoch[16/50] step:[50800/55751] loss:2.732807 took:0.24702s
Epoch[16/50] step:[51000/55751] loss:2.146828 took:0.25089s
Epoch[16/50] step:[51200/55751] loss:2.699632 took:0.16539s
Epoch[16/50] step:[51400/55751] loss:2.947192 took:0.27285s
Epoch[16/50] step:[51600/55751] loss:1.861385 took:0.17049s
Epoch[16/50] step:[51800/55751] loss:2.849647 took:0.19773s
Epoch[16/50] step:[52000/55751] loss:1.901320 took:0.19047s
Epoch[16/50] step:[52200/55751] loss:2.705723 took:0.18830s
Epoch[16/50] step:[52400/55751] loss:2.584616 took:0.21871s
Epoch[16/50] step:[52600/55751] loss:2.184847 took:0.28650s
Epoch[16/50] step:[52800/55751] loss:2.401851 took:0.25628s
Epoch[16/50] step:[53000/55751] loss:2.058932 took:0.21866s
Epoch[16/50] step:[53200/55751] loss:2.646819 took:0.21225s
Epoch[16/50] step:[53400/55751] loss:2.3

Epoch[17/50] step:[22000/55751] loss:2.023611 took:0.28689s
Epoch[17/50] step:[22200/55751] loss:2.159633 took:0.21037s
Epoch[17/50] step:[22400/55751] loss:2.978907 took:0.22413s
Epoch[17/50] step:[22600/55751] loss:3.076259 took:0.20579s
Epoch[17/50] step:[22800/55751] loss:1.952771 took:0.19366s
Epoch[17/50] step:[23000/55751] loss:2.342258 took:0.22427s
Epoch[17/50] step:[23200/55751] loss:2.766603 took:0.25198s
Epoch[17/50] step:[23400/55751] loss:2.967085 took:0.28727s
Epoch[17/50] step:[23600/55751] loss:2.277642 took:0.28569s
Epoch[17/50] step:[23800/55751] loss:1.667943 took:0.25310s
Epoch[17/50] step:[24000/55751] loss:1.935354 took:0.22729s
Epoch[17/50] step:[24200/55751] loss:2.489389 took:0.24493s
Epoch[17/50] step:[24400/55751] loss:2.177545 took:0.24062s
Epoch[17/50] step:[24600/55751] loss:2.648776 took:0.20352s
Epoch[17/50] step:[24800/55751] loss:2.586986 took:0.19903s
Epoch[17/50] step:[25000/55751] loss:2.411959 took:0.20961s
Epoch[17/50] step:[25200/55751] loss:2.2

Epoch[17/50] step:[49400/55751] loss:2.225221 took:0.35078s
Epoch[17/50] step:[49600/55751] loss:2.419329 took:0.23161s
Epoch[17/50] step:[49800/55751] loss:2.477315 took:0.18689s
Epoch[17/50] step:[50000/55751] loss:2.303893 took:0.25618s
Epoch[17/50] step:[50200/55751] loss:2.423845 took:0.23810s
Epoch[17/50] step:[50400/55751] loss:2.138276 took:0.17155s
Epoch[17/50] step:[50600/55751] loss:2.132734 took:0.17913s
Epoch[17/50] step:[50800/55751] loss:2.843984 took:0.17692s
Epoch[17/50] step:[51000/55751] loss:2.949976 took:0.26022s
Epoch[17/50] step:[51200/55751] loss:2.033995 took:0.18642s
Epoch[17/50] step:[51400/55751] loss:1.940418 took:0.25310s
Epoch[17/50] step:[51600/55751] loss:1.967052 took:0.18810s
Epoch[17/50] step:[51800/55751] loss:2.852893 took:0.21083s
Epoch[17/50] step:[52000/55751] loss:1.966823 took:0.23342s
Epoch[17/50] step:[52200/55751] loss:2.283545 took:0.18906s
Epoch[17/50] step:[52400/55751] loss:2.764456 took:0.18116s
Epoch[17/50] step:[52600/55751] loss:2.7

Epoch[18/50] step:[21200/55751] loss:2.522814 took:0.17929s
Epoch[18/50] step:[21400/55751] loss:2.823671 took:0.24054s
Epoch[18/50] step:[21600/55751] loss:2.225631 took:0.17462s
Epoch[18/50] step:[21800/55751] loss:2.765960 took:0.25757s
Epoch[18/50] step:[22000/55751] loss:2.268718 took:0.19640s
Epoch[18/50] step:[22200/55751] loss:2.000443 took:0.21254s
Epoch[18/50] step:[22400/55751] loss:2.309845 took:0.26566s
Epoch[18/50] step:[22600/55751] loss:2.428180 took:0.22889s
Epoch[18/50] step:[22800/55751] loss:2.072367 took:0.24136s
Epoch[18/50] step:[23000/55751] loss:2.899028 took:0.25620s
Epoch[18/50] step:[23200/55751] loss:2.812558 took:0.29223s
Epoch[18/50] step:[23400/55751] loss:2.571864 took:0.19922s
Epoch[18/50] step:[23600/55751] loss:2.756302 took:0.22372s
Epoch[18/50] step:[23800/55751] loss:2.433436 took:0.22782s
Epoch[18/50] step:[24000/55751] loss:2.091705 took:0.18913s
Epoch[18/50] step:[24200/55751] loss:2.637362 took:0.35493s
Epoch[18/50] step:[24400/55751] loss:1.6

Epoch[18/50] step:[48600/55751] loss:2.270180 took:0.22316s
Epoch[18/50] step:[48800/55751] loss:2.035597 took:0.17826s
Epoch[18/50] step:[49000/55751] loss:2.145612 took:0.23890s
Epoch[18/50] step:[49200/55751] loss:1.587280 took:0.20042s
Epoch[18/50] step:[49400/55751] loss:2.373027 took:0.18434s
Epoch[18/50] step:[49600/55751] loss:2.165766 took:0.33078s
Epoch[18/50] step:[49800/55751] loss:2.295143 took:0.21393s
Epoch[18/50] step:[50000/55751] loss:2.062859 took:0.19810s
Epoch[18/50] step:[50200/55751] loss:2.645627 took:0.18042s
Epoch[18/50] step:[50400/55751] loss:3.091254 took:0.17801s
Epoch[18/50] step:[50600/55751] loss:1.867959 took:0.22637s
Epoch[18/50] step:[50800/55751] loss:2.338914 took:0.25295s
Epoch[18/50] step:[51000/55751] loss:2.383610 took:0.31440s
Epoch[18/50] step:[51200/55751] loss:2.169042 took:0.21748s
Epoch[18/50] step:[51400/55751] loss:2.305022 took:0.23424s
Epoch[18/50] step:[51600/55751] loss:2.327052 took:0.17425s
Epoch[18/50] step:[51800/55751] loss:2.2

Epoch[19/50] step:[20400/55751] loss:2.665439 took:0.19140s
Epoch[19/50] step:[20600/55751] loss:2.385688 took:0.23938s
Epoch[19/50] step:[20800/55751] loss:3.196021 took:0.40592s
Epoch[19/50] step:[21000/55751] loss:2.931950 took:0.21048s
Epoch[19/50] step:[21200/55751] loss:2.156080 took:0.21378s
Epoch[19/50] step:[21400/55751] loss:2.467317 took:0.19301s
Epoch[19/50] step:[21600/55751] loss:2.555077 took:0.25786s
Epoch[19/50] step:[21800/55751] loss:2.212930 took:0.26792s
Epoch[19/50] step:[22000/55751] loss:2.295932 took:0.20651s
Epoch[19/50] step:[22200/55751] loss:2.149434 took:0.28349s
Epoch[19/50] step:[22400/55751] loss:2.488884 took:0.19242s
Epoch[19/50] step:[22600/55751] loss:2.487574 took:0.23331s
Epoch[19/50] step:[22800/55751] loss:2.601836 took:0.30651s
Epoch[19/50] step:[23000/55751] loss:2.111119 took:0.23169s
Epoch[19/50] step:[23200/55751] loss:2.305011 took:0.19773s
Epoch[19/50] step:[23400/55751] loss:2.400578 took:0.19606s
Epoch[19/50] step:[23600/55751] loss:2.0

Epoch[19/50] step:[47800/55751] loss:2.499229 took:0.24824s
Epoch[19/50] step:[48000/55751] loss:2.075290 took:0.16382s
Epoch[19/50] step:[48200/55751] loss:2.740176 took:0.27492s
Epoch[19/50] step:[48400/55751] loss:2.085910 took:0.19432s
Epoch[19/50] step:[48600/55751] loss:2.319422 took:0.22870s
Epoch[19/50] step:[48800/55751] loss:2.804422 took:0.28995s
Epoch[19/50] step:[49000/55751] loss:2.622257 took:0.19983s
Epoch[19/50] step:[49200/55751] loss:1.896932 took:0.19850s
Epoch[19/50] step:[49400/55751] loss:2.124476 took:0.34588s
Epoch[19/50] step:[49600/55751] loss:2.612957 took:0.21057s
Epoch[19/50] step:[49800/55751] loss:2.422180 took:0.19879s
Epoch[19/50] step:[50000/55751] loss:2.250853 took:0.19508s
Epoch[19/50] step:[50200/55751] loss:1.992893 took:0.18066s
Epoch[19/50] step:[50400/55751] loss:2.340332 took:0.16985s
Epoch[19/50] step:[50600/55751] loss:2.345015 took:0.23788s
Epoch[19/50] step:[50800/55751] loss:2.354009 took:0.26752s
Epoch[19/50] step:[51000/55751] loss:3.0

Epoch[20/50] step:[19600/55751] loss:2.311474 took:0.22122s
Epoch[20/50] step:[19800/55751] loss:3.090244 took:0.18621s
Epoch[20/50] step:[20000/55751] loss:2.855750 took:0.28560s
Epoch[20/50] step:[20200/55751] loss:1.761151 took:0.19663s
Epoch[20/50] step:[20400/55751] loss:2.973824 took:0.28321s
Epoch[20/50] step:[20600/55751] loss:2.112076 took:0.17856s
Epoch[20/50] step:[20800/55751] loss:2.414004 took:0.17251s
Epoch[20/50] step:[21000/55751] loss:2.552281 took:0.17874s
Epoch[20/50] step:[21200/55751] loss:1.995768 took:0.19879s
Epoch[20/50] step:[21400/55751] loss:2.913311 took:0.23961s
Epoch[20/50] step:[21600/55751] loss:2.423124 took:0.24184s
Epoch[20/50] step:[21800/55751] loss:2.800204 took:0.22393s
Epoch[20/50] step:[22000/55751] loss:2.587296 took:0.18271s
Epoch[20/50] step:[22200/55751] loss:2.273524 took:0.22081s
Epoch[20/50] step:[22400/55751] loss:2.859375 took:0.33068s
Epoch[20/50] step:[22600/55751] loss:2.384389 took:0.21112s
Epoch[20/50] step:[22800/55751] loss:2.5

Epoch[20/50] step:[47000/55751] loss:2.402595 took:0.20215s
Epoch[20/50] step:[47200/55751] loss:2.077814 took:0.25715s
Epoch[20/50] step:[47400/55751] loss:2.577721 took:0.24331s
Epoch[20/50] step:[47600/55751] loss:2.533108 took:0.19311s
Epoch[20/50] step:[47800/55751] loss:2.031348 took:0.24043s
Epoch[20/50] step:[48000/55751] loss:2.547025 took:0.25208s
Epoch[20/50] step:[48200/55751] loss:1.827979 took:0.25993s
Epoch[20/50] step:[48400/55751] loss:2.612454 took:0.22131s
Epoch[20/50] step:[48600/55751] loss:1.985983 took:0.20875s
Epoch[20/50] step:[48800/55751] loss:2.701381 took:0.26880s
Epoch[20/50] step:[49000/55751] loss:2.922343 took:0.17543s
Epoch[20/50] step:[49200/55751] loss:2.833940 took:0.24719s
Epoch[20/50] step:[49400/55751] loss:2.122978 took:0.20011s
Epoch[20/50] step:[49600/55751] loss:2.247209 took:0.20204s
Epoch[20/50] step:[49800/55751] loss:1.946473 took:0.18898s
Epoch[20/50] step:[50000/55751] loss:2.239431 took:0.23701s
Epoch[20/50] step:[50200/55751] loss:2.1

Epoch[21/50] step:[18800/55751] loss:2.728998 took:0.22392s
Epoch[21/50] step:[19000/55751] loss:2.544889 took:0.29110s
Epoch[21/50] step:[19200/55751] loss:1.779912 took:0.23318s
Epoch[21/50] step:[19400/55751] loss:2.055627 took:0.20758s
Epoch[21/50] step:[19600/55751] loss:2.201023 took:0.21930s
Epoch[21/50] step:[19800/55751] loss:2.222427 took:0.27261s
Epoch[21/50] step:[20000/55751] loss:2.385949 took:0.27671s
Epoch[21/50] step:[20200/55751] loss:2.722083 took:0.30031s
Epoch[21/50] step:[20400/55751] loss:2.253724 took:0.22827s
Epoch[21/50] step:[20600/55751] loss:2.197624 took:0.28608s
Epoch[21/50] step:[20800/55751] loss:1.848021 took:0.19184s
Epoch[21/50] step:[21000/55751] loss:1.933483 took:0.23272s
Epoch[21/50] step:[21200/55751] loss:1.859872 took:0.20680s
Epoch[21/50] step:[21400/55751] loss:1.967007 took:0.26145s
Epoch[21/50] step:[21600/55751] loss:2.025493 took:0.26866s
Epoch[21/50] step:[21800/55751] loss:2.948632 took:0.25746s
Epoch[21/50] step:[22000/55751] loss:2.4

Epoch[21/50] step:[46200/55751] loss:2.222697 took:0.18987s
Epoch[21/50] step:[46400/55751] loss:2.242158 took:0.30127s
Epoch[21/50] step:[46600/55751] loss:2.215921 took:0.17889s
Epoch[21/50] step:[46800/55751] loss:2.581346 took:0.20517s
Epoch[21/50] step:[47000/55751] loss:2.343116 took:0.18925s
Epoch[21/50] step:[47200/55751] loss:2.358134 took:0.23547s
Epoch[21/50] step:[47400/55751] loss:1.894410 took:0.22234s
Epoch[21/50] step:[47600/55751] loss:2.368645 took:0.30337s
Epoch[21/50] step:[47800/55751] loss:2.222873 took:0.24941s
Epoch[21/50] step:[48000/55751] loss:2.285800 took:0.17632s
Epoch[21/50] step:[48200/55751] loss:2.716175 took:0.31002s
Epoch[21/50] step:[48400/55751] loss:2.186161 took:0.23656s
Epoch[21/50] step:[48600/55751] loss:2.642804 took:0.42683s
Epoch[21/50] step:[48800/55751] loss:1.691771 took:0.28284s
Epoch[21/50] step:[49000/55751] loss:2.030275 took:0.36034s
Epoch[21/50] step:[49200/55751] loss:2.688534 took:0.24342s
Epoch[21/50] step:[49400/55751] loss:2.2

Epoch[22/50] step:[18000/55751] loss:2.238489 took:0.19839s
Epoch[22/50] step:[18200/55751] loss:2.107967 took:0.22120s
Epoch[22/50] step:[18400/55751] loss:2.087883 took:0.17255s
Epoch[22/50] step:[18600/55751] loss:2.319152 took:0.24094s
Epoch[22/50] step:[18800/55751] loss:2.006840 took:0.27198s
Epoch[22/50] step:[19000/55751] loss:2.407248 took:0.28346s
Epoch[22/50] step:[19200/55751] loss:2.911346 took:0.22514s
Epoch[22/50] step:[19400/55751] loss:2.243412 took:0.20842s
Epoch[22/50] step:[19600/55751] loss:2.176113 took:0.19045s
Epoch[22/50] step:[19800/55751] loss:2.713980 took:0.22566s
Epoch[22/50] step:[20000/55751] loss:1.933595 took:0.20389s
Epoch[22/50] step:[20200/55751] loss:2.884780 took:0.24331s
Epoch[22/50] step:[20400/55751] loss:1.991837 took:0.21011s
Epoch[22/50] step:[20600/55751] loss:2.327119 took:0.34323s
Epoch[22/50] step:[20800/55751] loss:3.029221 took:0.22441s
Epoch[22/50] step:[21000/55751] loss:1.952452 took:0.18715s
Epoch[22/50] step:[21200/55751] loss:1.6

Epoch[22/50] step:[45400/55751] loss:3.739205 took:0.27928s
Epoch[22/50] step:[45600/55751] loss:2.248301 took:0.27075s
Epoch[22/50] step:[45800/55751] loss:2.377948 took:0.27863s
Epoch[22/50] step:[46000/55751] loss:1.795737 took:0.17278s
Epoch[22/50] step:[46200/55751] loss:2.569687 took:0.22483s
Epoch[22/50] step:[46400/55751] loss:1.857271 took:0.19558s
Epoch[22/50] step:[46600/55751] loss:1.940984 took:0.18061s
Epoch[22/50] step:[46800/55751] loss:2.823496 took:0.21659s
Epoch[22/50] step:[47000/55751] loss:2.364567 took:0.23084s
Epoch[22/50] step:[47200/55751] loss:1.859168 took:0.22389s
Epoch[22/50] step:[47400/55751] loss:1.978804 took:0.24897s
Epoch[22/50] step:[47600/55751] loss:2.277526 took:0.20771s
Epoch[22/50] step:[47800/55751] loss:2.406193 took:0.28927s
Epoch[22/50] step:[48000/55751] loss:2.332015 took:0.24601s
Epoch[22/50] step:[48200/55751] loss:1.825627 took:0.20388s
Epoch[22/50] step:[48400/55751] loss:2.294664 took:0.24304s
Epoch[22/50] step:[48600/55751] loss:2.1

Epoch[23/50] step:[17200/55751] loss:2.557230 took:0.18967s
Epoch[23/50] step:[17400/55751] loss:2.548620 took:0.19426s
Epoch[23/50] step:[17600/55751] loss:2.357803 took:0.18582s
Epoch[23/50] step:[17800/55751] loss:2.521492 took:0.36812s
Epoch[23/50] step:[18000/55751] loss:1.757905 took:0.22167s
Epoch[23/50] step:[18200/55751] loss:2.459251 took:0.23786s
Epoch[23/50] step:[18400/55751] loss:2.573324 took:0.21364s
Epoch[23/50] step:[18600/55751] loss:2.058072 took:0.18120s
Epoch[23/50] step:[18800/55751] loss:2.593804 took:0.24614s
Epoch[23/50] step:[19000/55751] loss:2.130478 took:0.26036s
Epoch[23/50] step:[19200/55751] loss:2.055340 took:0.20348s
Epoch[23/50] step:[19400/55751] loss:2.400491 took:0.19617s
Epoch[23/50] step:[19600/55751] loss:2.297716 took:0.19862s
Epoch[23/50] step:[19800/55751] loss:1.522232 took:0.27610s
Epoch[23/50] step:[20000/55751] loss:1.972970 took:0.23453s
Epoch[23/50] step:[20200/55751] loss:2.488960 took:0.26297s
Epoch[23/50] step:[20400/55751] loss:2.6

Epoch[23/50] step:[44600/55751] loss:2.462203 took:0.24081s
Epoch[23/50] step:[44800/55751] loss:2.087890 took:0.20583s
Epoch[23/50] step:[45000/55751] loss:2.189836 took:0.33963s
Epoch[23/50] step:[45200/55751] loss:2.242231 took:0.20518s
Epoch[23/50] step:[45400/55751] loss:1.207266 took:0.29508s
Epoch[23/50] step:[45600/55751] loss:2.387017 took:0.25990s
Epoch[23/50] step:[45800/55751] loss:2.208263 took:0.22959s
Epoch[23/50] step:[46000/55751] loss:2.439917 took:0.28442s
Epoch[23/50] step:[46200/55751] loss:2.876441 took:0.25068s
Epoch[23/50] step:[46400/55751] loss:2.936131 took:0.29465s
Epoch[23/50] step:[46600/55751] loss:2.600209 took:0.22135s
Epoch[23/50] step:[46800/55751] loss:2.522848 took:0.20074s
Epoch[23/50] step:[47000/55751] loss:1.661330 took:0.16261s
Epoch[23/50] step:[47200/55751] loss:2.213002 took:0.18245s
Epoch[23/50] step:[47400/55751] loss:1.944457 took:0.24031s
Epoch[23/50] step:[47600/55751] loss:2.171340 took:0.21331s
Epoch[23/50] step:[47800/55751] loss:2.7

Epoch[24/50] step:[16400/55751] loss:2.679166 took:0.33143s
Epoch[24/50] step:[16600/55751] loss:2.317610 took:0.20039s
Epoch[24/50] step:[16800/55751] loss:2.596346 took:0.19442s
Epoch[24/50] step:[17000/55751] loss:2.449961 took:0.25422s
Epoch[24/50] step:[17200/55751] loss:2.049673 took:0.35175s
Epoch[24/50] step:[17400/55751] loss:2.423100 took:0.23184s
Epoch[24/50] step:[17600/55751] loss:2.220566 took:0.19370s
Epoch[24/50] step:[17800/55751] loss:2.103848 took:0.26997s
Epoch[24/50] step:[18000/55751] loss:1.741501 took:0.24570s
Epoch[24/50] step:[18200/55751] loss:2.962532 took:0.21360s
Epoch[24/50] step:[18400/55751] loss:2.186229 took:0.17717s
Epoch[24/50] step:[18600/55751] loss:2.505883 took:0.30190s
Epoch[24/50] step:[18800/55751] loss:2.184811 took:0.26941s
Epoch[24/50] step:[19000/55751] loss:2.472791 took:0.16581s
Epoch[24/50] step:[19200/55751] loss:2.290875 took:0.22482s
Epoch[24/50] step:[19400/55751] loss:2.180820 took:0.18506s
Epoch[24/50] step:[19600/55751] loss:2.7

Epoch[24/50] step:[43800/55751] loss:2.442490 took:0.27212s
Epoch[24/50] step:[44000/55751] loss:2.004912 took:0.23604s
Epoch[24/50] step:[44200/55751] loss:2.717801 took:0.24009s
Epoch[24/50] step:[44400/55751] loss:2.775660 took:0.27378s
Epoch[24/50] step:[44600/55751] loss:1.906819 took:0.19354s
Epoch[24/50] step:[44800/55751] loss:2.901532 took:0.19537s
Epoch[24/50] step:[45000/55751] loss:2.797313 took:0.16521s
Epoch[24/50] step:[45200/55751] loss:2.712829 took:0.24532s
Epoch[24/50] step:[45400/55751] loss:3.059097 took:0.25592s
Epoch[24/50] step:[45600/55751] loss:1.690290 took:0.24080s
Epoch[24/50] step:[45800/55751] loss:2.394802 took:0.31416s
Epoch[24/50] step:[46000/55751] loss:1.847042 took:0.18930s
Epoch[24/50] step:[46200/55751] loss:2.755643 took:0.23524s
Epoch[24/50] step:[46400/55751] loss:2.221445 took:0.19174s
Epoch[24/50] step:[46600/55751] loss:2.574882 took:0.23643s
Epoch[24/50] step:[46800/55751] loss:2.661906 took:0.26741s
Epoch[24/50] step:[47000/55751] loss:2.6

Epoch[25/50] step:[15600/55751] loss:2.733821 took:0.17284s
Epoch[25/50] step:[15800/55751] loss:2.693140 took:0.35357s
Epoch[25/50] step:[16000/55751] loss:1.810260 took:0.19454s
Epoch[25/50] step:[16200/55751] loss:2.524426 took:0.34796s
Epoch[25/50] step:[16400/55751] loss:2.299531 took:0.22342s
Epoch[25/50] step:[16600/55751] loss:1.588748 took:0.19708s
Epoch[25/50] step:[16800/55751] loss:2.316654 took:0.18323s
Epoch[25/50] step:[17000/55751] loss:2.840007 took:0.23822s
Epoch[25/50] step:[17200/55751] loss:2.145932 took:0.18633s
Epoch[25/50] step:[17400/55751] loss:2.109750 took:0.18477s
Epoch[25/50] step:[17600/55751] loss:2.322206 took:0.20116s
Epoch[25/50] step:[17800/55751] loss:2.251680 took:0.23563s
Epoch[25/50] step:[18000/55751] loss:2.316530 took:0.22015s
Epoch[25/50] step:[18200/55751] loss:2.579837 took:0.22712s
Epoch[25/50] step:[18400/55751] loss:2.266884 took:0.21096s
Epoch[25/50] step:[18600/55751] loss:2.639071 took:0.25229s
Epoch[25/50] step:[18800/55751] loss:1.7

Epoch[25/50] step:[43000/55751] loss:2.553573 took:0.18032s
Epoch[25/50] step:[43200/55751] loss:2.956164 took:0.23228s
Epoch[25/50] step:[43400/55751] loss:2.739230 took:0.25138s
Epoch[25/50] step:[43600/55751] loss:2.505233 took:0.22100s
Epoch[25/50] step:[43800/55751] loss:2.196294 took:0.21101s
Epoch[25/50] step:[44000/55751] loss:2.328063 took:0.20696s
Epoch[25/50] step:[44200/55751] loss:2.230658 took:0.24846s
Epoch[25/50] step:[44400/55751] loss:2.404512 took:0.20381s
Epoch[25/50] step:[44600/55751] loss:2.210465 took:0.22433s
Epoch[25/50] step:[44800/55751] loss:2.767770 took:0.26931s
Epoch[25/50] step:[45000/55751] loss:2.435528 took:0.38937s
Epoch[25/50] step:[45200/55751] loss:2.045431 took:0.38903s
Epoch[25/50] step:[45400/55751] loss:2.376391 took:0.22236s
Epoch[25/50] step:[45600/55751] loss:2.012723 took:0.19213s
Epoch[25/50] step:[45800/55751] loss:2.397322 took:0.27184s
Epoch[25/50] step:[46000/55751] loss:2.251858 took:0.24329s
Epoch[25/50] step:[46200/55751] loss:2.3

Epoch[26/50] step:[14800/55751] loss:2.424027 took:0.21120s
Epoch[26/50] step:[15000/55751] loss:2.594491 took:0.28700s
Epoch[26/50] step:[15200/55751] loss:2.578348 took:0.20603s
Epoch[26/50] step:[15400/55751] loss:2.543063 took:0.19666s
Epoch[26/50] step:[15600/55751] loss:2.721067 took:0.18040s
Epoch[26/50] step:[15800/55751] loss:1.433217 took:0.21924s
Epoch[26/50] step:[16000/55751] loss:1.858446 took:0.36603s
Epoch[26/50] step:[16200/55751] loss:2.266362 took:0.20693s
Epoch[26/50] step:[16400/55751] loss:1.961933 took:0.21665s
Epoch[26/50] step:[16600/55751] loss:2.808707 took:0.39918s
Epoch[26/50] step:[16800/55751] loss:1.955509 took:0.19230s
Epoch[26/50] step:[17000/55751] loss:1.730796 took:0.26253s
Epoch[26/50] step:[17200/55751] loss:1.949312 took:0.19780s
Epoch[26/50] step:[17400/55751] loss:2.961906 took:0.26688s
Epoch[26/50] step:[17600/55751] loss:2.563390 took:0.27390s
Epoch[26/50] step:[17800/55751] loss:2.111284 took:0.18720s
Epoch[26/50] step:[18000/55751] loss:2.2

Epoch[26/50] step:[42200/55751] loss:2.397462 took:0.21110s
Epoch[26/50] step:[42400/55751] loss:2.392451 took:0.18618s
Epoch[26/50] step:[42600/55751] loss:2.127996 took:0.26124s
Epoch[26/50] step:[42800/55751] loss:3.227638 took:0.41029s
Epoch[26/50] step:[43000/55751] loss:2.836262 took:0.25325s
Epoch[26/50] step:[43200/55751] loss:1.860387 took:0.20256s
Epoch[26/50] step:[43400/55751] loss:2.508834 took:0.22922s
Epoch[26/50] step:[43600/55751] loss:2.074289 took:0.47306s
Epoch[26/50] step:[43800/55751] loss:2.229421 took:0.23490s
Epoch[26/50] step:[44000/55751] loss:2.332476 took:0.17714s
Epoch[26/50] step:[44200/55751] loss:2.318701 took:0.23554s
Epoch[26/50] step:[44400/55751] loss:2.022738 took:0.19301s
Epoch[26/50] step:[44600/55751] loss:2.315966 took:0.16938s
Epoch[26/50] step:[44800/55751] loss:2.009161 took:0.20383s
Epoch[26/50] step:[45000/55751] loss:2.633798 took:0.23543s
Epoch[26/50] step:[45200/55751] loss:2.270288 took:0.19188s
Epoch[26/50] step:[45400/55751] loss:2.8

Epoch[27/50] step:[14000/55751] loss:2.045621 took:0.30574s
Epoch[27/50] step:[14200/55751] loss:2.038582 took:0.22151s
Epoch[27/50] step:[14400/55751] loss:2.114406 took:0.28285s
Epoch[27/50] step:[14600/55751] loss:2.498259 took:0.35468s
Epoch[27/50] step:[14800/55751] loss:2.309220 took:0.20113s
Epoch[27/50] step:[15000/55751] loss:1.814671 took:0.21554s
Epoch[27/50] step:[15200/55751] loss:2.439147 took:0.24911s
Epoch[27/50] step:[15400/55751] loss:1.855577 took:0.23437s
Epoch[27/50] step:[15600/55751] loss:1.361170 took:0.22569s
Epoch[27/50] step:[15800/55751] loss:2.634792 took:0.20385s
Epoch[27/50] step:[16000/55751] loss:2.538157 took:0.21547s
Epoch[27/50] step:[16200/55751] loss:2.437623 took:0.20891s
Epoch[27/50] step:[16400/55751] loss:2.703038 took:0.26182s
Epoch[27/50] step:[16600/55751] loss:2.180449 took:0.29610s
Epoch[27/50] step:[16800/55751] loss:2.334731 took:0.25615s
Epoch[27/50] step:[17000/55751] loss:2.234807 took:0.26177s
Epoch[27/50] step:[17200/55751] loss:2.6

Epoch[27/50] step:[41400/55751] loss:2.476732 took:0.24412s
Epoch[27/50] step:[41600/55751] loss:2.395463 took:0.25514s
Epoch[27/50] step:[41800/55751] loss:2.257198 took:0.27746s
Epoch[27/50] step:[42000/55751] loss:1.662277 took:0.26723s
Epoch[27/50] step:[42200/55751] loss:1.700158 took:0.19903s
Epoch[27/50] step:[42400/55751] loss:2.047911 took:0.26617s
Epoch[27/50] step:[42600/55751] loss:3.024685 took:0.46807s
Epoch[27/50] step:[42800/55751] loss:1.757693 took:0.22740s
Epoch[27/50] step:[43000/55751] loss:2.055217 took:0.25920s
Epoch[27/50] step:[43200/55751] loss:1.802921 took:0.16872s
Epoch[27/50] step:[43400/55751] loss:2.367850 took:0.34333s
Epoch[27/50] step:[43600/55751] loss:2.394595 took:0.22305s
Epoch[27/50] step:[43800/55751] loss:2.848040 took:0.18055s
Epoch[27/50] step:[44000/55751] loss:2.786552 took:0.25882s
Epoch[27/50] step:[44200/55751] loss:2.054959 took:0.22739s
Epoch[27/50] step:[44400/55751] loss:2.758892 took:0.23032s
Epoch[27/50] step:[44600/55751] loss:2.3

Epoch[28/50] step:[13200/55751] loss:2.310396 took:0.30974s
Epoch[28/50] step:[13400/55751] loss:2.838292 took:0.27261s
Epoch[28/50] step:[13600/55751] loss:2.098567 took:0.25295s
Epoch[28/50] step:[13800/55751] loss:2.228338 took:0.21053s
Epoch[28/50] step:[14000/55751] loss:2.027421 took:0.19072s
Epoch[28/50] step:[14200/55751] loss:2.136074 took:0.20484s
Epoch[28/50] step:[14400/55751] loss:2.426738 took:0.27551s
Epoch[28/50] step:[14600/55751] loss:2.804798 took:0.20641s
Epoch[28/50] step:[14800/55751] loss:1.988213 took:0.18976s
Epoch[28/50] step:[15000/55751] loss:2.924883 took:0.21628s
Epoch[28/50] step:[15200/55751] loss:2.426621 took:0.38516s
Epoch[28/50] step:[15400/55751] loss:2.643595 took:0.28607s
Epoch[28/50] step:[15600/55751] loss:1.625290 took:0.18966s
Epoch[28/50] step:[15800/55751] loss:2.454096 took:0.27612s
Epoch[28/50] step:[16000/55751] loss:1.588091 took:0.25003s
Epoch[28/50] step:[16200/55751] loss:2.649577 took:0.16945s
Epoch[28/50] step:[16400/55751] loss:2.3

Epoch[28/50] step:[40600/55751] loss:2.831693 took:0.18625s
Epoch[28/50] step:[40800/55751] loss:2.699344 took:0.22905s
Epoch[28/50] step:[41000/55751] loss:2.210579 took:0.21304s
Epoch[28/50] step:[41200/55751] loss:1.936529 took:0.21267s
Epoch[28/50] step:[41400/55751] loss:1.956490 took:0.19095s
Epoch[28/50] step:[41600/55751] loss:2.421966 took:0.19690s
Epoch[28/50] step:[41800/55751] loss:1.875696 took:0.23092s
Epoch[28/50] step:[42000/55751] loss:2.456120 took:0.22296s
Epoch[28/50] step:[42200/55751] loss:2.449681 took:0.33849s
Epoch[28/50] step:[42400/55751] loss:2.748331 took:0.22608s
Epoch[28/50] step:[42600/55751] loss:2.405628 took:0.23842s
Epoch[28/50] step:[42800/55751] loss:2.231595 took:0.60237s
Epoch[28/50] step:[43000/55751] loss:2.216931 took:0.21673s
Epoch[28/50] step:[43200/55751] loss:2.120767 took:0.24786s
Epoch[28/50] step:[43400/55751] loss:2.496040 took:0.28208s
Epoch[28/50] step:[43600/55751] loss:2.456295 took:0.25158s
Epoch[28/50] step:[43800/55751] loss:2.6

Epoch[29/50] step:[12400/55751] loss:2.406629 took:0.19367s
Epoch[29/50] step:[12600/55751] loss:2.137942 took:0.15785s
Epoch[29/50] step:[12800/55751] loss:2.108721 took:0.19806s
Epoch[29/50] step:[13000/55751] loss:2.391195 took:0.18951s
Epoch[29/50] step:[13200/55751] loss:2.146778 took:0.20077s
Epoch[29/50] step:[13400/55751] loss:2.452872 took:0.30611s
Epoch[29/50] step:[13600/55751] loss:2.687748 took:0.21212s
Epoch[29/50] step:[13800/55751] loss:1.926291 took:0.24850s
Epoch[29/50] step:[14000/55751] loss:2.051930 took:0.21174s
Epoch[29/50] step:[14200/55751] loss:2.116924 took:0.17175s
Epoch[29/50] step:[14400/55751] loss:1.824069 took:0.20293s
Epoch[29/50] step:[14600/55751] loss:2.784421 took:0.19431s
Epoch[29/50] step:[14800/55751] loss:2.412514 took:0.23116s
Epoch[29/50] step:[15000/55751] loss:2.218020 took:0.27950s
Epoch[29/50] step:[15200/55751] loss:1.727005 took:0.19548s
Epoch[29/50] step:[15400/55751] loss:2.209663 took:0.18295s
Epoch[29/50] step:[15600/55751] loss:2.1

Epoch[29/50] step:[39800/55751] loss:2.830774 took:0.26563s
Epoch[29/50] step:[40000/55751] loss:2.239850 took:0.19836s
Epoch[29/50] step:[40200/55751] loss:2.067001 took:0.27288s
Epoch[29/50] step:[40400/55751] loss:1.944124 took:0.22189s
Epoch[29/50] step:[40600/55751] loss:2.163426 took:0.26007s
Epoch[29/50] step:[40800/55751] loss:2.511413 took:0.17729s
Epoch[29/50] step:[41000/55751] loss:2.379705 took:0.24083s
Epoch[29/50] step:[41200/55751] loss:2.578979 took:0.17886s
Epoch[29/50] step:[41400/55751] loss:1.711315 took:0.26148s
Epoch[29/50] step:[41600/55751] loss:1.894215 took:0.24549s
Epoch[29/50] step:[41800/55751] loss:2.645439 took:0.44532s
Epoch[29/50] step:[42000/55751] loss:2.410926 took:0.19801s
Epoch[29/50] step:[42200/55751] loss:2.233664 took:0.22930s
Epoch[29/50] step:[42400/55751] loss:2.153398 took:0.20780s
Epoch[29/50] step:[42600/55751] loss:2.492308 took:0.18403s
Epoch[29/50] step:[42800/55751] loss:2.211783 took:0.18217s
Epoch[29/50] step:[43000/55751] loss:2.2

Epoch[30/50] step:[11600/55751] loss:2.293380 took:0.17247s
Epoch[30/50] step:[11800/55751] loss:2.632462 took:0.37017s
Epoch[30/50] step:[12000/55751] loss:2.730888 took:0.22089s
Epoch[30/50] step:[12200/55751] loss:1.747153 took:0.21059s
Epoch[30/50] step:[12400/55751] loss:2.541107 took:0.25934s
Epoch[30/50] step:[12600/55751] loss:2.293101 took:0.27450s
Epoch[30/50] step:[12800/55751] loss:2.327516 took:0.19841s
Epoch[30/50] step:[13000/55751] loss:2.249799 took:0.26785s
Epoch[30/50] step:[13200/55751] loss:2.108570 took:0.19529s
Epoch[30/50] step:[13400/55751] loss:2.579607 took:0.22466s
Epoch[30/50] step:[13600/55751] loss:1.961006 took:0.30922s
Epoch[30/50] step:[13800/55751] loss:2.318883 took:0.25858s
Epoch[30/50] step:[14000/55751] loss:1.953893 took:0.26330s
Epoch[30/50] step:[14200/55751] loss:2.112451 took:0.24902s
Epoch[30/50] step:[14400/55751] loss:2.579870 took:0.19010s
Epoch[30/50] step:[14600/55751] loss:2.397781 took:0.20125s
Epoch[30/50] step:[14800/55751] loss:2.1

Epoch[30/50] step:[39000/55751] loss:2.596907 took:0.22591s
Epoch[30/50] step:[39200/55751] loss:2.410456 took:0.27842s
Epoch[30/50] step:[39400/55751] loss:2.429081 took:0.21598s
Epoch[30/50] step:[39600/55751] loss:2.022207 took:0.15895s
Epoch[30/50] step:[39800/55751] loss:1.936904 took:0.17548s
Epoch[30/50] step:[40000/55751] loss:2.604623 took:0.22529s
Epoch[30/50] step:[40200/55751] loss:2.123423 took:0.24106s
Epoch[30/50] step:[40400/55751] loss:2.824529 took:0.21374s
Epoch[30/50] step:[40600/55751] loss:2.358912 took:0.18197s
Epoch[30/50] step:[40800/55751] loss:2.362660 took:0.26264s
Epoch[30/50] step:[41000/55751] loss:2.267183 took:0.25599s
Epoch[30/50] step:[41200/55751] loss:1.245098 took:0.20874s
Epoch[30/50] step:[41400/55751] loss:1.736333 took:0.22632s
Epoch[30/50] step:[41600/55751] loss:1.922867 took:0.19178s
Epoch[30/50] step:[41800/55751] loss:2.276717 took:0.16748s
Epoch[30/50] step:[42000/55751] loss:2.433859 took:0.22180s
Epoch[30/50] step:[42200/55751] loss:2.6

Epoch[31/50] step:[10800/55751] loss:1.963024 took:0.16409s
Epoch[31/50] step:[11000/55751] loss:1.873474 took:0.24420s
Epoch[31/50] step:[11200/55751] loss:1.914750 took:0.24217s
Epoch[31/50] step:[11400/55751] loss:2.541138 took:0.19949s
Epoch[31/50] step:[11600/55751] loss:2.551105 took:0.25850s
Epoch[31/50] step:[11800/55751] loss:2.471918 took:0.20813s
Epoch[31/50] step:[12000/55751] loss:2.315136 took:0.21359s
Epoch[31/50] step:[12200/55751] loss:2.906954 took:0.22010s
Epoch[31/50] step:[12400/55751] loss:2.364903 took:0.17790s
Epoch[31/50] step:[12600/55751] loss:2.513328 took:0.22899s
Epoch[31/50] step:[12800/55751] loss:2.528206 took:0.28272s
Epoch[31/50] step:[13000/55751] loss:2.791683 took:0.23108s
Epoch[31/50] step:[13200/55751] loss:1.857109 took:0.17488s
Epoch[31/50] step:[13400/55751] loss:1.678267 took:0.23701s
Epoch[31/50] step:[13600/55751] loss:2.933019 took:0.16269s
Epoch[31/50] step:[13800/55751] loss:1.837609 took:0.18870s
Epoch[31/50] step:[14000/55751] loss:2.6

Epoch[31/50] step:[38200/55751] loss:2.379151 took:0.33085s
Epoch[31/50] step:[38400/55751] loss:2.472190 took:0.22117s
Epoch[31/50] step:[38600/55751] loss:1.834125 took:0.23140s
Epoch[31/50] step:[38800/55751] loss:2.180204 took:0.28239s
Epoch[31/50] step:[39000/55751] loss:2.631962 took:0.30827s
Epoch[31/50] step:[39200/55751] loss:2.176157 took:0.24070s
Epoch[31/50] step:[39400/55751] loss:1.953119 took:0.22362s
Epoch[31/50] step:[39600/55751] loss:2.035858 took:0.32993s
Epoch[31/50] step:[39800/55751] loss:2.170770 took:0.34507s
Epoch[31/50] step:[40000/55751] loss:2.220108 took:0.21545s
Epoch[31/50] step:[40200/55751] loss:2.035520 took:0.30264s
Epoch[31/50] step:[40400/55751] loss:2.002989 took:0.18272s
Epoch[31/50] step:[40600/55751] loss:2.427827 took:0.22225s
Epoch[31/50] step:[40800/55751] loss:2.305652 took:0.24321s
Epoch[31/50] step:[41000/55751] loss:2.591556 took:0.18862s
Epoch[31/50] step:[41200/55751] loss:2.550525 took:0.27121s
Epoch[31/50] step:[41400/55751] loss:2.1

Epoch[32/50] step:[10000/55751] loss:2.110597 took:0.16851s
Epoch[32/50] step:[10200/55751] loss:2.263332 took:0.37024s
Epoch[32/50] step:[10400/55751] loss:2.106327 took:0.20084s
Epoch[32/50] step:[10600/55751] loss:2.516711 took:0.24591s
Epoch[32/50] step:[10800/55751] loss:2.832401 took:0.62053s
Epoch[32/50] step:[11000/55751] loss:2.530060 took:0.19945s
Epoch[32/50] step:[11200/55751] loss:2.055310 took:0.17964s
Epoch[32/50] step:[11400/55751] loss:1.969117 took:0.21287s
Epoch[32/50] step:[11600/55751] loss:2.574505 took:0.20123s
Epoch[32/50] step:[11800/55751] loss:2.058249 took:0.35189s
Epoch[32/50] step:[12000/55751] loss:2.568239 took:0.23107s
Epoch[32/50] step:[12200/55751] loss:2.042060 took:0.27306s
Epoch[32/50] step:[12400/55751] loss:2.197667 took:0.28442s
Epoch[32/50] step:[12600/55751] loss:2.647652 took:0.23387s
Epoch[32/50] step:[12800/55751] loss:2.002645 took:0.21668s
Epoch[32/50] step:[13000/55751] loss:2.291996 took:0.26552s
Epoch[32/50] step:[13200/55751] loss:2.4

Epoch[32/50] step:[37400/55751] loss:3.057149 took:0.19185s
Epoch[32/50] step:[37600/55751] loss:1.928623 took:0.20349s
Epoch[32/50] step:[37800/55751] loss:2.211089 took:0.38821s
Epoch[32/50] step:[38000/55751] loss:2.594400 took:0.20707s
Epoch[32/50] step:[38200/55751] loss:2.305234 took:0.17383s
Epoch[32/50] step:[38400/55751] loss:2.391309 took:0.17949s
Epoch[32/50] step:[38600/55751] loss:2.675743 took:0.18152s
Epoch[32/50] step:[38800/55751] loss:2.195970 took:0.22650s
Epoch[32/50] step:[39000/55751] loss:1.838905 took:0.25575s
Epoch[32/50] step:[39200/55751] loss:2.266752 took:0.25548s
Epoch[32/50] step:[39400/55751] loss:1.845604 took:0.21887s
Epoch[32/50] step:[39600/55751] loss:2.023212 took:0.24661s
Epoch[32/50] step:[39800/55751] loss:2.396932 took:0.21704s
Epoch[32/50] step:[40000/55751] loss:2.833974 took:0.31919s
Epoch[32/50] step:[40200/55751] loss:2.322539 took:0.18055s
Epoch[32/50] step:[40400/55751] loss:1.733793 took:0.30781s
Epoch[32/50] step:[40600/55751] loss:2.6

Epoch[33/50] step:[9200/55751] loss:2.484960 took:0.19929s
Epoch[33/50] step:[9400/55751] loss:2.185074 took:0.17245s
Epoch[33/50] step:[9600/55751] loss:2.269286 took:0.25509s
Epoch[33/50] step:[9800/55751] loss:2.078112 took:0.30596s
Epoch[33/50] step:[10000/55751] loss:1.846659 took:0.31910s
Epoch[33/50] step:[10200/55751] loss:2.907968 took:0.33791s
Epoch[33/50] step:[10400/55751] loss:2.906756 took:0.20353s
Epoch[33/50] step:[10600/55751] loss:2.407176 took:0.19727s
Epoch[33/50] step:[10800/55751] loss:2.485320 took:0.23749s
Epoch[33/50] step:[11000/55751] loss:1.934663 took:0.23425s
Epoch[33/50] step:[11200/55751] loss:1.780241 took:0.20898s
Epoch[33/50] step:[11400/55751] loss:2.068726 took:0.20106s
Epoch[33/50] step:[11600/55751] loss:1.819129 took:0.24063s
Epoch[33/50] step:[11800/55751] loss:2.254490 took:0.22337s
Epoch[33/50] step:[12000/55751] loss:2.161508 took:0.20970s
Epoch[33/50] step:[12200/55751] loss:2.000386 took:0.27251s
Epoch[33/50] step:[12400/55751] loss:2.77422

Epoch[33/50] step:[36600/55751] loss:2.567453 took:0.25973s
Epoch[33/50] step:[36800/55751] loss:2.924640 took:0.23819s
Epoch[33/50] step:[37000/55751] loss:3.016361 took:0.31280s
Epoch[33/50] step:[37200/55751] loss:2.550932 took:0.28918s
Epoch[33/50] step:[37400/55751] loss:2.649500 took:0.21906s
Epoch[33/50] step:[37600/55751] loss:2.223962 took:0.27070s
Epoch[33/50] step:[37800/55751] loss:2.201464 took:0.19943s
Epoch[33/50] step:[38000/55751] loss:2.406322 took:0.21269s
Epoch[33/50] step:[38200/55751] loss:2.809617 took:0.18308s
Epoch[33/50] step:[38400/55751] loss:2.073431 took:0.16997s
Epoch[33/50] step:[38600/55751] loss:2.501194 took:0.22038s
Epoch[33/50] step:[38800/55751] loss:2.348313 took:0.23059s
Epoch[33/50] step:[39000/55751] loss:2.195199 took:0.30555s
Epoch[33/50] step:[39200/55751] loss:2.014921 took:0.19295s
Epoch[33/50] step:[39400/55751] loss:2.667208 took:0.19124s
Epoch[33/50] step:[39600/55751] loss:2.376323 took:0.20136s
Epoch[33/50] step:[39800/55751] loss:2.4

Epoch[34/50] step:[8400/55751] loss:2.674588 took:0.28703s
Epoch[34/50] step:[8600/55751] loss:2.249234 took:0.29208s
Epoch[34/50] step:[8800/55751] loss:1.478961 took:0.21057s
Epoch[34/50] step:[9000/55751] loss:2.343694 took:0.28460s
Epoch[34/50] step:[9200/55751] loss:2.175517 took:0.23174s
Epoch[34/50] step:[9400/55751] loss:1.993302 took:0.37459s
Epoch[34/50] step:[9600/55751] loss:2.327348 took:0.28696s
Epoch[34/50] step:[9800/55751] loss:2.454435 took:0.17535s
Epoch[34/50] step:[10000/55751] loss:1.678730 took:0.20116s
Epoch[34/50] step:[10200/55751] loss:2.383903 took:0.38978s
Epoch[34/50] step:[10400/55751] loss:1.864944 took:0.19449s
Epoch[34/50] step:[10600/55751] loss:2.409579 took:0.25229s
Epoch[34/50] step:[10800/55751] loss:1.613630 took:0.20754s
Epoch[34/50] step:[11000/55751] loss:1.999418 took:0.19265s
Epoch[34/50] step:[11200/55751] loss:1.959553 took:0.29849s
Epoch[34/50] step:[11400/55751] loss:1.911104 took:0.21021s
Epoch[34/50] step:[11600/55751] loss:2.757709 to

Epoch[34/50] step:[35800/55751] loss:2.210734 took:0.21166s
Epoch[34/50] step:[36000/55751] loss:2.350640 took:0.30256s
Epoch[34/50] step:[36200/55751] loss:1.817644 took:0.33072s
Epoch[34/50] step:[36400/55751] loss:2.273716 took:0.32326s
Epoch[34/50] step:[36600/55751] loss:1.746186 took:0.19832s
Epoch[34/50] step:[36800/55751] loss:2.550582 took:0.19965s
Epoch[34/50] step:[37000/55751] loss:2.256923 took:0.42949s
Epoch[34/50] step:[37200/55751] loss:2.345714 took:0.19233s
Epoch[34/50] step:[37400/55751] loss:2.462405 took:0.17077s
Epoch[34/50] step:[37600/55751] loss:2.197016 took:0.24769s
Epoch[34/50] step:[37800/55751] loss:1.872700 took:0.25506s
Epoch[34/50] step:[38000/55751] loss:2.164821 took:0.28889s
Epoch[34/50] step:[38200/55751] loss:2.661520 took:0.18613s
Epoch[34/50] step:[38400/55751] loss:2.754015 took:0.19139s
Epoch[34/50] step:[38600/55751] loss:2.809300 took:0.18744s
Epoch[34/50] step:[38800/55751] loss:1.731344 took:0.25144s
Epoch[34/50] step:[39000/55751] loss:1.7

Epoch[35/50] step:[7600/55751] loss:3.167423 took:0.26063s
Epoch[35/50] step:[7800/55751] loss:1.871367 took:0.23000s
Epoch[35/50] step:[8000/55751] loss:1.997691 took:0.34615s
Epoch[35/50] step:[8200/55751] loss:1.929772 took:0.18630s
Epoch[35/50] step:[8400/55751] loss:2.407207 took:0.31416s
Epoch[35/50] step:[8600/55751] loss:2.201694 took:0.17138s
Epoch[35/50] step:[8800/55751] loss:2.154932 took:0.19259s
Epoch[35/50] step:[9000/55751] loss:2.406838 took:0.30330s
Epoch[35/50] step:[9200/55751] loss:2.138800 took:0.23280s
Epoch[35/50] step:[9400/55751] loss:2.450523 took:0.26517s
Epoch[35/50] step:[9600/55751] loss:2.562587 took:0.30981s
Epoch[35/50] step:[9800/55751] loss:2.540492 took:0.19857s
Epoch[35/50] step:[10000/55751] loss:2.448982 took:0.22066s
Epoch[35/50] step:[10200/55751] loss:2.167420 took:0.26658s
Epoch[35/50] step:[10400/55751] loss:2.030071 took:0.25417s
Epoch[35/50] step:[10600/55751] loss:2.305704 took:0.23507s
Epoch[35/50] step:[10800/55751] loss:2.806315 took:0

Epoch[35/50] step:[35000/55751] loss:2.344439 took:0.22746s
Epoch[35/50] step:[35200/55751] loss:2.048163 took:0.23962s
Epoch[35/50] step:[35400/55751] loss:2.784285 took:0.26759s
Epoch[35/50] step:[35600/55751] loss:2.140752 took:0.27742s
Epoch[35/50] step:[35800/55751] loss:1.933222 took:0.30338s
Epoch[35/50] step:[36000/55751] loss:2.600483 took:0.19443s
Epoch[35/50] step:[36200/55751] loss:2.641625 took:0.18957s
Epoch[35/50] step:[36400/55751] loss:1.759217 took:0.28505s
Epoch[35/50] step:[36600/55751] loss:1.983851 took:0.19352s
Epoch[35/50] step:[36800/55751] loss:2.072008 took:0.25305s
Epoch[35/50] step:[37000/55751] loss:2.672120 took:0.21424s
Epoch[35/50] step:[37200/55751] loss:1.767018 took:0.18764s
Epoch[35/50] step:[37400/55751] loss:2.506832 took:0.33295s
Epoch[35/50] step:[37600/55751] loss:1.549268 took:0.20301s
Epoch[35/50] step:[37800/55751] loss:2.107620 took:0.28978s
Epoch[35/50] step:[38000/55751] loss:2.170863 took:0.39697s
Epoch[35/50] step:[38200/55751] loss:2.4

Epoch[36/50] step:[6800/55751] loss:2.043704 took:0.29603s
Epoch[36/50] step:[7000/55751] loss:2.582916 took:0.27883s
Epoch[36/50] step:[7200/55751] loss:2.397465 took:0.19941s
Epoch[36/50] step:[7400/55751] loss:3.059170 took:0.19560s
Epoch[36/50] step:[7600/55751] loss:2.129105 took:0.31931s
Epoch[36/50] step:[7800/55751] loss:1.847878 took:0.34220s
Epoch[36/50] step:[8000/55751] loss:2.222705 took:0.24313s
Epoch[36/50] step:[8200/55751] loss:2.542538 took:0.20594s
Epoch[36/50] step:[8400/55751] loss:1.816083 took:0.18708s
Epoch[36/50] step:[8600/55751] loss:1.727816 took:0.25609s
Epoch[36/50] step:[8800/55751] loss:2.489308 took:0.21423s
Epoch[36/50] step:[9000/55751] loss:2.218007 took:0.27756s
Epoch[36/50] step:[9200/55751] loss:2.745339 took:0.28584s
Epoch[36/50] step:[9400/55751] loss:2.601737 took:0.23186s
Epoch[36/50] step:[9600/55751] loss:1.815493 took:0.18390s
Epoch[36/50] step:[9800/55751] loss:2.041611 took:0.27445s
Epoch[36/50] step:[10000/55751] loss:1.752544 took:0.204

Epoch[36/50] step:[34200/55751] loss:2.147737 took:0.31103s
Epoch[36/50] step:[34400/55751] loss:2.462542 took:0.27180s
Epoch[36/50] step:[34600/55751] loss:2.441963 took:0.28113s
Epoch[36/50] step:[34800/55751] loss:2.306465 took:0.18491s
Epoch[36/50] step:[35000/55751] loss:1.714612 took:0.27696s
Epoch[36/50] step:[35200/55751] loss:2.681612 took:0.22488s
Epoch[36/50] step:[35400/55751] loss:2.053450 took:0.18591s
Epoch[36/50] step:[35600/55751] loss:2.359510 took:0.17649s
Epoch[36/50] step:[35800/55751] loss:2.012107 took:0.18366s
Epoch[36/50] step:[36000/55751] loss:2.657809 took:0.24778s
Epoch[36/50] step:[36200/55751] loss:2.214647 took:0.24609s
Epoch[36/50] step:[36400/55751] loss:2.521051 took:0.18318s
Epoch[36/50] step:[36600/55751] loss:1.628509 took:0.22562s
Epoch[36/50] step:[36800/55751] loss:2.814775 took:0.19764s
Epoch[36/50] step:[37000/55751] loss:2.450688 took:0.29592s
Epoch[36/50] step:[37200/55751] loss:2.077984 took:0.23386s
Epoch[36/50] step:[37400/55751] loss:1.8

Epoch[37/50] step:[6000/55751] loss:2.873708 took:0.22024s
Epoch[37/50] step:[6200/55751] loss:2.460284 took:0.19985s
Epoch[37/50] step:[6400/55751] loss:3.175017 took:0.22677s
Epoch[37/50] step:[6600/55751] loss:1.900533 took:0.21832s
Epoch[37/50] step:[6800/55751] loss:2.314150 took:0.24126s
Epoch[37/50] step:[7000/55751] loss:2.641017 took:0.22015s
Epoch[37/50] step:[7200/55751] loss:2.026836 took:0.18294s
Epoch[37/50] step:[7400/55751] loss:2.543250 took:0.20470s
Epoch[37/50] step:[7600/55751] loss:2.553318 took:0.23762s
Epoch[37/50] step:[7800/55751] loss:2.358916 took:0.19410s
Epoch[37/50] step:[8000/55751] loss:2.120012 took:0.22576s
Epoch[37/50] step:[8200/55751] loss:2.528124 took:0.21306s
Epoch[37/50] step:[8400/55751] loss:2.083712 took:0.24194s
Epoch[37/50] step:[8600/55751] loss:2.387407 took:0.19059s
Epoch[37/50] step:[8800/55751] loss:2.333450 took:0.30823s
Epoch[37/50] step:[9000/55751] loss:1.942869 took:0.29380s
Epoch[37/50] step:[9200/55751] loss:2.061386 took:0.2889

Epoch[37/50] step:[33400/55751] loss:1.672262 took:0.20489s
Epoch[37/50] step:[33600/55751] loss:2.618965 took:0.20888s
Epoch[37/50] step:[33800/55751] loss:1.852583 took:0.18175s
Epoch[37/50] step:[34000/55751] loss:2.636312 took:0.25623s
Epoch[37/50] step:[34200/55751] loss:2.016754 took:0.20891s
Epoch[37/50] step:[34400/55751] loss:1.935429 took:0.19739s
Epoch[37/50] step:[34600/55751] loss:2.207486 took:0.21816s
Epoch[37/50] step:[34800/55751] loss:2.074927 took:0.25700s
Epoch[37/50] step:[35000/55751] loss:2.277313 took:0.20087s
Epoch[37/50] step:[35200/55751] loss:1.974350 took:0.22279s
Epoch[37/50] step:[35400/55751] loss:2.046264 took:0.18102s
Epoch[37/50] step:[35600/55751] loss:2.550820 took:0.29425s
Epoch[37/50] step:[35800/55751] loss:3.012388 took:0.29823s
Epoch[37/50] step:[36000/55751] loss:2.272300 took:0.18810s
Epoch[37/50] step:[36200/55751] loss:2.193577 took:0.21003s
Epoch[37/50] step:[36400/55751] loss:2.079789 took:0.20239s
Epoch[37/50] step:[36600/55751] loss:2.3

Epoch[38/50] step:[5200/55751] loss:2.312299 took:0.19029s
Epoch[38/50] step:[5400/55751] loss:2.039777 took:0.17244s
Epoch[38/50] step:[5600/55751] loss:2.321349 took:0.35980s
Epoch[38/50] step:[5800/55751] loss:2.320132 took:0.20106s
Epoch[38/50] step:[6000/55751] loss:2.456564 took:0.26215s
Epoch[38/50] step:[6200/55751] loss:2.333294 took:0.17009s
Epoch[38/50] step:[6400/55751] loss:1.934663 took:0.25564s
Epoch[38/50] step:[6600/55751] loss:2.050488 took:0.17688s
Epoch[38/50] step:[6800/55751] loss:2.602843 took:0.25751s
Epoch[38/50] step:[7000/55751] loss:2.089934 took:0.22564s
Epoch[38/50] step:[7200/55751] loss:2.341334 took:0.27773s
Epoch[38/50] step:[7400/55751] loss:2.990742 took:0.24262s
Epoch[38/50] step:[7600/55751] loss:2.181511 took:0.15872s
Epoch[38/50] step:[7800/55751] loss:1.720874 took:0.22494s
Epoch[38/50] step:[8000/55751] loss:2.861175 took:0.27688s
Epoch[38/50] step:[8200/55751] loss:2.169064 took:0.21326s
Epoch[38/50] step:[8400/55751] loss:2.523543 took:0.2249

Epoch[38/50] step:[32600/55751] loss:2.438545 took:0.23945s
Epoch[38/50] step:[32800/55751] loss:2.274989 took:0.26713s
Epoch[38/50] step:[33000/55751] loss:2.419614 took:0.26101s
Epoch[38/50] step:[33200/55751] loss:2.479040 took:0.22547s
Epoch[38/50] step:[33400/55751] loss:2.581400 took:0.19123s
Epoch[38/50] step:[33600/55751] loss:2.177065 took:0.19253s
Epoch[38/50] step:[33800/55751] loss:2.207329 took:0.32544s
Epoch[38/50] step:[34000/55751] loss:2.582482 took:0.24866s
Epoch[38/50] step:[34200/55751] loss:2.447111 took:0.38854s
Epoch[38/50] step:[34400/55751] loss:2.143396 took:0.41595s
Epoch[38/50] step:[34600/55751] loss:2.033914 took:0.20067s
Epoch[38/50] step:[34800/55751] loss:2.053836 took:0.26062s
Epoch[38/50] step:[35000/55751] loss:1.665812 took:0.18907s
Epoch[38/50] step:[35200/55751] loss:2.009606 took:0.23120s
Epoch[38/50] step:[35400/55751] loss:2.696718 took:0.22262s
Epoch[38/50] step:[35600/55751] loss:1.888353 took:0.18066s
Epoch[38/50] step:[35800/55751] loss:2.1

Epoch[39/50] step:[4400/55751] loss:2.656513 took:0.24157s
Epoch[39/50] step:[4600/55751] loss:1.211491 took:0.20457s
Epoch[39/50] step:[4800/55751] loss:2.188862 took:0.24324s
Epoch[39/50] step:[5000/55751] loss:2.762195 took:0.29734s
Epoch[39/50] step:[5200/55751] loss:2.021601 took:0.17546s
Epoch[39/50] step:[5400/55751] loss:1.822340 took:0.20607s
Epoch[39/50] step:[5600/55751] loss:2.306521 took:0.66285s
Epoch[39/50] step:[5800/55751] loss:1.953755 took:0.20939s
Epoch[39/50] step:[6000/55751] loss:1.700773 took:0.18962s
Epoch[39/50] step:[6200/55751] loss:2.461432 took:0.38470s
Epoch[39/50] step:[6400/55751] loss:2.446224 took:0.22165s
Epoch[39/50] step:[6600/55751] loss:2.846586 took:0.25232s
Epoch[39/50] step:[6800/55751] loss:2.207908 took:0.21185s
Epoch[39/50] step:[7000/55751] loss:2.008856 took:0.44135s
Epoch[39/50] step:[7200/55751] loss:2.831077 took:0.21988s
Epoch[39/50] step:[7400/55751] loss:2.516859 took:0.31392s
Epoch[39/50] step:[7600/55751] loss:2.140797 took:0.2214

Epoch[39/50] step:[32000/55751] loss:1.911100 took:0.18788s
Epoch[39/50] step:[32200/55751] loss:2.177177 took:0.26269s
Epoch[39/50] step:[32400/55751] loss:2.197391 took:0.35562s
Epoch[39/50] step:[32600/55751] loss:2.477907 took:0.21439s
Epoch[39/50] step:[32800/55751] loss:2.163136 took:0.20431s
Epoch[39/50] step:[33000/55751] loss:2.259949 took:0.25655s
Epoch[39/50] step:[33200/55751] loss:2.566651 took:0.25382s
Epoch[39/50] step:[33400/55751] loss:1.809590 took:0.31570s
Epoch[39/50] step:[33600/55751] loss:1.966422 took:0.19654s
Epoch[39/50] step:[33800/55751] loss:2.171214 took:0.19505s
Epoch[39/50] step:[34000/55751] loss:2.561526 took:0.17260s
Epoch[39/50] step:[34200/55751] loss:1.965879 took:0.30088s
Epoch[39/50] step:[34400/55751] loss:2.215524 took:0.26270s
Epoch[39/50] step:[34600/55751] loss:2.391858 took:0.27255s
Epoch[39/50] step:[34800/55751] loss:2.112506 took:0.20284s
Epoch[39/50] step:[35000/55751] loss:1.905969 took:0.32313s
Epoch[39/50] step:[35200/55751] loss:2.1

Epoch[40/50] step:[3600/55751] loss:2.175590 took:0.20991s
Epoch[40/50] step:[3800/55751] loss:2.190397 took:0.22526s
Epoch[40/50] step:[4000/55751] loss:3.003733 took:0.27131s
Epoch[40/50] step:[4200/55751] loss:1.986342 took:0.25381s
Epoch[40/50] step:[4400/55751] loss:2.156099 took:0.24096s
Epoch[40/50] step:[4600/55751] loss:2.331756 took:0.25207s
Epoch[40/50] step:[4800/55751] loss:2.529643 took:0.23294s
Epoch[40/50] step:[5000/55751] loss:2.885831 took:0.22843s
Epoch[40/50] step:[5200/55751] loss:2.296631 took:0.20840s
Epoch[40/50] step:[5400/55751] loss:2.177197 took:0.18995s
Epoch[40/50] step:[5600/55751] loss:2.236391 took:0.16915s
Epoch[40/50] step:[5800/55751] loss:2.282883 took:0.18269s
Epoch[40/50] step:[6000/55751] loss:1.705985 took:0.22494s
Epoch[40/50] step:[6200/55751] loss:1.907212 took:0.21326s
Epoch[40/50] step:[6400/55751] loss:2.291645 took:0.20410s
Epoch[40/50] step:[6600/55751] loss:2.287600 took:0.18354s
Epoch[40/50] step:[6800/55751] loss:2.090748 took:0.2625

Epoch[40/50] step:[31200/55751] loss:2.593079 took:0.28608s
Epoch[40/50] step:[31400/55751] loss:2.303792 took:0.31691s
Epoch[40/50] step:[31600/55751] loss:2.257458 took:0.27640s
Epoch[40/50] step:[31800/55751] loss:2.277780 took:0.25366s
Epoch[40/50] step:[32000/55751] loss:1.694325 took:0.18236s
Epoch[40/50] step:[32200/55751] loss:2.774316 took:0.18435s
Epoch[40/50] step:[32400/55751] loss:3.039624 took:0.18965s
Epoch[40/50] step:[32600/55751] loss:1.709792 took:0.19872s
Epoch[40/50] step:[32800/55751] loss:2.293479 took:0.25279s
Epoch[40/50] step:[33000/55751] loss:1.902569 took:0.18402s
Epoch[40/50] step:[33200/55751] loss:2.547017 took:0.15535s
Epoch[40/50] step:[33400/55751] loss:2.017070 took:0.18679s
Epoch[40/50] step:[33600/55751] loss:2.142171 took:0.18493s
Epoch[40/50] step:[33800/55751] loss:2.402508 took:0.24018s
Epoch[40/50] step:[34000/55751] loss:2.038411 took:0.23725s
Epoch[40/50] step:[34200/55751] loss:2.033474 took:0.22643s
Epoch[40/50] step:[34400/55751] loss:2.4

Epoch[41/50] step:[2800/55751] loss:2.543057 took:0.19368s
Epoch[41/50] step:[3000/55751] loss:2.426466 took:0.55423s
Epoch[41/50] step:[3200/55751] loss:1.608399 took:0.19074s
Epoch[41/50] step:[3400/55751] loss:2.781811 took:0.30369s
Epoch[41/50] step:[3600/55751] loss:2.515184 took:0.24049s
Epoch[41/50] step:[3800/55751] loss:1.932951 took:0.28513s
Epoch[41/50] step:[4000/55751] loss:2.104798 took:0.24789s
Epoch[41/50] step:[4200/55751] loss:2.192330 took:0.23949s
Epoch[41/50] step:[4400/55751] loss:1.965718 took:0.25327s
Epoch[41/50] step:[4600/55751] loss:2.591097 took:0.23467s
Epoch[41/50] step:[4800/55751] loss:2.859954 took:0.20994s
Epoch[41/50] step:[5000/55751] loss:2.630193 took:0.25027s
Epoch[41/50] step:[5200/55751] loss:2.425174 took:0.28092s
Epoch[41/50] step:[5400/55751] loss:2.514638 took:0.22411s
Epoch[41/50] step:[5600/55751] loss:2.321727 took:0.24603s
Epoch[41/50] step:[5800/55751] loss:2.748484 took:0.34737s
Epoch[41/50] step:[6000/55751] loss:2.288811 took:0.1691

Epoch[41/50] step:[30400/55751] loss:1.935686 took:0.21256s
Epoch[41/50] step:[30600/55751] loss:2.978198 took:0.20813s
Epoch[41/50] step:[30800/55751] loss:2.186508 took:0.27545s
Epoch[41/50] step:[31000/55751] loss:2.239680 took:0.23851s
Epoch[41/50] step:[31200/55751] loss:2.368272 took:0.31930s
Epoch[41/50] step:[31400/55751] loss:2.862912 took:0.23717s
Epoch[41/50] step:[31600/55751] loss:2.271863 took:0.19425s
Epoch[41/50] step:[31800/55751] loss:1.980908 took:0.20913s
Epoch[41/50] step:[32000/55751] loss:1.909103 took:0.23722s
Epoch[41/50] step:[32200/55751] loss:2.491132 took:0.16495s
Epoch[41/50] step:[32400/55751] loss:2.093793 took:0.18273s
Epoch[41/50] step:[32600/55751] loss:2.311458 took:0.20038s
Epoch[41/50] step:[32800/55751] loss:2.521453 took:0.25874s
Epoch[41/50] step:[33000/55751] loss:2.173675 took:0.19355s
Epoch[41/50] step:[33200/55751] loss:1.868097 took:0.23839s
Epoch[41/50] step:[33400/55751] loss:2.408650 took:0.23837s
Epoch[41/50] step:[33600/55751] loss:1.7

Epoch[42/50] step:[2000/55751] loss:2.274584 took:0.24156s
Epoch[42/50] step:[2200/55751] loss:2.223716 took:0.22194s
Epoch[42/50] step:[2400/55751] loss:1.610388 took:0.19369s
Epoch[42/50] step:[2600/55751] loss:1.963005 took:0.25925s
Epoch[42/50] step:[2800/55751] loss:2.645848 took:0.23873s
Epoch[42/50] step:[3000/55751] loss:1.532295 took:0.20808s
Epoch[42/50] step:[3200/55751] loss:1.988271 took:0.19206s
Epoch[42/50] step:[3400/55751] loss:2.971415 took:0.32332s
Epoch[42/50] step:[3600/55751] loss:2.597555 took:0.18727s
Epoch[42/50] step:[3800/55751] loss:1.967206 took:0.25487s
Epoch[42/50] step:[4000/55751] loss:2.309685 took:0.26926s
Epoch[42/50] step:[4200/55751] loss:1.645186 took:0.44112s
Epoch[42/50] step:[4400/55751] loss:2.474474 took:0.24893s
Epoch[42/50] step:[4600/55751] loss:2.144528 took:0.20845s
Epoch[42/50] step:[4800/55751] loss:2.283195 took:0.28956s
Epoch[42/50] step:[5000/55751] loss:2.281024 took:0.17507s
Epoch[42/50] step:[5200/55751] loss:1.913255 took:0.2553

Epoch[42/50] step:[29600/55751] loss:2.320860 took:0.22491s
Epoch[42/50] step:[29800/55751] loss:2.409341 took:0.17977s
Epoch[42/50] step:[30000/55751] loss:2.287416 took:0.21919s
Epoch[42/50] step:[30200/55751] loss:2.434669 took:0.26508s
Epoch[42/50] step:[30400/55751] loss:2.277326 took:0.22234s
Epoch[42/50] step:[30600/55751] loss:2.676822 took:0.27476s
Epoch[42/50] step:[30800/55751] loss:1.690400 took:0.22410s
Epoch[42/50] step:[31000/55751] loss:2.667983 took:0.27220s
Epoch[42/50] step:[31200/55751] loss:2.105670 took:0.18489s
Epoch[42/50] step:[31400/55751] loss:2.438746 took:0.29922s
Epoch[42/50] step:[31600/55751] loss:1.980518 took:0.25694s
Epoch[42/50] step:[31800/55751] loss:2.072547 took:0.23233s
Epoch[42/50] step:[32000/55751] loss:1.762211 took:0.22524s
Epoch[42/50] step:[32200/55751] loss:1.930982 took:0.19992s
Epoch[42/50] step:[32400/55751] loss:2.305040 took:0.21719s
Epoch[42/50] step:[32600/55751] loss:2.704295 took:0.21775s
Epoch[42/50] step:[32800/55751] loss:2.2

Epoch[43/50] step:[1200/55751] loss:2.265041 took:0.24926s
Epoch[43/50] step:[1400/55751] loss:2.544487 took:0.28380s
Epoch[43/50] step:[1600/55751] loss:2.972882 took:0.27919s
Epoch[43/50] step:[1800/55751] loss:1.766817 took:0.30460s
Epoch[43/50] step:[2000/55751] loss:2.512203 took:0.20365s
Epoch[43/50] step:[2200/55751] loss:2.299537 took:0.20104s
Epoch[43/50] step:[2400/55751] loss:2.033998 took:0.18655s
Epoch[43/50] step:[2600/55751] loss:2.132589 took:0.32740s
Epoch[43/50] step:[2800/55751] loss:2.658781 took:0.22752s
Epoch[43/50] step:[3000/55751] loss:2.180866 took:0.17964s
Epoch[43/50] step:[3200/55751] loss:2.130319 took:0.24885s
Epoch[43/50] step:[3400/55751] loss:2.163819 took:0.33107s
Epoch[43/50] step:[3600/55751] loss:2.374669 took:0.23488s
Epoch[43/50] step:[3800/55751] loss:2.840731 took:0.31282s
Epoch[43/50] step:[4000/55751] loss:2.494404 took:0.17121s
Epoch[43/50] step:[4200/55751] loss:2.086071 took:0.20244s
Epoch[43/50] step:[4400/55751] loss:2.378788 took:0.2334

Epoch[43/50] step:[28800/55751] loss:2.194536 took:0.17476s
Epoch[43/50] step:[29000/55751] loss:2.256992 took:0.24857s
Epoch[43/50] step:[29200/55751] loss:2.601149 took:0.26751s
Epoch[43/50] step:[29400/55751] loss:2.530152 took:0.17870s
Epoch[43/50] step:[29600/55751] loss:1.785859 took:0.28205s
Epoch[43/50] step:[29800/55751] loss:1.710262 took:0.27335s
Epoch[43/50] step:[30000/55751] loss:2.030746 took:0.30562s
Epoch[43/50] step:[30200/55751] loss:2.650089 took:0.25872s
Epoch[43/50] step:[30400/55751] loss:2.510522 took:0.23424s
Epoch[43/50] step:[30600/55751] loss:2.537228 took:0.20321s
Epoch[43/50] step:[30800/55751] loss:2.351872 took:0.26235s
Epoch[43/50] step:[31000/55751] loss:2.518430 took:0.22970s
Epoch[43/50] step:[31200/55751] loss:1.831901 took:0.32213s
Epoch[43/50] step:[31400/55751] loss:2.826542 took:0.52702s
Epoch[43/50] step:[31600/55751] loss:2.268192 took:0.19044s
Epoch[43/50] step:[31800/55751] loss:2.001922 took:0.20473s
Epoch[43/50] step:[32000/55751] loss:2.1

Epoch[44/50] step:[400/55751] loss:2.114024 took:0.20604s
Epoch[44/50] step:[600/55751] loss:2.178312 took:0.33398s
Epoch[44/50] step:[800/55751] loss:2.051505 took:0.17891s
Epoch[44/50] step:[1000/55751] loss:3.098047 took:0.27997s
Epoch[44/50] step:[1200/55751] loss:2.151304 took:0.26500s
Epoch[44/50] step:[1400/55751] loss:2.743674 took:0.20497s
Epoch[44/50] step:[1600/55751] loss:1.771313 took:0.20243s
Epoch[44/50] step:[1800/55751] loss:2.061584 took:0.25990s
Epoch[44/50] step:[2000/55751] loss:2.206826 took:0.22794s
Epoch[44/50] step:[2200/55751] loss:2.582218 took:0.22743s
Epoch[44/50] step:[2400/55751] loss:2.144224 took:0.29969s
Epoch[44/50] step:[2600/55751] loss:2.062460 took:0.23632s
Epoch[44/50] step:[2800/55751] loss:2.779951 took:0.23799s
Epoch[44/50] step:[3000/55751] loss:1.508814 took:0.18962s
Epoch[44/50] step:[3200/55751] loss:2.269618 took:0.22590s
Epoch[44/50] step:[3400/55751] loss:1.734347 took:0.22173s
Epoch[44/50] step:[3600/55751] loss:2.054340 took:0.24420s


Epoch[44/50] step:[28000/55751] loss:2.113197 took:0.28322s
Epoch[44/50] step:[28200/55751] loss:2.092333 took:0.29690s
Epoch[44/50] step:[28400/55751] loss:2.368290 took:0.29345s
Epoch[44/50] step:[28600/55751] loss:2.235302 took:0.26676s
Epoch[44/50] step:[28800/55751] loss:2.641880 took:0.23421s
Epoch[44/50] step:[29000/55751] loss:2.248405 took:0.22374s
Epoch[44/50] step:[29200/55751] loss:2.376436 took:0.30358s
Epoch[44/50] step:[29400/55751] loss:2.748812 took:0.17849s
Epoch[44/50] step:[29600/55751] loss:2.899393 took:0.20940s
Epoch[44/50] step:[29800/55751] loss:2.099567 took:0.27959s
Epoch[44/50] step:[30000/55751] loss:1.926311 took:0.17192s
Epoch[44/50] step:[30200/55751] loss:1.813287 took:0.18266s
Epoch[44/50] step:[30400/55751] loss:3.076495 took:0.35625s
Epoch[44/50] step:[30600/55751] loss:2.730826 took:0.19860s
Epoch[44/50] step:[30800/55751] loss:1.994083 took:0.25550s
Epoch[44/50] step:[31000/55751] loss:2.552493 took:0.18256s
Epoch[44/50] step:[31200/55751] loss:2.5

Epoch[44/50] step:[55400/55751] loss:2.198607 took:0.23736s
Epoch[44/50] step:[55600/55751] loss:2.230861 took:0.26477s
Epoch[45/50] step:[0/55751] loss:2.617032 took:0.19107s
Epoch[45/50] step:[200/55751] loss:2.014826 took:0.22866s
Epoch[45/50] step:[400/55751] loss:2.390681 took:0.24092s
Epoch[45/50] step:[600/55751] loss:2.041064 took:0.21091s
Epoch[45/50] step:[800/55751] loss:1.758974 took:0.23911s
Epoch[45/50] step:[1000/55751] loss:2.675649 took:0.23674s
Epoch[45/50] step:[1200/55751] loss:1.982502 took:0.27096s
Epoch[45/50] step:[1400/55751] loss:1.772110 took:0.28898s
Epoch[45/50] step:[1600/55751] loss:1.644214 took:0.22186s
Epoch[45/50] step:[1800/55751] loss:1.948589 took:0.26225s
Epoch[45/50] step:[2000/55751] loss:2.784228 took:0.22691s
Epoch[45/50] step:[2200/55751] loss:1.966428 took:0.20690s
Epoch[45/50] step:[2400/55751] loss:2.556671 took:0.26799s
Epoch[45/50] step:[2600/55751] loss:2.892498 took:0.21098s
Epoch[45/50] step:[2800/55751] loss:2.229599 took:0.18701s
Ep

Epoch[45/50] step:[27200/55751] loss:2.131039 took:0.24721s
Epoch[45/50] step:[27400/55751] loss:2.574883 took:0.24164s
Epoch[45/50] step:[27600/55751] loss:2.476509 took:0.17610s
Epoch[45/50] step:[27800/55751] loss:2.427271 took:0.17673s
Epoch[45/50] step:[28000/55751] loss:2.525440 took:0.24417s
Epoch[45/50] step:[28200/55751] loss:2.420470 took:0.23967s
Epoch[45/50] step:[28400/55751] loss:2.732904 took:0.19683s
Epoch[45/50] step:[28600/55751] loss:1.997287 took:0.17875s
Epoch[45/50] step:[28800/55751] loss:2.376720 took:0.19775s
Epoch[45/50] step:[29000/55751] loss:1.964165 took:0.26304s
Epoch[45/50] step:[29200/55751] loss:2.770571 took:0.16535s
Epoch[45/50] step:[29400/55751] loss:2.366434 took:0.25101s
Epoch[45/50] step:[29600/55751] loss:2.051817 took:0.19148s
Epoch[45/50] step:[29800/55751] loss:2.503131 took:0.21276s
Epoch[45/50] step:[30000/55751] loss:1.858993 took:0.26085s
Epoch[45/50] step:[30200/55751] loss:2.625190 took:0.29715s
Epoch[45/50] step:[30400/55751] loss:2.5

Epoch[45/50] step:[54600/55751] loss:2.075067 took:0.30344s
Epoch[45/50] step:[54800/55751] loss:2.360637 took:0.26417s
Epoch[45/50] step:[55000/55751] loss:2.834892 took:0.32338s
Epoch[45/50] step:[55200/55751] loss:2.199504 took:0.29226s
Epoch[45/50] step:[55400/55751] loss:2.267970 took:0.20929s
Epoch[45/50] step:[55600/55751] loss:2.584368 took:0.23216s
Epoch[46/50] step:[0/55751] loss:2.377715 took:0.34478s
Epoch[46/50] step:[200/55751] loss:1.894819 took:0.29660s
Epoch[46/50] step:[400/55751] loss:1.964525 took:0.17072s
Epoch[46/50] step:[600/55751] loss:2.118491 took:0.30961s
Epoch[46/50] step:[800/55751] loss:2.265806 took:0.21215s
Epoch[46/50] step:[1000/55751] loss:2.007946 took:0.25212s
Epoch[46/50] step:[1200/55751] loss:2.361927 took:0.18780s
Epoch[46/50] step:[1400/55751] loss:2.087618 took:0.22373s
Epoch[46/50] step:[1600/55751] loss:2.442199 took:0.19410s
Epoch[46/50] step:[1800/55751] loss:2.129166 took:0.22657s
Epoch[46/50] step:[2000/55751] loss:3.145575 took:0.51292

Epoch[46/50] step:[26400/55751] loss:1.950510 took:0.24106s
Epoch[46/50] step:[26600/55751] loss:2.561279 took:0.34134s
Epoch[46/50] step:[26800/55751] loss:2.807077 took:0.30342s
Epoch[46/50] step:[27000/55751] loss:2.936206 took:0.23827s
Epoch[46/50] step:[27200/55751] loss:2.629016 took:0.29165s
Epoch[46/50] step:[27400/55751] loss:1.914286 took:0.27075s
Epoch[46/50] step:[27600/55751] loss:2.294692 took:0.16593s
Epoch[46/50] step:[27800/55751] loss:2.459576 took:0.21132s
Epoch[46/50] step:[28000/55751] loss:2.034016 took:0.22098s
Epoch[46/50] step:[28200/55751] loss:2.322544 took:0.31643s
Epoch[46/50] step:[28400/55751] loss:2.798674 took:0.23609s
Epoch[46/50] step:[28600/55751] loss:2.656285 took:0.22251s
Epoch[46/50] step:[28800/55751] loss:2.822549 took:0.27979s
Epoch[46/50] step:[29000/55751] loss:2.315841 took:0.19533s
Epoch[46/50] step:[29200/55751] loss:2.132144 took:0.18649s
Epoch[46/50] step:[29400/55751] loss:3.046693 took:0.48059s
Epoch[46/50] step:[29600/55751] loss:1.3

Epoch[46/50] step:[53800/55751] loss:2.593574 took:0.58254s
Epoch[46/50] step:[54000/55751] loss:2.113031 took:0.25169s
Epoch[46/50] step:[54200/55751] loss:2.489883 took:0.38205s
Epoch[46/50] step:[54400/55751] loss:2.365896 took:0.28263s
Epoch[46/50] step:[54600/55751] loss:2.433249 took:0.21033s
Epoch[46/50] step:[54800/55751] loss:2.295783 took:0.30470s
Epoch[46/50] step:[55000/55751] loss:2.021626 took:0.18602s
Epoch[46/50] step:[55200/55751] loss:2.337139 took:0.25072s
Epoch[46/50] step:[55400/55751] loss:2.527434 took:0.20196s
Epoch[46/50] step:[55600/55751] loss:2.647503 took:0.25029s
Epoch[47/50] step:[0/55751] loss:1.814948 took:0.20454s
Epoch[47/50] step:[200/55751] loss:1.800951 took:0.25025s
Epoch[47/50] step:[400/55751] loss:3.080502 took:0.33575s
Epoch[47/50] step:[600/55751] loss:2.026397 took:0.18568s
Epoch[47/50] step:[800/55751] loss:2.582563 took:0.23808s
Epoch[47/50] step:[1000/55751] loss:1.751091 took:0.26740s
Epoch[47/50] step:[1200/55751] loss:1.352289 took:0.1

Epoch[47/50] step:[25600/55751] loss:2.763408 took:0.22703s
Epoch[47/50] step:[25800/55751] loss:1.997402 took:0.20635s
Epoch[47/50] step:[26000/55751] loss:2.154436 took:0.19323s
Epoch[47/50] step:[26200/55751] loss:2.288305 took:0.21917s
Epoch[47/50] step:[26400/55751] loss:1.881807 took:0.19660s
Epoch[47/50] step:[26600/55751] loss:2.437283 took:0.31037s
Epoch[47/50] step:[26800/55751] loss:2.359913 took:0.23326s
Epoch[47/50] step:[27000/55751] loss:2.686620 took:0.17735s
Epoch[47/50] step:[27200/55751] loss:2.037548 took:0.20609s
Epoch[47/50] step:[27400/55751] loss:2.570117 took:0.21746s
Epoch[47/50] step:[27600/55751] loss:1.922024 took:0.24242s
Epoch[47/50] step:[27800/55751] loss:2.141445 took:0.24537s
Epoch[47/50] step:[28000/55751] loss:2.415810 took:0.22824s
Epoch[47/50] step:[28200/55751] loss:1.980331 took:0.20296s
Epoch[47/50] step:[28400/55751] loss:2.197510 took:0.21580s
Epoch[47/50] step:[28600/55751] loss:2.334085 took:0.23914s
Epoch[47/50] step:[28800/55751] loss:1.9

Epoch[47/50] step:[53000/55751] loss:2.065729 took:0.19456s
Epoch[47/50] step:[53200/55751] loss:2.444023 took:0.30468s
Epoch[47/50] step:[53400/55751] loss:2.664458 took:0.25849s
Epoch[47/50] step:[53600/55751] loss:1.839886 took:0.29984s
Epoch[47/50] step:[53800/55751] loss:2.462192 took:0.22303s
Epoch[47/50] step:[54000/55751] loss:2.492886 took:0.18579s
Epoch[47/50] step:[54200/55751] loss:1.817048 took:0.38204s
Epoch[47/50] step:[54400/55751] loss:1.929944 took:0.18832s
Epoch[47/50] step:[54600/55751] loss:2.134052 took:0.22709s
Epoch[47/50] step:[54800/55751] loss:1.936413 took:0.23039s
Epoch[47/50] step:[55000/55751] loss:2.523053 took:0.19439s
Epoch[47/50] step:[55200/55751] loss:2.363389 took:0.23483s
Epoch[47/50] step:[55400/55751] loss:1.978359 took:0.20474s
Epoch[47/50] step:[55600/55751] loss:2.887799 took:0.44971s
Epoch[48/50] step:[0/55751] loss:1.923836 took:0.18861s
Epoch[48/50] step:[200/55751] loss:2.631896 took:0.22370s
Epoch[48/50] step:[400/55751] loss:1.772829 to

Epoch[48/50] step:[24800/55751] loss:2.720794 took:0.22603s
Epoch[48/50] step:[25000/55751] loss:1.725913 took:0.32832s
Epoch[48/50] step:[25200/55751] loss:1.935414 took:0.27912s
Epoch[48/50] step:[25400/55751] loss:1.891289 took:0.23663s
Epoch[48/50] step:[25600/55751] loss:2.355294 took:0.32782s
Epoch[48/50] step:[25800/55751] loss:2.363008 took:0.35423s
Epoch[48/50] step:[26000/55751] loss:1.961689 took:0.31092s
Epoch[48/50] step:[26200/55751] loss:1.687769 took:0.23782s
Epoch[48/50] step:[26400/55751] loss:2.420962 took:0.19594s
Epoch[48/50] step:[26600/55751] loss:1.642516 took:0.21945s
Epoch[48/50] step:[26800/55751] loss:2.550986 took:0.19625s
Epoch[48/50] step:[27000/55751] loss:2.051977 took:0.36339s
Epoch[48/50] step:[27200/55751] loss:2.535269 took:0.23821s
Epoch[48/50] step:[27400/55751] loss:2.626216 took:0.29358s
Epoch[48/50] step:[27600/55751] loss:2.200382 took:0.30248s
Epoch[48/50] step:[27800/55751] loss:2.616497 took:0.24235s
Epoch[48/50] step:[28000/55751] loss:2.4

Epoch[48/50] step:[52200/55751] loss:1.906689 took:0.19815s
Epoch[48/50] step:[52400/55751] loss:2.609971 took:0.20407s
Epoch[48/50] step:[52600/55751] loss:2.213368 took:0.20067s
Epoch[48/50] step:[52800/55751] loss:2.350420 took:0.18873s
Epoch[48/50] step:[53000/55751] loss:2.264428 took:0.22892s
Epoch[48/50] step:[53200/55751] loss:2.142832 took:0.35526s
Epoch[48/50] step:[53400/55751] loss:2.367175 took:0.24999s
Epoch[48/50] step:[53600/55751] loss:2.294425 took:0.34706s
Epoch[48/50] step:[53800/55751] loss:1.750660 took:0.17772s
Epoch[48/50] step:[54000/55751] loss:2.323182 took:0.24161s
Epoch[48/50] step:[54200/55751] loss:2.002518 took:0.21033s
Epoch[48/50] step:[54400/55751] loss:2.652279 took:0.33452s
Epoch[48/50] step:[54600/55751] loss:2.700330 took:0.25083s
Epoch[48/50] step:[54800/55751] loss:2.456713 took:0.26509s
Epoch[48/50] step:[55000/55751] loss:2.265565 took:0.30755s
Epoch[48/50] step:[55200/55751] loss:2.230322 took:0.18373s
Epoch[48/50] step:[55400/55751] loss:2.7

Epoch[49/50] step:[24000/55751] loss:2.025722 took:0.20553s
Epoch[49/50] step:[24200/55751] loss:2.719872 took:0.26143s
Epoch[49/50] step:[24400/55751] loss:1.764893 took:0.24655s
Epoch[49/50] step:[24600/55751] loss:2.043299 took:0.22124s
Epoch[49/50] step:[24800/55751] loss:2.095097 took:0.21556s
Epoch[49/50] step:[25000/55751] loss:1.852816 took:0.19264s
Epoch[49/50] step:[25200/55751] loss:2.254224 took:0.21574s
Epoch[49/50] step:[25400/55751] loss:2.887486 took:0.42318s
Epoch[49/50] step:[25600/55751] loss:2.139684 took:0.25997s
Epoch[49/50] step:[25800/55751] loss:2.754926 took:0.29533s
Epoch[49/50] step:[26000/55751] loss:1.624055 took:0.20740s
Epoch[49/50] step:[26200/55751] loss:2.828397 took:0.22959s
Epoch[49/50] step:[26400/55751] loss:3.272379 took:0.21587s
Epoch[49/50] step:[26600/55751] loss:2.536821 took:0.18318s
Epoch[49/50] step:[26800/55751] loss:2.569901 took:0.26595s
Epoch[49/50] step:[27000/55751] loss:2.451966 took:0.21422s
Epoch[49/50] step:[27200/55751] loss:2.3

Epoch[49/50] step:[51400/55751] loss:1.994718 took:0.20464s
Epoch[49/50] step:[51600/55751] loss:2.005978 took:0.28743s
Epoch[49/50] step:[51800/55751] loss:2.805684 took:0.26464s
Epoch[49/50] step:[52000/55751] loss:1.844638 took:0.25742s
Epoch[49/50] step:[52200/55751] loss:2.435005 took:0.20836s
Epoch[49/50] step:[52400/55751] loss:2.836753 took:0.32289s
Epoch[49/50] step:[52600/55751] loss:1.969807 took:0.22133s
Epoch[49/50] step:[52800/55751] loss:1.914384 took:0.19575s
Epoch[49/50] step:[53000/55751] loss:2.016619 took:0.37282s
Epoch[49/50] step:[53200/55751] loss:2.404088 took:0.36795s
Epoch[49/50] step:[53400/55751] loss:2.072329 took:0.21565s
Epoch[49/50] step:[53600/55751] loss:2.648263 took:0.28318s
Epoch[49/50] step:[53800/55751] loss:2.273766 took:0.19336s
Epoch[49/50] step:[54000/55751] loss:2.042050 took:0.17852s
Epoch[49/50] step:[54200/55751] loss:3.117425 took:0.22914s
Epoch[49/50] step:[54400/55751] loss:2.622095 took:0.18073s
Epoch[49/50] step:[54600/55751] loss:2.4

In [29]:
tl.files.save_npz(net.all_params, name='n_1M.npz', sess=sess)

[*] n_1M.npz saved
