In [1]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob

# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    medical_files = np.array(data['filenames'])
    medical_targets = np_utils.to_categorical(np.array(data['target']), 3)
    return medical_files, medical_targets

# load train, test, and validation datasets
train_files, train_targets = load_dataset('D:\data\medical AI\data/train')
valid_files, valid_targets = load_dataset('D:\data\medical AI\data/valid')
test_files, test_targets = load_dataset('D:\data\medical AI\data/test')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#数据处理，使用Keras的办法将图片统一转换为768*512*3的尺寸
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [3]:
dog_names = [item[30:-1] for item in sorted(glob("D:\data\medical AI\data/train/*/"))]

In [4]:
dog_names

['melanoma', 'nevus', 'seborrheic_keratosis']

In [3]:
#数据处理，将统一后的图片转换为批向量,并进行归一化
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
#train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
#test_tensors = paths_to_tensor(test_files).astype('float32')/255

100%|████████████████████████████████████████| 150/150 [00:54<00:00,  2.73it/s]


In [4]:
import tensorflow as tf
#搭建CNN网络
def model_variables(weight=224, height=224,channels=3):
    input_ = tf.placeholder(tf.float32, (None, weight, height, channels), name='input')
    labels = tf.placeholder(tf.int32, (None, channels), name='labels')
    lr = tf.placeholder(tf.float32)
    return input_, labels, lr

def build_model(input_, alpha=0.2):       
    
        
    x1 = tf.layers.conv2d(input_, filters=16, kernel_size=5, strides=2, padding='same', 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer())
    x1 = tf.layers.batch_normalization(x1)
    x1 = tf.maximum(0.2*x1, x1)
    x1 = tf.nn.dropout(x1, keep_prob = 0.5)
                #256*256*16

    x2 = tf.layers.conv2d(x1, filters=32, kernel_size=5, strides=2, padding='same',
                                          kernel_initializer = tf.contrib.layers.xavier_initializer())
    x2 = tf.layers.batch_normalization(x2)
    x2 = tf.maximum(0.2* x2, x2)
    x2 = tf.nn.dropout(x2, keep_prob = 0.5)
                #128*128*32

    x3 = tf.layers.conv2d(x2, filters=64, kernel_size=5, strides=2, padding='same',
                                          kernel_initializer = tf.contrib.layers.xavier_initializer())
    x3 = tf.layers.batch_normalization(x3)
    x3 = tf.maximum(0.2*x3, x3)
    x3 = tf.nn.dropout(x3, keep_prob = 0.5)
                #64*64*64

    x4 = tf.reshape(x3,(-1,28*28*64))
    logits = tf.layers.dense(x4, 3)
    output = tf.nn.softmax(logits)
    return logits, output

def loss(logits, labels):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels=labels))
    return loss

def opt(loss,learning_rate, beta1=0.8):
    opt = tf.train.AdamOptimizer(learning_rate=0.01, beta1 =0.8).minimize(loss)
    return opt

In [6]:
def get_batches(filepath, targets, batch_size):
    whole_size = filepath.shape[0]
    num_batches = whole_size//batch_size
    for i in range(num_batches):
        yield paths_to_tensor(filepath[i*batch_size:(i+1)*batch_size]).astype('float32')/255, targets[i*batch_size:(i+1)*batch_size]

In [56]:
epoches = 10
batch_size = 100
learning_rate = 0.005





In [57]:
input_, labels, lr = model_variables()
logits, output = build_model(input_)
loss_ =loss(logits, labels)
opt_  = opt(loss_,learning_rate)
steps = 0
saver = tf.train.Saver()
train_loss = []
valid_loss = 0

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for _ in range(epoches):
        for train_tensors, train_targets_batch in get_batches(train_files,train_targets,batch_size):
            #for batch_img, batch_label in get_batch(train_data_path):
            #_ = sess.run(loss_, )
            _,__ = sess.run([loss_,opt_],feed_dict={input_:train_tensors, labels:train_targets_batch})
            steps += 1
            train_loss.append(_)
            print('{}:train loss is {}'.format(steps,_))
            
                
            if steps%10==0:
                _ = sess.run(loss_, feed_dict={input_:valid_tensors, labels:valid_targets})
                if valid_loss ==0 or valid_loss>= _:
                    print('valid_loss improves')
                    valid_loss = _
                    saver.save(sess, './checkpoints/medical.ckpt')
                print('{}: valid_loss is{}'.format(steps, _))


  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:12,  8.12it/s]
  3%|█▎                                        | 3/100 [00:00<00:09, 10.02it/s]
  4%|█▋                                        | 4/100 [00:00<00:10,  9.43it/s]
  5%|██                                        | 5/100 [00:00<00:10,  8.74it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.26it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.26it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.94it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.45it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.29it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.99it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.25it/s]
 16%|██████▌                           

1:train loss is 1.1488020420074463



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:03,  1.55it/s]
  2%|▊                                         | 2/100 [00:00<00:39,  2.51it/s]
  4%|█▋                                        | 4/100 [00:01<00:24,  3.98it/s]
  7%|██▉                                       | 7/100 [00:01<00:15,  6.09it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.82it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.90it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.86it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.03it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.63it/s]
 20%|████████▏                                | 20/100 [00:02<00:09,  8.55it/s]
 24%|█████████▊                               | 24/100 [00:02<00:07,  9.70it/s]
 26%|██████████▋                       

2:train loss is 27.879741668701172



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:05, 19.24it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.51it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  9.15it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.29it/s]
  8%|███▎                                      | 8/100 [00:01<00:13,  6.66it/s]
  9%|███▊                                      | 9/100 [00:01<00:13,  6.92it/s]
 10%|████                                     | 10/100 [00:01<00:12,  6.97it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.52it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.36it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  6.15it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.49it/s]
 17%|██████▉                           

3:train loss is 184.07533264160156



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:13,  7.19it/s]
  3%|█▎                                        | 3/100 [00:00<00:09, 10.48it/s]
  6%|██▌                                       | 6/100 [00:00<00:09, 10.35it/s]
  8%|███▎                                      | 8/100 [00:00<00:08, 10.47it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.32it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.67it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.69it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.87it/s]
 18%|███████▍                                 | 18/100 [00:02<00:12,  6.73it/s]
 20%|████████▏                                | 20/100 [00:03<00:14,  5.48it/s]
 22%|█████████                                | 22/100 [00:04<00:14,  5.46it/s]
 25%|██████████▎                       

4:train loss is 238.03683471679688



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 13.35it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.48it/s]
  8%|███▎                                      | 8/100 [00:00<00:09,  9.61it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  8.22it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.61it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.75it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.89it/s]
 14%|█████▋                                   | 14/100 [00:01<00:11,  7.21it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.12it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.16it/s]
 18%|███████▍                                 | 18/100 [00:02<00:13,  6.18it/s]
 19%|███████▊                          

5:train loss is 552.7451782226562



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:09,  9.84it/s]
  6%|██▌                                       | 6/100 [00:00<00:06, 13.53it/s]
  9%|███▊                                      | 9/100 [00:00<00:05, 15.86it/s]
 11%|████▌                                    | 11/100 [00:00<00:06, 13.24it/s]
 15%|██████▏                                  | 15/100 [00:01<00:05, 14.20it/s]
 18%|███████▍                                 | 18/100 [00:01<00:06, 12.63it/s]
 20%|████████▏                                | 20/100 [00:01<00:06, 11.54it/s]
 22%|█████████                                | 22/100 [00:02<00:09,  8.55it/s]
 23%|█████████▍                               | 23/100 [00:02<00:09,  8.18it/s]
 25%|██████████▎                              | 25/100 [00:03<00:09,  8.31it/s]
 26%|██████████▋                              | 26/100 [00:03<00:09,  8.11it/s]
 27%|███████████                       

6:train loss is 86.71621704101562



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:06,  1.48it/s]
  2%|▊                                         | 2/100 [00:01<01:08,  1.44it/s]
  3%|█▎                                        | 3/100 [00:01<00:53,  1.80it/s]
  5%|██                                        | 5/100 [00:01<00:34,  2.75it/s]
  6%|██▌                                       | 6/100 [00:02<00:38,  2.41it/s]
  8%|███▎                                      | 8/100 [00:02<00:30,  2.98it/s]
 11%|████▌                                    | 11/100 [00:03<00:27,  3.20it/s]
 12%|████▉                                    | 12/100 [00:03<00:27,  3.23it/s]
 13%|█████▎                                   | 13/100 [00:03<00:25,  3.35it/s]
 14%|█████▋                                   | 14/100 [00:03<00:24,  3.50it/s]
 15%|██████▏                                  | 15/100 [00:04<00:24,  3.52it/s]
 16%|██████▌                           

7:train loss is 12.498259544372559



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:16,  5.91it/s]
  3%|█▎                                        | 3/100 [00:00<00:18,  5.14it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.68it/s]
 10%|████                                     | 10/100 [00:00<00:07, 12.28it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.60it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.18it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.65it/s]
 18%|███████▍                                 | 18/100 [00:02<00:12,  6.33it/s]
 19%|███████▊                                 | 19/100 [00:03<00:12,  6.28it/s]
 21%|████████▌                                | 21/100 [00:03<00:12,  6.47it/s]
 23%|█████████▍                               | 23/100 [00:03<00:11,  6.75it/s]
 24%|█████████▊                        

8:train loss is 293.48046875



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:11,  8.18it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.84it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.25it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.01it/s]
  8%|███▎                                      | 8/100 [00:01<00:12,  7.10it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.07it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.03it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.99it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.52it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.60it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.67it/s]
 17%|██████▉                           

9:train loss is 409.0333557128906



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:02,  1.58it/s]
  2%|▊                                         | 2/100 [00:00<00:41,  2.35it/s]
  5%|██                                        | 5/100 [00:01<00:20,  4.67it/s]
  6%|██▌                                       | 6/100 [00:01<00:18,  5.03it/s]
  7%|██▉                                       | 7/100 [00:01<00:17,  5.22it/s]
  9%|███▊                                      | 9/100 [00:01<00:16,  5.45it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.18it/s]
 11%|████▌                                    | 11/100 [00:02<00:17,  5.12it/s]
 12%|████▉                                    | 12/100 [00:02<00:17,  4.93it/s]
 13%|█████▎                                   | 13/100 [00:02<00:17,  5.06it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.97it/s]
 18%|███████▍                          

10:train loss is 19.15381622314453
valid_loss improves
10: valid_loss is70.80048370361328



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:21,  4.50it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.48it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.23it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.48it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  8.00it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.66it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  9.23it/s]
 17%|██████▉                                  | 17/100 [00:02<00:09,  8.48it/s]
 18%|███████▍                                 | 18/100 [00:02<00:09,  8.44it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.61it/s]
 21%|████████▌                                | 21/100 [00:03<00:12,  6.51it/s]
 22%|█████████                         

11:train loss is 82.54080200195312



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:08, 12.04it/s]
  3%|█▎                                        | 3/100 [00:00<00:13,  7.06it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.52it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.34it/s]
  8%|███▎                                      | 8/100 [00:01<00:11,  7.81it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.08it/s]
 10%|████                                     | 10/100 [00:01<00:12,  6.99it/s]
 13%|█████▎                                   | 13/100 [00:01<00:10,  8.35it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.25it/s]
 17%|██████▉                                  | 17/100 [00:02<00:09,  8.48it/s]
 19%|███████▊                                 | 19/100 [00:02<00:09,  8.71it/s]
 21%|████████▌                         

12:train loss is 157.6427764892578



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:11,  1.38it/s]
  4%|█▋                                        | 4/100 [00:01<00:24,  3.91it/s]
  6%|██▌                                       | 6/100 [00:01<00:27,  3.43it/s]
  7%|██▉                                       | 7/100 [00:02<00:27,  3.44it/s]
  8%|███▎                                      | 8/100 [00:02<00:26,  3.51it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.17it/s]
 11%|████▌                                    | 11/100 [00:02<00:22,  4.01it/s]
 13%|█████▎                                   | 13/100 [00:02<00:19,  4.49it/s]
 15%|██████▏                                  | 15/100 [00:02<00:16,  5.01it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  5.14it/s]
 19%|███████▊                                 | 19/100 [00:03<00:15,  5.31it/s]
 20%|████████▏                         

13:train loss is 273.2828063964844



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:48,  2.01it/s]
  3%|█▎                                        | 3/100 [00:01<00:35,  2.71it/s]
  4%|█▋                                        | 4/100 [00:01<00:30,  3.12it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  4.01it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.22it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  5.00it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.64it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.27it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.88it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.18it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  5.12it/s]
 20%|████████▏                         

14:train loss is 110.73599243164062



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:24,  4.00it/s]
  3%|█▎                                        | 3/100 [00:00<00:15,  6.24it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.53it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.27it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.37it/s]
  8%|███▎                                      | 8/100 [00:01<00:12,  7.67it/s]
 11%|████▌                                    | 11/100 [00:01<00:09,  9.33it/s]
 13%|█████▎                                   | 13/100 [00:01<00:09,  8.90it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.33it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.31it/s]
 18%|███████▍                                 | 18/100 [00:02<00:12,  6.83it/s]
 20%|████████▏                         

15:train loss is 1181.90576171875



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:09,  1.43it/s]
  3%|█▎                                        | 3/100 [00:00<00:27,  3.48it/s]
  4%|█▋                                        | 4/100 [00:01<00:26,  3.62it/s]
  6%|██▌                                       | 6/100 [00:01<00:27,  3.42it/s]
  7%|██▉                                       | 7/100 [00:01<00:26,  3.50it/s]
  8%|███▎                                      | 8/100 [00:02<00:31,  2.95it/s]
 11%|████▌                                    | 11/100 [00:02<00:22,  3.89it/s]
 13%|█████▎                                   | 13/100 [00:03<00:21,  4.12it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  5.18it/s]
 19%|███████▊                                 | 19/100 [00:03<00:15,  5.36it/s]
 21%|████████▌                                | 21/100 [00:04<00:15,  5.01it/s]
 23%|█████████▍                        

16:train loss is 199.11643981933594



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:23,  4.22it/s]
  2%|▊                                         | 2/100 [00:00<00:19,  4.95it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.66it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.62it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.40it/s]
 11%|████▌                                    | 11/100 [00:01<00:09,  9.13it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  8.82it/s]
 16%|██████▌                                  | 16/100 [00:01<00:07, 10.88it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 10.34it/s]
 20%|████████▏                                | 20/100 [00:01<00:07, 10.82it/s]
 22%|█████████                                | 22/100 [00:02<00:09,  8.25it/s]
 24%|█████████▊                        

17:train loss is 119.27257537841797



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.60it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.31it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  9.26it/s]
  8%|███▎                                      | 8/100 [00:01<00:18,  4.93it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.84it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.08it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.57it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.80it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.10it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.19it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.46it/s]
 20%|████████▏                         

18:train loss is 631.861572265625



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:13,  7.33it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.51it/s]
  5%|██                                        | 5/100 [00:00<00:11,  7.92it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.18it/s]
 10%|████                                     | 10/100 [00:01<00:09,  9.41it/s]
 11%|████▌                                    | 11/100 [00:01<00:10,  8.46it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.87it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.36it/s]
 15%|██████▏                                  | 15/100 [00:01<00:11,  7.72it/s]
 16%|██████▌                                  | 16/100 [00:02<00:10,  7.68it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.77it/s]
 18%|███████▍                          

19:train loss is 198.53639221191406



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 12.42it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.44it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.60it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.50it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.47it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.17it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.44it/s]
 14%|█████▋                                   | 14/100 [00:01<00:11,  7.51it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  6.76it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.46it/s]
 18%|███████▍                                 | 18/100 [00:02<00:13,  6.23it/s]
 20%|████████▏                         

20:train loss is 719.3966674804688
20: valid_loss is932.311279296875



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:04, 23.12it/s]
  4%|█▋                                        | 4/100 [00:00<00:06, 15.50it/s]
  5%|██                                        | 5/100 [00:00<00:07, 13.01it/s]
  7%|██▉                                       | 7/100 [00:00<00:09, 10.33it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.66it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.75it/s]
 11%|████▌                                    | 11/100 [00:01<00:10,  8.34it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.71it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  7.23it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.43it/s]
 16%|██████▌                                  | 16/100 [00:02<00:11,  7.45it/s]
 18%|███████▍                          

21:train loss is 651.1580200195312



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:03,  1.55it/s]
  2%|▊                                         | 2/100 [00:00<00:38,  2.55it/s]
  4%|█▋                                        | 4/100 [00:01<00:24,  3.97it/s]
  7%|██▉                                       | 7/100 [00:01<00:15,  6.12it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.84it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.99it/s]
 13%|█████▎                                   | 13/100 [00:01<00:10,  7.97it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.03it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.69it/s]
 20%|████████▏                                | 20/100 [00:02<00:09,  8.59it/s]
 24%|█████████▊                               | 24/100 [00:02<00:07,  9.88it/s]
 27%|███████████                       

22:train loss is 265.94403076171875



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:04, 20.68it/s]
  5%|██                                        | 5/100 [00:00<00:07, 12.17it/s]
  6%|██▌                                       | 6/100 [00:00<00:09,  9.45it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.78it/s]
  8%|███▎                                      | 8/100 [00:01<00:12,  7.10it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.45it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.79it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.76it/s]
 13%|█████▎                                   | 13/100 [00:02<00:13,  6.49it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  6.89it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  7.24it/s]
 18%|███████▍                          

23:train loss is 5615.923828125



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:13,  7.19it/s]
  3%|█▎                                        | 3/100 [00:00<00:10,  9.28it/s]
  6%|██▌                                       | 6/100 [00:00<00:09,  9.97it/s]
  8%|███▎                                      | 8/100 [00:00<00:09, 10.19it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.22it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.67it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.68it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.86it/s]
 18%|███████▍                                 | 18/100 [00:02<00:12,  6.76it/s]
 20%|████████▏                                | 20/100 [00:03<00:14,  5.55it/s]
 22%|█████████                                | 22/100 [00:03<00:13,  5.57it/s]
 25%|██████████▎                       

24:train loss is 204.1901397705078



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 12.65it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.33it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.61it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.17it/s]
 10%|████                                     | 10/100 [00:01<00:13,  6.56it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.93it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.41it/s]
 16%|██████▌                                  | 16/100 [00:02<00:15,  5.57it/s]
 17%|██████▉                                  | 17/100 [00:03<00:14,  5.66it/s]
 18%|███████▍                                 | 18/100 [00:03<00:14,  5.74it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.62it/s]
 21%|████████▌                         

25:train loss is 255.9169464111328



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:11,  8.62it/s]
  7%|██▉                                       | 7/100 [00:00<00:06, 14.99it/s]
 10%|████                                     | 10/100 [00:00<00:06, 13.54it/s]
 14%|█████▋                                   | 14/100 [00:00<00:05, 16.35it/s]
 16%|██████▌                                  | 16/100 [00:00<00:05, 16.04it/s]
 18%|███████▍                                 | 18/100 [00:01<00:05, 13.85it/s]
 20%|████████▏                                | 20/100 [00:01<00:06, 12.81it/s]
 22%|█████████                                | 22/100 [00:02<00:08,  9.13it/s]
 23%|█████████▍                               | 23/100 [00:02<00:08,  8.70it/s]
 25%|██████████▎                              | 25/100 [00:02<00:08,  8.80it/s]
 26%|██████████▋                              | 26/100 [00:03<00:08,  8.51it/s]
 27%|███████████                       

26:train loss is 111.60271453857422



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:27,  1.13it/s]
  2%|▊                                         | 2/100 [00:01<01:29,  1.10it/s]
  3%|█▎                                        | 3/100 [00:02<01:10,  1.37it/s]
  5%|██                                        | 5/100 [00:02<00:45,  2.09it/s]
  6%|██▌                                       | 6/100 [00:03<00:48,  1.93it/s]
  8%|███▎                                      | 8/100 [00:03<00:37,  2.44it/s]
 11%|████▌                                    | 11/100 [00:04<00:32,  2.74it/s]
 12%|████▉                                    | 12/100 [00:04<00:31,  2.79it/s]
 13%|█████▎                                   | 13/100 [00:04<00:29,  2.93it/s]
 15%|██████▏                                  | 15/100 [00:04<00:27,  3.12it/s]
 16%|██████▌                                  | 16/100 [00:04<00:26,  3.21it/s]
 17%|██████▉                           

27:train loss is 637.2632446289062



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:18,  5.26it/s]
  3%|█▎                                        | 3/100 [00:00<00:22,  4.29it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.31it/s]
 10%|████                                     | 10/100 [00:00<00:08, 10.69it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.22it/s]
 14%|█████▋                                   | 14/100 [00:02<00:16,  5.26it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.76it/s]
 18%|███████▍                                 | 18/100 [00:03<00:14,  5.60it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.58it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.86it/s]
 23%|█████████▍                               | 23/100 [00:03<00:12,  6.13it/s]
 24%|█████████▊                        

28:train loss is 530.353271484375



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:11,  8.52it/s]
  3%|█▎                                        | 3/100 [00:00<00:15,  6.41it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.08it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.09it/s]
  8%|███▎                                      | 8/100 [00:01<00:12,  7.19it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.17it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.13it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  7.13it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.60it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.70it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.81it/s]
 17%|██████▉                           

29:train loss is 389.3466491699219



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:21,  1.21it/s]
  2%|▊                                         | 2/100 [00:01<00:54,  1.80it/s]
  5%|██                                        | 5/100 [00:01<00:25,  3.75it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  4.02it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.14it/s]
  8%|███▎                                      | 8/100 [00:01<00:21,  4.38it/s]
  9%|███▊                                      | 9/100 [00:02<00:21,  4.32it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.18it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.17it/s]
 12%|████▉                                    | 12/100 [00:02<00:21,  4.07it/s]
 13%|█████▎                                   | 13/100 [00:03<00:20,  4.23it/s]
 17%|██████▉                           

30:train loss is 1046.735595703125
30: valid_loss is324.17205810546875



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:15,  6.43it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.54it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.71it/s]
  9%|███▊                                      | 9/100 [00:00<00:10,  9.06it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  9.67it/s]
 15%|██████▏                                  | 15/100 [00:01<00:08, 10.19it/s]
 17%|██████▉                                  | 17/100 [00:01<00:08,  9.66it/s]
 18%|███████▍                                 | 18/100 [00:01<00:08,  9.55it/s]
 19%|███████▊                                 | 19/100 [00:02<00:10,  7.50it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.37it/s]
 22%|█████████                                | 22/100 [00:03<00:12,  6.24it/s]
 23%|█████████▍                        

31:train loss is 168.97091674804688



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:08, 11.47it/s]
  3%|█▎                                        | 3/100 [00:00<00:15,  6.44it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.17it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.35it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.39it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.33it/s]
 13%|█████▎                                   | 13/100 [00:01<00:09,  8.79it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.77it/s]
 17%|██████▉                                  | 17/100 [00:01<00:09,  8.96it/s]
 19%|███████▊                                 | 19/100 [00:02<00:08,  9.26it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.62it/s]
 24%|█████████▊                        

32:train loss is 195.1835174560547



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:31,  1.09it/s]
  4%|█▋                                        | 4/100 [00:01<00:31,  3.05it/s]
  6%|██▌                                       | 6/100 [00:02<00:34,  2.69it/s]
  7%|██▉                                       | 7/100 [00:02<00:34,  2.73it/s]
  8%|███▎                                      | 8/100 [00:02<00:32,  2.82it/s]
 10%|████                                     | 10/100 [00:02<00:26,  3.41it/s]
 11%|████▌                                    | 11/100 [00:03<00:26,  3.39it/s]
 13%|█████▎                                   | 13/100 [00:03<00:22,  3.88it/s]
 16%|██████▌                                  | 16/100 [00:03<00:19,  4.27it/s]
 18%|███████▍                                 | 18/100 [00:03<00:18,  4.54it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.82it/s]
 22%|█████████                         

33:train loss is 655.6743774414062



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:35,  2.79it/s]
  4%|█▋                                        | 4/100 [00:00<00:22,  4.22it/s]
  6%|██▌                                       | 6/100 [00:01<00:17,  5.23it/s]
  7%|██▉                                       | 7/100 [00:01<00:16,  5.49it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.46it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.88it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.61it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  7.83it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.06it/s]
 21%|████████▌                                | 21/100 [00:03<00:11,  6.97it/s]
 23%|█████████▍                               | 23/100 [00:03<00:10,  7.01it/s]
 25%|██████████▎                       

34:train loss is 425.6845703125



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:24,  4.02it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.91it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.20it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.04it/s]
  6%|██▌                                       | 6/100 [00:00<00:15,  6.16it/s]
 10%|████                                     | 10/100 [00:01<00:09,  9.16it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.61it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.82it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  6.94it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.30it/s]
 22%|█████████                                | 22/100 [00:03<00:10,  7.28it/s]
 24%|█████████▊                        

35:train loss is 331.4917907714844



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:32,  1.07it/s]
  3%|█▎                                        | 3/100 [00:01<00:35,  2.73it/s]
  4%|█▋                                        | 4/100 [00:01<00:35,  2.68it/s]
  6%|██▌                                       | 6/100 [00:02<00:38,  2.46it/s]
  7%|██▉                                       | 7/100 [00:02<00:36,  2.53it/s]
  8%|███▎                                      | 8/100 [00:03<00:39,  2.31it/s]
 11%|████▌                                    | 11/100 [00:03<00:29,  3.06it/s]
 13%|█████▎                                   | 13/100 [00:03<00:26,  3.33it/s]
 15%|██████▏                                  | 15/100 [00:04<00:22,  3.73it/s]
 18%|███████▍                                 | 18/100 [00:04<00:19,  4.23it/s]
 20%|████████▏                                | 20/100 [00:04<00:17,  4.54it/s]
 22%|█████████                         

36:train loss is 145.9261474609375



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:26,  3.71it/s]
  2%|▊                                         | 2/100 [00:00<00:20,  4.68it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.28it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.62it/s]
  9%|███▊                                      | 9/100 [00:00<00:10,  9.09it/s]
 11%|████▌                                    | 11/100 [00:01<00:11,  8.02it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.11it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  9.44it/s]
 17%|██████▉                                  | 17/100 [00:01<00:09,  9.05it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  7.98it/s]
 22%|█████████                                | 22/100 [00:02<00:09,  7.91it/s]
 23%|█████████▍                        

37:train loss is 258.437744140625



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.91it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.54it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.74it/s]
  8%|███▎                                      | 8/100 [00:01<00:17,  5.12it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  5.00it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.24it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.73it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  6.00it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.29it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.37it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.64it/s]
 20%|████████▏                         

38:train loss is 54.67096710205078



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:17,  5.64it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.84it/s]
  5%|██                                        | 5/100 [00:00<00:13,  6.97it/s]
  7%|██▉                                       | 7/100 [00:00<00:13,  7.13it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.79it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  6.99it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.40it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.95it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.23it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.17it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.26it/s]
 18%|███████▍                          

39:train loss is 34.192726135253906



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 11.21it/s]
  4%|█▋                                        | 4/100 [00:00<00:13,  7.10it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.30it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.24it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.40it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.44it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.19it/s]
 14%|█████▋                                   | 14/100 [00:01<00:09,  8.81it/s]
 16%|██████▌                                  | 16/100 [00:01<00:09,  9.00it/s]
 18%|███████▍                                 | 18/100 [00:02<00:10,  7.49it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.58it/s]
 21%|████████▌                         

40:train loss is 70.58525085449219
valid_loss improves
40: valid_loss is46.7156982421875



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:06, 14.36it/s]
  4%|█▋                                        | 4/100 [00:00<00:07, 13.27it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.67it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.53it/s]
  8%|███▎                                      | 8/100 [00:00<00:11,  8.16it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.40it/s]
 11%|████▌                                    | 11/100 [00:01<00:11,  7.94it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.41it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  6.96it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.25it/s]
 16%|██████▌                                  | 16/100 [00:02<00:11,  7.26it/s]
 18%|███████▍                          

41:train loss is 33.394737243652344



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:04,  1.54it/s]
  2%|▊                                         | 2/100 [00:00<00:38,  2.58it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.10it/s]
  7%|██▉                                       | 7/100 [00:01<00:15,  6.16it/s]
  8%|███▎                                      | 8/100 [00:01<00:16,  5.66it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.82it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.79it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  7.85it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  7.51it/s]
 20%|████████▏                                | 20/100 [00:02<00:09,  8.44it/s]
 25%|██████████▎                              | 25/100 [00:02<00:07,  9.59it/s]
 28%|███████████▍                      

42:train loss is 12.257667541503906



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:05, 17.94it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.36it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.23it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.58it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.84it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.08it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.10it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.67it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.66it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.53it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.94it/s]
 17%|██████▉                           

43:train loss is 75.81669616699219



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:13,  7.24it/s]
  3%|█▎                                        | 3/100 [00:00<00:09, 10.52it/s]
  6%|██▌                                       | 6/100 [00:00<00:08, 10.82it/s]
  8%|███▎                                      | 8/100 [00:00<00:08, 10.90it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.47it/s]
 10%|████                                     | 10/100 [00:02<00:18,  4.78it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.85it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  6.02it/s]
 19%|███████▊                                 | 19/100 [00:02<00:11,  6.75it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.81it/s]
 22%|█████████                                | 22/100 [00:03<00:13,  5.70it/s]
 25%|██████████▎                       

44:train loss is 15.046385765075684



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 12.05it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.07it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.60it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.36it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.11it/s]
 12%|████▉                                    | 12/100 [00:02<00:21,  4.15it/s]
 14%|█████▋                                   | 14/100 [00:03<00:21,  4.03it/s]
 15%|██████▏                                  | 15/100 [00:03<00:20,  4.17it/s]
 16%|██████▌                                  | 16/100 [00:04<00:23,  3.53it/s]
 17%|██████▉                                  | 17/100 [00:04<00:22,  3.65it/s]
 18%|███████▍                                 | 18/100 [00:04<00:21,  3.76it/s]
 19%|███████▊                          

45:train loss is 39.002349853515625



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:11,  8.09it/s]
  6%|██▌                                       | 6/100 [00:00<00:07, 11.89it/s]
 10%|████                                     | 10/100 [00:00<00:07, 11.50it/s]
 14%|█████▋                                   | 14/100 [00:00<00:06, 14.04it/s]
 16%|██████▌                                  | 16/100 [00:01<00:06, 13.44it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 11.41it/s]
 20%|████████▏                                | 20/100 [00:01<00:07, 10.62it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.40it/s]
 22%|█████████                                | 22/100 [00:02<00:09,  8.03it/s]
 23%|█████████▍                               | 23/100 [00:02<00:09,  7.74it/s]
 25%|██████████▎                              | 25/100 [00:03<00:09,  7.95it/s]
 26%|██████████▋                       

46:train loss is 53.81418991088867



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:25,  1.15it/s]
  2%|▊                                         | 2/100 [00:01<01:28,  1.11it/s]
  3%|█▎                                        | 3/100 [00:02<01:09,  1.40it/s]
  5%|██                                        | 5/100 [00:02<00:43,  2.17it/s]
  6%|██▌                                       | 6/100 [00:02<00:46,  2.02it/s]
  8%|███▎                                      | 8/100 [00:03<00:36,  2.54it/s]
 11%|████▌                                    | 11/100 [00:03<00:31,  2.84it/s]
 12%|████▉                                    | 12/100 [00:04<00:30,  2.90it/s]
 13%|█████▎                                   | 13/100 [00:04<00:28,  3.04it/s]
 15%|██████▏                                  | 15/100 [00:04<00:26,  3.24it/s]
 16%|██████▌                                  | 16/100 [00:04<00:25,  3.33it/s]
 17%|██████▉                           

47:train loss is 63.596588134765625



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:18,  5.36it/s]
  3%|█▎                                        | 3/100 [00:00<00:22,  4.33it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.47it/s]
 10%|████                                     | 10/100 [00:00<00:08, 10.70it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.27it/s]
 14%|█████▋                                   | 14/100 [00:02<00:17,  4.95it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.47it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.34it/s]
 19%|███████▊                                 | 19/100 [00:03<00:15,  5.32it/s]
 20%|████████▏                                | 20/100 [00:03<00:14,  5.43it/s]
 21%|████████▌                                | 21/100 [00:04<00:17,  4.64it/s]
 23%|█████████▍                        

48:train loss is 32.62992858886719



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:10,  9.32it/s]
  3%|█▎                                        | 3/100 [00:00<00:13,  7.29it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.62it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.46it/s]
  8%|███▎                                      | 8/100 [00:01<00:12,  7.44it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.40it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.31it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  7.30it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.69it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.77it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.84it/s]
 17%|██████▉                           

49:train loss is 34.811317443847656



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:07,  1.46it/s]
  2%|▊                                         | 2/100 [00:00<00:43,  2.23it/s]
  5%|██                                        | 5/100 [00:01<00:20,  4.62it/s]
  6%|██▌                                       | 6/100 [00:01<00:18,  4.96it/s]
  7%|██▉                                       | 7/100 [00:01<00:18,  5.12it/s]
  9%|███▊                                      | 9/100 [00:01<00:16,  5.47it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.09it/s]
 11%|████▌                                    | 11/100 [00:02<00:17,  5.05it/s]
 12%|████▉                                    | 12/100 [00:02<00:18,  4.87it/s]
 13%|█████▎                                   | 13/100 [00:02<00:17,  5.01it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.22it/s]
 19%|███████▊                          

50:train loss is 17.452835083007812
50: valid_loss is68.92926025390625



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:16,  5.95it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.02it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.07it/s]
  8%|███▎                                      | 8/100 [00:00<00:11,  8.07it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  8.17it/s]
 10%|████                                     | 10/100 [00:01<00:11,  8.06it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.44it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  7.77it/s]
 16%|██████▌                                  | 16/100 [00:02<00:11,  7.08it/s]
 18%|███████▍                                 | 18/100 [00:02<00:11,  7.41it/s]
 19%|███████▊                                 | 19/100 [00:04<00:21,  3.81it/s]
 21%|████████▌                         

51:train loss is 83.30806732177734



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:09,  9.95it/s]
  3%|█▎                                        | 3/100 [00:00<00:17,  5.52it/s]
  4%|█▋                                        | 4/100 [00:00<00:18,  5.09it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.87it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  6.01it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.08it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.37it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.40it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.68it/s]
 19%|███████▊                                 | 19/100 [00:02<00:10,  8.02it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.67it/s]
 24%|█████████▊                        

52:train loss is 21.413612365722656



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:09,  1.42it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.02it/s]
  6%|██▌                                       | 6/100 [00:01<00:26,  3.49it/s]
  7%|██▉                                       | 7/100 [00:01<00:26,  3.53it/s]
  8%|███▎                                      | 8/100 [00:02<00:25,  3.58it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.27it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.15it/s]
 13%|█████▎                                   | 13/100 [00:02<00:18,  4.71it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.13it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.39it/s]
 20%|████████▏                                | 20/100 [00:03<00:14,  5.71it/s]
 22%|█████████                         

53:train loss is 25.195802688598633



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:35,  2.77it/s]
  3%|█▎                                        | 3/100 [00:00<00:26,  3.60it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.10it/s]
  6%|██▌                                       | 6/100 [00:01<00:17,  5.26it/s]
  7%|██▉                                       | 7/100 [00:01<00:16,  5.51it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.49it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.42it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  7.16it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.18it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.02it/s]
 21%|████████▌                                | 21/100 [00:03<00:11,  6.94it/s]
 23%|█████████▍                        

54:train loss is 8.911178588867188



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:33,  2.95it/s]
  3%|█▎                                        | 3/100 [00:00<00:20,  4.67it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  4.89it/s]
  5%|██                                        | 5/100 [00:01<00:19,  4.75it/s]
  6%|██▌                                       | 6/100 [00:01<00:19,  4.81it/s]
  9%|███▊                                      | 9/100 [00:01<00:13,  6.66it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.92it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  7.07it/s]
 16%|██████▌                                  | 16/100 [00:03<00:15,  5.26it/s]
 20%|████████▏                                | 20/100 [00:03<00:13,  5.96it/s]
 22%|█████████                                | 22/100 [00:03<00:12,  6.06it/s]
 24%|█████████▊                        

55:train loss is 19.45490074157715



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:26,  1.14it/s]
  3%|█▎                                        | 3/100 [00:01<00:34,  2.85it/s]
  4%|█▋                                        | 4/100 [00:01<00:33,  2.91it/s]
  6%|██▌                                       | 6/100 [00:02<00:35,  2.66it/s]
  7%|██▉                                       | 7/100 [00:02<00:34,  2.69it/s]
  8%|███▎                                      | 8/100 [00:03<00:38,  2.41it/s]
 12%|████▉                                    | 12/100 [00:03<00:25,  3.50it/s]
 14%|█████▋                                   | 14/100 [00:03<00:22,  3.74it/s]
 18%|███████▍                                 | 18/100 [00:03<00:17,  4.59it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.90it/s]
 22%|█████████                                | 22/100 [00:04<00:17,  4.49it/s]
 24%|█████████▊                        

56:train loss is 9.20687484741211



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:23,  4.18it/s]
  2%|▊                                         | 2/100 [00:00<00:18,  5.18it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.80it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  7.89it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.29it/s]
 11%|████▌                                    | 11/100 [00:01<00:10,  8.77it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.78it/s]
 16%|██████▌                                  | 16/100 [00:01<00:07, 10.86it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 10.32it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.36it/s]
 23%|█████████▍                               | 23/100 [00:02<00:09,  8.18it/s]
 24%|█████████▊                        

57:train loss is 13.108027458190918



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:05, 18.35it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.83it/s]
  5%|██                                        | 5/100 [00:00<00:12,  7.69it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.94it/s]
  8%|███▎                                      | 8/100 [00:01<00:18,  4.92it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.83it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.07it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.60it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.87it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.17it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.25it/s]
 19%|███████▊                          

58:train loss is 23.317153930664062



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:16,  5.91it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.92it/s]
  5%|██                                        | 5/100 [00:00<00:13,  7.03it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.18it/s]
 10%|████                                     | 10/100 [00:01<00:11,  8.16it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.23it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.58it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  6.09it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.40it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.35it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.43it/s]
 18%|███████▍                          

59:train loss is 13.522041320800781



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:06, 14.38it/s]
  4%|█▋                                        | 4/100 [00:00<00:10,  8.92it/s]
  6%|██▌                                       | 6/100 [00:00<00:09, 10.43it/s]
  7%|██▉                                       | 7/100 [00:00<00:09, 10.31it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.27it/s]
 11%|████▌                                    | 11/100 [00:01<00:08, 10.03it/s]
 13%|█████▎                                   | 13/100 [00:01<00:08, 10.60it/s]
 15%|██████▏                                  | 15/100 [00:01<00:08, 10.25it/s]
 17%|██████▉                                  | 17/100 [00:01<00:08,  9.76it/s]
 18%|███████▍                                 | 18/100 [00:01<00:08,  9.22it/s]
 20%|████████▏                                | 20/100 [00:02<00:08,  9.34it/s]
 21%|████████▌                         

60:train loss is 13.377720832824707
valid_loss improves
60: valid_loss is39.36814498901367



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:04, 23.86it/s]
  4%|█▋                                        | 4/100 [00:00<00:06, 15.74it/s]
  5%|██                                        | 5/100 [00:00<00:07, 13.15it/s]
  7%|██▉                                       | 7/100 [00:00<00:08, 10.54it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.79it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.86it/s]
 11%|████▌                                    | 11/100 [00:01<00:10,  8.43it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.80it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  7.21it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.42it/s]
 16%|██████▌                                  | 16/100 [00:02<00:11,  7.42it/s]
 18%|███████▍                          

61:train loss is 28.521867752075195



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:24,  1.18it/s]
  2%|▊                                         | 2/100 [00:01<00:49,  1.98it/s]
  4%|█▋                                        | 4/100 [00:01<00:29,  3.21it/s]
  7%|██▉                                       | 7/100 [00:01<00:18,  5.03it/s]
  8%|███▎                                      | 8/100 [00:01<00:19,  4.74it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.82it/s]
 13%|█████▎                                   | 13/100 [00:01<00:13,  6.53it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  6.65it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.37it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.52it/s]
 25%|██████████▎                              | 25/100 [00:03<00:09,  8.29it/s]
 28%|███████████▍                      

62:train loss is 11.826555252075195



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:06, 15.84it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.74it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  9.22it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.64it/s]
  8%|███▎                                      | 8/100 [00:01<00:13,  7.00it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.21it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.24it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.78it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.74it/s]
 13%|█████▎                                   | 13/100 [00:02<00:13,  6.48it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  6.81it/s]
 17%|██████▉                           

63:train loss is 9.526759147644043



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:17,  5.65it/s]
  3%|█▎                                        | 3/100 [00:00<00:11,  8.43it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.68it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.90it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.09it/s]
 10%|████                                     | 10/100 [00:02<00:22,  4.02it/s]
 13%|█████▎                                   | 13/100 [00:02<00:17,  4.96it/s]
 15%|██████▏                                  | 15/100 [00:02<00:16,  5.19it/s]
 19%|███████▊                                 | 19/100 [00:03<00:13,  5.86it/s]
 20%|████████▏                                | 20/100 [00:03<00:15,  5.08it/s]
 21%|████████▌                                | 21/100 [00:04<00:15,  5.20it/s]
 22%|█████████                         

64:train loss is 15.8070068359375



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:06, 14.65it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.54it/s]
  8%|███▎                                      | 8/100 [00:00<00:08, 10.34it/s]
  9%|███▊                                      | 9/100 [00:01<00:10,  8.70it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.98it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.32it/s]
 14%|█████▋                                   | 14/100 [00:01<00:11,  7.81it/s]
 16%|██████▌                                  | 16/100 [00:02<00:12,  6.46it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.54it/s]
 18%|███████▍                                 | 18/100 [00:02<00:12,  6.59it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.43it/s]
 21%|████████▌                         

65:train loss is 30.927841186523438



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.96it/s]
  6%|██▌                                       | 6/100 [00:00<00:07, 11.81it/s]
 10%|████                                     | 10/100 [00:00<00:07, 11.42it/s]
 14%|█████▋                                   | 14/100 [00:01<00:06, 13.95it/s]
 16%|██████▌                                  | 16/100 [00:01<00:06, 13.37it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 11.51it/s]
 20%|████████▏                                | 20/100 [00:01<00:07, 10.61it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.22it/s]
 22%|█████████                                | 22/100 [00:02<00:09,  7.90it/s]
 23%|█████████▍                               | 23/100 [00:03<00:10,  7.56it/s]
 25%|██████████▎                              | 25/100 [00:03<00:09,  7.71it/s]
 26%|██████████▋                       

66:train loss is 19.354198455810547



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:04,  1.53it/s]
  2%|▊                                         | 2/100 [00:01<01:07,  1.46it/s]
  3%|█▎                                        | 3/100 [00:01<00:53,  1.82it/s]
  5%|██                                        | 5/100 [00:01<00:34,  2.77it/s]
  6%|██▌                                       | 6/100 [00:02<00:38,  2.42it/s]
  8%|███▎                                      | 8/100 [00:02<00:30,  3.01it/s]
 11%|████▌                                    | 11/100 [00:03<00:27,  3.24it/s]
 12%|████▉                                    | 12/100 [00:03<00:26,  3.28it/s]
 13%|█████▎                                   | 13/100 [00:03<00:25,  3.41it/s]
 15%|██████▏                                  | 15/100 [00:04<00:23,  3.59it/s]
 16%|██████▌                                  | 16/100 [00:04<00:22,  3.67it/s]
 17%|██████▉                           

67:train loss is 21.229307174682617



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:19,  5.02it/s]
  3%|█▎                                        | 3/100 [00:00<00:23,  4.14it/s]
  5%|██                                        | 5/100 [00:00<00:15,  5.97it/s]
  8%|███▎                                      | 8/100 [00:00<00:11,  8.36it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.07it/s]
 14%|█████▋                                   | 14/100 [00:02<00:17,  5.01it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.53it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.40it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.41it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.69it/s]
 23%|█████████▍                               | 23/100 [00:03<00:12,  5.97it/s]
 24%|█████████▊                        

68:train loss is 18.795351028442383



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:12,  7.92it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.95it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.34it/s]
  7%|██▉                                       | 7/100 [00:00<00:13,  7.13it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  6.05it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.94it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.91it/s]
 12%|████▉                                    | 12/100 [00:02<00:14,  5.93it/s]
 13%|█████▎                                   | 13/100 [00:02<00:17,  4.85it/s]
 14%|█████▋                                   | 14/100 [00:02<00:17,  4.98it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.14it/s]
 17%|██████▉                           

69:train loss is 11.85551643371582



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:22,  1.20it/s]
  2%|▊                                         | 2/100 [00:01<00:54,  1.81it/s]
  5%|██                                        | 5/100 [00:01<00:25,  3.77it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  4.04it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.18it/s]
  8%|███▎                                      | 8/100 [00:01<00:20,  4.48it/s]
  9%|███▊                                      | 9/100 [00:02<00:20,  4.40it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.24it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.23it/s]
 12%|████▉                                    | 12/100 [00:02<00:21,  4.17it/s]
 13%|█████▎                                   | 13/100 [00:03<00:20,  4.33it/s]
 17%|██████▉                           

70:train loss is 6.2664666175842285
valid_loss improves
70: valid_loss is4.75568962097168



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:14,  6.55it/s]
  4%|█▋                                        | 4/100 [00:00<00:13,  7.15it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.43it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.75it/s]
 12%|████▉                                    | 12/100 [00:01<00:08, 10.45it/s]
 15%|██████▏                                  | 15/100 [00:01<00:07, 10.85it/s]
 17%|██████▉                                  | 17/100 [00:01<00:08, 10.19it/s]
 18%|███████▍                                 | 18/100 [00:01<00:08,  9.86it/s]
 19%|███████▊                                 | 19/100 [00:02<00:10,  7.68it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.56it/s]
 22%|█████████                                | 22/100 [00:03<00:12,  6.35it/s]
 23%|█████████▍                        

71:train loss is 2.779198408126831



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:08, 11.34it/s]
  3%|█▎                                        | 3/100 [00:00<00:15,  6.32it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.04it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.93it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.18it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.14it/s]
 13%|█████▎                                   | 13/100 [00:01<00:10,  8.58it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.59it/s]
 17%|██████▉                                  | 17/100 [00:01<00:09,  8.67it/s]
 19%|███████▊                                 | 19/100 [00:02<00:08,  9.00it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.41it/s]
 24%|█████████▊                        

72:train loss is 11.597541809082031



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:09,  1.42it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.01it/s]
  6%|██▌                                       | 6/100 [00:01<00:27,  3.48it/s]
  7%|██▉                                       | 7/100 [00:02<00:26,  3.50it/s]
  8%|███▎                                      | 8/100 [00:02<00:25,  3.56it/s]
  9%|███▊                                      | 9/100 [00:02<00:23,  3.84it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.12it/s]
 13%|█████▎                                   | 13/100 [00:02<00:18,  4.68it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.06it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.32it/s]
 20%|████████▏                                | 20/100 [00:03<00:14,  5.64it/s]
 22%|█████████                         

73:train loss is 26.791284561157227



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:44,  2.20it/s]
  3%|█▎                                        | 3/100 [00:01<00:32,  2.94it/s]
  4%|█▋                                        | 4/100 [00:01<00:28,  3.35it/s]
  6%|██▌                                       | 6/100 [00:01<00:21,  4.29it/s]
  7%|██▉                                       | 7/100 [00:01<00:20,  4.48it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.29it/s]
 10%|████                                     | 10/100 [00:02<00:18,  4.91it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.60it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  6.61it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.37it/s]
 21%|████████▌                                | 21/100 [00:03<00:12,  6.22it/s]
 23%|█████████▍                        

74:train loss is 5.3962602615356445



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:23,  4.17it/s]
  3%|█▎                                        | 3/100 [00:00<00:15,  6.08it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.30it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.09it/s]
  6%|██▌                                       | 6/100 [00:00<00:15,  6.21it/s]
 10%|████                                     | 10/100 [00:01<00:09,  9.24it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  8.87it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  9.05it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  6.99it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.31it/s]
 22%|█████████                                | 22/100 [00:03<00:10,  7.31it/s]
 24%|█████████▊                        

75:train loss is 7.247952461242676



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:05,  1.51it/s]
  3%|█▎                                        | 3/100 [00:00<00:25,  3.77it/s]
  4%|█▋                                        | 4/100 [00:01<00:25,  3.84it/s]
  6%|██▌                                       | 6/100 [00:01<00:26,  3.54it/s]
  7%|██▉                                       | 7/100 [00:01<00:25,  3.58it/s]
  8%|███▎                                      | 8/100 [00:02<00:30,  3.04it/s]
 11%|████▌                                    | 11/100 [00:02<00:22,  4.02it/s]
 13%|█████▎                                   | 13/100 [00:03<00:20,  4.26it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.37it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.58it/s]
 21%|████████▌                                | 21/100 [00:04<00:15,  5.14it/s]
 23%|█████████▍                        

76:train loss is 4.163921356201172



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:30,  3.27it/s]
  2%|▊                                         | 2/100 [00:00<00:24,  4.08it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.51it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.49it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.60it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.34it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  7.27it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.56it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  8.12it/s]
 20%|████████▏                                | 20/100 [00:02<00:08,  9.07it/s]
 22%|█████████                                | 22/100 [00:03<00:10,  7.13it/s]
 24%|█████████▊                        

77:train loss is 1.9907925128936768



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:13,  7.45it/s]
  5%|██                                        | 5/100 [00:00<00:12,  7.91it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  9.18it/s]
  8%|███▎                                      | 8/100 [00:01<00:18,  4.98it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.88it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.61it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.71it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.97it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.27it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.35it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.62it/s]
 20%|████████▏                         

78:train loss is 5.190039157867432



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:13,  7.09it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.90it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.03it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.35it/s]
 10%|████                                     | 10/100 [00:01<00:09,  9.79it/s]
 11%|████▌                                    | 11/100 [00:01<00:10,  8.74it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.09it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.53it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  7.89it/s]
 16%|██████▌                                  | 16/100 [00:02<00:10,  7.83it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.92it/s]
 18%|███████▍                          

79:train loss is 8.791054725646973



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 11.61it/s]
  4%|█▋                                        | 4/100 [00:00<00:13,  7.17it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.39it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.32it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.52it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.55it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.29it/s]
 14%|█████▋                                   | 14/100 [00:01<00:09,  8.92it/s]
 16%|██████▌                                  | 16/100 [00:01<00:09,  9.09it/s]
 18%|███████▍                                 | 18/100 [00:02<00:10,  7.55it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.67it/s]
 21%|████████▌                         

80:train loss is 14.065776824951172
80: valid_loss is23.516765594482422



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:05, 19.36it/s]
  4%|█▋                                        | 4/100 [00:00<00:07, 12.87it/s]
  5%|██                                        | 5/100 [00:00<00:09, 10.44it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.24it/s]
  8%|███▎                                      | 8/100 [00:01<00:13,  6.88it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.08it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.54it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.15it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.74it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.98it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.06it/s]
 18%|███████▍                          

81:train loss is 15.053691864013672



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:03,  1.56it/s]
  2%|▊                                         | 2/100 [00:00<00:37,  2.59it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.07it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.27it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.89it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  6.04it/s]
 13%|█████▎                                   | 13/100 [00:01<00:10,  8.14it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.33it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.91it/s]
 20%|████████▏                                | 20/100 [00:02<00:09,  8.84it/s]
 24%|█████████▊                               | 24/100 [00:02<00:07, 10.16it/s]
 27%|███████████                       

82:train loss is 15.265239715576172



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:04, 21.12it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.28it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.94it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.43it/s]
  8%|███▎                                      | 8/100 [00:01<00:13,  6.84it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.21it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.54it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.39it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.98it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.41it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.82it/s]
 18%|███████▍                          

83:train loss is 5.1748480796813965



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:17,  5.64it/s]
  3%|█▎                                        | 3/100 [00:00<00:11,  8.44it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.68it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.91it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.08it/s]
 10%|████                                     | 10/100 [00:02<00:23,  3.75it/s]
 13%|█████▎                                   | 13/100 [00:02<00:18,  4.60it/s]
 15%|██████▏                                  | 15/100 [00:03<00:17,  4.79it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.49it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.81it/s]
 21%|████████▌                                | 21/100 [00:04<00:16,  4.93it/s]
 22%|█████████                         

84:train loss is 4.621857166290283



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 12.66it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.35it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.60it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.17it/s]
 10%|████                                     | 10/100 [00:01<00:13,  6.50it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.78it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  6.95it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.34it/s]
 16%|██████▌                                  | 16/100 [00:02<00:15,  5.52it/s]
 17%|██████▉                                  | 17/100 [00:03<00:14,  5.62it/s]
 18%|███████▍                                 | 18/100 [00:03<00:14,  5.70it/s]
 19%|███████▊                          

85:train loss is 14.192578315734863



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:09,  9.74it/s]
  7%|██▉                                       | 7/100 [00:00<00:05, 16.98it/s]
 10%|████                                     | 10/100 [00:00<00:06, 14.38it/s]
 14%|█████▋                                   | 14/100 [00:00<00:04, 17.20it/s]
 16%|██████▌                                  | 16/100 [00:00<00:05, 16.76it/s]
 18%|███████▍                                 | 18/100 [00:01<00:05, 14.31it/s]
 20%|████████▏                                | 20/100 [00:01<00:06, 13.16it/s]
 22%|█████████                                | 22/100 [00:02<00:08,  9.30it/s]
 23%|█████████▍                               | 23/100 [00:02<00:08,  8.87it/s]
 25%|██████████▎                              | 25/100 [00:02<00:08,  9.00it/s]
 26%|██████████▋                              | 26/100 [00:02<00:08,  8.73it/s]
 27%|███████████                       

86:train loss is 7.07075309753418



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:25,  1.16it/s]
  2%|▊                                         | 2/100 [00:01<01:28,  1.11it/s]
  3%|█▎                                        | 3/100 [00:02<01:09,  1.40it/s]
  5%|██                                        | 5/100 [00:02<00:44,  2.13it/s]
  6%|██▌                                       | 6/100 [00:03<00:50,  1.87it/s]
  8%|███▎                                      | 8/100 [00:03<00:39,  2.35it/s]
 11%|████▌                                    | 11/100 [00:04<00:33,  2.66it/s]
 12%|████▉                                    | 12/100 [00:04<00:32,  2.73it/s]
 13%|█████▎                                   | 13/100 [00:04<00:30,  2.86it/s]
 15%|██████▏                                  | 15/100 [00:04<00:27,  3.06it/s]
 16%|██████▌                                  | 16/100 [00:05<00:26,  3.15it/s]
 17%|██████▉                           

87:train loss is 11.256479263305664



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:18,  5.40it/s]
  3%|█▎                                        | 3/100 [00:00<00:22,  4.35it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.31it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.90it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.28it/s]
 14%|█████▋                                   | 14/100 [00:02<00:17,  5.04it/s]
 17%|██████▉                                  | 17/100 [00:03<00:14,  5.56it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.42it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.43it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.71it/s]
 23%|█████████▍                               | 23/100 [00:03<00:12,  5.94it/s]
 24%|█████████▊                        

88:train loss is 12.559662818908691



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:11,  8.51it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.88it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.63it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.56it/s]
  8%|███▎                                      | 8/100 [00:01<00:12,  7.51it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.44it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.35it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.34it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.72it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.80it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.80it/s]
 17%|██████▉                           

89:train loss is 16.838834762573242



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:21,  1.21it/s]
  2%|▊                                         | 2/100 [00:01<00:54,  1.79it/s]
  5%|██                                        | 5/100 [00:01<00:25,  3.71it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  3.98it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.13it/s]
  8%|███▎                                      | 8/100 [00:01<00:20,  4.43it/s]
  9%|███▊                                      | 9/100 [00:02<00:20,  4.36it/s]
 10%|████                                     | 10/100 [00:02<00:20,  4.29it/s]
 11%|████▌                                    | 11/100 [00:02<00:20,  4.36it/s]
 12%|████▉                                    | 12/100 [00:02<00:20,  4.28it/s]
 13%|█████▎                                   | 13/100 [00:02<00:19,  4.43it/s]
 17%|██████▉                           

90:train loss is 7.610695838928223
90: valid_loss is11.725397109985352



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:17,  5.60it/s]
  4%|█▋                                        | 4/100 [00:00<00:16,  5.75it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.80it/s]
  8%|███▎                                      | 8/100 [00:01<00:11,  7.84it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.95it/s]
 11%|████▌                                    | 11/100 [00:01<00:10,  8.87it/s]
 13%|█████▎                                   | 13/100 [00:01<00:09,  8.97it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.57it/s]
 16%|██████▌                                  | 16/100 [00:02<00:10,  7.69it/s]
 18%|███████▍                                 | 18/100 [00:02<00:10,  7.88it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.46it/s]
 21%|████████▌                         

91:train loss is 7.127120494842529



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:08, 12.13it/s]
  3%|█▎                                        | 3/100 [00:00<00:13,  7.12it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.57it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.56it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.44it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.54it/s]
 13%|█████▎                                   | 13/100 [00:01<00:09,  9.10it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  9.04it/s]
 17%|██████▉                                  | 17/100 [00:01<00:09,  9.09it/s]
 19%|███████▊                                 | 19/100 [00:02<00:08,  9.29it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.62it/s]
 24%|█████████▊                        

92:train loss is 2.435800075531006



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:03<05:34,  3.38s/it]
  4%|█▋                                        | 4/100 [00:03<01:28,  1.09it/s]
  6%|██▌                                       | 6/100 [00:04<01:09,  1.36it/s]
  7%|██▉                                       | 7/100 [00:04<01:02,  1.49it/s]
  8%|███▎                                      | 8/100 [00:05<01:04,  1.42it/s]
 10%|████                                     | 10/100 [00:05<00:51,  1.73it/s]
 11%|████▌                                    | 11/100 [00:06<00:49,  1.81it/s]
 13%|█████▎                                   | 13/100 [00:06<00:41,  2.10it/s]
 16%|██████▌                                  | 16/100 [00:06<00:34,  2.44it/s]
 18%|███████▍                                 | 18/100 [00:06<00:30,  2.66it/s]
 20%|████████▏                                | 20/100 [00:06<00:27,  2.87it/s]
 22%|█████████                         

93:train loss is 8.324274063110352



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:43,  2.25it/s]
  3%|█▎                                        | 3/100 [00:01<00:33,  2.93it/s]
  4%|█▋                                        | 4/100 [00:01<00:28,  3.37it/s]
  6%|██▌                                       | 6/100 [00:01<00:22,  4.24it/s]
  7%|██▉                                       | 7/100 [00:01<00:20,  4.43it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.18it/s]
 10%|████                                     | 10/100 [00:02<00:18,  4.79it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.40it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.41it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  5.14it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.97it/s]
 23%|█████████▍                        

94:train loss is 11.055377006530762



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:24,  4.03it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.97it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.23it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.03it/s]
  6%|██▌                                       | 6/100 [00:00<00:15,  6.15it/s]
 10%|████                                     | 10/100 [00:01<00:09,  9.16it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  8.80it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  9.00it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  7.00it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.36it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.45it/s]
 22%|█████████                         

95:train loss is 2.0885939598083496



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:26,  1.15it/s]
  3%|█▎                                        | 3/100 [00:01<00:34,  2.85it/s]
  4%|█▋                                        | 4/100 [00:01<00:32,  2.92it/s]
  6%|██▌                                       | 6/100 [00:02<00:34,  2.69it/s]
  7%|██▉                                       | 7/100 [00:02<00:33,  2.74it/s]
  8%|███▎                                      | 8/100 [00:03<00:37,  2.46it/s]
 12%|████▉                                    | 12/100 [00:03<00:24,  3.56it/s]
 14%|█████▋                                   | 14/100 [00:03<00:22,  3.84it/s]
 18%|███████▍                                 | 18/100 [00:03<00:17,  4.70it/s]
 20%|████████▏                                | 20/100 [00:03<00:15,  5.03it/s]
 22%|█████████                                | 22/100 [00:04<00:17,  4.56it/s]
 24%|█████████▊                        

96:train loss is 4.504782199859619



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:24,  4.06it/s]
  2%|▊                                         | 2/100 [00:00<00:19,  5.06it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.72it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.00it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.54it/s]
 11%|████▌                                    | 11/100 [00:01<00:09,  9.21it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  8.97it/s]
 16%|██████▌                                  | 16/100 [00:01<00:07, 11.05it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 10.48it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.50it/s]
 23%|█████████▍                               | 23/100 [00:02<00:09,  8.28it/s]
 24%|█████████▊                        

97:train loss is 6.24751091003418



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.66it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.35it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.57it/s]
  8%|███▎                                      | 8/100 [00:01<00:18,  5.08it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.97it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.20it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.63it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.91it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.18it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.27it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.53it/s]
 20%|████████▏                         

98:train loss is 5.910383701324463



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:17,  5.69it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.86it/s]
  5%|██                                        | 5/100 [00:00<00:13,  6.97it/s]
  7%|██▉                                       | 7/100 [00:00<00:13,  7.15it/s]
 10%|████                                     | 10/100 [00:01<00:10,  8.26it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.31it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.64it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  6.14it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.45it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.39it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.48it/s]
 18%|███████▍                          

99:train loss is 3.536323308944702



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 12.17it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.40it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.78it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.80it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.13it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.27it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.75it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.26it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.22it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.23it/s]
 18%|███████▍                                 | 18/100 [00:04<00:20,  4.06it/s]
 19%|███████▊                          

100:train loss is 2.7705907821655273
valid_loss improves
100: valid_loss is2.3173255920410156



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:04, 20.38it/s]
  4%|█▋                                        | 4/100 [00:00<00:06, 14.54it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.81it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.74it/s]
  8%|███▎                                      | 8/100 [00:00<00:11,  8.28it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.43it/s]
 11%|████▌                                    | 11/100 [00:01<00:11,  8.02it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.63it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  7.18it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.36it/s]
 16%|██████▌                                  | 16/100 [00:02<00:11,  7.39it/s]
 18%|███████▍                          

101:train loss is 2.5198099613189697



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:04,  1.53it/s]
  2%|▊                                         | 2/100 [00:00<00:38,  2.57it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.04it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.24it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.92it/s]
 13%|█████▎                                   | 13/100 [00:01<00:10,  8.00it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.20it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.73it/s]
 21%|████████▌                                | 21/100 [00:02<00:08,  9.06it/s]
 25%|██████████▎                              | 25/100 [00:02<00:07,  9.87it/s]
 28%|███████████▍                             | 28/100 [00:02<00:07, 10.00it/s]
 30%|████████████▎                     

102:train loss is 2.218234062194824



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:05, 18.05it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.09it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.10it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.52it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.87it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  6.05it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.06it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.65it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.68it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.55it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.97it/s]
 17%|██████▉                           

103:train loss is 5.612934112548828



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:18,  5.30it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.86it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.64it/s]
  6%|██▌                                       | 6/100 [00:00<00:15,  6.09it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.41it/s]
  9%|███▊                                      | 9/100 [00:02<00:24,  3.79it/s]
 10%|████                                     | 10/100 [00:03<00:30,  2.95it/s]
 13%|█████▎                                   | 13/100 [00:03<00:23,  3.67it/s]
 15%|██████▏                                  | 15/100 [00:03<00:22,  3.85it/s]
 18%|███████▍                                 | 18/100 [00:04<00:18,  4.48it/s]
 20%|████████▏                                | 20/100 [00:05<00:22,  3.51it/s]
 21%|████████▌                         

104:train loss is 1.9271762371063232



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 11.99it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.09it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.60it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.11it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.41it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.70it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  6.86it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.21it/s]
 16%|██████▌                                  | 16/100 [00:03<00:19,  4.34it/s]
 17%|██████▉                                  | 17/100 [00:03<00:18,  4.45it/s]
 18%|███████▍                                 | 18/100 [00:03<00:18,  4.52it/s]
 19%|███████▊                          

105:train loss is 4.188925266265869



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:10,  9.06it/s]
  6%|██▌                                       | 6/100 [00:00<00:06, 13.74it/s]
  9%|███▊                                      | 9/100 [00:00<00:05, 16.40it/s]
 11%|████▌                                    | 11/100 [00:00<00:06, 14.04it/s]
 15%|██████▏                                  | 15/100 [00:01<00:05, 14.98it/s]
 18%|███████▍                                 | 18/100 [00:01<00:06, 13.64it/s]
 20%|████████▏                                | 20/100 [00:01<00:06, 12.31it/s]
 22%|█████████                                | 22/100 [00:02<00:08,  8.94it/s]
 23%|█████████▍                               | 23/100 [00:02<00:09,  8.52it/s]
 25%|██████████▎                              | 25/100 [00:02<00:08,  8.67it/s]
 26%|██████████▋                              | 26/100 [00:03<00:08,  8.43it/s]
 27%|███████████                       

106:train loss is 4.38970947265625



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:25,  1.15it/s]
  2%|▊                                         | 2/100 [00:01<01:28,  1.11it/s]
  3%|█▎                                        | 3/100 [00:02<01:10,  1.38it/s]
  5%|██                                        | 5/100 [00:02<00:44,  2.12it/s]
  6%|██▌                                       | 6/100 [00:03<00:53,  1.75it/s]
  8%|███▎                                      | 8/100 [00:03<00:41,  2.20it/s]
 11%|████▌                                    | 11/100 [00:04<00:35,  2.52it/s]
 12%|████▉                                    | 12/100 [00:04<00:34,  2.57it/s]
 13%|█████▎                                   | 13/100 [00:04<00:32,  2.71it/s]
 14%|█████▋                                   | 14/100 [00:04<00:30,  2.86it/s]
 15%|██████▏                                  | 15/100 [00:05<00:29,  2.91it/s]
 16%|██████▌                           

107:train loss is 4.016781806945801



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:19,  4.94it/s]
  3%|█▎                                        | 3/100 [00:00<00:23,  4.14it/s]
  6%|██▌                                       | 6/100 [00:00<00:13,  7.17it/s]
 10%|████                                     | 10/100 [00:00<00:08, 10.51it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.24it/s]
 14%|█████▋                                   | 14/100 [00:02<00:16,  5.07it/s]
 17%|██████▉                                  | 17/100 [00:03<00:14,  5.59it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.45it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.46it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.68it/s]
 23%|█████████▍                               | 23/100 [00:03<00:12,  5.95it/s]
 24%|█████████▊                        

108:train loss is 1.6667921543121338



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:13,  7.53it/s]
  3%|█▎                                        | 3/100 [00:00<00:21,  4.42it/s]
  6%|██▌                                       | 6/100 [00:01<00:16,  5.73it/s]
  7%|██▉                                       | 7/100 [00:01<00:16,  5.77it/s]
  8%|███▎                                      | 8/100 [00:01<00:17,  5.23it/s]
 10%|████                                     | 10/100 [00:01<00:16,  5.32it/s]
 11%|████▌                                    | 11/100 [00:02<00:16,  5.29it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.35it/s]
 13%|█████▎                                   | 13/100 [00:02<00:19,  4.40it/s]
 14%|█████▋                                   | 14/100 [00:03<00:19,  4.50it/s]
 16%|██████▌                                  | 16/100 [00:03<00:17,  4.69it/s]
 17%|██████▉                           

109:train loss is 2.8609063625335693



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:22,  1.20it/s]
  2%|▊                                         | 2/100 [00:01<00:54,  1.79it/s]
  5%|██                                        | 5/100 [00:01<00:25,  3.73it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  3.97it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.06it/s]
  8%|███▎                                      | 8/100 [00:01<00:21,  4.36it/s]
  9%|███▊                                      | 9/100 [00:02<00:21,  4.30it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.16it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.21it/s]
 12%|████▉                                    | 12/100 [00:02<00:21,  4.15it/s]
 13%|█████▎                                   | 13/100 [00:03<00:20,  4.31it/s]
 17%|██████▉                           

110:train loss is 1.0577445030212402
110: valid_loss is2.423548698425293



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:16,  6.01it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.06it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.34it/s]
  8%|███▎                                      | 8/100 [00:00<00:11,  8.30it/s]
  9%|███▊                                      | 9/100 [00:01<00:10,  8.38it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  9.04it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  9.26it/s]
 16%|██████▌                                  | 16/100 [00:01<00:10,  8.10it/s]
 18%|███████▍                                 | 18/100 [00:02<00:09,  8.25it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.62it/s]
 21%|████████▌                                | 21/100 [00:03<00:11,  6.62it/s]
 22%|█████████                         

111:train loss is 2.500394344329834



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:09, 10.55it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.73it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.31it/s]
  8%|███▎                                      | 8/100 [00:00<00:11,  8.34it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.42it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.48it/s]
 13%|█████▎                                   | 13/100 [00:01<00:09,  9.03it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.97it/s]
 17%|██████▉                                  | 17/100 [00:01<00:09,  9.14it/s]
 19%|███████▊                                 | 19/100 [00:02<00:08,  9.43it/s]
 21%|████████▌                                | 21/100 [00:02<00:08,  8.85it/s]
 24%|█████████▊                        

112:train loss is 2.2126238346099854



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:30,  1.10it/s]
  4%|█▋                                        | 4/100 [00:01<00:30,  3.13it/s]
  6%|██▌                                       | 6/100 [00:02<00:34,  2.73it/s]
  7%|██▉                                       | 7/100 [00:02<00:33,  2.76it/s]
  8%|███▎                                      | 8/100 [00:02<00:32,  2.82it/s]
 10%|████                                     | 10/100 [00:02<00:26,  3.37it/s]
 11%|████▌                                    | 11/100 [00:03<00:27,  3.28it/s]
 13%|█████▎                                   | 13/100 [00:03<00:23,  3.72it/s]
 16%|██████▌                                  | 16/100 [00:03<00:20,  4.08it/s]
 18%|███████▍                                 | 18/100 [00:04<00:18,  4.34it/s]
 20%|████████▏                                | 20/100 [00:04<00:17,  4.64it/s]
 22%|█████████                         

113:train loss is 3.2820370197296143



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:45,  2.17it/s]
  3%|█▎                                        | 3/100 [00:01<00:33,  2.90it/s]
  4%|█▋                                        | 4/100 [00:01<00:29,  3.31it/s]
  6%|██▌                                       | 6/100 [00:01<00:22,  4.21it/s]
  7%|██▉                                       | 7/100 [00:01<00:21,  4.36it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.15it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.71it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.33it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.21it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  5.15it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  6.00it/s]
 23%|█████████▍                        

114:train loss is 3.8972339630126953



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:25,  3.94it/s]
  3%|█▎                                        | 3/100 [00:00<00:15,  6.19it/s]
  4%|█▋                                        | 4/100 [00:00<00:14,  6.49it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.24it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.33it/s]
 10%|████                                     | 10/100 [00:01<00:09,  9.40it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  9.00it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  9.16it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  7.11it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.47it/s]
 22%|█████████                                | 22/100 [00:02<00:10,  7.43it/s]
 24%|█████████▊                        

115:train loss is 1.9557595252990723



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:27,  1.14it/s]
  3%|█▎                                        | 3/100 [00:01<00:33,  2.90it/s]
  4%|█▋                                        | 4/100 [00:01<00:32,  2.95it/s]
  6%|██▌                                       | 6/100 [00:02<00:34,  2.71it/s]
  7%|██▉                                       | 7/100 [00:02<00:33,  2.75it/s]
  8%|███▎                                      | 8/100 [00:03<00:38,  2.42it/s]
 12%|████▉                                    | 12/100 [00:03<00:25,  3.48it/s]
 14%|█████▋                                   | 14/100 [00:03<00:23,  3.73it/s]
 18%|███████▍                                 | 18/100 [00:03<00:17,  4.57it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.90it/s]
 22%|█████████                                | 22/100 [00:04<00:17,  4.49it/s]
 24%|█████████▊                        

116:train loss is 1.9570927619934082



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:29,  3.32it/s]
  2%|▊                                         | 2/100 [00:00<00:23,  4.13it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.51it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.43it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.63it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.36it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.46it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.75it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  8.27it/s]
 20%|████████▏                                | 20/100 [00:02<00:08,  9.23it/s]
 22%|█████████                                | 22/100 [00:03<00:11,  7.07it/s]
 24%|█████████▊                        

117:train loss is 1.2100720405578613



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:13,  7.15it/s]
  5%|██                                        | 5/100 [00:00<00:11,  7.98it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.96it/s]
  8%|███▎                                      | 8/100 [00:01<00:18,  4.89it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.80it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.04it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.48it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.64it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.79it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.81it/s]
 19%|███████▊                                 | 19/100 [00:03<00:13,  6.13it/s]
 20%|████████▏                         

118:train loss is 3.0615694522857666



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:13,  7.10it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.77it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.14it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.44it/s]
 10%|████                                     | 10/100 [00:01<00:09,  9.90it/s]
 11%|████▌                                    | 11/100 [00:01<00:10,  8.80it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.14it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.42it/s]
 15%|██████▏                                  | 15/100 [00:01<00:11,  7.73it/s]
 16%|██████▌                                  | 16/100 [00:02<00:10,  7.67it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.73it/s]
 18%|███████▍                          

119:train loss is 0.8295599222183228



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 11.39it/s]
  4%|█▋                                        | 4/100 [00:00<00:13,  6.97it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.18it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.02it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.15it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.12it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.85it/s]
 14%|█████▋                                   | 14/100 [00:01<00:10,  8.41it/s]
 16%|██████▌                                  | 16/100 [00:01<00:10,  8.36it/s]
 18%|███████▍                                 | 18/100 [00:02<00:11,  7.07it/s]
 20%|████████▏                                | 20/100 [00:02<00:11,  7.19it/s]
 21%|████████▌                         

120:train loss is 2.071467638015747
120: valid_loss is5.054170608520508



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:06, 16.31it/s]
  4%|█▋                                        | 4/100 [00:00<00:07, 12.54it/s]
  5%|██                                        | 5/100 [00:00<00:09, 10.42it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.92it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.51it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.80it/s]
 11%|████▌                                    | 11/100 [00:01<00:14,  6.30it/s]
 12%|████▉                                    | 12/100 [00:02<00:14,  6.00it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.63it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.86it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.86it/s]
 17%|██████▉                           

121:train loss is 3.1072866916656494



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:25,  1.16it/s]
  2%|▊                                         | 2/100 [00:01<00:50,  1.94it/s]
  4%|█▋                                        | 4/100 [00:01<00:29,  3.21it/s]
  7%|██▉                                       | 7/100 [00:01<00:18,  5.16it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.89it/s]
 13%|█████▎                                   | 13/100 [00:01<00:13,  6.62it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  6.65it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.42it/s]
 20%|████████▏                                | 20/100 [00:02<00:11,  7.23it/s]
 23%|█████████▍                               | 23/100 [00:02<00:09,  8.00it/s]
 25%|██████████▎                              | 25/100 [00:03<00:09,  8.18it/s]
 28%|███████████▍                      

122:train loss is 2.713813543319702



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:05, 18.57it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.30it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.16it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.54it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.89it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.15it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.16it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.68it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.69it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.56it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.98it/s]
 17%|██████▉                           

123:train loss is 1.7254283428192139



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:15,  6.32it/s]
  3%|█▎                                        | 3/100 [00:00<00:10,  9.49it/s]
  6%|██▌                                       | 6/100 [00:00<00:09, 10.05it/s]
  8%|███▎                                      | 8/100 [00:00<00:08, 10.36it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.27it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.69it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.68it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.90it/s]
 18%|███████▍                                 | 18/100 [00:02<00:12,  6.78it/s]
 20%|████████▏                                | 20/100 [00:03<00:14,  5.57it/s]
 22%|█████████                                | 22/100 [00:03<00:13,  5.59it/s]
 25%|██████████▎                       

124:train loss is 1.0063254833221436



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 11.79it/s]
  4%|█▋                                        | 4/100 [00:00<00:16,  5.91it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.47it/s]
  9%|███▊                                      | 9/100 [00:01<00:13,  6.94it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.36it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.69it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  6.85it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.21it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.22it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.32it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.40it/s]
 19%|███████▊                          

125:train loss is 0.9561123847961426



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.92it/s]
  6%|██▌                                       | 6/100 [00:00<00:08, 10.81it/s]
  9%|███▊                                      | 9/100 [00:00<00:06, 13.02it/s]
 11%|████▌                                    | 11/100 [00:01<00:08, 10.86it/s]
 15%|██████▏                                  | 15/100 [00:01<00:07, 11.79it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 10.82it/s]
 20%|████████▏                                | 20/100 [00:01<00:07, 10.30it/s]
 21%|████████▌                                | 21/100 [00:02<00:09,  8.16it/s]
 22%|█████████                                | 22/100 [00:02<00:09,  7.80it/s]
 23%|█████████▍                               | 23/100 [00:03<00:10,  7.44it/s]
 25%|██████████▎                              | 25/100 [00:03<00:09,  7.62it/s]
 26%|██████████▋                       

126:train loss is 1.6830525398254395



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:27,  1.13it/s]
  2%|▊                                         | 2/100 [00:01<01:30,  1.09it/s]
  3%|█▎                                        | 3/100 [00:02<01:10,  1.37it/s]
  5%|██                                        | 5/100 [00:02<00:46,  2.06it/s]
  6%|██▌                                       | 6/100 [00:03<00:49,  1.91it/s]
  8%|███▎                                      | 8/100 [00:03<00:38,  2.40it/s]
 11%|████▌                                    | 11/100 [00:04<00:33,  2.70it/s]
 12%|████▉                                    | 12/100 [00:04<00:32,  2.74it/s]
 13%|█████▎                                   | 13/100 [00:04<00:30,  2.88it/s]
 15%|██████▏                                  | 15/100 [00:04<00:27,  3.09it/s]
 16%|██████▌                                  | 16/100 [00:05<00:26,  3.17it/s]
 17%|██████▉                           

127:train loss is 1.2206883430480957



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:18,  5.30it/s]
  3%|█▎                                        | 3/100 [00:00<00:22,  4.31it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.27it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.94it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.29it/s]
 14%|█████▋                                   | 14/100 [00:02<00:16,  5.07it/s]
 17%|██████▉                                  | 17/100 [00:03<00:14,  5.58it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.44it/s]
 19%|███████▊                                 | 19/100 [00:03<00:14,  5.43it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.70it/s]
 23%|█████████▍                               | 23/100 [00:03<00:12,  5.96it/s]
 24%|█████████▊                        

128:train loss is 1.1897257566452026



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:13,  7.21it/s]
  3%|█▎                                        | 3/100 [00:00<00:17,  5.70it/s]
  6%|██▌                                       | 6/100 [00:00<00:13,  7.14it/s]
  7%|██▉                                       | 7/100 [00:01<00:13,  6.84it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.96it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.79it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.77it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.80it/s]
 13%|█████▎                                   | 13/100 [00:02<00:19,  4.49it/s]
 14%|█████▋                                   | 14/100 [00:03<00:18,  4.61it/s]
 16%|██████▌                                  | 16/100 [00:03<00:17,  4.80it/s]
 17%|██████▉                           

129:train loss is 1.7549450397491455



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:22,  1.20it/s]
  2%|▊                                         | 2/100 [00:01<00:54,  1.79it/s]
  5%|██                                        | 5/100 [00:01<00:25,  3.73it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  4.00it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.15it/s]
  8%|███▎                                      | 8/100 [00:01<00:20,  4.45it/s]
  9%|███▊                                      | 9/100 [00:02<00:20,  4.38it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.20it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.19it/s]
 12%|████▉                                    | 12/100 [00:02<00:21,  4.06it/s]
 13%|█████▎                                   | 13/100 [00:03<00:20,  4.22it/s]
 16%|██████▌                           

130:train loss is 0.9837429523468018
valid_loss improves
130: valid_loss is1.1941649913787842



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:13,  7.15it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.47it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  9.38it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.45it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.67it/s]
 12%|████▉                                    | 12/100 [00:01<00:08, 10.27it/s]
 15%|██████▏                                  | 15/100 [00:01<00:07, 10.71it/s]
 17%|██████▉                                  | 17/100 [00:01<00:08,  9.94it/s]
 18%|███████▍                                 | 18/100 [00:01<00:08,  9.73it/s]
 19%|███████▊                                 | 19/100 [00:02<00:10,  7.43it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.33it/s]
 22%|█████████                         

131:train loss is 1.0991896390914917



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:10,  9.10it/s]
  3%|█▎                                        | 3/100 [00:00<00:17,  5.40it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  5.05it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.59it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.56it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.85it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.93it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  7.20it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.18it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  7.35it/s]
 19%|███████▊                                 | 19/100 [00:02<00:10,  7.59it/s]
 21%|████████▌                         

132:train loss is 1.092822551727295



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:35,  1.04it/s]
  4%|█▋                                        | 4/100 [00:01<00:32,  3.00it/s]
  6%|██▌                                       | 6/100 [00:02<00:34,  2.73it/s]
  7%|██▉                                       | 7/100 [00:02<00:32,  2.84it/s]
  8%|███▎                                      | 8/100 [00:02<00:31,  2.96it/s]
 10%|████                                     | 10/100 [00:02<00:25,  3.55it/s]
 11%|████▌                                    | 11/100 [00:03<00:25,  3.52it/s]
 13%|█████▎                                   | 13/100 [00:03<00:21,  4.02it/s]
 16%|██████▌                                  | 16/100 [00:03<00:18,  4.42it/s]
 18%|███████▍                                 | 18/100 [00:03<00:17,  4.67it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.97it/s]
 22%|█████████                         

133:train loss is 1.3213796615600586



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:36,  2.72it/s]
  3%|█▎                                        | 3/100 [00:00<00:28,  3.46it/s]
  4%|█▋                                        | 4/100 [00:01<00:24,  3.97it/s]
  6%|██▌                                       | 6/100 [00:01<00:18,  5.10it/s]
  7%|██▉                                       | 7/100 [00:01<00:17,  5.36it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.33it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.79it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.44it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.49it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.79it/s]
 21%|████████▌                                | 21/100 [00:03<00:11,  6.69it/s]
 23%|█████████▍                        

134:train loss is 0.8427174091339111



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:37,  2.64it/s]
  2%|▊                                         | 2/100 [00:00<00:24,  3.97it/s]
  3%|█▎                                        | 3/100 [00:00<00:27,  3.50it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.01it/s]
  5%|██                                        | 5/100 [00:01<00:22,  4.23it/s]
  6%|██▌                                       | 6/100 [00:01<00:20,  4.51it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.25it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.90it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.35it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.06it/s]
 20%|████████▏                                | 20/100 [00:03<00:12,  6.46it/s]
 22%|█████████                         

135:train loss is 0.9411787986755371



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:26,  1.14it/s]
  3%|█▎                                        | 3/100 [00:01<00:34,  2.82it/s]
  4%|█▋                                        | 4/100 [00:01<00:33,  2.89it/s]
  6%|██▌                                       | 6/100 [00:02<00:35,  2.68it/s]
  7%|██▉                                       | 7/100 [00:02<00:33,  2.80it/s]
  8%|███▎                                      | 8/100 [00:03<00:37,  2.45it/s]
 12%|████▉                                    | 12/100 [00:03<00:24,  3.55it/s]
 14%|█████▋                                   | 14/100 [00:03<00:22,  3.78it/s]
 16%|██████▌                                  | 16/100 [00:03<00:20,  4.16it/s]
 18%|███████▍                                 | 18/100 [00:03<00:18,  4.52it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.83it/s]
 22%|█████████                         

136:train loss is 0.9514201283454895



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:30,  3.25it/s]
  2%|▊                                         | 2/100 [00:00<00:23,  4.09it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.43it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.41it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.69it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.38it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.37it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.58it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  8.07it/s]
 20%|████████▏                                | 20/100 [00:02<00:08,  9.01it/s]
 22%|█████████                                | 22/100 [00:03<00:10,  7.13it/s]
 24%|█████████▊                        

137:train loss is 0.905184805393219



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.70it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.39it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.62it/s]
  8%|███▎                                      | 8/100 [00:01<00:18,  5.01it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.88it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.12it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.56it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.70it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.82it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.88it/s]
 19%|███████▊                                 | 19/100 [00:03<00:13,  6.12it/s]
 20%|████████▏                         

138:train loss is 0.9593811631202698



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:17,  5.58it/s]
  3%|█▎                                        | 3/100 [00:00<00:18,  5.30it/s]
  5%|██                                        | 5/100 [00:00<00:14,  6.44it/s]
  7%|██▉                                       | 7/100 [00:01<00:13,  6.74it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.71it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  6.88it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.31it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  6.07it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.48it/s]
 16%|██████▌                                  | 16/100 [00:02<00:12,  6.51it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.64it/s]
 18%|███████▍                          

139:train loss is 0.8842446804046631



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 12.55it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.45it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.62it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.51it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.49it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.53it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.38it/s]
 14%|█████▋                                   | 14/100 [00:01<00:09,  9.03it/s]
 16%|██████▌                                  | 16/100 [00:01<00:09,  9.20it/s]
 18%|███████▍                                 | 18/100 [00:02<00:10,  7.90it/s]
 20%|████████▏                                | 20/100 [00:02<00:09,  8.11it/s]
 21%|████████▌                         

140:train loss is 0.9135202169418335
140: valid_loss is1.2249946594238281



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:06, 15.50it/s]
  4%|█▋                                        | 4/100 [00:00<00:08, 11.64it/s]
  5%|██                                        | 5/100 [00:00<00:10,  9.30it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.59it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.38it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.76it/s]
 11%|████▌                                    | 11/100 [00:01<00:14,  6.24it/s]
 12%|████▉                                    | 12/100 [00:02<00:14,  5.88it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.53it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.74it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.80it/s]
 18%|███████▍                          

141:train loss is 1.0756900310516357



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:06,  1.49it/s]
  2%|▊                                         | 2/100 [00:00<00:39,  2.49it/s]
  4%|█▋                                        | 4/100 [00:00<00:23,  4.02it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.23it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  6.01it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.50it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.14it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.78it/s]
 21%|████████▌                                | 21/100 [00:02<00:08,  9.10it/s]
 25%|██████████▎                              | 25/100 [00:02<00:07,  9.88it/s]
 28%|███████████▍                             | 28/100 [00:02<00:07,  9.96it/s]
 30%|████████████▎                     

142:train loss is 1.1147979497909546



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:05, 18.34it/s]
  5%|██                                        | 5/100 [00:00<00:08, 10.72it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  7.92it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.40it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.79it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  6.05it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.07it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.58it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.55it/s]
 13%|█████▎                                   | 13/100 [00:02<00:16,  5.29it/s]
 15%|██████▏                                  | 15/100 [00:02<00:15,  5.66it/s]
 17%|██████▉                           

143:train loss is 0.8433361053466797



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:20,  4.84it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.62it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.23it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.55it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.97it/s]
 10%|████                                     | 10/100 [00:02<00:23,  3.89it/s]
 13%|█████▎                                   | 13/100 [00:02<00:18,  4.79it/s]
 15%|██████▏                                  | 15/100 [00:02<00:16,  5.02it/s]
 18%|███████▍                                 | 18/100 [00:03<00:14,  5.82it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.89it/s]
 22%|█████████                                | 22/100 [00:04<00:15,  4.96it/s]
 25%|██████████▎                       

144:train loss is 0.8394931554794312



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 12.33it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.29it/s]
  8%|███▎                                      | 8/100 [00:00<00:10,  8.75it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.13it/s]
 10%|████                                     | 10/100 [00:01<00:13,  6.48it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.79it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  6.96it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.39it/s]
 16%|██████▌                                  | 16/100 [00:02<00:15,  5.59it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.69it/s]
 18%|███████▍                                 | 18/100 [00:03<00:14,  5.78it/s]
 19%|███████▊                          

145:train loss is 0.8609973192214966



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:11,  8.78it/s]
  6%|██▌                                       | 6/100 [00:00<00:07, 13.17it/s]
  9%|███▊                                      | 9/100 [00:00<00:05, 15.77it/s]
 11%|████▌                                    | 11/100 [00:00<00:06, 13.53it/s]
 15%|██████▏                                  | 15/100 [00:01<00:05, 14.55it/s]
 18%|███████▍                                 | 18/100 [00:01<00:06, 13.13it/s]
 20%|████████▏                                | 20/100 [00:01<00:06, 11.95it/s]
 22%|█████████                                | 22/100 [00:02<00:08,  8.75it/s]
 23%|█████████▍                               | 23/100 [00:02<00:09,  8.36it/s]
 25%|██████████▎                              | 25/100 [00:02<00:08,  8.51it/s]
 26%|██████████▋                              | 26/100 [00:03<00:08,  8.29it/s]
 27%|███████████                       

146:train loss is 1.0293248891830444



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:26,  1.15it/s]
  2%|▊                                         | 2/100 [00:01<01:28,  1.11it/s]
  3%|█▎                                        | 3/100 [00:02<01:10,  1.38it/s]
  5%|██                                        | 5/100 [00:02<00:45,  2.10it/s]
  6%|██▌                                       | 6/100 [00:03<00:49,  1.90it/s]
  8%|███▎                                      | 8/100 [00:03<00:38,  2.39it/s]
 11%|████▌                                    | 11/100 [00:04<00:33,  2.67it/s]
 12%|████▉                                    | 12/100 [00:04<00:32,  2.73it/s]
 13%|█████▎                                   | 13/100 [00:04<00:30,  2.86it/s]
 15%|██████▏                                  | 15/100 [00:04<00:27,  3.06it/s]
 16%|██████▌                                  | 16/100 [00:05<00:26,  3.14it/s]
 17%|██████▉                           

147:train loss is 0.8710152506828308



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:15,  6.30it/s]
  3%|█▎                                        | 3/100 [00:00<00:18,  5.32it/s]
  6%|██▌                                       | 6/100 [00:00<00:10,  8.97it/s]
 10%|████                                     | 10/100 [00:00<00:07, 12.83it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.85it/s]
 14%|█████▋                                   | 14/100 [00:02<00:13,  6.33it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.80it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.48it/s]
 21%|████████▌                                | 21/100 [00:03<00:11,  6.69it/s]
 23%|█████████▍                               | 23/100 [00:03<00:11,  6.90it/s]
 25%|██████████▎                              | 25/100 [00:03<00:10,  7.23it/s]
 27%|███████████                       

148:train loss is 1.0152405500411987



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:12,  7.94it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.99it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.34it/s]
  7%|██▉                                       | 7/100 [00:00<00:13,  7.13it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  6.12it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.95it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.91it/s]
 12%|████▉                                    | 12/100 [00:02<00:14,  5.94it/s]
 13%|█████▎                                   | 13/100 [00:03<00:21,  4.06it/s]
 14%|█████▋                                   | 14/100 [00:03<00:20,  4.20it/s]
 16%|██████▌                                  | 16/100 [00:03<00:19,  4.39it/s]
 17%|██████▉                           

149:train loss is 1.2424941062927246



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:24,  1.18it/s]
  2%|▊                                         | 2/100 [00:01<00:55,  1.76it/s]
  5%|██                                        | 5/100 [00:01<00:25,  3.66it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  3.93it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.09it/s]
  8%|███▎                                      | 8/100 [00:01<00:20,  4.38it/s]
  9%|███▊                                      | 9/100 [00:02<00:21,  4.32it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.14it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.14it/s]
 12%|████▉                                    | 12/100 [00:03<00:22,  3.98it/s]
 13%|█████▎                                   | 13/100 [00:03<00:21,  4.14it/s]
 16%|██████▌                           

150:train loss is 0.7621475458145142
150: valid_loss is1.2041399478912354



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:16,  5.84it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.65it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.71it/s]
  8%|███▎                                      | 8/100 [00:01<00:11,  7.71it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.79it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.14it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.44it/s]
 16%|██████▌                                  | 16/100 [00:02<00:11,  7.61it/s]
 18%|███████▍                                 | 18/100 [00:02<00:10,  7.74it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.35it/s]
 21%|████████▌                                | 21/100 [00:03<00:12,  6.32it/s]
 22%|█████████                         

151:train loss is 0.9135035872459412



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:10,  9.73it/s]
  3%|█▎                                        | 3/100 [00:00<00:18,  5.28it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  4.98it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.72it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.90it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.90it/s]
 13%|█████▎                                   | 13/100 [00:01<00:12,  7.22it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.18it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  7.32it/s]
 19%|███████▊                                 | 19/100 [00:02<00:10,  7.57it/s]
 21%|████████▌                                | 21/100 [00:02<00:11,  7.08it/s]
 24%|█████████▊                        

152:train loss is 1.019976258277893



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:31,  1.08it/s]
  4%|█▋                                        | 4/100 [00:01<00:31,  3.08it/s]
  6%|██▌                                       | 6/100 [00:02<00:34,  2.70it/s]
  7%|██▉                                       | 7/100 [00:02<00:34,  2.71it/s]
  8%|███▎                                      | 8/100 [00:02<00:32,  2.81it/s]
 10%|████                                     | 10/100 [00:02<00:26,  3.38it/s]
 11%|████▌                                    | 11/100 [00:03<00:26,  3.36it/s]
 13%|█████▎                                   | 13/100 [00:03<00:22,  3.84it/s]
 16%|██████▌                                  | 16/100 [00:03<00:19,  4.26it/s]
 18%|███████▍                                 | 18/100 [00:03<00:18,  4.52it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.81it/s]
 22%|█████████                         

153:train loss is 0.999450147151947



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:44,  2.20it/s]
  3%|█▎                                        | 3/100 [00:01<00:34,  2.85it/s]
  4%|█▋                                        | 4/100 [00:01<00:29,  3.28it/s]
  6%|██▌                                       | 6/100 [00:01<00:22,  4.11it/s]
  7%|██▉                                       | 7/100 [00:01<00:21,  4.32it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  5.04it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.61it/s]
 12%|████▉                                    | 12/100 [00:02<00:17,  4.98it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.96it/s]
 17%|██████▉                                  | 17/100 [00:03<00:17,  4.84it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.65it/s]
 23%|█████████▍                        

154:train loss is 0.9455177187919617



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:31,  3.16it/s]
  3%|█▎                                        | 3/100 [00:00<00:19,  4.91it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  4.99it/s]
  5%|██                                        | 5/100 [00:01<00:19,  4.77it/s]
  6%|██▌                                       | 6/100 [00:01<00:19,  4.89it/s]
  9%|███▊                                      | 9/100 [00:01<00:13,  6.74it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.91it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  7.05it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.23it/s]
 20%|████████▏                                | 20/100 [00:03<00:13,  5.92it/s]
 22%|█████████                                | 22/100 [00:03<00:13,  5.96it/s]
 24%|█████████▊                        

155:train loss is 0.8919950723648071



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:29,  1.11it/s]
  3%|█▎                                        | 3/100 [00:01<00:34,  2.80it/s]
  4%|█▋                                        | 4/100 [00:01<00:33,  2.87it/s]
  6%|██▌                                       | 6/100 [00:02<00:35,  2.65it/s]
  7%|██▉                                       | 7/100 [00:02<00:34,  2.69it/s]
  8%|███▎                                      | 8/100 [00:03<00:39,  2.30it/s]
 12%|████▉                                    | 12/100 [00:03<00:26,  3.34it/s]
 14%|█████▋                                   | 14/100 [00:03<00:23,  3.62it/s]
 18%|███████▍                                 | 18/100 [00:04<00:18,  4.44it/s]
 20%|████████▏                                | 20/100 [00:04<00:16,  4.77it/s]
 22%|█████████                                | 22/100 [00:05<00:17,  4.37it/s]
 24%|█████████▊                        

156:train loss is 0.836323618888855



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:30,  3.29it/s]
  2%|▊                                         | 2/100 [00:00<00:24,  3.99it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.54it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.41it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.63it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.35it/s]
 12%|████▉                                    | 12/100 [00:01<00:11,  7.45it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.74it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  8.10it/s]
 20%|████████▏                                | 20/100 [00:02<00:08,  8.94it/s]
 22%|█████████                                | 22/100 [00:03<00:11,  6.84it/s]
 24%|█████████▊                        

157:train loss is 0.9815984964370728



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.59it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.04it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  9.15it/s]
  8%|███▎                                      | 8/100 [00:01<00:18,  4.87it/s]
  9%|███▊                                      | 9/100 [00:01<00:19,  4.78it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.01it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.43it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.58it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.68it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.78it/s]
 19%|███████▊                                 | 19/100 [00:03<00:13,  6.04it/s]
 20%|████████▏                         

158:train loss is 0.9538983702659607



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:17,  5.55it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.77it/s]
  5%|██                                        | 5/100 [00:00<00:14,  6.73it/s]
  7%|██▉                                       | 7/100 [00:01<00:13,  6.94it/s]
 10%|████                                     | 10/100 [00:01<00:11,  8.03it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.02it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.38it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.93it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.25it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.12it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.19it/s]
 18%|███████▍                          

159:train loss is 0.9377533197402954



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:07, 12.77it/s]
  4%|█▋                                        | 4/100 [00:00<00:12,  7.52it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.36it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.21it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.31it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.38it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.14it/s]
 14%|█████▋                                   | 14/100 [00:01<00:09,  8.75it/s]
 16%|██████▌                                  | 16/100 [00:01<00:09,  8.89it/s]
 18%|███████▍                                 | 18/100 [00:02<00:11,  7.42it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.56it/s]
 21%|████████▌                         

160:train loss is 0.8898784518241882
160: valid_loss is1.6129146814346313



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:05, 18.02it/s]
  4%|█▋                                        | 4/100 [00:00<00:07, 12.66it/s]
  5%|██                                        | 5/100 [00:00<00:09, 10.00it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.91it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.50it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.83it/s]
 11%|████▌                                    | 11/100 [00:01<00:14,  6.31it/s]
 12%|████▉                                    | 12/100 [00:02<00:14,  5.95it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.53it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.76it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.77it/s]
 17%|██████▉                           

161:train loss is 1.0612339973449707



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:24,  1.18it/s]
  2%|▊                                         | 2/100 [00:01<00:49,  1.98it/s]
  4%|█▋                                        | 4/100 [00:01<00:30,  3.20it/s]
  7%|██▉                                       | 7/100 [00:01<00:18,  5.05it/s]
  8%|███▎                                      | 8/100 [00:01<00:19,  4.76it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.83it/s]
 13%|█████▎                                   | 13/100 [00:01<00:13,  6.54it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  6.66it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.32it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.46it/s]
 24%|█████████▊                               | 24/100 [00:02<00:09,  8.22it/s]
 27%|███████████                       

162:train loss is 1.0048093795776367



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:05, 17.96it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.15it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.07it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.50it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.86it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  6.05it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.06it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.63it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.52it/s]
 13%|█████▎                                   | 13/100 [00:02<00:16,  5.26it/s]
 15%|██████▏                                  | 15/100 [00:02<00:15,  5.61it/s]
 17%|██████▉                           

163:train loss is 0.8116755485534668



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:25,  3.89it/s]
  3%|█▎                                        | 3/100 [00:00<00:17,  5.70it/s]
  6%|██▌                                       | 6/100 [00:01<00:15,  5.99it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.23it/s]
  9%|███▊                                      | 9/100 [00:02<00:25,  3.61it/s]
 10%|████                                     | 10/100 [00:03<00:33,  2.70it/s]
 13%|█████▎                                   | 13/100 [00:03<00:25,  3.37it/s]
 15%|██████▏                                  | 15/100 [00:04<00:24,  3.49it/s]
 18%|███████▍                                 | 18/100 [00:04<00:20,  4.06it/s]
 20%|████████▏                                | 20/100 [00:05<00:23,  3.34it/s]
 21%|████████▌                                | 21/100 [00:06<00:23,  3.42it/s]
 22%|█████████                         

164:train loss is 0.816723644733429



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:09, 10.34it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  4.81it/s]
  8%|███▎                                      | 8/100 [00:01<00:13,  6.61it/s]
  9%|███▊                                      | 9/100 [00:01<00:16,  5.42it/s]
 10%|████                                     | 10/100 [00:01<00:17,  5.12it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.47it/s]
 13%|█████▎                                   | 13/100 [00:02<00:15,  5.62it/s]
 14%|█████▋                                   | 14/100 [00:02<00:16,  5.22it/s]
 16%|██████▌                                  | 16/100 [00:03<00:18,  4.51it/s]
 17%|██████▉                                  | 17/100 [00:03<00:18,  4.59it/s]
 18%|███████▍                                 | 18/100 [00:03<00:17,  4.64it/s]
 19%|███████▊                          

165:train loss is 0.7877914905548096



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.50it/s]
  6%|██▌                                       | 6/100 [00:00<00:08, 10.87it/s]
 10%|████                                     | 10/100 [00:00<00:08, 10.57it/s]
 14%|█████▋                                   | 14/100 [00:01<00:06, 13.07it/s]
 16%|██████▌                                  | 16/100 [00:01<00:06, 12.66it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 11.03it/s]
 20%|████████▏                                | 20/100 [00:01<00:07, 10.24it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.79it/s]
 22%|█████████                                | 22/100 [00:02<00:10,  7.51it/s]
 23%|█████████▍                               | 23/100 [00:03<00:10,  7.25it/s]
 25%|██████████▎                              | 25/100 [00:03<00:10,  7.46it/s]
 26%|██████████▋                       

166:train loss is 1.123060703277588



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:07,  1.46it/s]
  2%|▊                                         | 2/100 [00:01<01:09,  1.41it/s]
  3%|█▎                                        | 3/100 [00:01<00:57,  1.69it/s]
  5%|██                                        | 5/100 [00:02<00:38,  2.48it/s]
  6%|██▌                                       | 6/100 [00:02<00:46,  2.03it/s]
  8%|███▎                                      | 8/100 [00:03<00:36,  2.52it/s]
 11%|████▌                                    | 11/100 [00:04<00:33,  2.62it/s]
 12%|████▉                                    | 12/100 [00:04<00:33,  2.64it/s]
 13%|█████▎                                   | 13/100 [00:04<00:31,  2.74it/s]
 14%|█████▋                                   | 14/100 [00:04<00:29,  2.87it/s]
 15%|██████▏                                  | 15/100 [00:05<00:29,  2.88it/s]
 16%|██████▌                           

167:train loss is 0.9611704349517822



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:18,  5.44it/s]
  3%|█▎                                        | 3/100 [00:00<00:22,  4.36it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.54it/s]
 10%|████                                     | 10/100 [00:00<00:08, 10.86it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.31it/s]
 14%|█████▋                                   | 14/100 [00:02<00:17,  5.00it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.40it/s]
 18%|███████▍                                 | 18/100 [00:03<00:15,  5.27it/s]
 19%|███████▊                                 | 19/100 [00:03<00:15,  5.28it/s]
 21%|████████▌                                | 21/100 [00:03<00:14,  5.51it/s]
 23%|█████████▍                               | 23/100 [00:04<00:13,  5.74it/s]
 24%|█████████▊                        

168:train loss is 0.9733935594558716



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:13,  7.45it/s]
  3%|█▎                                        | 3/100 [00:00<00:16,  5.77it/s]
  6%|██▌                                       | 6/100 [00:00<00:13,  6.94it/s]
  7%|██▉                                       | 7/100 [00:01<00:13,  6.76it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.89it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.78it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.76it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.79it/s]
 13%|█████▎                                   | 13/100 [00:02<00:19,  4.48it/s]
 14%|█████▋                                   | 14/100 [00:03<00:18,  4.61it/s]
 16%|██████▌                                  | 16/100 [00:03<00:17,  4.74it/s]
 17%|██████▉                           

169:train loss is 0.9705224633216858



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:22,  1.20it/s]
  2%|▊                                         | 2/100 [00:01<00:54,  1.79it/s]
  5%|██                                        | 5/100 [00:01<00:26,  3.64it/s]
  6%|██▌                                       | 6/100 [00:01<00:24,  3.84it/s]
  7%|██▉                                       | 7/100 [00:01<00:23,  3.99it/s]
  8%|███▎                                      | 8/100 [00:01<00:21,  4.25it/s]
  9%|███▊                                      | 9/100 [00:02<00:21,  4.22it/s]
 10%|████                                     | 10/100 [00:02<00:22,  4.09it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.10it/s]
 12%|████▉                                    | 12/100 [00:03<00:22,  3.96it/s]
 13%|█████▎                                   | 13/100 [00:03<00:21,  4.08it/s]
 17%|██████▉                           

170:train loss is 0.8478624224662781
170: valid_loss is1.365192174911499



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:17,  5.45it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.62it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.58it/s]
  8%|███▎                                      | 8/100 [00:01<00:12,  7.63it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  7.77it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.37it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.68it/s]
 16%|██████▌                                  | 16/100 [00:02<00:10,  7.79it/s]
 18%|███████▍                                 | 18/100 [00:02<00:10,  7.91it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.41it/s]
 21%|████████▌                                | 21/100 [00:03<00:12,  6.39it/s]
 22%|█████████                         

171:train loss is 0.9548260569572449



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:09,  9.91it/s]
  3%|█▎                                        | 3/100 [00:00<00:18,  5.28it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  4.91it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.40it/s]
  7%|██▉                                       | 7/100 [00:01<00:13,  6.71it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.89it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.33it/s]
 10%|████                                     | 10/100 [00:01<00:16,  5.42it/s]
 13%|█████▎                                   | 13/100 [00:01<00:13,  6.58it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.50it/s]
 17%|██████▉                                  | 17/100 [00:02<00:12,  6.66it/s]
 18%|███████▍                          

172:train loss is 1.0070854425430298



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:01<01:43,  1.04s/it]
  4%|█▋                                        | 4/100 [00:01<00:34,  2.82it/s]
  6%|██▌                                       | 6/100 [00:02<00:36,  2.56it/s]
  7%|██▉                                       | 7/100 [00:02<00:35,  2.62it/s]
  8%|███▎                                      | 8/100 [00:02<00:34,  2.69it/s]
 10%|████                                     | 10/100 [00:03<00:27,  3.22it/s]
 11%|████▌                                    | 11/100 [00:03<00:28,  3.17it/s]
 13%|█████▎                                   | 13/100 [00:03<00:24,  3.62it/s]
 16%|██████▌                                  | 16/100 [00:03<00:20,  4.01it/s]
 18%|███████▍                                 | 18/100 [00:04<00:19,  4.28it/s]
 20%|████████▏                                | 20/100 [00:04<00:17,  4.55it/s]
 22%|█████████                         

173:train loss is 1.0174099206924438



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:44,  2.19it/s]
  3%|█▎                                        | 3/100 [00:01<00:33,  2.86it/s]
  4%|█▋                                        | 4/100 [00:01<00:29,  3.29it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  3.99it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.21it/s]
  9%|███▊                                      | 9/100 [00:01<00:18,  4.94it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.57it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.18it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.16it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  5.01it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.81it/s]
 23%|█████████▍                        

174:train loss is 1.078290581703186



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:33,  2.98it/s]
  3%|█▎                                        | 3/100 [00:00<00:19,  4.89it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  4.99it/s]
  5%|██                                        | 5/100 [00:01<00:19,  4.82it/s]
  6%|██▌                                       | 6/100 [00:01<00:19,  4.93it/s]
  9%|███▊                                      | 9/100 [00:01<00:13,  6.73it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.93it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  7.08it/s]
 16%|██████▌                                  | 16/100 [00:03<00:16,  5.17it/s]
 20%|████████▏                                | 20/100 [00:03<00:13,  5.83it/s]
 22%|█████████                                | 22/100 [00:03<00:13,  5.91it/s]
 24%|█████████▊                        

175:train loss is 0.8795684576034546



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:30,  1.09it/s]
  3%|█▎                                        | 3/100 [00:01<00:34,  2.78it/s]
  4%|█▋                                        | 4/100 [00:01<00:35,  2.68it/s]
  6%|██▌                                       | 6/100 [00:02<00:37,  2.53it/s]
  7%|██▉                                       | 7/100 [00:02<00:36,  2.58it/s]
  8%|███▎                                      | 8/100 [00:03<00:39,  2.32it/s]
 12%|████▉                                    | 12/100 [00:03<00:26,  3.37it/s]
 14%|█████▋                                   | 14/100 [00:03<00:23,  3.65it/s]
 17%|██████▉                                  | 17/100 [00:03<00:19,  4.31it/s]
 19%|███████▊                                 | 19/100 [00:04<00:17,  4.53it/s]
 21%|████████▌                                | 21/100 [00:04<00:18,  4.32it/s]
 23%|█████████▍                        

176:train loss is 0.7555158138275146



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:30,  3.25it/s]
  2%|▊                                         | 2/100 [00:00<00:23,  4.12it/s]
  4%|█▋                                        | 4/100 [00:00<00:18,  5.32it/s]
  6%|██▌                                       | 6/100 [00:00<00:15,  6.19it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.35it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.10it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  7.09it/s]
 15%|██████▏                                  | 15/100 [00:01<00:10,  8.36it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.86it/s]
 20%|████████▏                                | 20/100 [00:02<00:09,  8.72it/s]
 22%|█████████                                | 22/100 [00:03<00:11,  6.96it/s]
 24%|█████████▊                        

177:train loss is 1.0708975791931152



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:13,  7.07it/s]
  5%|██                                        | 5/100 [00:00<00:12,  7.76it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  9.01it/s]
  8%|███▎                                      | 8/100 [00:01<00:19,  4.83it/s]
  9%|███▊                                      | 9/100 [00:01<00:19,  4.72it/s]
 10%|████                                     | 10/100 [00:02<00:18,  4.97it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.41it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.54it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.70it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.75it/s]
 19%|███████▊                                 | 19/100 [00:03<00:13,  6.01it/s]
 20%|████████▏                         

178:train loss is 0.8689819574356079



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:19,  4.97it/s]
  3%|█▎                                        | 3/100 [00:00<00:18,  5.16it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.31it/s]
  7%|██▉                                       | 7/100 [00:01<00:13,  6.66it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.81it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  6.98it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.38it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.91it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.23it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.20it/s]
 18%|███████▍                                 | 18/100 [00:02<00:13,  6.19it/s]
 19%|███████▊                          

179:train loss is 0.8783406615257263



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:06, 14.34it/s]
  4%|█▋                                        | 4/100 [00:00<00:10,  8.91it/s]
  6%|██▌                                       | 6/100 [00:00<00:09,  9.98it/s]
  7%|██▉                                       | 7/100 [00:00<00:09,  9.96it/s]
  9%|███▊                                      | 9/100 [00:00<00:10,  9.06it/s]
 11%|████▌                                    | 11/100 [00:01<00:09,  9.82it/s]
 13%|█████▎                                   | 13/100 [00:01<00:08, 10.49it/s]
 15%|██████▏                                  | 15/100 [00:01<00:08, 10.15it/s]
 17%|██████▉                                  | 17/100 [00:01<00:08,  9.48it/s]
 18%|███████▍                                 | 18/100 [00:02<00:09,  8.95it/s]
 20%|████████▏                                | 20/100 [00:02<00:08,  9.04it/s]
 21%|████████▌                         

180:train loss is 0.9104447960853577
180: valid_loss is1.5625091791152954



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:05, 16.80it/s]
  4%|█▋                                        | 4/100 [00:00<00:08, 11.52it/s]
  5%|██                                        | 5/100 [00:00<00:09,  9.78it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.82it/s]
  8%|███▎                                      | 8/100 [00:01<00:14,  6.51it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.81it/s]
 11%|████▌                                    | 11/100 [00:01<00:14,  6.30it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.82it/s]
 13%|█████▎                                   | 13/100 [00:02<00:16,  5.43it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  5.69it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.71it/s]
 17%|██████▉                           

181:train loss is 1.1568454504013062



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:26,  1.15it/s]
  2%|▊                                         | 2/100 [00:01<00:50,  1.92it/s]
  4%|█▋                                        | 4/100 [00:01<00:31,  3.09it/s]
  7%|██▉                                       | 7/100 [00:01<00:18,  4.94it/s]
  8%|███▎                                      | 8/100 [00:01<00:19,  4.61it/s]
  9%|███▊                                      | 9/100 [00:01<00:19,  4.70it/s]
 12%|████▉                                    | 12/100 [00:02<00:14,  5.92it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.48it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.20it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.32it/s]
 24%|█████████▊                               | 24/100 [00:02<00:09,  8.07it/s]
 27%|███████████                       

182:train loss is 0.9813060164451599



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:05, 18.48it/s]
  5%|██                                        | 5/100 [00:00<00:08, 11.57it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.22it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.56it/s]
  8%|███▎                                      | 8/100 [00:01<00:15,  5.86it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.09it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.12it/s]
 11%|████▌                                    | 11/100 [00:01<00:15,  5.68it/s]
 12%|████▉                                    | 12/100 [00:02<00:15,  5.67it/s]
 13%|█████▎                                   | 13/100 [00:02<00:16,  5.39it/s]
 15%|██████▏                                  | 15/100 [00:02<00:15,  5.66it/s]
 17%|██████▉                           

183:train loss is 0.7866431474685669



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:14,  7.03it/s]
  3%|█▎                                        | 3/100 [00:00<00:09, 10.35it/s]
  6%|██▌                                       | 6/100 [00:00<00:08, 10.49it/s]
  8%|███▎                                      | 8/100 [00:00<00:08, 10.68it/s]
  9%|███▊                                      | 9/100 [00:01<00:14,  6.39it/s]
 10%|████                                     | 10/100 [00:02<00:18,  4.74it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.84it/s]
 15%|██████▏                                  | 15/100 [00:02<00:14,  6.04it/s]
 18%|███████▍                                 | 18/100 [00:02<00:11,  6.95it/s]
 20%|████████▏                                | 20/100 [00:03<00:14,  5.63it/s]
 22%|█████████                                | 22/100 [00:03<00:13,  5.59it/s]
 25%|██████████▎                       

184:train loss is 0.8227376341819763



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 11.31it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.55it/s]
  8%|███▎                                      | 8/100 [00:01<00:11,  7.80it/s]
  9%|███▊                                      | 9/100 [00:01<00:13,  6.52it/s]
 10%|████                                     | 10/100 [00:01<00:15,  5.96it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.26it/s]
 14%|█████▋                                   | 14/100 [00:02<00:14,  5.88it/s]
 16%|██████▌                                  | 16/100 [00:03<00:17,  4.84it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  4.90it/s]
 18%|███████▍                                 | 18/100 [00:03<00:16,  4.95it/s]
 19%|███████▊                                 | 19/100 [00:03<00:16,  4.84it/s]
 21%|████████▌                         

185:train loss is 0.9124976992607117



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:12,  7.94it/s]
  6%|██▌                                       | 6/100 [00:00<00:07, 12.39it/s]
 10%|████                                     | 10/100 [00:00<00:07, 11.73it/s]
 14%|█████▋                                   | 14/100 [00:00<00:06, 14.28it/s]
 16%|██████▌                                  | 16/100 [00:01<00:06, 13.64it/s]
 18%|███████▍                                 | 18/100 [00:01<00:07, 11.69it/s]
 20%|████████▏                                | 20/100 [00:01<00:07, 10.60it/s]
 22%|█████████                                | 22/100 [00:02<00:09,  7.81it/s]
 23%|█████████▍                               | 23/100 [00:03<00:10,  7.53it/s]
 25%|██████████▎                              | 25/100 [00:03<00:09,  7.66it/s]
 26%|██████████▋                              | 26/100 [00:03<00:09,  7.51it/s]
 27%|███████████                       

186:train loss is 1.0944128036499023



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:25,  1.15it/s]
  2%|▊                                         | 2/100 [00:01<01:30,  1.08it/s]
  3%|█▎                                        | 3/100 [00:02<01:10,  1.37it/s]
  5%|██                                        | 5/100 [00:02<00:45,  2.08it/s]
  6%|██▌                                       | 6/100 [00:03<00:50,  1.87it/s]
  8%|███▎                                      | 8/100 [00:03<00:39,  2.34it/s]
 11%|████▌                                    | 11/100 [00:04<00:33,  2.65it/s]
 12%|████▉                                    | 12/100 [00:04<00:32,  2.70it/s]
 13%|█████▎                                   | 13/100 [00:04<00:30,  2.81it/s]
 14%|█████▋                                   | 14/100 [00:04<00:29,  2.96it/s]
 15%|██████▏                                  | 15/100 [00:05<00:28,  3.00it/s]
 16%|██████▌                           

187:train loss is 1.0003387928009033



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:18,  5.28it/s]
  3%|█▎                                        | 3/100 [00:00<00:22,  4.24it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.25it/s]
  9%|███▊                                      | 9/100 [00:00<00:09,  9.66it/s]
 12%|████▉                                    | 12/100 [00:01<00:14,  6.16it/s]
 14%|█████▋                                   | 14/100 [00:02<00:17,  4.87it/s]
 17%|██████▉                                  | 17/100 [00:03<00:15,  5.23it/s]
 18%|███████▍                                 | 18/100 [00:03<00:16,  5.02it/s]
 19%|███████▊                                 | 19/100 [00:03<00:16,  5.06it/s]
 21%|████████▌                                | 21/100 [00:03<00:14,  5.30it/s]
 23%|█████████▍                               | 23/100 [00:04<00:13,  5.58it/s]
 24%|█████████▊                        

188:train loss is 1.0439860820770264



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:11,  8.50it/s]
  3%|█▎                                        | 3/100 [00:00<00:13,  6.94it/s]
  5%|██                                        | 5/100 [00:00<00:11,  8.43it/s]
  6%|██▌                                       | 6/100 [00:00<00:12,  7.28it/s]
  7%|██▉                                       | 7/100 [00:00<00:12,  7.38it/s]
  8%|███▎                                      | 8/100 [00:01<00:13,  6.69it/s]
 10%|████                                     | 10/100 [00:01<00:13,  6.72it/s]
 11%|████▌                                    | 11/100 [00:01<00:13,  6.72it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.75it/s]
 13%|█████▎                                   | 13/100 [00:02<00:16,  5.30it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.39it/s]
 16%|██████▌                           

189:train loss is 1.1103569269180298



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:21,  1.21it/s]
  2%|▊                                         | 2/100 [00:01<00:54,  1.80it/s]
  5%|██                                        | 5/100 [00:01<00:25,  3.75it/s]
  6%|██▌                                       | 6/100 [00:01<00:23,  4.02it/s]
  7%|██▉                                       | 7/100 [00:01<00:22,  4.16it/s]
  8%|███▎                                      | 8/100 [00:01<00:20,  4.40it/s]
  9%|███▊                                      | 9/100 [00:02<00:20,  4.34it/s]
 10%|████                                     | 10/100 [00:02<00:21,  4.19it/s]
 11%|████▌                                    | 11/100 [00:02<00:21,  4.19it/s]
 12%|████▉                                    | 12/100 [00:02<00:21,  4.03it/s]
 13%|█████▎                                   | 13/100 [00:03<00:21,  4.12it/s]
 17%|██████▉                           

190:train loss is 0.7820780277252197
190: valid_loss is1.3003051280975342



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:16,  5.91it/s]
  4%|█▋                                        | 4/100 [00:00<00:15,  6.01it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.06it/s]
  8%|███▎                                      | 8/100 [00:00<00:11,  8.07it/s]
  9%|███▊                                      | 9/100 [00:01<00:11,  8.17it/s]
 12%|████▉                                    | 12/100 [00:01<00:09,  8.80it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.87it/s]
 16%|██████▌                                  | 16/100 [00:02<00:10,  7.85it/s]
 18%|███████▍                                 | 18/100 [00:02<00:10,  8.02it/s]
 19%|███████▊                                 | 19/100 [00:02<00:12,  6.44it/s]
 21%|████████▌                                | 21/100 [00:03<00:12,  6.43it/s]
 22%|█████████                         

191:train loss is 0.9527934193611145



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:09, 10.03it/s]
  3%|█▎                                        | 3/100 [00:00<00:17,  5.56it/s]
  4%|█▋                                        | 4/100 [00:00<00:18,  5.17it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  7.98it/s]
  9%|███▊                                      | 9/100 [00:01<00:15,  5.99it/s]
 10%|████                                     | 10/100 [00:01<00:14,  6.04it/s]
 13%|█████▎                                   | 13/100 [00:01<00:11,  7.38it/s]
 15%|██████▏                                  | 15/100 [00:02<00:11,  7.33it/s]
 17%|██████▉                                  | 17/100 [00:02<00:11,  7.49it/s]
 19%|███████▊                                 | 19/100 [00:02<00:10,  7.69it/s]
 21%|████████▌                                | 21/100 [00:02<00:10,  7.26it/s]
 24%|█████████▊                        

192:train loss is 1.1260056495666504



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:01<01:40,  1.01s/it]
  4%|█▋                                        | 4/100 [00:01<00:34,  2.74it/s]
  6%|██▌                                       | 6/100 [00:02<00:38,  2.46it/s]
  7%|██▉                                       | 7/100 [00:02<00:36,  2.52it/s]
  8%|███▎                                      | 8/100 [00:03<00:35,  2.59it/s]
  9%|███▊                                      | 9/100 [00:03<00:32,  2.81it/s]
 11%|████▌                                    | 11/100 [00:03<00:29,  3.00it/s]
 13%|█████▎                                   | 13/100 [00:03<00:25,  3.42it/s]
 16%|██████▌                                  | 16/100 [00:04<00:22,  3.74it/s]
 18%|███████▍                                 | 18/100 [00:04<00:20,  3.93it/s]
 19%|███████▊                                 | 19/100 [00:04<00:20,  4.05it/s]
 20%|████████▏                         

193:train loss is 0.9439315795898438



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  2%|▊                                         | 2/100 [00:00<00:44,  2.19it/s]
  3%|█▎                                        | 3/100 [00:01<00:33,  2.92it/s]
  4%|█▋                                        | 4/100 [00:01<00:28,  3.33it/s]
  6%|██▌                                       | 6/100 [00:01<00:22,  4.20it/s]
  7%|██▉                                       | 7/100 [00:01<00:21,  4.40it/s]
  9%|███▊                                      | 9/100 [00:01<00:17,  5.20it/s]
 10%|████                                     | 10/100 [00:02<00:19,  4.71it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.33it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.33it/s]
 17%|██████▉                                  | 17/100 [00:03<00:16,  5.02it/s]
 21%|████████▌                                | 21/100 [00:03<00:13,  5.84it/s]
 23%|█████████▍                        

194:train loss is 0.7938066720962524



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:31,  3.13it/s]
  3%|█▎                                        | 3/100 [00:00<00:19,  4.89it/s]
  4%|█▋                                        | 4/100 [00:00<00:19,  5.04it/s]
  5%|██                                        | 5/100 [00:01<00:19,  4.86it/s]
  6%|██▌                                       | 6/100 [00:01<00:18,  4.97it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.36it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  6.92it/s]
 15%|██████▏                                  | 15/100 [00:02<00:12,  7.07it/s]
 16%|██████▌                                  | 16/100 [00:02<00:15,  5.41it/s]
 19%|███████▊                                 | 19/100 [00:03<00:13,  6.19it/s]
 21%|████████▌                                | 21/100 [00:03<00:12,  6.26it/s]
 23%|█████████▍                        

195:train loss is 0.8594003319740295



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<01:26,  1.14it/s]
  3%|█▎                                        | 3/100 [00:01<00:33,  2.89it/s]
  4%|█▋                                        | 4/100 [00:01<00:33,  2.89it/s]
  6%|██▌                                       | 6/100 [00:02<00:34,  2.69it/s]
  7%|██▉                                       | 7/100 [00:02<00:33,  2.82it/s]
  8%|███▎                                      | 8/100 [00:03<00:36,  2.53it/s]
 12%|████▉                                    | 12/100 [00:03<00:24,  3.65it/s]
 14%|█████▋                                   | 14/100 [00:03<00:21,  3.91it/s]
 17%|██████▉                                  | 17/100 [00:03<00:18,  4.61it/s]
 19%|███████▊                                 | 19/100 [00:03<00:16,  4.84it/s]
 21%|████████▌                                | 21/100 [00:04<00:17,  4.55it/s]
 23%|█████████▍                        

196:train loss is 0.863044261932373



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:30,  3.22it/s]
  2%|▊                                         | 2/100 [00:00<00:24,  4.07it/s]
  4%|█▋                                        | 4/100 [00:00<00:17,  5.41it/s]
  6%|██▌                                       | 6/100 [00:00<00:14,  6.27it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.52it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  7.23it/s]
 12%|████▉                                    | 12/100 [00:01<00:12,  7.33it/s]
 15%|██████▏                                  | 15/100 [00:01<00:09,  8.55it/s]
 17%|██████▉                                  | 17/100 [00:02<00:10,  7.96it/s]
 20%|████████▏                                | 20/100 [00:02<00:08,  8.94it/s]
 22%|█████████                                | 22/100 [00:03<00:11,  6.67it/s]
 24%|█████████▊                        

197:train loss is 0.9658603072166443



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:14,  6.71it/s]
  5%|██                                        | 5/100 [00:00<00:12,  7.57it/s]
  7%|██▉                                       | 7/100 [00:00<00:10,  8.76it/s]
  8%|███▎                                      | 8/100 [00:01<00:19,  4.76it/s]
  9%|███▊                                      | 9/100 [00:01<00:19,  4.65it/s]
 10%|████                                     | 10/100 [00:02<00:18,  4.88it/s]
 12%|████▉                                    | 12/100 [00:02<00:16,  5.31it/s]
 14%|█████▋                                   | 14/100 [00:02<00:15,  5.45it/s]
 16%|██████▌                                  | 16/100 [00:02<00:14,  5.62it/s]
 17%|██████▉                                  | 17/100 [00:02<00:14,  5.70it/s]
 19%|███████▊                                 | 19/100 [00:03<00:13,  5.87it/s]
 20%|████████▏                         

198:train loss is 0.9076341390609741



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  1%|▍                                         | 1/100 [00:00<00:17,  5.65it/s]
  3%|█▎                                        | 3/100 [00:00<00:18,  5.22it/s]
  5%|██                                        | 5/100 [00:00<00:15,  6.27it/s]
  7%|██▉                                       | 7/100 [00:01<00:14,  6.59it/s]
 10%|████                                     | 10/100 [00:01<00:11,  7.70it/s]
 11%|████▌                                    | 11/100 [00:01<00:12,  6.89it/s]
 12%|████▉                                    | 12/100 [00:01<00:13,  6.29it/s]
 13%|█████▎                                   | 13/100 [00:02<00:14,  5.85it/s]
 15%|██████▏                                  | 15/100 [00:02<00:13,  6.18it/s]
 16%|██████▌                                  | 16/100 [00:02<00:13,  6.14it/s]
 17%|██████▉                                  | 17/100 [00:02<00:13,  6.21it/s]
 18%|███████▍                          

199:train loss is 0.8018568158149719



  0%|                                                  | 0/100 [00:00<?, ?it/s]
  3%|█▎                                        | 3/100 [00:00<00:08, 11.53it/s]
  4%|█▋                                        | 4/100 [00:00<00:13,  7.14it/s]
  6%|██▌                                       | 6/100 [00:00<00:11,  8.31it/s]
  7%|██▉                                       | 7/100 [00:00<00:11,  8.04it/s]
  9%|███▊                                      | 9/100 [00:01<00:12,  7.30it/s]
 10%|████                                     | 10/100 [00:01<00:12,  7.37it/s]
 12%|████▉                                    | 12/100 [00:01<00:10,  8.09it/s]
 14%|█████▋                                   | 14/100 [00:01<00:09,  8.74it/s]
 16%|██████▌                                  | 16/100 [00:01<00:09,  8.87it/s]
 18%|███████▍                                 | 18/100 [00:02<00:11,  7.44it/s]
 20%|████████▏                                | 20/100 [00:02<00:10,  7.57it/s]
 21%|████████▌                         

200:train loss is 0.843914270401001
200: valid_loss is1.4102799892425537


In [26]:
result = 0

In [27]:
#测试模型准确率
test_tensors = paths_to_tensor(test_files).astype('float32')/255



  0%|                                                  | 0/600 [00:00<?, ?it/s]
  0%|                                          | 1/600 [00:00<04:29,  2.22it/s]
  0%|▏                                         | 2/600 [00:00<02:52,  3.46it/s]
  0%|▏                                         | 3/600 [00:01<04:26,  2.24it/s]
  1%|▎                                         | 4/600 [00:01<03:59,  2.49it/s]
  1%|▎                                         | 5/600 [00:02<04:32,  2.18it/s]
  1%|▍                                         | 6/600 [00:02<04:08,  2.39it/s]
  1%|▍                                         | 7/600 [00:02<03:43,  2.65it/s]
  1%|▌                                         | 8/600 [00:02<03:25,  2.88it/s]
  2%|▋                                         | 9/600 [00:02<03:11,  3.09it/s]
  2%|▋                                        | 10/600 [00:03<03:07,  3.15it/s]
  2%|▊                                        | 11/600 [00:03<03:28,  2.83it/s]
  2%|▊                                 

INFO:tensorflow:Restoring parameters from ./checkpoints/medical.ckpt
(600, 3)
0.0


In [58]:
with tf.Session() as sess:
    saver.restore(sess, './checkpoints/medical.ckpt')
    test_output = sess.run(output, feed_dict={input_:test_tensors})
    print(test_output.shape)
    result = test_output
    test_accuracy = np.sum(np.argmax(test_output,axis=1)==np.argmax(test_targets,axis=1))/600.0
    print(test_accuracy)

INFO:tensorflow:Restoring parameters from ./checkpoints/medical.ckpt
(600, 3)
0.4583333333333333


In [52]:
from pandas import DataFrame

In [53]:
DataFrame(result).to_csv('zuichu.csv')

In [54]:
DataFrame(test_files).to_csv('zuichu1.csv')