# Import requirements

In [None]:
!pip install transformers



In [None]:
import os
import pdb
import argparse
import random
from dataclasses import dataclass, field
from typing import Optional
from collections import defaultdict

import torch
from torch.nn.utils.rnn import pad_sequence

import numpy as np
from tqdm import tqdm, trange

from transformers import (
    BertForSequenceClassification,
    BertTokenizer,
    AutoConfig,
    AdamW,
    get_cosine_schedule_with_warmup
)

#Wandb 사용

In [None]:
!pip install wandb -qqq

In [None]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mgoorm-team3[0m (use `wandb login --relogin` to force relogin)


True

# 1. Preprocess

In [None]:
def make_id_file(task, tokenizer):
    def make_data_strings(file_name):
        data_strings = []
        with open(os.path.join(file_name), 'r', encoding='utf-8') as f:
            id_file_data = [tokenizer.encode(line.lower()) for line in f.readlines()]
        for item in id_file_data:
            data_strings.append(' '.join([str(k) for k in item]))
        return data_strings
    
    print('it will take some times...')
    train_pos = make_data_strings('sentiment.train.1')
    train_neg = make_data_strings('sentiment.train.0')
    dev_pos = make_data_strings('sentiment.dev.1')
    dev_neg = make_data_strings('sentiment.dev.0')

    print('make id file finished!')
    return train_pos, train_neg, dev_pos, dev_neg

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
!ls

pytorch_model.bin  sentiment.dev.1    submission_1e-4_3ep_0.WP.csv	 wandb
sample_data	   sentiment.train.0  submission_1e-5_5EP_0.1Warmup.csv
sentiment.dev.0    sentiment.train.1  test_no_label.csv


In [None]:
train_pos, train_neg, dev_pos, dev_neg = make_id_file('yelp', tokenizer)

it will take some times...
make id file finished!


Hyper parameter들을 wandb의 config에 저장

In [None]:
wandb.init(project="Joo Hyung-joon", entity="goorm-team3",config={
    "learning_rate": 1e-4,
    "epochs": 3,
    "batch_size": 128,
    "warmup_ratio": 0.1
    })
wandb.run.name = 'Lr'+ str(wandb.config.learning_rate) +',Ep' + str(wandb.config.epochs) +',Bs'+ str(wandb.config.batch_size) + 'Wr' + str(wandb.config.warmup_ratio)

In [None]:
train_pos[:10]

['101 6581 2833 1012 102',
 '101 21688 8013 2326 1012 102',
 '101 2027 2036 2031 3679 19247 1998 3256 6949 2029 2003 2428 2204 1012 102',
 '101 2009 1005 1055 1037 2204 15174 2098 7570 22974 2063 1012 102',
 '101 1996 3095 2003 5379 1012 102',
 '101 2204 3347 2833 1012 102',
 '101 2204 2326 1012 102',
 '101 11350 1997 2154 2003 25628 1998 7167 1997 19247 1012 102',
 '101 2307 2173 2005 6265 2030 3347 27962 1998 5404 1012 102',
 '101 1996 2047 2846 3504 6429 1012 102']

In [None]:
class SentimentDataset(object):
    def __init__(self, tokenizer, pos, neg):
        self.tokenizer = tokenizer
        self.data = []
        self.label = []

        for pos_sent in pos:
            self.data += [self._cast_to_int(pos_sent.strip().split())]
            self.label += [[1]]
        for neg_sent in neg:
            self.data += [self._cast_to_int(neg_sent.strip().split())]
            self.label += [[0]]

    def _cast_to_int(self, sample):
        return [int(word_id) for word_id in sample]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        sample = self.data[index]
        return np.array(sample), np.array(self.label[index])

In [None]:
train_dataset = SentimentDataset(tokenizer, train_pos, train_neg)
dev_dataset = SentimentDataset(tokenizer, dev_pos, dev_neg)

In [None]:
for i, item in enumerate(train_dataset):
    print(item)
    if i == 10:
        break

(array([ 101, 6581, 2833, 1012,  102]), array([1]))
(array([  101, 21688,  8013,  2326,  1012,   102]), array([1]))
(array([  101,  2027,  2036,  2031,  3679, 19247,  1998,  3256,  6949,
        2029,  2003,  2428,  2204,  1012,   102]), array([1]))
(array([  101,  2009,  1005,  1055,  1037,  2204, 15174,  2098,  7570,
       22974,  2063,  1012,   102]), array([1]))
(array([ 101, 1996, 3095, 2003, 5379, 1012,  102]), array([1]))
(array([ 101, 2204, 3347, 2833, 1012,  102]), array([1]))
(array([ 101, 2204, 2326, 1012,  102]), array([1]))
(array([  101, 11350,  1997,  2154,  2003, 25628,  1998,  7167,  1997,
       19247,  1012,   102]), array([1]))
(array([  101,  2307,  2173,  2005,  6265,  2030,  3347, 27962,  1998,
        5404,  1012,   102]), array([1]))
(array([ 101, 1996, 2047, 2846, 3504, 6429, 1012,  102]), array([1]))
(array([ 101, 2023, 2173, 2001, 2200, 2204, 1012,  102]), array([1]))


In [None]:
def collate_fn_style(samples):
    input_ids_ori, labels = zip(*samples)   #원래의 입력을 input_ids_ori로 따로 구별하여 저장
    max_len = max(len(input_id) for input_id in input_ids_ori)
    sorted_indices = np.argsort([len(input_id) for input_id in input_ids_ori])[::-1]

    input_ids = pad_sequence([torch.tensor(input_ids_ori[index]) for index in sorted_indices],
                             batch_first=True)
    attention_mask = torch.tensor(
        [[1] * len(input_ids_ori[index]) + [0] * (max_len - len(input_ids_ori[index])) for index in
         sorted_indices])
    token_type_ids = torch.tensor([[0] * len(input_ids[index]) for index in sorted_indices])
    position_ids = torch.tensor([list(range(len(input_ids[index]))) for index in sorted_indices])
    labels = torch.tensor(np.stack(labels, axis=0)[sorted_indices])

    return input_ids, attention_mask, token_type_ids, position_ids, labels

In [None]:
train_batch_size= wandb.config.batch_size #배치 사이즈 증가
eval_batch_size= wandb.config.batch_size

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=train_batch_size,
                                           shuffle=True, collate_fn=collate_fn_style,
                                           pin_memory=True, num_workers=2)
dev_loader = torch.utils.data.DataLoader(dev_dataset, batch_size=eval_batch_size,
                                         shuffle=False, collate_fn=collate_fn_style,
                                         num_workers=2)

In [None]:
# random seed
def set_seed(random_seed):
    torch.random.manual_seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

set_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [None]:
model.train()
optimizer = AdamW(model.parameters(), lr=wandb.config.learning_rate)

t_total = len(train_loader) * wandb.config.epochs
warmup_step = int(t_total * wandb.config.warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total) 



In [None]:
def compute_acc(predictions, target_labels):
    return (np.array(predictions) == np.array(target_labels)).mean()

In [None]:
train_epoch = wandb.config.epochs
lowest_valid_loss = 9999.
for epoch in range(train_epoch):
    with tqdm(train_loader, unit="batch") as tepoch:
        for iteration, (input_ids, attention_mask, token_type_ids, position_ids, labels) in enumerate(tepoch):
            tepoch.set_description(f"Epoch {epoch}")
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            token_type_ids = token_type_ids.to(device)
            position_ids = position_ids.to(device)
            labels = labels.to(device, dtype=torch.long)

            #print("input_ids",input_ids)
            #print("attention_mask",attention_mask) # 문제가 있음
            #print("token_type_ids", token_type_ids)
            #print("position_ids", position_ids)

            optimizer.zero_grad()

            output = model(input_ids=input_ids,
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids,
                           position_ids=position_ids,
                           labels=labels)

            loss = output.loss
            loss.backward()

            optimizer.step()
            scheduler.step() 

            tepoch.set_postfix(loss=loss.item())
            if iteration != 0 and iteration % int(len(train_loader) / 20) == 0:
                # Evaluate the model five times per epoch
                with torch.no_grad():
                    model.eval()
                    valid_losses = []
                    predictions = []
                    target_labels = []
                    for input_ids, attention_mask, token_type_ids, position_ids, labels in tqdm(dev_loader,
                                                                                                desc='Eval',
                                                                                                position=1,
                                                                                                leave=None):
                        input_ids = input_ids.to(device)
                        attention_mask = attention_mask.to(device)
                        token_type_ids = token_type_ids.to(device)
                        position_ids = position_ids.to(device)
                        labels = labels.to(device, dtype=torch.long)

                        output = model(input_ids=input_ids,
                                       attention_mask=attention_mask,
                                       token_type_ids=token_type_ids,
                                       position_ids=position_ids,
                                       labels=labels)

                        logits = output.logits
                        loss = output.loss
                        valid_losses.append(loss.item())

                        batch_predictions = [0 if example[0] > example[1] else 1 for example in logits]
                        batch_labels = [int(example) for example in labels]

                        predictions += batch_predictions
                        target_labels += batch_labels

                acc = compute_acc(predictions, target_labels)
                valid_loss = sum(valid_losses) / len(valid_losses)
                wandb.log({'accuracy': acc, 'loss': valid_loss})
                if lowest_valid_loss > valid_loss:
                    print('Acc for model which have lower valid loss: ', acc)
                    torch.save(model.state_dict(), "./pytorch_model.bin")

Epoch 0:   5%|▍         | 173/3463 [00:26<07:59,  6.86batch/s, loss=0.125]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.60it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.92it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.96it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.13it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.74it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.82it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.60it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.95it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.29it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.09it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.73it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.73it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.55it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.77it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9635


Epoch 0:  10%|▉         | 346/3463 [00:54<07:40,  6.76batch/s, loss=0.0951]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.67it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 12.08it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 16.29it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.29it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.87it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.88it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.65it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.84it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.24it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.95it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.68it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.58it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.40it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.76it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9695


Epoch 0:  15%|█▍        | 519/3463 [01:23<07:27,  6.57batch/s, loss=0.071] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.52it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.81it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.69it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.98it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.61it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.71it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.57it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.93it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.34it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.09it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.64it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.63it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.49it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.85it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.96675


Epoch 0:  20%|█▉        | 692/3463 [01:52<06:44,  6.85batch/s, loss=0.104] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.42it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.69it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.69it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.85it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.58it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.65it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.56it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.97it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.33it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.03it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.60it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.59it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.46it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.82it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97225


Epoch 0:  25%|██▍       | 865/3463 [02:21<06:22,  6.79batch/s, loss=0.0794]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.68it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:01, 14.02it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.93it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.09it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.57it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.68it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.42it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.80it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.26it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.00it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.60it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.60it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.46it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.81it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.96675


Epoch 0:  30%|██▉       | 1038/3463 [02:49<05:58,  6.76batch/s, loss=0.0484]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.65it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:01, 14.01it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.84it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.00it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.59it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.63it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.41it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.77it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.14it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.89it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.52it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.49it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.29it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.67it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97375


Epoch 0:  35%|███▍      | 1211/3463 [03:18<05:25,  6.92batch/s, loss=0.0816]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.59it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.88it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.90it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.07it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.65it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.78it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.52it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.83it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.25it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.94it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.64it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.62it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.43it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.81it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97175


Epoch 0:  40%|███▉      | 1384/3463 [03:47<05:13,  6.62batch/s, loss=0.0536]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.79it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 12.17it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 15.14it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.40it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.81it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.88it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.70it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.00it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.42it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.11it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.81it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.75it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.59it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.90it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9735


Epoch 0:  45%|████▍     | 1557/3463 [04:15<04:42,  6.74batch/s, loss=0.0581]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.67it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.98it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 15.01it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.26it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.66it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.74it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.40it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.85it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.27it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.06it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.73it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.73it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.55it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.90it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97225


Epoch 0:  50%|████▉     | 1730/3463 [04:44<04:20,  6.66batch/s, loss=0.0696]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.68it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.96it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 16.01it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.24it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.87it/s][A
Eval:  41%|████      | 13/32 [00:00<00:00, 19.00it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.87it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.19it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.55it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.25it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.82it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.74it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.56it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 18.01it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9745


Epoch 0:  55%|█████▍    | 1903/3463 [05:13<03:40,  7.08batch/s, loss=0.171] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.72it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:01, 14.08it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.92it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.08it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.69it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.77it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.63it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.06it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.47it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.04it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.68it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.59it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.47it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.85it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.976


Epoch 0:  60%|█████▉    | 2076/3463 [05:42<03:22,  6.83batch/s, loss=0.0403]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.67it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.93it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.93it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.01it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.72it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.87it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.82it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.15it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.48it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.18it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.73it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.68it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.50it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.85it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97575


Epoch 0:  65%|██████▍   | 2249/3463 [06:10<02:54,  6.95batch/s, loss=0.0823]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.59it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.82it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.82it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.94it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.61it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.67it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.48it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.93it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.31it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.03it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.71it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.71it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.52it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.89it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97425


Epoch 0:  70%|██████▉   | 2422/3463 [06:40<02:34,  6.72batch/s, loss=0.0899]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.71it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:01, 14.14it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 16.12it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.30it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.89it/s][A
Eval:  41%|████      | 13/32 [00:00<00:00, 19.00it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.80it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.01it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.36it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.10it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.65it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.68it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.56it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.85it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9765


Epoch 0:  75%|███████▍  | 2595/3463 [07:09<02:08,  6.75batch/s, loss=0.0709]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.54it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.92it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 15.00it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.35it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.68it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.69it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.50it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.83it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.21it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.98it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.70it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.73it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.49it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.79it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.977


Epoch 0:  80%|███████▉  | 2768/3463 [07:37<01:46,  6.51batch/s, loss=0.0384]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.71it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:01, 14.14it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.93it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.97it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.58it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.54it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.43it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.76it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.09it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.84it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.46it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.48it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.29it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.70it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9795


Epoch 0:  85%|████████▍ | 2941/3463 [08:06<01:14,  7.01batch/s, loss=0.088]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.75it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 12.10it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 15.16it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.44it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.83it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.84it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.65it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.04it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.27it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.01it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.58it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.58it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.41it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.84it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.979


Epoch 0:  90%|████████▉ | 3114/3463 [08:35<00:51,  6.72batch/s, loss=0.0248]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.36it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.67it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.61it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.77it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.48it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.58it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.43it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.74it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.17it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.91it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.63it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.62it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.41it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.72it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97725


Epoch 0:  95%|█████████▍| 3287/3463 [09:04<00:26,  6.68batch/s, loss=0.0774]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.35it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.47it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.48it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.73it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.49it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.58it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.52it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.82it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.21it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.01it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.71it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.62it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.41it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.72it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.977


Epoch 0: 100%|█████████▉| 3460/3463 [09:33<00:00,  6.43batch/s, loss=0.0167]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.25it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.41it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.42it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.65it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.35it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.51it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.33it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.80it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.18it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.83it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.46it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.37it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.14it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.63it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97875


Epoch 0: 100%|██████████| 3463/3463 [09:36<00:00,  6.00batch/s, loss=0.00592]
Epoch 1:   5%|▍         | 173/3463 [00:26<08:38,  6.34batch/s, loss=0.00827]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.64it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.87it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.67it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.81it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.49it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.57it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.45it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.84it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.23it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.95it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.55it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.46it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.20it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.57it/s][A
   

Acc for model which have lower valid loss:  0.97825


Epoch 1:  10%|▉         | 346/3463 [00:54<08:00,  6.49batch/s, loss=0.0288]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.69it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.88it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.75it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.19it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.58it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.64it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.40it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.73it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.17it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.92it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.51it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.45it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.30it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.60it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9765


Epoch 1:  15%|█▍        | 519/3463 [01:23<07:18,  6.72batch/s, loss=0.04]  
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.76it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 12.12it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 15.09it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.29it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.66it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.66it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.50it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.94it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.26it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.99it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.67it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.65it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.48it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.92it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97675


Epoch 1:  20%|█▉        | 692/3463 [01:51<06:58,  6.62batch/s, loss=0.0687]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.36it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.64it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.72it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.92it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.63it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.71it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.67it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.03it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.42it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.16it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.71it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.69it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.51it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.84it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97975


Epoch 1:  25%|██▍       | 865/3463 [02:20<06:46,  6.38batch/s, loss=0.0277]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.49it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.77it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.90it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.18it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.75it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.81it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.70it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.03it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.28it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.09it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.78it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.75it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.47it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.80it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98075


Epoch 1:  30%|██▉       | 1038/3463 [02:49<05:52,  6.88batch/s, loss=0.0117]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.50it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.88it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 16.07it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.11it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.71it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.78it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.69it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.11it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.46it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.10it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.69it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.71it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.57it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.90it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98175


Epoch 1:  35%|███▍      | 1211/3463 [03:18<05:53,  6.38batch/s, loss=0.0235]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.73it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 12.14it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 15.06it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.37it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.80it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.92it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.80it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.13it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.39it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.10it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.70it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.74it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.62it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.88it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98125


Epoch 1:  40%|███▉      | 1384/3463 [03:47<05:15,  6.59batch/s, loss=0.0606]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.69it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 12.07it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 15.08it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.41it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.86it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.84it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.67it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.02it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.45it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.06it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.74it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.71it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.55it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.87it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98125


Epoch 1:  45%|████▍     | 1557/3463 [04:16<05:01,  6.32batch/s, loss=0.0435]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.45it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.79it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 16.02it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.20it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.88it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.96it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.81it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.12it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.46it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.18it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.83it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.79it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.61it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.98it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98125


Epoch 1:  50%|████▉     | 1730/3463 [04:45<04:35,  6.28batch/s, loss=0.0137] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:06,  5.12it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.32it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.50it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.08it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.60it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.70it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.59it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.98it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.38it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.08it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.77it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.72it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.43it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.80it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.982


Epoch 1:  55%|█████▍    | 1903/3463 [05:13<04:01,  6.45batch/s, loss=0.104]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.26it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.56it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.51it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.95it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.45it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.61it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.51it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.93it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.36it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.10it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.74it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.62it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.37it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.80it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 1:  60%|█████▉    | 2076/3463 [05:42<03:27,  6.67batch/s, loss=0.0622]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.51it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.88it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.97it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.38it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.73it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.76it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.65it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.01it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.36it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.11it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.68it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.57it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.36it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.69it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.981


Epoch 1:  65%|██████▍   | 2249/3463 [06:11<03:01,  6.69batch/s, loss=0.0346]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.62it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.91it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.85it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.24it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.70it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.75it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.65it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.04it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.29it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.04it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.73it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.67it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.45it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.77it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.97875


Epoch 1:  70%|██████▉   | 2422/3463 [06:40<02:30,  6.94batch/s, loss=0.0171]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.64it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.88it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.79it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.94it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.61it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.64it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.44it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.83it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.20it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.87it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.56it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.58it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.36it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.77it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98125


Epoch 1:  75%|███████▍  | 2595/3463 [07:09<02:14,  6.46batch/s, loss=0.0347]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.75it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:01, 14.21it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.94it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.10it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.76it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.77it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.65it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 19.05it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.44it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.14it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.67it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.64it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.48it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.79it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98075


Epoch 1:  80%|███████▉  | 2768/3463 [07:37<01:44,  6.65batch/s, loss=0.00715]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.64it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.86it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.82it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.99it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.58it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.66it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.58it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.95it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.36it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 18.01it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.64it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.59it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.33it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.69it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98


Epoch 1:  85%|████████▍ | 2941/3463 [08:06<01:17,  6.71batch/s, loss=0.0399]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.53it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.88it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.88it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.19it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.63it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.65it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.46it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.86it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.24it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.99it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.62it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.60it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.40it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.70it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.981


Epoch 1:  90%|████████▉ | 3114/3463 [08:35<00:52,  6.63batch/s, loss=0.0216]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.38it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.73it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.81it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.18it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.51it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.52it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.27it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.71it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.06it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.80it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.41it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.39it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.13it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.51it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.979


Epoch 1:  95%|█████████▍| 3287/3463 [09:04<00:26,  6.56batch/s, loss=0.0215]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.58it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.75it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.87it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.29it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.67it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.76it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.58it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.98it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.28it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.98it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.59it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.55it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.35it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.71it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.981


Epoch 1: 100%|█████████▉| 3460/3463 [09:33<00:00,  6.47batch/s, loss=0.0975]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.56it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.69it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.55it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.82it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.43it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.40it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.34it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.79it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.19it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.94it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.55it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.51it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.32it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.69it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 1: 100%|██████████| 3463/3463 [09:36<00:00,  6.01batch/s, loss=0.00923]
Epoch 2:   5%|▍         | 173/3463 [00:26<07:48,  7.02batch/s, loss=0.00125]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.59it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.65it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.60it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.82it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.43it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.48it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.33it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.76it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.11it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.83it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.45it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.42it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.27it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.68it/s][A
   

Acc for model which have lower valid loss:  0.98075


Epoch 2:  10%|▉         | 346/3463 [00:55<08:03,  6.44batch/s, loss=0.00863]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.38it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.63it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.69it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.96it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.53it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.64it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.41it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.76it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.19it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.91it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.46it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.47it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.35it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.65it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.981


Epoch 2:  15%|█▍        | 519/3463 [01:24<07:13,  6.79batch/s, loss=0.0114] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.57it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.73it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.69it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.86it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.45it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.55it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.41it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.70it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.19it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.91it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.55it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.52it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.28it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.71it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.981


Epoch 2:  20%|█▉        | 692/3463 [01:52<06:49,  6.77batch/s, loss=0.0263] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.53it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.77it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.70it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.84it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.49it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.59it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.41it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.82it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.22it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.90it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.51it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.55it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.39it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.74it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98175


Epoch 2:  25%|██▍       | 865/3463 [02:21<06:30,  6.65batch/s, loss=0.00694]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.65it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.86it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.57it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.78it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.45it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.55it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.46it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.84it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.19it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.81it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.56it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.56it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.37it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.72it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9805


Epoch 2:  30%|██▉       | 1038/3463 [02:50<05:55,  6.82batch/s, loss=0.000673]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.64it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 12.01it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.95it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.29it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.61it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.58it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.31it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.79it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.18it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.88it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.45it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.40it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.21it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.59it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9815


Epoch 2:  35%|███▍      | 1211/3463 [03:19<05:45,  6.51batch/s, loss=0.00238]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.45it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.82it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.92it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.09it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.46it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.52it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.38it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.72it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.09it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.81it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.48it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.48it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.21it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.57it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98125


Epoch 2:  40%|███▉      | 1384/3463 [03:48<05:16,  6.57batch/s, loss=0.00194] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.73it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.92it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.80it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.87it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.41it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.35it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.24it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.63it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.03it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.80it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.46it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.47it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.33it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.72it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9815


Epoch 2:  45%|████▍     | 1557/3463 [04:17<04:47,  6.63batch/s, loss=0.0544] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.43it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.70it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.51it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.69it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.28it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.27it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.22it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.58it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 17.95it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.66it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.36it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.41it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.28it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.60it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 2:  50%|████▉     | 1730/3463 [04:46<04:19,  6.69batch/s, loss=0.00115]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.57it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.95it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.87it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.14it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.55it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.57it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.34it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.70it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.13it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.87it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.55it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.52it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.31it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.70it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9815


Epoch 2:  55%|█████▍    | 1903/3463 [05:15<03:41,  7.04batch/s, loss=0.0159] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.60it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.71it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.72it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.92it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.49it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.53it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.32it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.74it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.10it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.78it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.39it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.43it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.22it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.60it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 2:  60%|█████▉    | 2076/3463 [05:44<03:31,  6.56batch/s, loss=0.00467] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.62it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.63it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.49it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.71it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.36it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.44it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.23it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.58it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 17.91it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.61it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.33it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.30it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.17it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.64it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9835


Epoch 2:  65%|██████▍   | 2249/3463 [06:13<03:08,  6.45batch/s, loss=0.0271] 
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.24it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.54it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.51it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.91it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.35it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.37it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.20it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.63it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 17.96it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.66it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.33it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.36it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.18it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.54it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9825


Epoch 2:  70%|██████▉   | 2422/3463 [06:42<02:41,  6.46batch/s, loss=0.00267]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.68it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.92it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.77it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.88it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.51it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.63it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.45it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.79it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.14it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.88it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.51it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.47it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.25it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.57it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98225


Epoch 2:  75%|███████▍  | 2595/3463 [07:11<02:06,  6.86batch/s, loss=0.00615]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.41it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.69it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.66it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.87it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.44it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.54it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.51it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.86it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.18it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.81it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.43it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.37it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.16it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.57it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9825


Epoch 2:  80%|███████▉  | 2768/3463 [07:40<01:42,  6.76batch/s, loss=0.00746]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.51it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.70it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.59it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.83it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.51it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.59it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.42it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.69it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.09it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.76it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.45it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.43it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.23it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.59it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.9825


Epoch 2:  85%|████████▍ | 2941/3463 [08:09<01:22,  6.36batch/s, loss=0.0331]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.48it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.76it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.63it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.83it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.48it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.57it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.34it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.77it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.17it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.89it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.50it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.45it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.27it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.70it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 2:  90%|████████▉ | 3114/3463 [08:38<00:52,  6.62batch/s, loss=0.000756]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.69it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.89it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.73it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.92it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.42it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.38it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.39it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.78it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.10it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.83it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.51it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.51it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.27it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.47it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 2:  95%|█████████▍| 3287/3463 [09:07<00:27,  6.48batch/s, loss=0.00433]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.55it/s][A
Eval:   9%|▉         | 3/32 [00:00<00:02, 11.86it/s][A
Eval:  16%|█▌        | 5/32 [00:00<00:01, 14.90it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 17.26it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.54it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.59it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.41it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.74it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.04it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.75it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.38it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.42it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.23it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.57it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 2: 100%|█████████▉| 3460/3463 [09:37<00:00,  7.06batch/s, loss=0.00124]
Eval:   0%|          | 0/32 [00:00<?, ?it/s][A
Eval:   3%|▎         | 1/32 [00:00<00:05,  5.36it/s][A
Eval:  12%|█▎        | 4/32 [00:00<00:02, 13.45it/s][A
Eval:  19%|█▉        | 6/32 [00:00<00:01, 15.43it/s][A
Eval:  25%|██▌       | 8/32 [00:00<00:01, 16.66it/s][A
Eval:  34%|███▍      | 11/32 [00:00<00:01, 18.33it/s][A
Eval:  41%|████      | 13/32 [00:00<00:01, 18.45it/s][A
Eval:  50%|█████     | 16/32 [00:00<00:00, 19.37it/s][A
Eval:  56%|█████▋    | 18/32 [00:01<00:00, 18.66it/s][A
Eval:  62%|██████▎   | 20/32 [00:01<00:00, 18.10it/s][A
Eval:  69%|██████▉   | 22/32 [00:01<00:00, 17.87it/s][A
Eval:  75%|███████▌  | 24/32 [00:01<00:00, 17.47it/s][A
Eval:  81%|████████▏ | 26/32 [00:01<00:00, 17.44it/s][A
Eval:  88%|████████▊ | 28/32 [00:01<00:00, 17.14it/s][A
Eval:  94%|█████████▍| 30/32 [00:01<00:00, 17.46it/s][A
                                                     [A

Acc for model which have lower valid loss:  0.98275


Epoch 2: 100%|██████████| 3463/3463 [09:40<00:00,  5.97batch/s, loss=0.000417]


In [None]:
import pandas as pd
test_df = pd.read_csv('test_no_label.csv')

In [None]:
test_dataset = test_df['Id']

In [None]:
def make_id_file_test(tokenizer, test_dataset):
    data_strings = []
    id_file_data = [tokenizer.encode(sent.lower()) for sent in test_dataset]
    for item in id_file_data:
        data_strings.append(' '.join([str(k) for k in item]))
    return data_strings

In [None]:
test = make_id_file_test(tokenizer, test_dataset)

In [None]:
test[:10]

['101 2009 1005 1055 1037 2878 2047 3325 1998 2047 26389 2169 2051 2017 2175 1012 102',
 '101 2061 15640 2013 2019 2214 5440 1012 102',
 '101 2009 2003 1996 2087 14469 7273 1999 1996 3028 1012 102',
 '101 2079 2025 3696 1037 10084 2007 2122 2111 1012 102',
 '101 1045 2001 6091 1998 2016 2081 2033 2514 2061 6625 1998 6160 1012 102',
 '101 1996 2069 2518 2057 2363 2008 2001 2980 2001 1996 4157 1012 102',
 '101 2053 1010 2025 1996 3924 2012 2004 2226 1010 1996 3924 1999 3502 2152 1012 102',
 '101 2027 3288 2009 2041 2392 2005 2017 1998 2024 2200 14044 1012 102',
 '101 4606 1996 12043 2106 1050 1005 1056 2130 2113 2129 2000 2147 1996 3274 1012 102',
 '101 2027 2031 2019 6581 4989 1997 25025 2015 2000 5454 2013 1012 102']

In [None]:
class SentimentTestDataset(object):
    def __init__(self, tokenizer, test):
        self.tokenizer = tokenizer
        self.data = []

        for sent in test:
            self.data += [self._cast_to_int(sent.strip().split())]

    def _cast_to_int(self, sample):
        return [int(word_id) for word_id in sample]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        sample = self.data[index]
        return np.array(sample)

In [None]:
test_dataset = SentimentTestDataset(tokenizer, test)

In [None]:
test_dataset[:10] #버그 찾기용



array([list([101, 2009, 1005, 1055, 1037, 2878, 2047, 3325, 1998, 2047, 26389, 2169, 2051, 2017, 2175, 1012, 102]),
       list([101, 2061, 15640, 2013, 2019, 2214, 5440, 1012, 102]),
       list([101, 2009, 2003, 1996, 2087, 14469, 7273, 1999, 1996, 3028, 1012, 102]),
       list([101, 2079, 2025, 3696, 1037, 10084, 2007, 2122, 2111, 1012, 102]),
       list([101, 1045, 2001, 6091, 1998, 2016, 2081, 2033, 2514, 2061, 6625, 1998, 6160, 1012, 102]),
       list([101, 1996, 2069, 2518, 2057, 2363, 2008, 2001, 2980, 2001, 1996, 4157, 1012, 102]),
       list([101, 2053, 1010, 2025, 1996, 3924, 2012, 2004, 2226, 1010, 1996, 3924, 1999, 3502, 2152, 1012, 102]),
       list([101, 2027, 3288, 2009, 2041, 2392, 2005, 2017, 1998, 2024, 2200, 14044, 1012, 102]),
       list([101, 4606, 1996, 12043, 2106, 1050, 1005, 1056, 2130, 2113, 2129, 2000, 2147, 1996, 3274, 1012, 102]),
       list([101, 2027, 2031, 2019, 6581, 4989, 1997, 25025, 2015, 2000, 5454, 2013, 1012, 102])],
      dtype=object)

In [None]:
def collate_fn_style_test_mine(samples):
    input_ids_ori = samples      # 원래의 값을 따로 저장
    max_len = max(len(input_id) for input_id in input_ids_ori)
    sorted_indices = np.argsort([len(input_id) for input_id in input_ids_ori])[::-1]

    input_ids = pad_sequence([torch.tensor(input_ids_ori[index]) for index in sorted_indices],
                             batch_first=True)
    attention_mask = torch.tensor(
        [[1] * len(input_ids_ori[index]) + [0] * (max_len - len(input_ids_ori[index])) for index in
         sorted_indices])
    token_type_ids = torch.tensor([[0] * len(input_ids[index]) for index in sorted_indices])
    position_ids = torch.tensor([list(range(len(input_ids[index]))) for index in sorted_indices])

    return input_ids, attention_mask, token_type_ids, position_ids

In [None]:
def collate_fn_style_test(samples):
    input_ids_ori = samples
    max_len = max(len(input_id) for input_id in input_ids_ori)
    sorted_indices = np.array([len(input_id) for input_id in input_ids_ori])

    input_ids = pad_sequence([torch.tensor(input_id) for input_id in input_ids_ori],
                             batch_first=True)
    attention_mask = torch.tensor(
        [[1] * len(input_id) + [0] * (max_len - len(input_id)) for input_id in
         input_ids_ori])
    token_type_ids = torch.tensor([[0] * len(input_id) for input_id in input_ids])
    position_ids = torch.tensor([list(range(len(input_id))) for input_id in input_ids])

    return input_ids, attention_mask, token_type_ids, position_ids

In [None]:
test_batch_size = 32
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size,
                                          shuffle=False, collate_fn=collate_fn_style_test,
                                          num_workers=2)

In [None]:
with torch.no_grad():
    model.eval()
    predictions = []
    for input_ids, attention_mask, token_type_ids, position_ids in tqdm(test_loader,
                                                                        desc='Test',
                                                                        position=1,
                                                                        leave=None):

        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        token_type_ids = token_type_ids.to(device)
        position_ids = position_ids.to(device)

        #print("input_ids",input_ids) #sort로 인한 문제 해결
        #print("attention_mask",attention_mask) 
        #print("token_type_ids", token_type_ids)
        #print("position_ids", position_ids)

        output = model(input_ids=input_ids,
                       attention_mask=attention_mask,
                       token_type_ids=token_type_ids,
                       position_ids=position_ids)

        logits = output.logits
        batch_predictions = [0 if example[0] > example[1] else 1 for example in logits]
        predictions += batch_predictions


Test:   0%|          | 0/32 [00:00<?, ?it/s][A
Test:   3%|▎         | 1/32 [00:00<00:04,  6.85it/s][A
Test:  22%|██▏       | 7/32 [00:00<00:00, 31.55it/s][A
Test:  41%|████      | 13/32 [00:00<00:00, 42.02it/s][A
Test:  59%|█████▉    | 19/32 [00:00<00:00, 46.97it/s][A
Test:  78%|███████▊  | 25/32 [00:00<00:00, 49.23it/s][A
Test:  97%|█████████▋| 31/32 [00:00<00:00, 51.25it/s][A
                                                     [A

In [None]:
test_df['Category'] = predictions

In [None]:
test_df.to_csv('submission.csv', index=False)