In [None]:
!pip install transformers datasets tokenizers

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.

In [None]:
!wget http://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip
!unzip -qq cornell_movie_dialogs_corpus.zip
!rm cornell_movie_dialogs_corpus.zip
!mkdir datasets
!mv cornell\ movie-dialogs\ corpus/movie_conversations.txt ./datasets
!mv cornell\ movie-dialogs\ corpus/movie_lines.txt ./datasets

--2025-04-27 14:04:37--  http://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip
Resolving www.cs.cornell.edu (www.cs.cornell.edu)... 132.236.207.53
Connecting to www.cs.cornell.edu (www.cs.cornell.edu)|132.236.207.53|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip [following]
--2025-04-27 14:04:38--  https://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip
Connecting to www.cs.cornell.edu (www.cs.cornell.edu)|132.236.207.53|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9916637 (9.5M) [application/zip]
Saving to: ‘cornell_movie_dialogs_corpus.zip’


2025-04-27 14:04:41 (3.87 MB/s) - ‘cornell_movie_dialogs_corpus.zip’ saved [9916637/9916637]



In [None]:
import os
from pathlib import Path
import torch
import re
import random
import transformers, datasets
from tokenizers import BertWordPieceTokenizer
from transformers import BertTokenizer
import tqdm
from torch.utils.data import Dataset, DataLoader
import itertools
import math
import torch.nn.functional as F
import numpy as np
from torch.optim import Adam


MAX_LEN = 64

### Loading all data into memory
corpus_movie_conv = './datasets/movie_conversations.txt'
corpus_movie_lines = './datasets/movie_lines.txt'
with open(corpus_movie_conv, 'r', encoding='iso-8859-1') as c:
    conv = c.readlines()
with open(corpus_movie_lines, 'r', encoding='iso-8859-1') as l:
    lines = l.readlines()

### splitting text using special lines
lines_dic = {}
for line in lines:
    objects = line.split(" +++$+++ ")
    lines_dic[objects[0]] = objects[-1]

### generate question answer pairs
pairs = []
for con in conv:
    ids = eval(con.split(" +++$+++ ")[-1])
    for i in range(len(ids)):
        qa_pairs = []

        if i == len(ids) - 1:
            break

        first = lines_dic[ids[i]].strip()
        second = lines_dic[ids[i+1]].strip()

        qa_pairs.append(' '.join(first.split()[:MAX_LEN]))
        qa_pairs.append(' '.join(second.split()[:MAX_LEN]))
        pairs.append(qa_pairs)

In [None]:
#### examine a random pair what it contains

print(pairs[20])

print(len(pairs))

["I really, really, really wanna go, but I can't. Not unless my sister goes.", "I'm workin' on it. But she doesn't seem to be goin' for him."]
221616


In [None]:
## WordPiece tokenizer

### save data as txt file
os.makedirs('./data', exist_ok=True)
text_data = []
file_count = 0

for sample in tqdm.tqdm([x[0] for x in pairs]):
    text_data.append(sample)

    # once we hit the 10k mark, save to file
    if len(text_data) == 10000:
        with open(f'./data/text_{file_count}.txt', 'w', encoding='utf-8') as fp:
            fp.write('\n'.join(text_data))
        text_data = []
        file_count += 1

paths = [str(x) for x in Path('./data').glob('**/*.txt')]

### training own tokenizer
tokenizer = BertWordPieceTokenizer(
    clean_text=True,
    handle_chinese_chars=False,
    strip_accents=False,
    lowercase=True
)

tokenizer.train(
    files=paths,
    vocab_size=30_000,
    min_frequency=5,
    limit_alphabet=1000,
    wordpieces_prefix='##',
    special_tokens=['[PAD]', '[CLS]', '[SEP]', '[MASK]', '[UNK]']
)


os.mkdir('./bert-it-1')
tokenizer.save_model('./bert-it-1', 'bert-it')
tokenizer = BertTokenizer.from_pretrained('./bert-it-1/bert-it-vocab.txt', local_files_only=True)

100%|██████████| 221616/221616 [00:00<00:00, 1959757.78it/s]


In [None]:
class BERTDataset(Dataset):
    def __init__(self, data_pair, tokenizer, seq_len=64):

        self.tokenizer = tokenizer
        self.seq_len = seq_len
        self.corpus_lines = len(data_pair)
        self.lines = data_pair

    def __len__(self):
        return self.corpus_lines

    def __getitem__(self, item):

        # Step 1: get random sentence pair, either negavtive or posutive (saved as is_next_label)
        t1, t2, is_next_label = self.get_sent(item)

        # Step 2: replace random words in sentence with mask / random words
        t1_random, t1_label = self.random_word(t1)
        t2_random, t2_label = self.random_word(t2)

        # step 3: Adding CLS and SEP tokens to the start and end of sentences
          # adding PAD token for labels
        t1 = [self.tokenizer.vocab['[CLS]']] + t1_random + [self.tokenizer.vocab['[SEP]']]
        t2 = t2_random + [self.tokenizer.vocab['[SEP]']]
        t1_label = [self.tokenizer.vocab['[PAD]']] + t1_label + [self.tokenizer.vocab['[PAD]']]
        t2_label = t2_label + [self.tokenizer.vocab['[PAD]']]

        # Step 4: combine sentence 1 and 2 as one input
        # adding PAD tokens to make the sentence same length as seq_len
        segment_label = ([1 for _ in range(len(t1))] + [2 for _ in range(len(t2))])[:self.seq_len]
        bert_input = (t1 + t2)[:self.seq_len]
        bert_label = (t1_label + t2_label)[:self.seq_len]
        padding = [self.tokenizer.vocab['[PAD]'] for _ in range(self.seq_len - len(bert_input))]
        bert_input.extend(padding), bert_label.extend(padding), segment_label.extend(padding)

        output = {"bert_input": bert_input,
                  "bert_label": bert_label,
                  "segment_label": segment_label,
                  "is_next": is_next_label}

        return {key: torch.tensor(value, dtype=torch.long) if key != 'is_next' else torch.tensor(value)
                for key, value in output.items()}

    def random_word(self, sentence):
        tokens = sentence.split()
        output_label = []
        output = []

        # 15% of the toekns would be replaced
        for i, token in enumerate(tokens):
            prob = random.random()

            # remove cls and sep token
            token_id = self.tokenizer(token)['input_ids'][1:-1]

            if prob < 0.15:
                prob /= 0.15

                # 80% chance change token to mask token
                if prob < 0.8:
                    for i in range(len(token_id)):
                        output.append(self.tokenizer.vocab['[MASK]'])

                # 10% chance change token to random token
                elif prob < 0.9:
                    for i in range(len(token_id)):
                        output.append(random.randrange(len(self.tokenizer.vocab)))

                # 10% chance change token to current token
                else:
                    output.append(token_id)

                output_label.append(token_id)#
            else:
                output.append(token_id)
                for i in range(len(token_id)):
                    output_label.append(0)

        # flattening
        output = list(itertools.chain(*[[x] if not isinstance(x, list) else x for x in output]))
        output_label = list(itertools.chain(*[[x] if not isinstance(x, list) else x for x in output_label]))
        assert len(output) == len(output_label)
        return output, output_label



    def get_sent(self, index):
        '''return random sentence pair'''
        t1, t2 = self.get_corpus_line(index)

        # negative or positive pair, for next sentence prediction
        if random.random() > 0.5:
            return t1, t2, 1
        else:
            return t1, self.get_random_line(), 0


    def get_corpus_line(self, item):
        '''return sentence pair'''
        return self.lines[item][0], self.lines[item][1]

    def get_random_line(self):
        '''return random single sentence'''
        return self.lines[random.randrange(len(self.lines))][1]

In [None]:
dataset = BERTDataset(pairs, tokenizer, seq_len=64)

# Access the data for index 86
output = dataset[86]


# Check the dtype (for tensors)
print("Dtype of bert_input:", output['bert_input'].dtype)
print("Dtype of bert_label:", output['bert_label'].dtype)
print("Dtype of segment_label:", output['segment_label'].dtype)
print("Dtype of is_next:", output['is_next'].dtype)

t1, t2 = dataset.get_corpus_line(86)


print("Sentence 1 (t1):", t1)
print("Sentence 2 (t2):", t2)

Dtype of bert_input: torch.int64
Dtype of bert_label: torch.int64
Dtype of segment_label: torch.int64
Dtype of is_next: torch.int64
Sentence 1 (t1): Now I do. Back then, was a different story.
Sentence 2 (t2): As in...


In [None]:
class PositionalEmbedding(torch.nn.Module):

    def __init__(self, d_model, max_len=128):
        super().__init__()

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        for pos in range(max_len):
            # for each dimension of the each position
            for i in range(0, d_model, 2):
                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i)/d_model)))
                pe[pos, i + 1] = math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))

        # include the batch size
        self.pe = pe.unsqueeze(0)
        # self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe


class BERTEmbedding(torch.nn.Module):
    """
    BERT Embedding which is considered with under features
        1. TokenEmbedding : normal embedding matrix
        2. PositionalEmbedding : adding positional information using sin, cos
        3. SegmentEmbedding : adding sentence segment info, (sent_A:1, sent_B:2)
        sum of all these features are output of BERTEmbedding
    """

    def __init__(self, vocab_size, embed_size, seq_len=64, dropout=0.1):
        """
        :param vocab_size: total vocab size
        :param embed_size: embedding size of token embedding
        :param dropout: dropout rate
        """

        super().__init__()
        self.embed_size = embed_size
        # (m, seq_len) --> (m, seq_len, embed_size)
        # padding_idx is not updated during training, remains as fixed pad (0)
        self.token = torch.nn.Embedding(vocab_size, embed_size, padding_idx=0)
        self.segment = torch.nn.Embedding(3, embed_size, padding_idx=0)
        self.position = PositionalEmbedding(d_model=embed_size, max_len=seq_len)
        self.dropout = torch.nn.Dropout(p=dropout)

    def forward(self, sequence, segment_label):
        x = self.token(sequence) + self.position(sequence) + self.segment(segment_label)
        return self.dropout(x)

In [None]:
### attention layers
class MultiHeadedAttention(torch.nn.Module):

    def __init__(self, heads, d_model, dropout=0.1):
        super(MultiHeadedAttention, self).__init__()

        assert d_model % heads == 0
        self.d_k = d_model // heads
        self.heads = heads
        self.dropout = torch.nn.Dropout(dropout)

        self.query = torch.nn.Linear(d_model, d_model)
        self.key = torch.nn.Linear(d_model, d_model)
        self.value = torch.nn.Linear(d_model, d_model)
        self.output_linear = torch.nn.Linear(d_model, d_model)


    def forward(self, query, key, value, mask):
        """
        query, key,value of shape: (batch_size, max_len, d_model)
        mask of shape: (batch_size, 1, 1, max_words)
        """
        # (batch_size, max_len, d_model)
        query = self.query(query)
        key = self.key(key)
        value = self.value(value)

        # (batch_size, max_len, d_model) --> (batch_size, max_len, h, d_k) --> (batch_size, h, max_len, d_k)
        query = query.view(query.shape[0], -1, self.heads, self.d_k).permute(0, 2, 1, 3)
        key = key.view(key.shape[0], -1, self.heads, self.d_k).permute(0, 2, 1, 3)
        value = value.view(value.shape[0], -1, self.heads, self.d_k).permute(0, 2, 1, 3)

        # (batch_size, h, max_len, d_k) matmul (batch_size, h, d_k, max_len) --> (batch_size, h, max_len, max_len)
        scores = torch.matmul(query, key.permute(0, 1, 3, 2)) / math.sqrt(query.size(-1))

        # fill 0 mask with super small number so it wont affect the softmax weight
        # (batch_size, h, max_len, max_len)
        scores = scores.masked_fill(mask == 0, -1e9)

        # (batch_size, h, max_len, max_len)
        # softmax to put attention weight for all non-pad tokens
        # max_len X max_len matrix of attention
        weights = F.softmax(scores, dim=-1)
        weights = self.dropout(weights)

        # (batch_size, h, max_len, max_len) matmul (batch_size, h, max_len, d_k) --> (batch_size, h, max_len, d_k)
        context = torch.matmul(weights, value)

        # (batch_size, h, max_len, d_k) --> (batch_size, max_len, h, d_k) --> (batch_size, max_len, d_model)
        context = context.permute(0, 2, 1, 3).contiguous().view(context.shape[0], -1, self.heads * self.d_k)

        # (batch_size, max_len, d_model)
        return self.output_linear(context)



class FeedForward(torch.nn.Module):
    "Implements FFN equation."

    def __init__(self, d_model, middle_dim=2048, dropout=0.1):
        super(FeedForward, self).__init__()

        self.fc1 = torch.nn.Linear(d_model, middle_dim)
        self.fc2 = torch.nn.Linear(middle_dim, d_model)
        self.dropout = torch.nn.Dropout(dropout)
        self.activation = torch.nn.GELU()

    def forward(self, x):
        out = self.activation(self.fc1(x))
        out = self.fc2(self.dropout(out))
        return out


class EncoderLayer(torch.nn.Module):
    def __init__(
        self,
        d_model=768,
        heads=12,
        feed_forward_hidden=768 * 4,
        dropout=0.1
        ):
        super(EncoderLayer, self).__init__()
        self.layernorm = torch.nn.LayerNorm(d_model)
        self.self_multihead = MultiHeadedAttention(heads, d_model)
        self.feed_forward = FeedForward(d_model, middle_dim=feed_forward_hidden)
        self.dropout = torch.nn.Dropout(dropout)

    def forward(self, embeddings, mask):
        # embeddings: (batch_size, max_len, d_model)
        # encoder mask: (batch_size, 1, 1, max_len)
        # result: (batch_size, max_len, d_model)
        interacted = self.dropout(self.self_multihead(embeddings, embeddings, embeddings, mask))
        # residual layer
        interacted = self.layernorm(interacted + embeddings)
        # bottleneck
        feed_forward_out = self.dropout(self.feed_forward(interacted))
        encoded = self.layernorm(feed_forward_out + interacted)
        return encoded  # (batch_size, max_len, d_model)


### **Final BERT Model**

In [None]:
class BERT(torch.nn.Module):
    """
    BERT model: Bidirectional Encoder Representations from Transformers.
    """

    def __init__(self, vocab_size, d_model=768, n_layers=12, heads=12, dropout=0.1):
        """
        :param vocab_size: vocab_size of total words
        :param hidden: BERT model hidden size
        :param n_layers: numbers of Transformer blocks(layers)
        :param attn_heads: number of attention heads
        :param dropout: dropout rate
        """

        super().__init__()
        self.d_model = d_model
        self.n_layers = n_layers
        self.heads = heads

        # paper noted they used 4 * hidden_size for ff_network_hidden_size
        self.feed_forward_hidden = d_model * 4

        # embedding for BERT, sum of positional, segment, token embeddings
        self.embedding = BERTEmbedding(vocab_size=vocab_size, embed_size=d_model)

        # multi-layers transformer blocks, deep network
        self.encoder_blocks = torch.nn.ModuleList(
            [EncoderLayer(d_model, heads, d_model * 4, dropout) for _ in range(n_layers)]
        )

    def forward(self, x, segment_info):
        # attention masking for padded token
        # (batch_size, l, seq_len, seq_len)
        mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)

        # embedding the indexed sequence to sequence of vectors
        x = self.embedding(x, segment_info)

        # running over multiple transformer blocks
        for encoder in self.encoder_blocks:
            x = encoder.forward(x, mask)
        return x


class NextSentencePrediction(torch.nn.Module):
    """
    2-class classification model: is_next, is_not_next
    """

    def __init__(self, hidden):
        """
        :param hidden: BERT model output size
        """
        super().__init__()
        self.linear = torch.nn.Linear(hidden, 2)
        self.softmax = torch.nn.LogSoftmax(dim=-1)

    def forward(self, x):
        # use only the first token which is the [CLS]
        return self.softmax(self.linear(x[:, 0]))


class MaskedLanguageModel(torch.nn.Module):
    """
    predicting origin token from masked input sequence
    n-class classification problem, n-class = vocab_size
    """

    def __init__(self, hidden, vocab_size):
        """
        :param hidden: output size of BERT model
        :param vocab_size: total vocab size
        """
        super().__init__()
        self.linear = torch.nn.Linear(hidden, vocab_size)
        self.softmax = torch.nn.LogSoftmax(dim=-1)

    def forward(self, x):
        return self.softmax(self.linear(x))


class BERTLM(torch.nn.Module):
    """
    BERT Language Model
    Next Sentence Prediction Model + Masked Language Model
    """

    def __init__(self, bert: BERT, vocab_size):
        """
        :param bert: BERT model which should be trained
        :param vocab_size: total vocab size for masked_lm
        """

        super().__init__()
        self.bert = bert
        self.next_sentence = NextSentencePrediction(self.bert.d_model)
        self.mask_lm = MaskedLanguageModel(self.bert.d_model, vocab_size)

    def forward(self, x, segment_label):
        x = self.bert(x, segment_label)
        return self.next_sentence(x), self.mask_lm(x)

### **Optimizer**

In [None]:
class ScheduledOptim():
    '''A simple wrapper class fo learning rate scheduling'''

    def __init__(self, optimizer, d_model, n_warmup_steps):
        self._optimizer = optimizer
        self.n_warmup_steps = n_warmup_steps
        self.n_current_steps = 0
        self.init_lr = np.power(d_model, -0.5)

    def step_and_update_lr(self):
        "Step with the inner optimizer"
        self._update_learning_rate()
        self._optimizer.step()

    def zero_grad(self):
        "Zero out the gradients by the inner optimizer"
        self._optimizer.zero_grad()

    def _get_lr_scale(self):
        return np.min([
            np.power(self.n_current_steps, -0.5),
            np.power(self.n_warmup_steps, -1.5) * self.n_current_steps])

    def _update_learning_rate(self):
        ''' Learning rate scheduling per step '''

        self.n_current_steps += 1
        lr = self.init_lr * self._get_lr_scale()

        for param_group in self._optimizer.param_groups:
            param_group['lr'] = lr

### **Trainer**

In [None]:
class BERTTrainer:
    def __init__(
        self,
        model,
        train_dataloader,
        test_dataloader=None,
        lr= 1e-4,
        weight_decay=0.01,
        betas=(0.9, 0.999),
        warmup_steps=10000,
        log_freq=10,
        device='cuda'
      ):

        self.device = device
        self.model = model
        self.train_data = train_dataloader
        self.test_data = test_dataloader

        # Setting the Adam optimizer with hyper-param
        self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
        self.optim_schedule = ScheduledOptim(
            self.optim, self.model.bert.d_model, n_warmup_steps=warmup_steps
            )

        # Using Negative Log Likelihhod Loss fucntion for predicting the masked_token
        self.criterion = torch.nn.NLLLoss(ignore_index=0)
        self.log_freq = log_freq
        print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))


    def train(self, epoch):
        self.iteration(epoch, self.train_data)

    def test(self, epoch):
        self.iteration(epoch, self.test_data, train=False)


    def iteration(self, epoch, data_loader, train=True):

        avg_loss = 0.0
        total_correct = 0
        total_element = 0

        mode = "train" if train else "test"

        # progress bar
        data_iter = tqdm.tqdm(
            enumerate(data_loader),
            desc="EP_%s:%d" % (mode, epoch),
            total=len(data_loader),
            bar_format="{l_bar}{r_bar}"
        )

        for i, data in data_iter:

            # 0. batch_data will be sent into the device(GPU or cpu)
            data = {key: value.to(self.device) for key, value in data.items()}

            # 1. forward the next_sentence_prediction and masked_lm model
            next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"])

            # 2-1. NLL (negative log likelihood) loss of is_next classification result
            next_loss = self.criterion(next_sent_output, data["is_next"])

            # 2-2, NLLLoss of predicting masked token word
            # transpose to (m, vocab_size, seq_len) vs (m, seq_len)
            # criterion(mask_lm_output.view(-1, mask_lm_output.size(-1)), data["bert_label"].view(-1))
            mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"])

            # 2-3. Adding next_loss and mask_loss: 3.4 Pre-training Procedure
            loss = next_loss + mask_loss

            # 3. backward optimization only in train
            if train:
                self.optim_schedule.zero_grad()
                loss.backward()
                self.optim_schedule.step_and_update_lr()

            # next sentence prediction accuracy
            correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item()
            avg_loss += loss.item()
            total_correct += correct
            total_element += data["is_next"].nelement()

            post_fix = {
                "epoch": epoch,
                "iter": i,
                "avg_loss": avg_loss / (i + 1),
                "avg_acc": total_correct / total_element * 100,
                "loss": loss.item()
            }

            if i % self.log_freq == 0:
                data_iter.write(str(post_fix))

        print(
            f"EP{epoch}, {mode}: \
            avg_loss={avg_loss / len(data_iter)}, \
            total_acc={total_correct * 100.0 / total_element}"
        )


### **BERT Training**

In [None]:
train_data = BERTDataset(
    pairs, seq_len=MAX_LEN, tokenizer=tokenizer)

train_loader = DataLoader(
    train_data, batch_size=32, shuffle=True, pin_memory=True)

# print(train_data[80])


bert_model = BERT(
    vocab_size=len(tokenizer.vocab),
    d_model=768,
    n_layers=2,
    heads=12,
    dropout=0.1
)

bert_lm = BERTLM(bert_model, len(tokenizer.vocab))
bert_trainer = BERTTrainer(bert_lm, train_loader, device='cpu')
epochs = 5

for epoch in range(epochs):
    bert_trainer.train(epoch)

Total Parameters: 46699434


EP_train:0:   0%|| 1/6926 [00:01<2:51:24,  1.49s/it]

{'epoch': 0, 'iter': 0, 'avg_loss': 11.87763786315918, 'avg_acc': 43.75, 'loss': 11.87763786315918}


EP_train:0:   0%|| 11/6926 [00:12<2:05:13,  1.09s/it]

{'epoch': 0, 'iter': 10, 'avg_loss': 11.759687076915394, 'avg_acc': 48.57954545454545, 'loss': 11.579602241516113}


EP_train:0:   0%|| 21/6926 [00:22<2:11:16,  1.14s/it]

{'epoch': 0, 'iter': 20, 'avg_loss': 11.689689772469658, 'avg_acc': 50.595238095238095, 'loss': 11.560999870300293}


EP_train:0:   0%|| 31/6926 [00:33<2:06:53,  1.10s/it]

{'epoch': 0, 'iter': 30, 'avg_loss': 11.57596117450345, 'avg_acc': 50.60483870967742, 'loss': 11.079766273498535}


EP_train:0:   1%|| 41/6926 [00:43<1:54:31,  1.00it/s]

{'epoch': 0, 'iter': 40, 'avg_loss': 11.418616248340141, 'avg_acc': 50.91463414634146, 'loss': 10.759316444396973}


EP_train:0:   1%|| 51/6926 [00:53<1:56:54,  1.02s/it]

{'epoch': 0, 'iter': 50, 'avg_loss': 11.262793933644014, 'avg_acc': 51.53186274509803, 'loss': 10.443327903747559}


EP_train:0:   1%|| 61/6926 [01:04<1:56:05,  1.01s/it]

{'epoch': 0, 'iter': 60, 'avg_loss': 11.11732054538414, 'avg_acc': 51.79303278688525, 'loss': 10.282441139221191}


EP_train:0:   1%|| 71/6926 [01:14<1:56:50,  1.02s/it]

{'epoch': 0, 'iter': 70, 'avg_loss': 10.983156835529167, 'avg_acc': 51.40845070422535, 'loss': 10.018783569335938}


EP_train:0:   1%|| 81/6926 [01:25<1:57:51,  1.03s/it]

{'epoch': 0, 'iter': 80, 'avg_loss': 10.867900860162429, 'avg_acc': 52.00617283950617, 'loss': 10.071439743041992}


EP_train:0:   1%|| 91/6926 [01:35<1:59:45,  1.05s/it]

{'epoch': 0, 'iter': 90, 'avg_loss': 10.772506609067811, 'avg_acc': 51.82005494505495, 'loss': 9.950614929199219}


EP_train:0:   1%|| 101/6926 [01:46<2:07:26,  1.12s/it]

{'epoch': 0, 'iter': 100, 'avg_loss': 10.680628379972854, 'avg_acc': 51.70173267326733, 'loss': 9.77735710144043}


EP_train:0:   2%|| 111/6926 [01:56<2:06:22,  1.11s/it]

{'epoch': 0, 'iter': 110, 'avg_loss': 10.597808966765532, 'avg_acc': 51.604729729729726, 'loss': 9.645489692687988}


EP_train:0:   2%|| 121/6926 [02:06<1:49:42,  1.03it/s]

{'epoch': 0, 'iter': 120, 'avg_loss': 10.520673223763458, 'avg_acc': 51.60123966942148, 'loss': 9.648775100708008}


EP_train:0:   2%|| 131/6926 [02:16<1:52:40,  1.01it/s]

{'epoch': 0, 'iter': 130, 'avg_loss': 10.44367951109209, 'avg_acc': 50.978053435114504, 'loss': 9.430127143859863}


EP_train:0:   2%|| 141/6926 [02:27<1:55:27,  1.02s/it]

{'epoch': 0, 'iter': 140, 'avg_loss': 10.373484361256269, 'avg_acc': 50.90868794326241, 'loss': 9.515931129455566}


EP_train:0:   2%|| 151/6926 [02:37<1:54:27,  1.01s/it]

{'epoch': 0, 'iter': 150, 'avg_loss': 10.306285441316517, 'avg_acc': 50.682947019867555, 'loss': 9.312395095825195}


EP_train:0:   2%|| 161/6926 [02:48<1:54:51,  1.02s/it]

{'epoch': 0, 'iter': 160, 'avg_loss': 10.233829699688076, 'avg_acc': 50.34937888198758, 'loss': 9.187271118164062}


EP_train:0:   2%|| 171/6926 [02:58<1:56:18,  1.03s/it]

{'epoch': 0, 'iter': 170, 'avg_loss': 10.163831325999478, 'avg_acc': 50.29239766081871, 'loss': 8.98560905456543}


EP_train:0:   3%|| 181/6926 [03:08<2:04:51,  1.11s/it]

{'epoch': 0, 'iter': 180, 'avg_loss': 10.097484773035207, 'avg_acc': 50.12085635359116, 'loss': 8.953372955322266}


EP_train:0:   3%|| 191/6926 [03:19<2:02:40,  1.09s/it]

{'epoch': 0, 'iter': 190, 'avg_loss': 10.030666905547935, 'avg_acc': 49.950916230366495, 'loss': 9.005990982055664}


EP_train:0:   3%|| 201/6926 [03:29<1:52:48,  1.01s/it]

{'epoch': 0, 'iter': 200, 'avg_loss': 9.963059676820366, 'avg_acc': 50.01554726368159, 'loss': 8.652519226074219}


EP_train:0:   3%|| 211/6926 [03:39<1:50:46,  1.01it/s]

{'epoch': 0, 'iter': 210, 'avg_loss': 9.901499680433227, 'avg_acc': 50.01481042654028, 'loss': 8.450000762939453}


EP_train:0:   3%|| 221/6926 [03:50<1:52:36,  1.01s/it]

{'epoch': 0, 'iter': 220, 'avg_loss': 9.840166070342603, 'avg_acc': 49.98585972850679, 'loss': 8.316047668457031}


EP_train:0:   3%|| 231/6926 [04:00<1:54:23,  1.03s/it]

{'epoch': 0, 'iter': 230, 'avg_loss': 9.773181440510275, 'avg_acc': 50.21645021645021, 'loss': 8.225564002990723}


EP_train:0:   3%|| 241/6926 [04:11<1:54:22,  1.03s/it]

{'epoch': 0, 'iter': 240, 'avg_loss': 9.715233486223022, 'avg_acc': 50.37603734439834, 'loss': 8.330452919006348}


EP_train:0:   4%|| 251/6926 [04:21<1:58:25,  1.06s/it]

{'epoch': 0, 'iter': 250, 'avg_loss': 9.653793937181572, 'avg_acc': 50.5851593625498, 'loss': 7.842550754547119}


EP_train:0:   4%|| 261/6926 [04:32<2:01:48,  1.10s/it]

{'epoch': 0, 'iter': 260, 'avg_loss': 9.600362012212761, 'avg_acc': 50.57471264367817, 'loss': 8.396284103393555}


EP_train:0:   4%|| 271/6926 [04:42<2:04:46,  1.13s/it]

{'epoch': 0, 'iter': 270, 'avg_loss': 9.548741117174774, 'avg_acc': 50.3690036900369, 'loss': 8.327116012573242}


EP_train:0:   4%|| 281/6926 [04:52<1:50:23,  1.00it/s]

{'epoch': 0, 'iter': 280, 'avg_loss': 9.493504972220316, 'avg_acc': 50.30026690391459, 'loss': 7.6495161056518555}


EP_train:0:   4%|| 291/6926 [05:03<1:49:48,  1.01it/s]

{'epoch': 0, 'iter': 290, 'avg_loss': 9.442700987419311, 'avg_acc': 50.25773195876289, 'loss': 7.884024620056152}


EP_train:0:   4%|| 301/6926 [05:13<1:51:49,  1.01s/it]

{'epoch': 0, 'iter': 300, 'avg_loss': 9.390052675012734, 'avg_acc': 50.301079734219265, 'loss': 7.776120185852051}


EP_train:0:   4%|| 311/6926 [05:24<1:52:35,  1.02s/it]

{'epoch': 0, 'iter': 310, 'avg_loss': 9.337066326877311, 'avg_acc': 50.43207395498393, 'loss': 7.6350250244140625}


EP_train:0:   5%|| 321/6926 [05:34<1:54:32,  1.04s/it]

{'epoch': 0, 'iter': 320, 'avg_loss': 9.288605801412992, 'avg_acc': 50.31152647975078, 'loss': 7.820180892944336}


EP_train:0:   5%|| 331/6926 [05:45<1:55:58,  1.06s/it]

{'epoch': 0, 'iter': 330, 'avg_loss': 9.23666437586989, 'avg_acc': 50.35876132930513, 'loss': 7.916828632354736}


EP_train:0:   5%|| 341/6926 [05:55<2:00:02,  1.09s/it]

{'epoch': 0, 'iter': 340, 'avg_loss': 9.191940627839209, 'avg_acc': 50.348240469208214, 'loss': 7.870798110961914}


EP_train:0:   5%|| 351/6926 [06:06<2:07:04,  1.16s/it]

{'epoch': 0, 'iter': 350, 'avg_loss': 9.143623826170918, 'avg_acc': 50.373931623931625, 'loss': 7.576415538787842}


EP_train:0:   5%|| 361/6926 [06:17<2:01:54,  1.11s/it]

{'epoch': 0, 'iter': 360, 'avg_loss': 9.098667521225778, 'avg_acc': 50.3722299168975, 'loss': 7.7886881828308105}


EP_train:0:   5%|| 371/6926 [06:27<1:53:01,  1.03s/it]

{'epoch': 0, 'iter': 370, 'avg_loss': 9.05468959628411, 'avg_acc': 50.47169811320755, 'loss': 7.353297710418701}


EP_train:0:   6%|| 381/6926 [06:38<1:52:00,  1.03s/it]

{'epoch': 0, 'iter': 380, 'avg_loss': 9.010799914833129, 'avg_acc': 50.37729658792651, 'loss': 7.432699680328369}


EP_train:0:   6%|| 391/6926 [06:49<1:51:33,  1.02s/it]

{'epoch': 0, 'iter': 390, 'avg_loss': 8.968211016691555, 'avg_acc': 50.29571611253198, 'loss': 7.428986072540283}


EP_train:0:   6%|| 401/6926 [07:00<1:52:16,  1.03s/it]

{'epoch': 0, 'iter': 400, 'avg_loss': 8.926620213468176, 'avg_acc': 50.179239401496254, 'loss': 7.372670650482178}


EP_train:0:   6%|| 411/6926 [07:10<1:53:13,  1.04s/it]

{'epoch': 0, 'iter': 410, 'avg_loss': 8.886089668366742, 'avg_acc': 50.18248175182482, 'loss': 6.882781982421875}


EP_train:0:   6%|| 421/6926 [07:21<1:56:13,  1.07s/it]

{'epoch': 0, 'iter': 420, 'avg_loss': 8.851040920565644, 'avg_acc': 50.10391923990499, 'loss': 7.323608875274658}


EP_train:0:   6%|| 431/6926 [07:32<1:57:08,  1.08s/it]

{'epoch': 0, 'iter': 430, 'avg_loss': 8.810262936727076, 'avg_acc': 50.13776102088167, 'loss': 7.137568473815918}


EP_train:0:   6%|| 441/6926 [07:43<1:57:16,  1.09s/it]

{'epoch': 0, 'iter': 440, 'avg_loss': 8.775264602669782, 'avg_acc': 50.141723356009074, 'loss': 7.5036797523498535}


EP_train:0:   7%|| 451/6926 [07:53<2:02:41,  1.14s/it]

{'epoch': 0, 'iter': 450, 'avg_loss': 8.740132219775553, 'avg_acc': 50.062361419068736, 'loss': 7.005851745605469}


EP_train:0:   7%|| 461/6926 [08:04<1:59:39,  1.11s/it]

{'epoch': 0, 'iter': 460, 'avg_loss': 8.701881672451657, 'avg_acc': 50.0, 'loss': 6.986963272094727}


EP_train:0:   7%|| 471/6926 [08:14<1:52:01,  1.04s/it]

{'epoch': 0, 'iter': 470, 'avg_loss': 8.667222821788423, 'avg_acc': 50.06634819532909, 'loss': 7.114170074462891}


EP_train:0:   7%|| 481/6926 [08:25<1:52:09,  1.04s/it]

{'epoch': 0, 'iter': 480, 'avg_loss': 8.63317823211765, 'avg_acc': 50.2079002079002, 'loss': 6.741403102874756}


EP_train:0:   7%|| 491/6926 [08:36<1:50:55,  1.03s/it]

{'epoch': 0, 'iter': 490, 'avg_loss': 8.596445863455717, 'avg_acc': 50.24821792260692, 'loss': 7.209240913391113}


EP_train:0:   7%|| 501/6926 [08:47<1:50:58,  1.04s/it]

{'epoch': 0, 'iter': 500, 'avg_loss': 8.562700974965049, 'avg_acc': 50.33682634730538, 'loss': 6.662424564361572}


EP_train:0:   7%|| 511/6926 [08:58<1:51:50,  1.05s/it]

{'epoch': 0, 'iter': 510, 'avg_loss': 8.530677508001459, 'avg_acc': 50.19569471624267, 'loss': 6.372987270355225}


EP_train:0:   8%|| 521/6926 [09:08<1:50:59,  1.04s/it]

{'epoch': 0, 'iter': 520, 'avg_loss': 8.49864351451969, 'avg_acc': 50.22792706333973, 'loss': 6.696984767913818}


EP_train:0:   8%|| 531/6926 [09:19<1:55:21,  1.08s/it]

{'epoch': 0, 'iter': 530, 'avg_loss': 8.464438877536752, 'avg_acc': 50.21186440677966, 'loss': 6.4224066734313965}


EP_train:0:   8%|| 541/6926 [09:30<1:56:21,  1.09s/it]

{'epoch': 0, 'iter': 540, 'avg_loss': 8.430145454935579, 'avg_acc': 50.26571164510166, 'loss': 6.634417533874512}


EP_train:0:   8%|| 551/6926 [09:41<2:00:46,  1.14s/it]

{'epoch': 0, 'iter': 550, 'avg_loss': 8.400294645728303, 'avg_acc': 50.2211887477314, 'loss': 6.731833457946777}


EP_train:0:   8%|| 561/6926 [09:51<2:04:40,  1.18s/it]

{'epoch': 0, 'iter': 560, 'avg_loss': 8.36863799868633, 'avg_acc': 50.26180926916221, 'loss': 6.338919162750244}


EP_train:0:   8%|| 571/6926 [10:02<1:51:20,  1.05s/it]

{'epoch': 0, 'iter': 570, 'avg_loss': 8.338934519244994, 'avg_acc': 50.284588441331, 'loss': 6.778668403625488}


EP_train:0:   8%|| 581/6926 [10:13<1:48:47,  1.03s/it]

{'epoch': 0, 'iter': 580, 'avg_loss': 8.308370751070688, 'avg_acc': 50.31196213425129, 'loss': 6.784109592437744}


EP_train:0:   9%|| 591/6926 [10:23<1:50:21,  1.05s/it]

{'epoch': 0, 'iter': 590, 'avg_loss': 8.279044814521287, 'avg_acc': 50.317258883248726, 'loss': 6.957728385925293}


EP_train:0:   9%|| 601/6926 [10:34<1:49:17,  1.04s/it]

{'epoch': 0, 'iter': 600, 'avg_loss': 8.250645009134454, 'avg_acc': 50.249584026622294, 'loss': 6.455053329467773}


EP_train:0:   9%|| 611/6926 [10:45<1:49:58,  1.04s/it]

{'epoch': 0, 'iter': 610, 'avg_loss': 8.223123198445222, 'avg_acc': 50.301759410801964, 'loss': 6.291332721710205}


EP_train:0:   9%|| 621/6926 [10:56<1:49:29,  1.04s/it]

{'epoch': 0, 'iter': 620, 'avg_loss': 8.195540383626106, 'avg_acc': 50.30696457326892, 'loss': 6.941789150238037}


EP_train:0:   9%|| 631/6926 [11:07<1:52:11,  1.07s/it]

{'epoch': 0, 'iter': 630, 'avg_loss': 8.165442708555, 'avg_acc': 50.26743264659272, 'loss': 6.301031589508057}


EP_train:0:   9%|| 641/6926 [11:18<1:51:21,  1.06s/it]

{'epoch': 0, 'iter': 640, 'avg_loss': 8.140611905203595, 'avg_acc': 50.25351014040562, 'loss': 6.4371418952941895}


EP_train:0:   9%|| 651/6926 [11:28<1:53:14,  1.08s/it]

{'epoch': 0, 'iter': 650, 'avg_loss': 8.115549609042166, 'avg_acc': 50.28801843317973, 'loss': 6.682239055633545}


EP_train:0:  10%|| 661/6926 [11:39<1:58:09,  1.13s/it]

{'epoch': 0, 'iter': 660, 'avg_loss': 8.091160316149875, 'avg_acc': 50.30257186081695, 'loss': 6.865475654602051}


EP_train:0:  10%|| 671/6926 [11:50<2:01:00,  1.16s/it]

{'epoch': 0, 'iter': 670, 'avg_loss': 8.065716148666404, 'avg_acc': 50.2887481371088, 'loss': 6.717825412750244}


EP_train:0:  10%|| 681/6926 [12:00<1:45:46,  1.02s/it]

{'epoch': 0, 'iter': 680, 'avg_loss': 8.041214096388627, 'avg_acc': 50.261563876651984, 'loss': 6.537429332733154}


EP_train:0:  10%|| 691/6926 [12:11<1:45:50,  1.02s/it]

{'epoch': 0, 'iter': 690, 'avg_loss': 8.015924903659847, 'avg_acc': 50.30752532561505, 'loss': 6.319131374359131}


EP_train:0:  10%|| 701/6926 [12:22<1:45:30,  1.02s/it]

{'epoch': 0, 'iter': 700, 'avg_loss': 7.991354125372523, 'avg_acc': 50.307596291012835, 'loss': 6.367130279541016}


EP_train:0:  10%|| 711/6926 [12:32<1:45:59,  1.02s/it]

{'epoch': 0, 'iter': 710, 'avg_loss': 7.968812345620114, 'avg_acc': 50.35601265822785, 'loss': 6.4349284172058105}


EP_train:0:  10%|| 721/6926 [12:43<1:46:27,  1.03s/it]

{'epoch': 0, 'iter': 720, 'avg_loss': 7.946157347643425, 'avg_acc': 50.24705270457698, 'loss': 5.93623685836792}


EP_train:0:  11%|| 731/6926 [12:54<1:47:58,  1.05s/it]

{'epoch': 0, 'iter': 730, 'avg_loss': 7.9244500054543385, 'avg_acc': 50.2265731874145, 'loss': 6.725047588348389}


EP_train:0:  11%|| 741/6926 [13:04<1:47:28,  1.04s/it]

{'epoch': 0, 'iter': 740, 'avg_loss': 7.903156994808058, 'avg_acc': 50.22351551956815, 'loss': 6.3283772468566895}


EP_train:0:  11%|| 751/6926 [13:15<1:53:51,  1.11s/it]

{'epoch': 0, 'iter': 750, 'avg_loss': 7.881215763473003, 'avg_acc': 50.21221704394141, 'loss': 5.969786643981934}


EP_train:0:  11%|| 761/6926 [13:25<1:56:45,  1.14s/it]

{'epoch': 0, 'iter': 760, 'avg_loss': 7.859003779452671, 'avg_acc': 50.24227989487516, 'loss': 6.265711784362793}


EP_train:0:  11%|| 771/6926 [13:35<1:50:46,  1.08s/it]

{'epoch': 0, 'iter': 770, 'avg_loss': 7.8374307300948605, 'avg_acc': 50.22292477302205, 'loss': 6.2318878173828125}


EP_train:0:  11%|| 781/6926 [13:46<1:42:13,  1.00it/s]

{'epoch': 0, 'iter': 780, 'avg_loss': 7.817764371366415, 'avg_acc': 50.212067861715745, 'loss': 6.424302577972412}


EP_train:0:  11%|| 791/6926 [13:56<1:44:06,  1.02s/it]

{'epoch': 0, 'iter': 790, 'avg_loss': 7.798333525506947, 'avg_acc': 50.20938685208597, 'loss': 5.990481376647949}


EP_train:0:  12%|| 801/6926 [14:07<1:43:33,  1.01s/it]

{'epoch': 0, 'iter': 800, 'avg_loss': 7.778339830081859, 'avg_acc': 50.17166042446941, 'loss': 6.421486854553223}


EP_train:0:  12%|| 811/6926 [14:17<1:44:11,  1.02s/it]

{'epoch': 0, 'iter': 810, 'avg_loss': 7.760055747190915, 'avg_acc': 50.1502774352651, 'loss': 7.096622467041016}


EP_train:0:  12%|| 821/6926 [14:28<1:46:05,  1.04s/it]

{'epoch': 0, 'iter': 820, 'avg_loss': 7.740272907043345, 'avg_acc': 50.12941534713764, 'loss': 6.2564849853515625}


EP_train:0:  12%|| 831/6926 [14:38<1:46:44,  1.05s/it]

{'epoch': 0, 'iter': 830, 'avg_loss': 7.722518575464058, 'avg_acc': 50.11657641395909, 'loss': 6.618548393249512}


EP_train:0:  12%|| 841/6926 [14:49<1:52:54,  1.11s/it]

{'epoch': 0, 'iter': 840, 'avg_loss': 7.7041739374222, 'avg_acc': 50.089179548156956, 'loss': 6.199341773986816}


EP_train:0:  12%|| 851/6926 [15:00<1:56:16,  1.15s/it]

{'epoch': 0, 'iter': 850, 'avg_loss': 7.686608632218824, 'avg_acc': 50.09914806110458, 'loss': 5.915590286254883}


EP_train:0:  12%|| 861/6926 [15:10<1:42:03,  1.01s/it]

{'epoch': 0, 'iter': 860, 'avg_loss': 7.668502184018302, 'avg_acc': 50.09436701509872, 'loss': 6.505883693695068}


EP_train:0:  13%|| 871/6926 [15:20<1:41:37,  1.01s/it]

{'epoch': 0, 'iter': 870, 'avg_loss': 7.649688526902489, 'avg_acc': 50.086107921928814, 'loss': 6.082315444946289}


EP_train:0:  13%|| 881/6926 [15:31<1:40:48,  1.00s/it]

{'epoch': 0, 'iter': 880, 'avg_loss': 7.632892160491424, 'avg_acc': 50.08513053348468, 'loss': 6.309040069580078}


EP_train:0:  13%|| 891/6926 [15:41<1:42:00,  1.01s/it]

{'epoch': 0, 'iter': 890, 'avg_loss': 7.615348236328022, 'avg_acc': 50.08417508417509, 'loss': 5.919167995452881}


EP_train:0:  13%|| 901/6926 [15:52<1:42:54,  1.02s/it]

{'epoch': 0, 'iter': 900, 'avg_loss': 7.598450201333033, 'avg_acc': 50.08324084350721, 'loss': 6.177864074707031}


EP_train:0:  13%|| 911/6926 [16:03<1:44:33,  1.04s/it]

{'epoch': 0, 'iter': 910, 'avg_loss': 7.583244316402565, 'avg_acc': 50.08575740944018, 'loss': 6.34096622467041}


EP_train:0:  13%|| 921/6926 [16:13<1:45:32,  1.05s/it]

{'epoch': 0, 'iter': 920, 'avg_loss': 7.569093995710408, 'avg_acc': 50.09161237785016, 'loss': 6.523689270019531}


EP_train:0:  13%|| 931/6926 [16:24<1:50:27,  1.11s/it]

{'epoch': 0, 'iter': 930, 'avg_loss': 7.553910923311457, 'avg_acc': 50.10405477980666, 'loss': 6.069889068603516}


EP_train:0:  14%|| 941/6926 [16:34<1:53:10,  1.13s/it]

{'epoch': 0, 'iter': 940, 'avg_loss': 7.537782582415784, 'avg_acc': 50.16936769394261, 'loss': 5.478960990905762}


EP_train:0:  14%|| 951/6926 [16:45<1:46:30,  1.07s/it]

{'epoch': 0, 'iter': 950, 'avg_loss': 7.522457212554418, 'avg_acc': 50.10515247108307, 'loss': 6.131444931030273}


EP_train:0:  14%|| 961/6926 [16:55<1:38:25,  1.01it/s]

{'epoch': 0, 'iter': 960, 'avg_loss': 7.507478953648308, 'avg_acc': 50.12682101977107, 'loss': 6.689550399780273}


EP_train:0:  14%|| 971/6926 [17:05<1:40:24,  1.01s/it]

{'epoch': 0, 'iter': 970, 'avg_loss': 7.493496952783681, 'avg_acc': 50.11585993820803, 'loss': 6.243340015411377}


EP_train:0:  14%|| 981/6926 [17:16<1:39:54,  1.01s/it]

{'epoch': 0, 'iter': 980, 'avg_loss': 7.477111545177775, 'avg_acc': 50.07326707441386, 'loss': 6.021949768066406}


EP_train:0:  14%|| 991/6926 [17:26<1:41:48,  1.03s/it]

{'epoch': 0, 'iter': 990, 'avg_loss': 7.4630264639252974, 'avg_acc': 50.05991422805247, 'loss': 6.087897777557373}


EP_train:0:  14%|| 1001/6926 [17:37<1:43:10,  1.04s/it]

{'epoch': 0, 'iter': 1000, 'avg_loss': 7.449444024355619, 'avg_acc': 50.01560939060939, 'loss': 5.99716329574585}


EP_train:0:  15%|| 1011/6926 [17:48<1:45:27,  1.07s/it]

{'epoch': 0, 'iter': 1010, 'avg_loss': 7.436192977086491, 'avg_acc': 49.987636003956474, 'loss': 6.107766151428223}


EP_train:0:  15%|| 1021/6926 [17:58<1:48:23,  1.10s/it]

{'epoch': 0, 'iter': 1020, 'avg_loss': 7.423521836754391, 'avg_acc': 50.0, 'loss': 6.077112197875977}


EP_train:0:  15%|| 1031/6926 [18:09<1:49:58,  1.12s/it]

{'epoch': 0, 'iter': 1030, 'avg_loss': 7.410174076124722, 'avg_acc': 50.01818622696411, 'loss': 5.929940223693848}


EP_train:0:  15%|| 1041/6926 [18:19<1:47:47,  1.10s/it]

{'epoch': 0, 'iter': 1040, 'avg_loss': 7.396853393825857, 'avg_acc': 50.05703650336215, 'loss': 6.046861171722412}


EP_train:0:  15%|| 1051/6926 [18:29<1:38:45,  1.01s/it]

{'epoch': 0, 'iter': 1050, 'avg_loss': 7.382190727938027, 'avg_acc': 50.065413891531875, 'loss': 6.034249782562256}


EP_train:0:  15%|| 1061/6926 [18:40<1:36:36,  1.01it/s]

{'epoch': 0, 'iter': 1060, 'avg_loss': 7.370587133664662, 'avg_acc': 50.076578699340246, 'loss': 5.711930274963379}


EP_train:0:  15%|| 1071/6926 [18:50<1:38:55,  1.01s/it]

{'epoch': 0, 'iter': 1070, 'avg_loss': 7.357412084477956, 'avg_acc': 50.09920634920635, 'loss': 6.10444450378418}


EP_train:0:  16%|| 1081/6926 [19:01<1:40:03,  1.03s/it]

{'epoch': 0, 'iter': 1080, 'avg_loss': 7.344845674304803, 'avg_acc': 50.13297872340425, 'loss': 5.867177963256836}


EP_train:0:  16%|| 1091/6926 [19:11<1:40:15,  1.03s/it]

{'epoch': 0, 'iter': 1090, 'avg_loss': 7.332202398700522, 'avg_acc': 50.16040329972502, 'loss': 5.965864658355713}


EP_train:0:  16%|| 1101/6926 [19:22<1:42:59,  1.06s/it]

{'epoch': 0, 'iter': 1100, 'avg_loss': 7.3188321566603385, 'avg_acc': 50.096503178928245, 'loss': 5.873414993286133}


EP_train:0:  16%|| 1111/6926 [19:33<1:43:29,  1.07s/it]

{'epoch': 0, 'iter': 1110, 'avg_loss': 7.307029611671647, 'avg_acc': 50.06750675067507, 'loss': 6.771122455596924}


EP_train:0:  16%|| 1121/6926 [19:43<1:49:15,  1.13s/it]

{'epoch': 0, 'iter': 1120, 'avg_loss': 7.29468192977633, 'avg_acc': 50.03345227475469, 'loss': 6.093444347381592}


EP_train:0:  16%|| 1131/6926 [19:53<1:48:46,  1.13s/it]

{'epoch': 0, 'iter': 1130, 'avg_loss': 7.283542608603453, 'avg_acc': 49.99723695844385, 'loss': 6.1329345703125}


EP_train:0:  16%|| 1141/6926 [20:04<1:37:20,  1.01s/it]

{'epoch': 0, 'iter': 1140, 'avg_loss': 7.273235069462126, 'avg_acc': 49.997261174408415, 'loss': 6.3385515213012695}


EP_train:0:  17%|| 1151/6926 [20:14<1:36:03,  1.00it/s]

{'epoch': 0, 'iter': 1150, 'avg_loss': 7.261907840997214, 'avg_acc': 49.96198957428323, 'loss': 5.917153358459473}


EP_train:0:  17%|| 1161/6926 [20:25<1:37:02,  1.01s/it]

{'epoch': 0, 'iter': 1160, 'avg_loss': 7.250510206312891, 'avg_acc': 49.96770025839793, 'loss': 5.898433208465576}


EP_train:0:  17%|| 1171/6926 [20:35<1:36:08,  1.00s/it]

{'epoch': 0, 'iter': 1170, 'avg_loss': 7.239371586416034, 'avg_acc': 49.95463279248506, 'loss': 5.981712818145752}


EP_train:0:  17%|| 1181/6926 [20:46<1:39:34,  1.04s/it]

{'epoch': 0, 'iter': 1180, 'avg_loss': 7.228301483125226, 'avg_acc': 50.00264606265876, 'loss': 5.981601715087891}


EP_train:0:  17%|| 1191/6926 [20:57<1:40:03,  1.05s/it]

{'epoch': 0, 'iter': 1190, 'avg_loss': 7.219731058621987, 'avg_acc': 50.0, 'loss': 6.29663610458374}


EP_train:0:  17%|| 1201/6926 [21:07<1:42:25,  1.07s/it]

{'epoch': 0, 'iter': 1200, 'avg_loss': 7.210460602492715, 'avg_acc': 50.013009991673606, 'loss': 6.232715129852295}


EP_train:0:  17%|| 1211/6926 [21:18<1:43:39,  1.09s/it]

{'epoch': 0, 'iter': 1210, 'avg_loss': 7.200977458134847, 'avg_acc': 50.02322460776219, 'loss': 6.344027519226074}


EP_train:0:  18%|| 1221/6926 [21:28<1:45:21,  1.11s/it]

{'epoch': 0, 'iter': 1220, 'avg_loss': 7.191221878823445, 'avg_acc': 50.01023751023751, 'loss': 5.550210952758789}


EP_train:0:  18%|| 1231/6926 [21:38<1:35:34,  1.01s/it]

{'epoch': 0, 'iter': 1230, 'avg_loss': 7.181616345040688, 'avg_acc': 49.98984565393989, 'loss': 6.6194233894348145}


EP_train:0:  18%|| 1241/6926 [21:49<1:37:10,  1.03s/it]

{'epoch': 0, 'iter': 1240, 'avg_loss': 7.171198823776676, 'avg_acc': 49.957191780821915, 'loss': 6.067949295043945}


EP_train:0:  18%|| 1251/6926 [21:59<1:35:48,  1.01s/it]

{'epoch': 0, 'iter': 1250, 'avg_loss': 7.162014529001799, 'avg_acc': 49.9525379696243, 'loss': 6.275131702423096}


EP_train:0:  18%|| 1261/6926 [22:10<1:37:05,  1.03s/it]

{'epoch': 0, 'iter': 1260, 'avg_loss': 7.151944002398038, 'avg_acc': 49.9330888183981, 'loss': 5.839197158813477}


EP_train:0:  18%|| 1271/6926 [22:21<1:37:05,  1.03s/it]

{'epoch': 0, 'iter': 1270, 'avg_loss': 7.143063725682341, 'avg_acc': 49.90902832415421, 'loss': 6.1709794998168945}


EP_train:0:  18%|| 1281/6926 [22:31<1:36:40,  1.03s/it]

{'epoch': 0, 'iter': 1280, 'avg_loss': 7.134124468863709, 'avg_acc': 49.88778298204527, 'loss': 6.142892837524414}


EP_train:0:  19%|| 1291/6926 [22:42<1:40:17,  1.07s/it]

{'epoch': 0, 'iter': 1290, 'avg_loss': 7.124648749412814, 'avg_acc': 49.8789697908598, 'loss': 6.081577301025391}


EP_train:0:  19%|| 1301/6926 [22:52<1:44:48,  1.12s/it]

{'epoch': 0, 'iter': 1300, 'avg_loss': 7.115388378741464, 'avg_acc': 49.89911606456572, 'loss': 5.745368957519531}


EP_train:0:  19%|| 1311/6926 [23:03<1:47:13,  1.15s/it]

{'epoch': 0, 'iter': 1310, 'avg_loss': 7.106359928823626, 'avg_acc': 49.91657131960336, 'loss': 5.744056701660156}


EP_train:0:  19%|| 1321/6926 [23:13<1:34:15,  1.01s/it]

{'epoch': 0, 'iter': 1320, 'avg_loss': 7.09708967974112, 'avg_acc': 49.89591218773656, 'loss': 5.673115253448486}


EP_train:0:  19%|| 1331/6926 [23:23<1:34:10,  1.01s/it]

{'epoch': 0, 'iter': 1330, 'avg_loss': 7.08957684944663, 'avg_acc': 49.88260706235913, 'loss': 6.284033298492432}


EP_train:0:  19%|| 1341/6926 [23:34<1:33:40,  1.01s/it]

{'epoch': 0, 'iter': 1340, 'avg_loss': 7.08115938757001, 'avg_acc': 49.89280387770321, 'loss': 5.633574962615967}


EP_train:0:  20%|| 1351/6926 [23:45<1:33:50,  1.01s/it]

{'epoch': 0, 'iter': 1350, 'avg_loss': 7.072496324887901, 'avg_acc': 49.872779422649884, 'loss': 6.358549118041992}


EP_train:0:  20%|| 1361/6926 [23:55<1:35:13,  1.03s/it]

{'epoch': 0, 'iter': 1360, 'avg_loss': 7.063851908320105, 'avg_acc': 49.87830639235856, 'loss': 6.142042636871338}


EP_train:0:  20%|| 1371/6926 [24:06<1:36:08,  1.04s/it]

{'epoch': 0, 'iter': 1370, 'avg_loss': 7.055546369976931, 'avg_acc': 49.897428884026255, 'loss': 5.9958176612854}


EP_train:0:  20%|| 1381/6926 [24:17<1:38:20,  1.06s/it]

{'epoch': 0, 'iter': 1380, 'avg_loss': 7.047603386886563, 'avg_acc': 49.93890296886314, 'loss': 5.467602252960205}


EP_train:0:  20%|| 1391/6926 [24:27<1:40:58,  1.09s/it]

{'epoch': 0, 'iter': 1390, 'avg_loss': 7.039653471954258, 'avg_acc': 49.94383537023724, 'loss': 5.661850929260254}


EP_train:0:  20%|| 1401/6926 [24:38<1:45:26,  1.15s/it]

{'epoch': 0, 'iter': 1400, 'avg_loss': 7.031244082590412, 'avg_acc': 49.968772305496074, 'loss': 6.089345932006836}


EP_train:0:  20%|| 1411/6926 [24:48<1:44:29,  1.14s/it]

{'epoch': 0, 'iter': 1410, 'avg_loss': 7.023704869116899, 'avg_acc': 49.98006732813607, 'loss': 6.1271138191223145}


EP_train:0:  21%|| 1421/6926 [24:59<1:33:00,  1.01s/it]

{'epoch': 0, 'iter': 1420, 'avg_loss': 7.01597163947345, 'avg_acc': 49.962614356087265, 'loss': 5.9220757484436035}


EP_train:0:  21%|| 1431/6926 [25:09<1:32:50,  1.01s/it]

{'epoch': 0, 'iter': 1430, 'avg_loss': 7.008125577749363, 'avg_acc': 49.9475890985325, 'loss': 6.046283721923828}


EP_train:0:  21%|| 1441/6926 [25:20<1:33:27,  1.02s/it]

{'epoch': 0, 'iter': 1440, 'avg_loss': 7.00023248442174, 'avg_acc': 49.96747050659264, 'loss': 5.5911993980407715}


EP_train:0:  21%|| 1451/6926 [25:31<1:33:50,  1.03s/it]

{'epoch': 0, 'iter': 1450, 'avg_loss': 6.993088789926078, 'avg_acc': 49.99138525155066, 'loss': 5.807047367095947}


EP_train:0:  21%|| 1461/6926 [25:41<1:34:51,  1.04s/it]

{'epoch': 0, 'iter': 1460, 'avg_loss': 6.985230453042115, 'avg_acc': 50.00213894592744, 'loss': 5.987692832946777}


EP_train:0:  21%|| 1471/6926 [25:52<1:35:18,  1.05s/it]

{'epoch': 0, 'iter': 1470, 'avg_loss': 6.977788888216829, 'avg_acc': 49.993626784500336, 'loss': 5.769822597503662}


EP_train:0:  21%|| 1481/6926 [26:02<1:37:06,  1.07s/it]

{'epoch': 0, 'iter': 1480, 'avg_loss': 6.971613506623654, 'avg_acc': 50.008440243079, 'loss': 6.0525712966918945}


EP_train:0:  22%|| 1491/6926 [26:13<1:42:53,  1.14s/it]

{'epoch': 0, 'iter': 1490, 'avg_loss': 6.96456026695144, 'avg_acc': 50.02724681421864, 'loss': 5.746126651763916}


EP_train:0:  22%|| 1501/6926 [26:24<1:43:18,  1.14s/it]

{'epoch': 0, 'iter': 1500, 'avg_loss': 6.958538574190794, 'avg_acc': 50.004163890739505, 'loss': 6.253878116607666}


EP_train:0:  22%|| 1511/6926 [26:34<1:31:20,  1.01s/it]

{'epoch': 0, 'iter': 1510, 'avg_loss': 6.9517616638673365, 'avg_acc': 50.00413633355394, 'loss': 5.780992031097412}


EP_train:0:  22%|| 1521/6926 [26:45<1:31:12,  1.01s/it]

{'epoch': 0, 'iter': 1520, 'avg_loss': 6.944390322643232, 'avg_acc': 50.01643655489809, 'loss': 5.505647659301758}


EP_train:0:  22%|| 1531/6926 [26:55<1:31:12,  1.01s/it]

{'epoch': 0, 'iter': 1530, 'avg_loss': 6.937390367008361, 'avg_acc': 50.05306988896147, 'loss': 5.3545637130737305}


EP_train:0:  22%|| 1541/6926 [27:06<1:31:53,  1.02s/it]

{'epoch': 0, 'iter': 1540, 'avg_loss': 6.930687298177513, 'avg_acc': 50.05678131083712, 'loss': 5.907761096954346}


EP_train:0:  22%|| 1551/6926 [27:16<1:33:01,  1.04s/it]

{'epoch': 0, 'iter': 1550, 'avg_loss': 6.924059370423347, 'avg_acc': 50.030222437137326, 'loss': 6.6279168128967285}


EP_train:0:  23%|| 1561/6926 [27:27<1:33:41,  1.05s/it]

{'epoch': 0, 'iter': 1560, 'avg_loss': 6.917748729515198, 'avg_acc': 50.00200192184497, 'loss': 5.902115821838379}


EP_train:0:  23%|| 1571/6926 [27:38<1:32:47,  1.04s/it]

{'epoch': 0, 'iter': 1570, 'avg_loss': 6.911469081038506, 'avg_acc': 50.00795671546785, 'loss': 5.605770587921143}


EP_train:0:  23%|| 1581/6926 [27:48<1:34:22,  1.06s/it]

{'epoch': 0, 'iter': 1580, 'avg_loss': 6.90503506506643, 'avg_acc': 49.992093611638204, 'loss': 5.800256729125977}


EP_train:0:  23%|| 1591/6926 [27:59<1:39:39,  1.12s/it]

{'epoch': 0, 'iter': 1590, 'avg_loss': 6.898047330018336, 'avg_acc': 50.00589252042741, 'loss': 6.070418834686279}


EP_train:0:  23%|| 1601/6926 [28:09<1:42:18,  1.15s/it]

{'epoch': 0, 'iter': 1600, 'avg_loss': 6.892168728877871, 'avg_acc': 50.00390381011868, 'loss': 5.729106903076172}


EP_train:0:  23%|| 1611/6926 [28:20<1:29:12,  1.01s/it]

{'epoch': 0, 'iter': 1610, 'avg_loss': 6.886463413025414, 'avg_acc': 50.01357852265673, 'loss': 6.275238513946533}


EP_train:0:  23%|| 1621/6926 [28:30<1:30:11,  1.02s/it]

{'epoch': 0, 'iter': 1620, 'avg_loss': 6.879877212531474, 'avg_acc': 50.0308451573103, 'loss': 5.855497360229492}


EP_train:0:  24%|| 1631/6926 [28:41<1:30:31,  1.03s/it]

{'epoch': 0, 'iter': 1630, 'avg_loss': 6.874029622934553, 'avg_acc': 50.038320049049666, 'loss': 5.714468479156494}


EP_train:0:  24%|| 1641/6926 [28:51<1:30:44,  1.03s/it]

{'epoch': 0, 'iter': 1640, 'avg_loss': 6.86798406969404, 'avg_acc': 50.06093845216332, 'loss': 5.799610614776611}


EP_train:0:  24%|| 1651/6926 [29:02<1:31:29,  1.04s/it]

{'epoch': 0, 'iter': 1650, 'avg_loss': 6.862971934602161, 'avg_acc': 50.10221078134464, 'loss': 6.417799949645996}


EP_train:0:  24%|| 1661/6926 [29:13<1:33:26,  1.06s/it]

{'epoch': 0, 'iter': 1660, 'avg_loss': 6.856526578642531, 'avg_acc': 50.12605358217941, 'loss': 5.834659099578857}


EP_train:0:  24%|| 1671/6926 [29:23<1:34:13,  1.08s/it]

{'epoch': 0, 'iter': 1670, 'avg_loss': 6.849717595347525, 'avg_acc': 50.172052663076, 'loss': 5.60446310043335}


EP_train:0:  24%|| 1681/6926 [29:34<1:36:41,  1.11s/it]

{'epoch': 0, 'iter': 1680, 'avg_loss': 6.844004103429682, 'avg_acc': 50.15987507436051, 'loss': 5.972975254058838}


EP_train:0:  24%|| 1691/6926 [29:45<1:43:08,  1.18s/it]

{'epoch': 0, 'iter': 1690, 'avg_loss': 6.838143073194201, 'avg_acc': 50.16262566528681, 'loss': 5.643266677856445}


EP_train:0:  25%|| 1701/6926 [29:55<1:29:34,  1.03s/it]

{'epoch': 0, 'iter': 1700, 'avg_loss': 6.832474143136466, 'avg_acc': 50.15064667842446, 'loss': 5.720256328582764}


EP_train:0:  25%|| 1711/6926 [30:06<1:29:48,  1.03s/it]

{'epoch': 0, 'iter': 1710, 'avg_loss': 6.827041314680767, 'avg_acc': 50.162551139684396, 'loss': 5.886969566345215}


EP_train:0:  25%|| 1721/6926 [30:16<1:27:17,  1.01s/it]

{'epoch': 0, 'iter': 1720, 'avg_loss': 6.821667699741805, 'avg_acc': 50.152527600232425, 'loss': 5.947781085968018}


EP_train:0:  25%|| 1731/6926 [30:27<1:29:16,  1.03s/it]

{'epoch': 0, 'iter': 1730, 'avg_loss': 6.81731264113278, 'avg_acc': 50.17511554015021, 'loss': 5.636315822601318}


EP_train:0:  25%|| 1741/6926 [30:37<1:27:38,  1.01s/it]

{'epoch': 0, 'iter': 1740, 'avg_loss': 6.8125220221530975, 'avg_acc': 50.183084434233194, 'loss': 6.1584038734436035}


EP_train:0:  25%|| 1751/6926 [30:48<1:29:34,  1.04s/it]

{'epoch': 0, 'iter': 1750, 'avg_loss': 6.807303660124251, 'avg_acc': 50.19988577955454, 'loss': 5.772513389587402}


EP_train:0:  25%|| 1761/6926 [30:59<1:34:17,  1.10s/it]

{'epoch': 0, 'iter': 1760, 'avg_loss': 6.8022896202364675, 'avg_acc': 50.177455990914254, 'loss': 5.807365894317627}


EP_train:0:  26%|| 1771/6926 [31:09<1:35:59,  1.12s/it]

{'epoch': 0, 'iter': 1770, 'avg_loss': 6.797065773634908, 'avg_acc': 50.18704121964992, 'loss': 6.150449752807617}


EP_train:0:  26%|| 1781/6926 [31:20<1:37:57,  1.14s/it]

{'epoch': 0, 'iter': 1780, 'avg_loss': 6.791702678269178, 'avg_acc': 50.200028074115664, 'loss': 5.922349452972412}


EP_train:0:  26%|| 1791/6926 [31:30<1:27:38,  1.02s/it]

{'epoch': 0, 'iter': 1790, 'avg_loss': 6.785977195854762, 'avg_acc': 50.21112506979342, 'loss': 5.5498175621032715}


EP_train:0:  26%|| 1801/6926 [31:40<1:25:37,  1.00s/it]

{'epoch': 0, 'iter': 1800, 'avg_loss': 6.781259625703345, 'avg_acc': 50.21168795113826, 'loss': 6.221451759338379}


EP_train:0:  26%|| 1811/6926 [31:51<1:26:36,  1.02s/it]

{'epoch': 0, 'iter': 1810, 'avg_loss': 6.775866638817622, 'avg_acc': 50.20361678630591, 'loss': 6.104256629943848}


EP_train:0:  26%|| 1821/6926 [32:02<1:26:32,  1.02s/it]

{'epoch': 0, 'iter': 1820, 'avg_loss': 6.770620581214733, 'avg_acc': 50.20764689730917, 'loss': 5.090567588806152}


EP_train:0:  26%|| 1831/6926 [32:12<1:27:25,  1.03s/it]

{'epoch': 0, 'iter': 1830, 'avg_loss': 6.765511302854245, 'avg_acc': 50.19115237575096, 'loss': 5.593067646026611}


EP_train:0:  27%|| 1841/6926 [32:23<1:28:31,  1.04s/it]

{'epoch': 0, 'iter': 1840, 'avg_loss': 6.7607500132240075, 'avg_acc': 50.22236556219446, 'loss': 5.545187950134277}


EP_train:0:  27%|| 1851/6926 [32:34<1:29:58,  1.06s/it]

{'epoch': 0, 'iter': 1850, 'avg_loss': 6.756163328676723, 'avg_acc': 50.22454078876283, 'loss': 5.513035297393799}


EP_train:0:  27%|| 1861/6926 [32:44<1:33:50,  1.11s/it]

{'epoch': 0, 'iter': 1860, 'avg_loss': 6.7506129135693484, 'avg_acc': 50.226692638366465, 'loss': 5.737318515777588}


EP_train:0:  27%|| 1871/6926 [32:55<1:33:39,  1.11s/it]

{'epoch': 0, 'iter': 1870, 'avg_loss': 6.746635813180423, 'avg_acc': 50.210448957776585, 'loss': 6.347056865692139}


EP_train:0:  27%|| 1881/6926 [33:05<1:25:54,  1.02s/it]

{'epoch': 0, 'iter': 1880, 'avg_loss': 6.742399038500383, 'avg_acc': 50.210991493886226, 'loss': 5.865935802459717}


EP_train:0:  27%|| 1891/6926 [33:16<1:26:49,  1.03s/it]

{'epoch': 0, 'iter': 1890, 'avg_loss': 6.737557448009408, 'avg_acc': 50.20657059756742, 'loss': 5.4081244468688965}


EP_train:0:  27%|| 1901/6926 [33:26<1:23:38,  1.00it/s]

{'epoch': 0, 'iter': 1900, 'avg_loss': 6.733255281754383, 'avg_acc': 50.19233298264072, 'loss': 6.422271728515625}


EP_train:0:  28%|| 1911/6926 [33:37<1:27:02,  1.04s/it]

{'epoch': 0, 'iter': 1910, 'avg_loss': 6.728110748332935, 'avg_acc': 50.191326530612244, 'loss': 5.532033920288086}


EP_train:0:  28%|| 1921/6926 [33:48<1:25:14,  1.02s/it]

{'epoch': 0, 'iter': 1920, 'avg_loss': 6.723259248440617, 'avg_acc': 50.17080947423217, 'loss': 5.853057384490967}


EP_train:0:  28%|| 1931/6926 [33:58<1:26:51,  1.04s/it]

{'epoch': 0, 'iter': 1930, 'avg_loss': 6.719184641638054, 'avg_acc': 50.200673226307615, 'loss': 5.973474025726318}


EP_train:0:  28%|| 1941/6926 [34:09<1:28:22,  1.06s/it]

{'epoch': 0, 'iter': 1940, 'avg_loss': 6.714842352162066, 'avg_acc': 50.18997939206594, 'loss': 6.1005353927612305}


EP_train:0:  28%|| 1951/6926 [34:20<1:29:19,  1.08s/it]

{'epoch': 0, 'iter': 1950, 'avg_loss': 6.710343708202571, 'avg_acc': 50.17939518195797, 'loss': 5.970518589019775}


EP_train:0:  28%|| 1961/6926 [34:31<1:31:41,  1.11s/it]

{'epoch': 0, 'iter': 1960, 'avg_loss': 6.706847814319694, 'avg_acc': 50.170512493625694, 'loss': 6.204665184020996}


EP_train:0:  28%|| 1971/6926 [34:41<1:36:39,  1.17s/it]

{'epoch': 0, 'iter': 1970, 'avg_loss': 6.703173876655459, 'avg_acc': 50.1759893455099, 'loss': 5.713106155395508}


EP_train:0:  29%|| 1981/6926 [34:51<1:23:46,  1.02s/it]

{'epoch': 0, 'iter': 1980, 'avg_loss': 6.699462405603862, 'avg_acc': 50.17510095911156, 'loss': 5.826193809509277}


EP_train:0:  29%|| 1991/6926 [35:02<1:23:19,  1.01s/it]

{'epoch': 0, 'iter': 1990, 'avg_loss': 6.694920545124876, 'avg_acc': 50.178930185836265, 'loss': 5.616783142089844}


EP_train:0:  29%|| 2001/6926 [35:13<1:24:52,  1.03s/it]

{'epoch': 0, 'iter': 2000, 'avg_loss': 6.6905195523595165, 'avg_acc': 50.18896801599201, 'loss': 5.569193363189697}


EP_train:0:  29%|| 2011/6926 [35:23<1:23:35,  1.02s/it]

{'epoch': 0, 'iter': 2010, 'avg_loss': 6.686490006022404, 'avg_acc': 50.18025857782198, 'loss': 6.095041751861572}


EP_train:0:  29%|| 2021/6926 [35:34<1:24:08,  1.03s/it]

{'epoch': 0, 'iter': 2020, 'avg_loss': 6.682471108991046, 'avg_acc': 50.179366650173186, 'loss': 5.828011512756348}


EP_train:0:  29%|| 2031/6926 [35:45<1:25:34,  1.05s/it]

{'epoch': 0, 'iter': 2030, 'avg_loss': 6.678483116632609, 'avg_acc': 50.200024618414574, 'loss': 5.92686128616333}


EP_train:0:  29%|| 2041/6926 [35:55<1:26:06,  1.06s/it]

{'epoch': 0, 'iter': 2040, 'avg_loss': 6.674230076763222, 'avg_acc': 50.179140127388536, 'loss': 5.766122341156006}


EP_train:0:  30%|| 2051/6926 [36:06<1:27:41,  1.08s/it]

{'epoch': 0, 'iter': 2050, 'avg_loss': 6.669633532430648, 'avg_acc': 50.16455387615797, 'loss': 5.476725101470947}


EP_train:0:  30%|| 2061/6926 [36:17<1:29:04,  1.10s/it]

{'epoch': 0, 'iter': 2060, 'avg_loss': 6.6649786073683766, 'avg_acc': 50.1910480349345, 'loss': 5.625283241271973}


EP_train:0:  30%|| 2071/6926 [36:27<1:31:28,  1.13s/it]

{'epoch': 0, 'iter': 2070, 'avg_loss': 6.660440164508018, 'avg_acc': 50.1659826170932, 'loss': 5.775404453277588}


EP_train:0:  30%|| 2081/6926 [36:37<1:21:05,  1.00s/it]

{'epoch': 0, 'iter': 2080, 'avg_loss': 6.656626000191259, 'avg_acc': 50.19071359923114, 'loss': 5.768825531005859}


EP_train:0:  30%|| 2091/6926 [36:48<1:21:22,  1.01s/it]

{'epoch': 0, 'iter': 2090, 'avg_loss': 6.652812512852483, 'avg_acc': 50.20026303204208, 'loss': 5.5225934982299805}


EP_train:0:  30%|| 2101/6926 [36:58<1:21:46,  1.02s/it]

{'epoch': 0, 'iter': 2100, 'avg_loss': 6.648280479861463, 'avg_acc': 50.1844359828653, 'loss': 5.973301887512207}


EP_train:0:  30%|| 2111/6926 [37:09<1:22:17,  1.03s/it]

{'epoch': 0, 'iter': 2110, 'avg_loss': 6.642982643512345, 'avg_acc': 50.161357176693514, 'loss': 5.404656887054443}


EP_train:0:  31%|| 2121/6926 [37:20<1:22:54,  1.04s/it]

{'epoch': 0, 'iter': 2120, 'avg_loss': 6.638651742400116, 'avg_acc': 50.17238330975955, 'loss': 5.476313591003418}


EP_train:0:  31%|| 2131/6926 [37:30<1:25:40,  1.07s/it]

{'epoch': 0, 'iter': 2130, 'avg_loss': 6.634868952770403, 'avg_acc': 50.1657085875176, 'loss': 6.301799297332764}


EP_train:0:  31%|| 2141/6926 [37:41<1:26:56,  1.09s/it]

{'epoch': 0, 'iter': 2140, 'avg_loss': 6.630644671552819, 'avg_acc': 50.1605558150397, 'loss': 6.115932941436768}


EP_train:0:  31%|| 2151/6926 [37:52<1:30:24,  1.14s/it]

{'epoch': 0, 'iter': 2150, 'avg_loss': 6.627506532540492, 'avg_acc': 50.16997907949791, 'loss': 5.963101387023926}


EP_train:0:  31%|| 2161/6926 [38:02<1:32:05,  1.16s/it]

{'epoch': 0, 'iter': 2160, 'avg_loss': 6.6236171821707215, 'avg_acc': 50.14894724664507, 'loss': 5.62955904006958}


EP_train:0:  31%|| 2171/6926 [38:12<1:21:09,  1.02s/it]

{'epoch': 0, 'iter': 2170, 'avg_loss': 6.619760489364968, 'avg_acc': 50.151140027637034, 'loss': 5.519279956817627}


EP_train:0:  31%|| 2181/6926 [38:23<1:19:58,  1.01s/it]

{'epoch': 0, 'iter': 2180, 'avg_loss': 6.616379540647623, 'avg_acc': 50.14758138468592, 'loss': 5.860677719116211}


EP_train:0:  32%|| 2191/6926 [38:34<1:21:32,  1.03s/it]

{'epoch': 0, 'iter': 2190, 'avg_loss': 6.612961196507672, 'avg_acc': 50.146907804655406, 'loss': 6.12660551071167}


EP_train:0:  32%|| 2201/6926 [38:44<1:20:18,  1.02s/it]

{'epoch': 0, 'iter': 2200, 'avg_loss': 6.60921959809854, 'avg_acc': 50.16327805542935, 'loss': 5.606520652770996}


EP_train:0:  32%|| 2211/6926 [38:55<1:20:47,  1.03s/it]

{'epoch': 0, 'iter': 2210, 'avg_loss': 6.605899369754386, 'avg_acc': 50.1696065128901, 'loss': 5.821345806121826}


EP_train:0:  32%|| 2221/6926 [39:05<1:22:14,  1.05s/it]

{'epoch': 0, 'iter': 2220, 'avg_loss': 6.602803445409838, 'avg_acc': 50.17024988743809, 'loss': 6.050590991973877}


EP_train:0:  32%|| 2231/6926 [39:16<1:23:38,  1.07s/it]

{'epoch': 0, 'iter': 2230, 'avg_loss': 6.599050783886604, 'avg_acc': 50.187696100403414, 'loss': 5.679413795471191}


EP_train:0:  32%|| 2241/6926 [39:27<1:25:13,  1.09s/it]

{'epoch': 0, 'iter': 2240, 'avg_loss': 6.595538343823631, 'avg_acc': 50.19661981258366, 'loss': 5.536213397979736}


EP_train:0:  33%|| 2251/6926 [39:37<1:29:30,  1.15s/it]

{'epoch': 0, 'iter': 2250, 'avg_loss': 6.591809201590065, 'avg_acc': 50.18741670368725, 'loss': 6.001389980316162}


EP_train:0:  33%|| 2261/6926 [39:47<1:19:01,  1.02s/it]

{'epoch': 0, 'iter': 2260, 'avg_loss': 6.588373744978097, 'avg_acc': 50.17138434321097, 'loss': 5.835084438323975}


EP_train:0:  33%|| 2271/6926 [39:58<1:19:46,  1.03s/it]

{'epoch': 0, 'iter': 2270, 'avg_loss': 6.585039730693526, 'avg_acc': 50.15274108322325, 'loss': 5.925197601318359}


EP_train:0:  33%|| 2281/6926 [40:09<1:20:26,  1.04s/it]

{'epoch': 0, 'iter': 2280, 'avg_loss': 6.581994341424243, 'avg_acc': 50.15755151249452, 'loss': 5.881197452545166}


EP_train:0:  33%|| 2291/6926 [40:19<1:20:15,  1.04s/it]

{'epoch': 0, 'iter': 2290, 'avg_loss': 6.5782785463520455, 'avg_acc': 50.16231994762113, 'loss': 5.500590801239014}


EP_train:0:  33%|| 2301/6926 [40:30<1:21:27,  1.06s/it]

{'epoch': 0, 'iter': 2300, 'avg_loss': 6.575129363289195, 'avg_acc': 50.15210777922642, 'loss': 6.223466873168945}


EP_train:0:  33%|| 2311/6926 [40:41<1:21:32,  1.06s/it]

{'epoch': 0, 'iter': 2310, 'avg_loss': 6.571412661126144, 'avg_acc': 50.144688446559925, 'loss': 5.741537570953369}


EP_train:0:  34%|| 2321/6926 [40:52<1:21:04,  1.06s/it]

{'epoch': 0, 'iter': 2320, 'avg_loss': 6.568281281153635, 'avg_acc': 50.14137225333908, 'loss': 5.392557144165039}


EP_train:0:  34%|| 2331/6926 [41:03<1:21:13,  1.06s/it]

{'epoch': 0, 'iter': 2330, 'avg_loss': 6.564601187949424, 'avg_acc': 50.15149077649078, 'loss': 5.792973518371582}


EP_train:0:  34%|| 2341/6926 [41:13<1:22:51,  1.08s/it]

{'epoch': 0, 'iter': 2340, 'avg_loss': 6.56130920582885, 'avg_acc': 50.141499359248186, 'loss': 5.537982940673828}


EP_train:0:  34%|| 2351/6926 [41:24<1:26:09,  1.13s/it]

{'epoch': 0, 'iter': 2350, 'avg_loss': 6.558088328370841, 'avg_acc': 50.14222671203743, 'loss': 5.9580979347229}


EP_train:0:  34%|| 2361/6926 [41:34<1:27:28,  1.15s/it]

{'epoch': 0, 'iter': 2360, 'avg_loss': 6.554913546664388, 'avg_acc': 50.13500635324015, 'loss': 5.721664905548096}


EP_train:0:  34%|| 2371/6926 [41:44<1:16:24,  1.01s/it]

{'epoch': 0, 'iter': 2370, 'avg_loss': 6.551347711628271, 'avg_acc': 50.10939477013918, 'loss': 6.064119338989258}


EP_train:0:  34%|| 2381/6926 [41:55<1:17:08,  1.02s/it]

{'epoch': 0, 'iter': 2380, 'avg_loss': 6.547987180515179, 'avg_acc': 50.1154976900462, 'loss': 5.776705265045166}


EP_train:0:  35%|| 2391/6926 [42:06<1:16:14,  1.01s/it]

{'epoch': 0, 'iter': 2390, 'avg_loss': 6.545582041985717, 'avg_acc': 50.11240066917607, 'loss': 5.570878505706787}


EP_train:0:  35%|| 2401/6926 [42:16<1:18:14,  1.04s/it]

{'epoch': 0, 'iter': 2400, 'avg_loss': 6.5425588793677125, 'avg_acc': 50.114535610162434, 'loss': 5.40133810043335}


EP_train:0:  35%|| 2411/6926 [42:27<1:18:15,  1.04s/it]

{'epoch': 0, 'iter': 2410, 'avg_loss': 6.540026389825695, 'avg_acc': 50.11276441310659, 'loss': 6.0127129554748535}


EP_train:0:  35%|| 2421/6926 [42:38<1:18:38,  1.05s/it]

{'epoch': 0, 'iter': 2420, 'avg_loss': 6.536510946336042, 'avg_acc': 50.11229863692689, 'loss': 5.609666347503662}


EP_train:0:  35%|| 2431/6926 [42:48<1:21:16,  1.08s/it]

{'epoch': 0, 'iter': 2430, 'avg_loss': 6.533528167884667, 'avg_acc': 50.12083504730563, 'loss': 5.769003391265869}


EP_train:0:  35%|| 2441/6926 [42:59<1:22:38,  1.11s/it]

{'epoch': 0, 'iter': 2440, 'avg_loss': 6.530547814406317, 'avg_acc': 50.12930151577223, 'loss': 5.6305646896362305}


EP_train:0:  35%|| 2451/6926 [43:10<1:26:42,  1.16s/it]

{'epoch': 0, 'iter': 2450, 'avg_loss': 6.527225849823969, 'avg_acc': 50.146623827009385, 'loss': 5.920275688171387}


EP_train:0:  36%|| 2461/6926 [43:20<1:16:02,  1.02s/it]

{'epoch': 0, 'iter': 2460, 'avg_loss': 6.524224850958026, 'avg_acc': 50.16126574563186, 'loss': 5.73220682144165}


EP_train:0:  36%|| 2471/6926 [43:31<1:15:45,  1.02s/it]

{'epoch': 0, 'iter': 2470, 'avg_loss': 6.521326477640508, 'avg_acc': 50.147966410360176, 'loss': 5.663172245025635}


EP_train:0:  36%|| 2481/6926 [43:41<1:14:40,  1.01s/it]

{'epoch': 0, 'iter': 2480, 'avg_loss': 6.517880214697312, 'avg_acc': 50.1473700120919, 'loss': 5.578065395355225}


EP_train:0:  36%|| 2491/6926 [43:52<1:15:21,  1.02s/it]

{'epoch': 0, 'iter': 2490, 'avg_loss': 6.515109263190014, 'avg_acc': 50.1593235648334, 'loss': 5.818114280700684}


EP_train:0:  36%|| 2501/6926 [44:02<1:16:00,  1.03s/it]

{'epoch': 0, 'iter': 2500, 'avg_loss': 6.511557092098464, 'avg_acc': 50.14619152339065, 'loss': 5.816467761993408}


EP_train:0:  36%|| 2511/6926 [44:13<1:18:02,  1.06s/it]

{'epoch': 0, 'iter': 2510, 'avg_loss': 6.5081087271562374, 'avg_acc': 50.15183193946635, 'loss': 5.368273735046387}


EP_train:0:  36%|| 2521/6926 [44:24<1:20:38,  1.10s/it]

{'epoch': 0, 'iter': 2520, 'avg_loss': 6.505550715716194, 'avg_acc': 50.1326358587862, 'loss': 6.118330001831055}


EP_train:0:  37%|| 2531/6926 [44:34<1:22:05,  1.12s/it]

{'epoch': 0, 'iter': 2530, 'avg_loss': 6.502726748167691, 'avg_acc': 50.14569340181746, 'loss': 5.3303446769714355}


EP_train:0:  37%|| 2541/6926 [44:45<1:24:11,  1.15s/it]

{'epoch': 0, 'iter': 2540, 'avg_loss': 6.499706906908283, 'avg_acc': 50.13774104683195, 'loss': 5.427259922027588}


EP_train:0:  37%|| 2551/6926 [44:55<1:17:55,  1.07s/it]

{'epoch': 0, 'iter': 2550, 'avg_loss': 6.49681183255359, 'avg_acc': 50.140876127009015, 'loss': 5.143314361572266}


EP_train:0:  37%|| 2561/6926 [45:06<1:13:28,  1.01s/it]

{'epoch': 0, 'iter': 2560, 'avg_loss': 6.493576974805469, 'avg_acc': 50.13666536509176, 'loss': 5.705864906311035}


EP_train:0:  37%|| 2571/6926 [45:16<1:14:18,  1.02s/it]

{'epoch': 0, 'iter': 2570, 'avg_loss': 6.4908617587926, 'avg_acc': 50.142211201866985, 'loss': 6.148105621337891}


EP_train:0:  37%|| 2581/6926 [45:27<1:13:37,  1.02s/it]

{'epoch': 0, 'iter': 2580, 'avg_loss': 6.4887205609421725, 'avg_acc': 50.14650329329717, 'loss': 5.894054889678955}


EP_train:0:  37%|| 2591/6926 [45:38<1:13:59,  1.02s/it]

{'epoch': 0, 'iter': 2590, 'avg_loss': 6.485887799839033, 'avg_acc': 50.14473176379776, 'loss': 5.891757011413574}


EP_train:0:  38%|| 2601/6926 [45:48<1:16:15,  1.06s/it]

{'epoch': 0, 'iter': 2600, 'avg_loss': 6.4831008502310485, 'avg_acc': 50.133362168396765, 'loss': 6.085303783416748}


EP_train:0:  38%|| 2611/6926 [45:59<1:16:12,  1.06s/it]

{'epoch': 0, 'iter': 2610, 'avg_loss': 6.4804205801330825, 'avg_acc': 50.13045767905018, 'loss': 5.750835418701172}


EP_train:0:  38%|| 2621/6926 [46:10<1:17:05,  1.07s/it]

{'epoch': 0, 'iter': 2620, 'avg_loss': 6.477424587379289, 'avg_acc': 50.13592140404426, 'loss': 5.227821350097656}


EP_train:0:  38%|| 2631/6926 [46:20<1:20:23,  1.12s/it]

{'epoch': 0, 'iter': 2630, 'avg_loss': 6.474905843926853, 'avg_acc': 50.14371911820601, 'loss': 6.208372116088867}


EP_train:0:  38%|| 2641/6926 [46:31<1:23:09,  1.16s/it]

{'epoch': 0, 'iter': 2640, 'avg_loss': 6.472421085685548, 'avg_acc': 50.136075350246124, 'loss': 5.917738437652588}


EP_train:0:  38%|| 2651/6926 [46:41<1:12:30,  1.02s/it]

{'epoch': 0, 'iter': 2650, 'avg_loss': 6.469960436466638, 'avg_acc': 50.132025650697855, 'loss': 6.1434173583984375}


EP_train:0:  38%|| 2661/6926 [46:52<1:12:17,  1.02s/it]

{'epoch': 0, 'iter': 2660, 'avg_loss': 6.46737302975832, 'avg_acc': 50.125657647500944, 'loss': 5.767868518829346}


EP_train:0:  39%|| 2671/6926 [47:02<1:12:49,  1.03s/it]

{'epoch': 0, 'iter': 2670, 'avg_loss': 6.464865518125065, 'avg_acc': 50.127527143391994, 'loss': 6.10287618637085}


EP_train:0:  39%|| 2681/6926 [47:13<1:12:06,  1.02s/it]

{'epoch': 0, 'iter': 2680, 'avg_loss': 6.462005039508198, 'avg_acc': 50.145701230884, 'loss': 5.823858261108398}


EP_train:0:  39%|| 2691/6926 [47:23<1:11:34,  1.01s/it]

{'epoch': 0, 'iter': 2690, 'avg_loss': 6.459849459847656, 'avg_acc': 50.155611296915644, 'loss': 5.407562255859375}


EP_train:0:  39%|| 2701/6926 [47:34<1:13:35,  1.05s/it]

{'epoch': 0, 'iter': 2700, 'avg_loss': 6.457773432648654, 'avg_acc': 50.156192151055166, 'loss': 5.618403434753418}


EP_train:0:  39%|| 2711/6926 [47:45<1:14:49,  1.07s/it]

{'epoch': 0, 'iter': 2710, 'avg_loss': 6.455287011687231, 'avg_acc': 50.1579214312062, 'loss': 5.828303813934326}


EP_train:0:  39%|| 2721/6926 [47:55<1:16:27,  1.09s/it]

{'epoch': 0, 'iter': 2720, 'avg_loss': 6.452864876830898, 'avg_acc': 50.150450202131566, 'loss': 5.561725616455078}


EP_train:0:  39%|| 2731/6926 [48:06<1:19:09,  1.13s/it]

{'epoch': 0, 'iter': 2730, 'avg_loss': 6.44926049708447, 'avg_acc': 50.16248626876602, 'loss': 5.502737522125244}


EP_train:0:  40%|| 2741/6926 [48:16<1:13:15,  1.05s/it]

{'epoch': 0, 'iter': 2740, 'avg_loss': 6.4468203664304395, 'avg_acc': 50.16759394381612, 'loss': 5.836330890655518}


EP_train:0:  40%|| 2751/6926 [48:27<1:10:25,  1.01s/it]

{'epoch': 0, 'iter': 2750, 'avg_loss': 6.444630595303933, 'avg_acc': 50.153353326063254, 'loss': 5.762578010559082}


EP_train:0:  40%|| 2761/6926 [48:37<1:10:39,  1.02s/it]

{'epoch': 0, 'iter': 2760, 'avg_loss': 6.442438746828482, 'avg_acc': 50.15053422672945, 'loss': 5.868409633636475}


EP_train:0:  40%|| 2771/6926 [48:48<1:11:33,  1.03s/it]

{'epoch': 0, 'iter': 2770, 'avg_loss': 6.440172820949933, 'avg_acc': 50.15675748827139, 'loss': 5.827479839324951}


EP_train:0:  40%|| 2781/6926 [48:59<1:10:23,  1.02s/it]

{'epoch': 0, 'iter': 2780, 'avg_loss': 6.437768684621767, 'avg_acc': 50.157317511686436, 'loss': 5.531793117523193}


EP_train:0:  40%|| 2791/6926 [49:09<1:12:40,  1.05s/it]

{'epoch': 0, 'iter': 2790, 'avg_loss': 6.435573878573585, 'avg_acc': 50.15003582945181, 'loss': 5.464407444000244}


EP_train:0:  40%|| 2801/6926 [49:20<1:13:35,  1.07s/it]

{'epoch': 0, 'iter': 2800, 'avg_loss': 6.433339781184062, 'avg_acc': 50.12718671902891, 'loss': 5.859269142150879}


EP_train:0:  41%|| 2811/6926 [49:31<1:14:14,  1.08s/it]

{'epoch': 0, 'iter': 2810, 'avg_loss': 6.430765678008143, 'avg_acc': 50.130069370330844, 'loss': 5.680543899536133}


EP_train:0:  41%|| 2821/6926 [49:41<1:19:37,  1.16s/it]

{'epoch': 0, 'iter': 2820, 'avg_loss': 6.42884492265633, 'avg_acc': 50.11742289968096, 'loss': 5.3554863929748535}


EP_train:0:  41%|| 2831/6926 [49:52<1:16:00,  1.11s/it]

{'epoch': 0, 'iter': 2830, 'avg_loss': 6.426424810381922, 'avg_acc': 50.136877428470505, 'loss': 5.474422454833984}


EP_train:0:  41%|| 2841/6926 [50:02<1:09:03,  1.01s/it]

{'epoch': 0, 'iter': 2840, 'avg_loss': 6.423819566771666, 'avg_acc': 50.146295318549804, 'loss': 5.503238201141357}


EP_train:0:  41%|| 2851/6926 [50:13<1:09:14,  1.02s/it]

{'epoch': 0, 'iter': 2850, 'avg_loss': 6.421846850384164, 'avg_acc': 50.1304366888811, 'loss': 5.392838001251221}


EP_train:0:  41%|| 2861/6926 [50:24<1:08:49,  1.02s/it]

{'epoch': 0, 'iter': 2860, 'avg_loss': 6.419139340056359, 'avg_acc': 50.12779622509612, 'loss': 5.29177713394165}


EP_train:0:  41%|| 2871/6926 [50:34<1:07:58,  1.01s/it]

{'epoch': 0, 'iter': 2870, 'avg_loss': 6.4161860422156325, 'avg_acc': 50.11537791710205, 'loss': 5.3471455574035645}


EP_train:0:  42%|| 2881/6926 [50:45<1:11:07,  1.06s/it]

{'epoch': 0, 'iter': 2880, 'avg_loss': 6.413429663935207, 'avg_acc': 50.13016313779938, 'loss': 5.678802490234375}


EP_train:0:  42%|| 2891/6926 [50:55<1:09:58,  1.04s/it]

{'epoch': 0, 'iter': 2890, 'avg_loss': 6.411521860160287, 'avg_acc': 50.15025077827742, 'loss': 5.964892864227295}


EP_train:0:  42%|| 2901/6926 [51:06<1:12:32,  1.08s/it]

{'epoch': 0, 'iter': 2900, 'avg_loss': 6.409012636370759, 'avg_acc': 50.141115132712855, 'loss': 6.291670799255371}


EP_train:0:  42%|| 2911/6926 [51:17<1:15:21,  1.13s/it]

{'epoch': 0, 'iter': 2910, 'avg_loss': 6.406818781550993, 'avg_acc': 50.14814496736517, 'loss': 5.938815593719482}


EP_train:0:  42%|| 2921/6926 [51:27<1:19:17,  1.19s/it]

{'epoch': 0, 'iter': 2920, 'avg_loss': 6.404492542458821, 'avg_acc': 50.15512666894899, 'loss': 5.839562892913818}


EP_train:0:  42%|| 2931/6926 [51:38<1:09:56,  1.05s/it]

{'epoch': 0, 'iter': 2930, 'avg_loss': 6.40282616750326, 'avg_acc': 50.15246502900034, 'loss': 5.90681266784668}


EP_train:0:  42%|| 2941/6926 [51:48<1:08:08,  1.03s/it]

{'epoch': 0, 'iter': 2940, 'avg_loss': 6.400508930390483, 'avg_acc': 50.14982148928936, 'loss': 5.549468994140625}


EP_train:0:  43%|| 2951/6926 [51:59<1:06:55,  1.01s/it]

{'epoch': 0, 'iter': 2950, 'avg_loss': 6.399007873509302, 'avg_acc': 50.16096238563199, 'loss': 6.210590839385986}


EP_train:0:  43%|| 2961/6926 [52:09<1:08:19,  1.03s/it]

{'epoch': 0, 'iter': 2960, 'avg_loss': 6.396820126494537, 'avg_acc': 50.16991725768322, 'loss': 6.600347995758057}


EP_train:0:  43%|| 2971/6926 [52:20<1:08:09,  1.03s/it]

{'epoch': 0, 'iter': 2970, 'avg_loss': 6.39411891987331, 'avg_acc': 50.16303433187479, 'loss': 5.519732475280762}


EP_train:0:  43%|| 2981/6926 [52:31<1:08:43,  1.05s/it]

{'epoch': 0, 'iter': 2980, 'avg_loss': 6.392197080443906, 'avg_acc': 50.14781113720228, 'loss': 5.890153408050537}


EP_train:0:  43%|| 2991/6926 [52:41<1:11:03,  1.08s/it]

{'epoch': 0, 'iter': 2990, 'avg_loss': 6.390189569397381, 'avg_acc': 50.16507856904045, 'loss': 5.474836826324463}


EP_train:0:  43%|| 3001/6926 [52:52<1:13:36,  1.13s/it]

{'epoch': 0, 'iter': 3000, 'avg_loss': 6.388116893113672, 'avg_acc': 50.16036321226258, 'loss': 5.938516139984131}


EP_train:0:  43%|| 3011/6926 [53:02<1:12:59,  1.12s/it]

{'epoch': 0, 'iter': 3010, 'avg_loss': 6.38585108960043, 'avg_acc': 50.146338425772164, 'loss': 5.361569881439209}


EP_train:0:  44%|| 3021/6926 [53:13<1:06:19,  1.02s/it]

{'epoch': 0, 'iter': 3020, 'avg_loss': 6.383890932658302, 'avg_acc': 50.134475339291626, 'loss': 5.7767438888549805}


EP_train:0:  44%|| 3031/6926 [53:23<1:06:31,  1.02s/it]

{'epoch': 0, 'iter': 3030, 'avg_loss': 6.382248206092988, 'avg_acc': 50.1474348399868, 'loss': 5.66779088973999}


EP_train:0:  44%|| 3041/6926 [53:34<1:05:27,  1.01s/it]

{'epoch': 0, 'iter': 3040, 'avg_loss': 6.380212851709067, 'avg_acc': 50.14797763893456, 'loss': 6.113137722015381}


EP_train:0:  44%|| 3051/6926 [53:44<1:05:44,  1.02s/it]

{'epoch': 0, 'iter': 3050, 'avg_loss': 6.377583488250552, 'avg_acc': 50.13417731891183, 'loss': 5.769194602966309}


EP_train:0:  44%|| 3061/6926 [53:55<1:06:26,  1.03s/it]

{'epoch': 0, 'iter': 3060, 'avg_loss': 6.375416658470654, 'avg_acc': 50.139864423391046, 'loss': 5.404829502105713}


EP_train:0:  44%|| 3071/6926 [54:06<1:07:49,  1.06s/it]

{'epoch': 0, 'iter': 3070, 'avg_loss': 6.373226478487224, 'avg_acc': 50.147549658091826, 'loss': 5.474933624267578}


EP_train:0:  44%|| 3081/6926 [54:16<1:08:48,  1.07s/it]

{'epoch': 0, 'iter': 3080, 'avg_loss': 6.371788985076875, 'avg_acc': 50.143013631937684, 'loss': 5.44235372543335}


EP_train:0:  45%|| 3091/6926 [54:27<1:12:30,  1.13s/it]

{'epoch': 0, 'iter': 3090, 'avg_loss': 6.369530301077086, 'avg_acc': 50.13345195729537, 'loss': 6.127355575561523}


EP_train:0:  45%|| 3101/6926 [54:37<1:09:11,  1.09s/it]

{'epoch': 0, 'iter': 3100, 'avg_loss': 6.367139976511306, 'avg_acc': 50.11992099322799, 'loss': 5.544936180114746}


EP_train:0:  45%|| 3111/6926 [54:48<1:04:29,  1.01s/it]

{'epoch': 0, 'iter': 3110, 'avg_loss': 6.3655503011523855, 'avg_acc': 50.10848601735777, 'loss': 5.871435165405273}


EP_train:0:  45%|| 3121/6926 [54:58<1:05:04,  1.03s/it]

{'epoch': 0, 'iter': 3120, 'avg_loss': 6.363917445800351, 'avg_acc': 50.116148670297974, 'loss': 5.615546226501465}


EP_train:0:  45%|| 3131/6926 [55:09<1:04:19,  1.02s/it]

{'epoch': 0, 'iter': 3130, 'avg_loss': 6.36206200754098, 'avg_acc': 50.121766208878945, 'loss': 5.7621846199035645}


EP_train:0:  45%|| 3141/6926 [55:20<1:06:12,  1.05s/it]

{'epoch': 0, 'iter': 3140, 'avg_loss': 6.360197596168943, 'avg_acc': 50.10943966889526, 'loss': 5.810629844665527}


EP_train:0:  45%|| 3151/6926 [55:30<1:05:02,  1.03s/it]

{'epoch': 0, 'iter': 3150, 'avg_loss': 6.3586348076919945, 'avg_acc': 50.1001666137734, 'loss': 5.642720699310303}


EP_train:0:  46%|| 3161/6926 [55:41<1:06:04,  1.05s/it]

{'epoch': 0, 'iter': 3160, 'avg_loss': 6.356741119735944, 'avg_acc': 50.119621955077506, 'loss': 6.018698215484619}


EP_train:0:  46%|| 3171/6926 [55:51<1:06:06,  1.06s/it]

{'epoch': 0, 'iter': 3170, 'avg_loss': 6.355371722284833, 'avg_acc': 50.11234626300851, 'loss': 6.033522605895996}


EP_train:0:  46%|| 3181/6926 [56:02<1:10:47,  1.13s/it]

{'epoch': 0, 'iter': 3180, 'avg_loss': 6.353509297201522, 'avg_acc': 50.10118673373153, 'loss': 6.011741638183594}


EP_train:0:  46%|| 3191/6926 [56:12<1:12:33,  1.17s/it]

{'epoch': 0, 'iter': 3190, 'avg_loss': 6.352050459672575, 'avg_acc': 50.10380758382952, 'loss': 6.509175777435303}


EP_train:0:  46%|| 3201/6926 [56:22<1:02:28,  1.01s/it]

{'epoch': 0, 'iter': 3200, 'avg_loss': 6.349834076578413, 'avg_acc': 50.09860199937519, 'loss': 5.717796802520752}


EP_train:0:  46%|| 3211/6926 [56:33<1:02:47,  1.01s/it]

{'epoch': 0, 'iter': 3210, 'avg_loss': 6.3487103232636475, 'avg_acc': 50.1099735284958, 'loss': 5.876096725463867}


EP_train:0:  47%|| 3221/6926 [56:44<1:03:24,  1.03s/it]

{'epoch': 0, 'iter': 3220, 'avg_loss': 6.346623843517854, 'avg_acc': 50.10575131946601, 'loss': 5.324923515319824}


EP_train:0:  47%|| 3231/6926 [56:54<1:03:59,  1.04s/it]

{'epoch': 0, 'iter': 3230, 'avg_loss': 6.344683274511434, 'avg_acc': 50.13057103064067, 'loss': 5.667742729187012}


EP_train:0:  47%|| 3241/6926 [57:05<1:03:29,  1.03s/it]

{'epoch': 0, 'iter': 3240, 'avg_loss': 6.342965867114045, 'avg_acc': 50.13884603517433, 'loss': 5.160613059997559}


EP_train:0:  47%|| 3251/6926 [57:16<1:04:14,  1.05s/it]

{'epoch': 0, 'iter': 3250, 'avg_loss': 6.341320573348553, 'avg_acc': 50.12784527837588, 'loss': 5.781716823577881}


EP_train:0:  47%|| 3261/6926 [57:26<1:06:53,  1.10s/it]

{'epoch': 0, 'iter': 3260, 'avg_loss': 6.339417527329516, 'avg_acc': 50.12553664520086, 'loss': 5.510043144226074}


EP_train:0:  47%|| 3271/6926 [57:37<1:07:44,  1.11s/it]

{'epoch': 0, 'iter': 3270, 'avg_loss': 6.3375777447489705, 'avg_acc': 50.123242127789666, 'loss': 5.90158748626709}


EP_train:0:  47%|| 3281/6926 [57:47<1:02:47,  1.03s/it]

{'epoch': 0, 'iter': 3280, 'avg_loss': 6.335706403145736, 'avg_acc': 50.106674794270035, 'loss': 5.725871562957764}


EP_train:0:  48%|| 3291/6926 [57:58<1:01:19,  1.01s/it]

{'epoch': 0, 'iter': 3290, 'avg_loss': 6.333827843169099, 'avg_acc': 50.100653296870256, 'loss': 5.98870849609375}


EP_train:0:  48%|| 3301/6926 [58:08<1:01:35,  1.02s/it]

{'epoch': 0, 'iter': 3300, 'avg_loss': 6.3320491001195025, 'avg_acc': 50.09656164798546, 'loss': 5.572929382324219}


EP_train:0:  48%|| 3311/6926 [58:19<1:00:33,  1.01s/it]

{'epoch': 0, 'iter': 3310, 'avg_loss': 6.3304820341561285, 'avg_acc': 50.10476442162489, 'loss': 5.420722961425781}


EP_train:0:  48%|| 3321/6926 [58:29<1:01:00,  1.02s/it]

{'epoch': 0, 'iter': 3320, 'avg_loss': 6.3281727843670845, 'avg_acc': 50.09880307136405, 'loss': 5.452885150909424}


EP_train:0:  48%|| 3331/6926 [58:40<1:01:31,  1.03s/it]

{'epoch': 0, 'iter': 3330, 'avg_loss': 6.326336812271581, 'avg_acc': 50.08818673071149, 'loss': 5.489525318145752}


EP_train:0:  48%|| 3341/6926 [58:50<1:03:10,  1.06s/it]

{'epoch': 0, 'iter': 3340, 'avg_loss': 6.324856253877724, 'avg_acc': 50.08137533672553, 'loss': 5.95841646194458}


EP_train:0:  48%|| 3351/6926 [59:01<1:05:35,  1.10s/it]

{'epoch': 0, 'iter': 3350, 'avg_loss': 6.3230214017926665, 'avg_acc': 50.06807669352432, 'loss': 5.665799617767334}


EP_train:0:  49%|| 3361/6926 [59:12<1:07:33,  1.14s/it]

{'epoch': 0, 'iter': 3360, 'avg_loss': 6.320827312038187, 'avg_acc': 50.06787414459982, 'loss': 5.477669715881348}


EP_train:0:  49%|| 3371/6926 [59:22<1:10:56,  1.20s/it]

{'epoch': 0, 'iter': 3370, 'avg_loss': 6.319086477403576, 'avg_acc': 50.07045387125481, 'loss': 5.6269211769104}


EP_train:0:  49%|| 3381/6926 [59:32<59:24,  1.01s/it]

{'epoch': 0, 'iter': 3380, 'avg_loss': 6.316824480331498, 'avg_acc': 50.07116977225673, 'loss': 5.6560378074646}


EP_train:0:  49%|| 3391/6926 [59:43<1:00:12,  1.02s/it]

{'epoch': 0, 'iter': 3390, 'avg_loss': 6.315291665571343, 'avg_acc': 50.076489236213504, 'loss': 6.01848030090332}


EP_train:0:  49%|| 3401/6926 [59:54<1:00:15,  1.03s/it]

{'epoch': 0, 'iter': 3400, 'avg_loss': 6.313916657399304, 'avg_acc': 50.08177741840635, 'loss': 5.524486541748047}


EP_train:0:  49%|| 3411/6926 [1:00:04<59:40,  1.02s/it]  

{'epoch': 0, 'iter': 3410, 'avg_loss': 6.31241994545768, 'avg_acc': 50.09344766930519, 'loss': 5.159453392028809}


EP_train:0:  49%|| 3421/6926 [1:00:15<1:01:32,  1.05s/it]

{'epoch': 0, 'iter': 3420, 'avg_loss': 6.31132092011738, 'avg_acc': 50.09682841274481, 'loss': 6.2951555252075195}


EP_train:0:  50%|| 3431/6926 [1:00:26<1:00:14,  1.03s/it]

{'epoch': 0, 'iter': 3430, 'avg_loss': 6.3092862265132945, 'avg_acc': 50.08743806470417, 'loss': 5.751338005065918}


EP_train:0:  50%|| 3441/6926 [1:00:36<1:02:47,  1.08s/it]

{'epoch': 0, 'iter': 3440, 'avg_loss': 6.308003472713702, 'avg_acc': 50.092632955536175, 'loss': 5.72968053817749}


EP_train:0:  50%|| 3451/6926 [1:00:47<1:02:56,  1.09s/it]

{'epoch': 0, 'iter': 3450, 'avg_loss': 6.30638628495116, 'avg_acc': 50.08783685888149, 'loss': 5.745921611785889}


EP_train:0:  50%|| 3461/6926 [1:00:58<1:04:16,  1.11s/it]

{'epoch': 0, 'iter': 3460, 'avg_loss': 6.304871535170323, 'avg_acc': 50.09390349609939, 'loss': 5.405196666717529}


EP_train:0:  50%|| 3471/6926 [1:01:08<1:03:09,  1.10s/it]

{'epoch': 0, 'iter': 3470, 'avg_loss': 6.303482773049151, 'avg_acc': 50.0954335926246, 'loss': 5.833651542663574}


EP_train:0:  50%|| 3481/6926 [1:01:18<58:18,  1.02s/it]

{'epoch': 0, 'iter': 3480, 'avg_loss': 6.301873727216011, 'avg_acc': 50.10862539500144, 'loss': 5.702808380126953}


EP_train:0:  50%|| 3491/6926 [1:01:29<59:21,  1.04s/it]

{'epoch': 0, 'iter': 3490, 'avg_loss': 6.300321611761193, 'avg_acc': 50.09936264680608, 'loss': 6.333874702453613}


EP_train:0:  51%|| 3501/6926 [1:01:40<58:12,  1.02s/it]

{'epoch': 0, 'iter': 3500, 'avg_loss': 6.299066635786687, 'avg_acc': 50.103541845187095, 'loss': 5.836565971374512}


EP_train:0:  51%|| 3511/6926 [1:01:50<59:18,  1.04s/it]

{'epoch': 0, 'iter': 3510, 'avg_loss': 6.297705917065044, 'avg_acc': 50.11392765593848, 'loss': 5.785733699798584}


EP_train:0:  51%|| 3521/6926 [1:02:01<58:46,  1.04s/it]

{'epoch': 0, 'iter': 3520, 'avg_loss': 6.295939317923722, 'avg_acc': 50.110941493893776, 'loss': 6.0353593826293945}


EP_train:0:  51%|| 3531/6926 [1:02:11<59:41,  1.06s/it]  

{'epoch': 0, 'iter': 3530, 'avg_loss': 6.294516404604851, 'avg_acc': 50.10620220900594, 'loss': 6.038008689880371}


EP_train:0:  51%|| 3541/6926 [1:02:22<1:00:28,  1.07s/it]

{'epoch': 0, 'iter': 3540, 'avg_loss': 6.2926983576109095, 'avg_acc': 50.09884213499012, 'loss': 5.76857852935791}


EP_train:0:  51%|| 3551/6926 [1:02:32<1:04:10,  1.14s/it]

{'epoch': 0, 'iter': 3550, 'avg_loss': 6.291023441181489, 'avg_acc': 50.09680371726274, 'loss': 5.872481346130371}


EP_train:0:  51%|| 3561/6926 [1:02:43<1:01:23,  1.09s/it]

{'epoch': 0, 'iter': 3560, 'avg_loss': 6.289719519873611, 'avg_acc': 50.09214406065712, 'loss': 5.842050552368164}


EP_train:0:  52%|| 3571/6926 [1:02:53<56:30,  1.01s/it]

{'epoch': 0, 'iter': 3570, 'avg_loss': 6.288072199622512, 'avg_acc': 50.09013581629795, 'loss': 5.581125736236572}


EP_train:0:  52%|| 3581/6926 [1:03:04<56:43,  1.02s/it]

{'epoch': 0, 'iter': 3580, 'avg_loss': 6.286474508468214, 'avg_acc': 50.08813878804803, 'loss': 5.600448131561279}


EP_train:0:  52%|| 3591/6926 [1:03:14<57:10,  1.03s/it]

{'epoch': 0, 'iter': 3590, 'avg_loss': 6.285181623951235, 'avg_acc': 50.095725424672786, 'loss': 5.934525966644287}


EP_train:0:  52%|| 3601/6926 [1:03:25<57:20,  1.03s/it]

{'epoch': 0, 'iter': 3600, 'avg_loss': 6.283424503503326, 'avg_acc': 50.09025270758123, 'loss': 5.69801139831543}


EP_train:0:  52%|| 3611/6926 [1:03:36<57:27,  1.04s/it]

{'epoch': 0, 'iter': 3610, 'avg_loss': 6.281978106571346, 'avg_acc': 50.09173359180282, 'loss': 5.94726037979126}


EP_train:0:  52%|| 3621/6926 [1:03:46<58:32,  1.06s/it]

{'epoch': 0, 'iter': 3620, 'avg_loss': 6.280632979828361, 'avg_acc': 50.09752140292737, 'loss': 5.762078762054443}


EP_train:0:  52%|| 3631/6926 [1:03:57<1:00:35,  1.10s/it]

{'epoch': 0, 'iter': 3630, 'avg_loss': 6.279145553597272, 'avg_acc': 50.07315477829799, 'loss': 5.782121658325195}


EP_train:0:  53%|| 3641/6926 [1:04:08<1:02:45,  1.15s/it]

{'epoch': 0, 'iter': 3640, 'avg_loss': 6.277761382344201, 'avg_acc': 50.07123729744576, 'loss': 5.994831085205078}


EP_train:0:  53%|| 3651/6926 [1:04:18<1:01:13,  1.12s/it]

{'epoch': 0, 'iter': 3650, 'avg_loss': 6.276267285870709, 'avg_acc': 50.07532182963572, 'loss': 5.659008502960205}


EP_train:0:  53%|| 3661/6926 [1:04:28<54:48,  1.01s/it]

{'epoch': 0, 'iter': 3660, 'avg_loss': 6.27502025251537, 'avg_acc': 50.093041518710734, 'loss': 5.803684711456299}


EP_train:0:  53%|| 3671/6926 [1:04:39<55:28,  1.02s/it]

{'epoch': 0, 'iter': 3670, 'avg_loss': 6.273490443732361, 'avg_acc': 50.09363933533098, 'loss': 6.763915061950684}


EP_train:0:  53%|| 3681/6926 [1:04:50<54:57,  1.02s/it]

{'epoch': 0, 'iter': 3680, 'avg_loss': 6.271769386178545, 'avg_acc': 50.09083808747623, 'loss': 5.260537624359131}


EP_train:0:  53%|| 3691/6926 [1:05:00<55:09,  1.02s/it]

{'epoch': 0, 'iter': 3690, 'avg_loss': 6.2701594398713505, 'avg_acc': 50.10159848279599, 'loss': 5.666423797607422}


EP_train:0:  53%|| 3701/6926 [1:05:11<55:54,  1.04s/it]

{'epoch': 0, 'iter': 3700, 'avg_loss': 6.268701900439918, 'avg_acc': 50.08274790597136, 'loss': 5.50567626953125}


EP_train:0:  54%|| 3711/6926 [1:05:21<55:56,  1.04s/it]

{'epoch': 0, 'iter': 3710, 'avg_loss': 6.267184083527935, 'avg_acc': 50.077472379412555, 'loss': 6.152920246124268}


EP_train:0:  54%|| 3721/6926 [1:05:32<57:50,  1.08s/it]

{'epoch': 0, 'iter': 3720, 'avg_loss': 6.2657339611991505, 'avg_acc': 50.07810400429992, 'loss': 5.941766738891602}


EP_train:0:  54%|| 3731/6926 [1:05:42<59:05,  1.11s/it]

{'epoch': 0, 'iter': 3730, 'avg_loss': 6.264103317797519, 'avg_acc': 50.08459528276601, 'loss': 5.666489601135254}


EP_train:0:  54%|| 3741/6926 [1:05:53<1:00:13,  1.13s/it]

{'epoch': 0, 'iter': 3740, 'avg_loss': 6.262367107689109, 'avg_acc': 50.07434509489441, 'loss': 5.265793800354004}


EP_train:0:  54%|| 3751/6926 [1:06:03<53:16,  1.01s/it]

{'epoch': 0, 'iter': 3750, 'avg_loss': 6.260635758870063, 'avg_acc': 50.07581311650227, 'loss': 5.624989986419678}


EP_train:0:  54%|| 3761/6926 [1:06:14<53:11,  1.01s/it]

{'epoch': 0, 'iter': 3760, 'avg_loss': 6.259397184046873, 'avg_acc': 50.07145705929275, 'loss': 5.86294412612915}


EP_train:0:  54%|| 3771/6926 [1:06:24<54:24,  1.03s/it]

{'epoch': 0, 'iter': 3770, 'avg_loss': 6.25804510953666, 'avg_acc': 50.07209626093875, 'loss': 5.504334926605225}


EP_train:0:  55%|| 3781/6926 [1:06:35<54:01,  1.03s/it]

{'epoch': 0, 'iter': 3780, 'avg_loss': 6.256649264391375, 'avg_acc': 50.07769108701402, 'loss': 5.955719947814941}


EP_train:0:  55%|| 3791/6926 [1:06:46<53:51,  1.03s/it]

{'epoch': 0, 'iter': 3790, 'avg_loss': 6.254982125189964, 'avg_acc': 50.065945660775526, 'loss': 5.575616836547852}


EP_train:0:  55%|| 3801/6926 [1:06:56<54:49,  1.05s/it]

{'epoch': 0, 'iter': 3800, 'avg_loss': 6.25389392010258, 'avg_acc': 50.07317153380689, 'loss': 5.804103851318359}


EP_train:0:  55%|| 3811/6926 [1:07:07<56:35,  1.09s/it]

{'epoch': 0, 'iter': 3810, 'avg_loss': 6.252828284593681, 'avg_acc': 50.079539490947255, 'loss': 5.782948970794678}


EP_train:0:  55%|| 3821/6926 [1:07:18<57:57,  1.12s/it]  

{'epoch': 0, 'iter': 3820, 'avg_loss': 6.251452278136209, 'avg_acc': 50.07360638576289, 'loss': 5.31845760345459}


EP_train:0:  55%|| 3831/6926 [1:07:28<59:05,  1.15s/it]

{'epoch': 0, 'iter': 3830, 'avg_loss': 6.24992239478362, 'avg_acc': 50.060362829548424, 'loss': 5.78889799118042}


EP_train:0:  55%|| 3841/6926 [1:07:38<52:17,  1.02s/it]

{'epoch': 0, 'iter': 3840, 'avg_loss': 6.247985222723856, 'avg_acc': 50.04881541265296, 'loss': 5.511262893676758}


EP_train:0:  56%|| 3851/6926 [1:07:49<52:06,  1.02s/it]

{'epoch': 0, 'iter': 3850, 'avg_loss': 6.246754274464867, 'avg_acc': 50.05680342768112, 'loss': 5.520754337310791}


EP_train:0:  56%|| 3861/6926 [1:08:00<52:11,  1.02s/it]

{'epoch': 0, 'iter': 3860, 'avg_loss': 6.24543636260765, 'avg_acc': 50.05746568246568, 'loss': 5.427020072937012}


EP_train:0:  56%|| 3871/6926 [1:08:10<52:10,  1.02s/it]

{'epoch': 0, 'iter': 3870, 'avg_loss': 6.244418870506592, 'avg_acc': 50.067004649961255, 'loss': 5.673731803894043}


EP_train:0:  56%|| 3881/6926 [1:08:21<52:05,  1.03s/it]

{'epoch': 0, 'iter': 3880, 'avg_loss': 6.242896246007041, 'avg_acc': 50.074078845658335, 'loss': 5.447144031524658}


EP_train:0:  56%|| 3891/6926 [1:08:31<52:00,  1.03s/it]

{'epoch': 0, 'iter': 3890, 'avg_loss': 6.241544362482146, 'avg_acc': 50.07228218966847, 'loss': 5.62050724029541}


EP_train:0:  56%|| 3901/6926 [1:08:42<52:38,  1.04s/it]

{'epoch': 0, 'iter': 3900, 'avg_loss': 6.2402420076948655, 'avg_acc': 50.06008074852601, 'loss': 6.407965183258057}


EP_train:0:  56%|| 3911/6926 [1:08:53<54:20,  1.08s/it]

{'epoch': 0, 'iter': 3910, 'avg_loss': 6.239083781933669, 'avg_acc': 50.06472129890054, 'loss': 5.465052127838135}


EP_train:0:  57%|| 3921/6926 [1:09:03<54:46,  1.09s/it]

{'epoch': 0, 'iter': 3920, 'avg_loss': 6.2378260433233015, 'avg_acc': 50.0502104055088, 'loss': 6.163295745849609}


EP_train:0:  57%|| 3931/6926 [1:09:14<56:56,  1.14s/it]

{'epoch': 0, 'iter': 3930, 'avg_loss': 6.236739634618417, 'avg_acc': 50.04531289748155, 'loss': 5.569154739379883}


EP_train:0:  57%|| 3941/6926 [1:09:24<51:56,  1.04s/it]

{'epoch': 0, 'iter': 3940, 'avg_loss': 6.235490150301628, 'avg_acc': 50.04916264907384, 'loss': 6.046095371246338}


EP_train:0:  57%|| 3951/6926 [1:09:35<50:15,  1.01s/it]

{'epoch': 0, 'iter': 3950, 'avg_loss': 6.233802240729423, 'avg_acc': 50.057738547203236, 'loss': 5.611295223236084}


EP_train:0:  57%|| 3961/6926 [1:09:45<50:15,  1.02s/it]

{'epoch': 0, 'iter': 3960, 'avg_loss': 6.2322600932170635, 'avg_acc': 50.05522595304216, 'loss': 5.64838981628418}


EP_train:0:  57%|| 3971/6926 [1:09:56<51:01,  1.04s/it]

{'epoch': 0, 'iter': 3970, 'avg_loss': 6.230972267584127, 'avg_acc': 50.05508687987912, 'loss': 5.787883758544922}


EP_train:0:  57%|| 3981/6926 [1:10:07<51:16,  1.04s/it]

{'epoch': 0, 'iter': 3980, 'avg_loss': 6.22998354501444, 'avg_acc': 50.0612283345893, 'loss': 6.001025676727295}


EP_train:0:  58%|| 3991/6926 [1:10:17<50:58,  1.04s/it]

{'epoch': 0, 'iter': 3990, 'avg_loss': 6.228323577311188, 'avg_acc': 50.06498997744926, 'loss': 5.743706226348877}


EP_train:0:  58%|| 4001/6926 [1:10:28<51:41,  1.06s/it]

{'epoch': 0, 'iter': 4000, 'avg_loss': 6.227019420000232, 'avg_acc': 50.07576230942264, 'loss': 5.354485034942627}


EP_train:0:  58%|| 4011/6926 [1:10:39<53:14,  1.10s/it]

{'epoch': 0, 'iter': 4010, 'avg_loss': 6.225946407964836, 'avg_acc': 50.07557342308652, 'loss': 6.200821399688721}


EP_train:0:  58%|| 4021/6926 [1:10:49<54:19,  1.12s/it]

{'epoch': 0, 'iter': 4020, 'avg_loss': 6.225009144078852, 'avg_acc': 50.070722457100224, 'loss': 5.644330978393555}


EP_train:0:  58%|| 4031/6926 [1:11:00<53:25,  1.11s/it]

{'epoch': 0, 'iter': 4030, 'avg_loss': 6.22388627161846, 'avg_acc': 50.07209749441826, 'loss': 5.723794937133789}


EP_train:0:  58%|| 4041/6926 [1:11:10<48:09,  1.00s/it]

{'epoch': 0, 'iter': 4040, 'avg_loss': 6.222971454819545, 'avg_acc': 50.075785696609756, 'loss': 6.003933906555176}


EP_train:0:  58%|| 4051/6926 [1:11:20<48:07,  1.00s/it]

{'epoch': 0, 'iter': 4050, 'avg_loss': 6.221867092389644, 'avg_acc': 50.07637003209084, 'loss': 5.838645935058594}


EP_train:0:  59%|| 4061/6926 [1:11:31<49:24,  1.03s/it]

{'epoch': 0, 'iter': 4060, 'avg_loss': 6.220595604385305, 'avg_acc': 50.078490519576455, 'loss': 5.798256874084473}


EP_train:0:  59%|| 4071/6926 [1:11:42<49:42,  1.04s/it]

{'epoch': 0, 'iter': 4070, 'avg_loss': 6.218804149772927, 'avg_acc': 50.08367108818472, 'loss': 5.3899431228637695}


EP_train:0:  59%|| 4081/6926 [1:11:52<49:23,  1.04s/it]

{'epoch': 0, 'iter': 4080, 'avg_loss': 6.217611152440478, 'avg_acc': 50.094186473903456, 'loss': 5.484589099884033}


EP_train:0:  59%|| 4091/6926 [1:12:03<50:07,  1.06s/it]

{'epoch': 0, 'iter': 4090, 'avg_loss': 6.216512363366701, 'avg_acc': 50.09701173307259, 'loss': 5.874229907989502}


EP_train:0:  59%|| 4101/6926 [1:12:14<50:19,  1.07s/it]

{'epoch': 0, 'iter': 4100, 'avg_loss': 6.215262214895052, 'avg_acc': 50.09906120458425, 'loss': 5.312845230102539}


EP_train:0:  59%|| 4111/6926 [1:12:24<51:02,  1.09s/it]

{'epoch': 0, 'iter': 4110, 'avg_loss': 6.214257414281238, 'avg_acc': 50.111742884942835, 'loss': 5.195980072021484}


EP_train:0:  60%|| 4121/6926 [1:12:35<54:12,  1.16s/it]

{'epoch': 0, 'iter': 4120, 'avg_loss': 6.213294682066524, 'avg_acc': 50.111471730162584, 'loss': 5.577513694763184}


EP_train:0:  60%|| 4131/6926 [1:12:45<50:40,  1.09s/it]

{'epoch': 0, 'iter': 4130, 'avg_loss': 6.212378998583034, 'avg_acc': 50.11120188816267, 'loss': 5.295523643493652}


EP_train:0:  60%|| 4141/6926 [1:12:56<46:47,  1.01s/it]

{'epoch': 0, 'iter': 4140, 'avg_loss': 6.211511406164991, 'avg_acc': 50.11395194397489, 'loss': 5.905283451080322}


EP_train:0:  60%|| 4151/6926 [1:13:06<46:44,  1.01s/it]

{'epoch': 0, 'iter': 4150, 'avg_loss': 6.210416110789336, 'avg_acc': 50.11292459648278, 'loss': 5.6926960945129395}


EP_train:0:  60%|| 4161/6926 [1:13:17<47:54,  1.04s/it]

{'epoch': 0, 'iter': 4160, 'avg_loss': 6.2093840361615555, 'avg_acc': 50.10589401586157, 'loss': 5.775752067565918}


EP_train:0:  60%|| 4171/6926 [1:13:27<47:35,  1.04s/it]

{'epoch': 0, 'iter': 4170, 'avg_loss': 6.208556491675293, 'avg_acc': 50.100395588587865, 'loss': 5.8980255126953125}


EP_train:0:  60%|| 4181/6926 [1:13:38<47:43,  1.04s/it]

{'epoch': 0, 'iter': 4180, 'avg_loss': 6.2076586397376765, 'avg_acc': 50.09043889021765, 'loss': 5.943076133728027}


EP_train:0:  61%|| 4191/6926 [1:13:49<48:47,  1.07s/it]

{'epoch': 0, 'iter': 4190, 'avg_loss': 6.206550875872878, 'avg_acc': 50.08649486995944, 'loss': 6.111042022705078}


EP_train:0:  61%|| 4201/6926 [1:13:59<49:33,  1.09s/it]

{'epoch': 0, 'iter': 4200, 'avg_loss': 6.205464701004409, 'avg_acc': 50.083313496786474, 'loss': 6.032597064971924}


EP_train:0:  61%|| 4211/6926 [1:14:10<50:51,  1.12s/it]

{'epoch': 0, 'iter': 4210, 'avg_loss': 6.204507719618327, 'avg_acc': 50.08682616955592, 'loss': 5.983819007873535}


EP_train:0:  61%|| 4221/6926 [1:14:20<45:57,  1.02s/it]

{'epoch': 0, 'iter': 4220, 'avg_loss': 6.203790225679913, 'avg_acc': 50.088841506751955, 'loss': 6.055787563323975}


EP_train:0:  61%|| 4231/6926 [1:14:31<46:28,  1.03s/it]

{'epoch': 0, 'iter': 4230, 'avg_loss': 6.202404487783873, 'avg_acc': 50.09232450957221, 'loss': 5.713054180145264}


EP_train:0:  61%|| 4241/6926 [1:14:41<45:15,  1.01s/it]

{'epoch': 0, 'iter': 4240, 'avg_loss': 6.201364484943047, 'avg_acc': 50.09284366894601, 'loss': 5.7774152755737305}


EP_train:0:  61%|| 4251/6926 [1:14:52<45:26,  1.02s/it]

{'epoch': 0, 'iter': 4250, 'avg_loss': 6.2002482730566655, 'avg_acc': 50.09777111267937, 'loss': 5.2823262214660645}


EP_train:0:  62%|| 4261/6926 [1:15:02<46:10,  1.04s/it]

{'epoch': 0, 'iter': 4260, 'avg_loss': 6.199268663469488, 'avg_acc': 50.101208636470304, 'loss': 5.496036052703857}


EP_train:0:  62%|| 4271/6926 [1:15:13<47:40,  1.08s/it]

{'epoch': 0, 'iter': 4270, 'avg_loss': 6.198327631560736, 'avg_acc': 50.10682509950831, 'loss': 6.015525817871094}


EP_train:0:  62%|| 4281/6926 [1:15:24<47:38,  1.08s/it]

{'epoch': 0, 'iter': 4280, 'avg_loss': 6.197711471084456, 'avg_acc': 50.102925718290116, 'loss': 6.118717193603516}


EP_train:0:  62%|| 4291/6926 [1:15:34<49:46,  1.13s/it]

{'epoch': 0, 'iter': 4290, 'avg_loss': 6.196618041801052, 'avg_acc': 50.107783733395486, 'loss': 5.7115559577941895}


EP_train:0:  62%|| 4301/6926 [1:15:45<51:02,  1.17s/it]

{'epoch': 0, 'iter': 4300, 'avg_loss': 6.195470777230883, 'avg_acc': 50.11043943269007, 'loss': 5.843894004821777}


EP_train:0:  62%|| 4311/6926 [1:15:55<44:10,  1.01s/it]

{'epoch': 0, 'iter': 4310, 'avg_loss': 6.194281532824717, 'avg_acc': 50.11018325214568, 'loss': 5.8399481773376465}


EP_train:0:  62%|| 4321/6926 [1:16:06<44:06,  1.02s/it]

{'epoch': 0, 'iter': 4320, 'avg_loss': 6.193334515793846, 'avg_acc': 50.10775862068966, 'loss': 5.639355182647705}


EP_train:0:  63%|| 4331/6926 [1:16:16<43:55,  1.02s/it]

{'epoch': 0, 'iter': 4330, 'avg_loss': 6.192418366319669, 'avg_acc': 50.093800507965824, 'loss': 5.633268356323242}


EP_train:0:  63%|| 4341/6926 [1:16:27<44:15,  1.03s/it]

{'epoch': 0, 'iter': 4340, 'avg_loss': 6.191387385548713, 'avg_acc': 50.09862358903479, 'loss': 5.6889214515686035}


EP_train:0:  63%|| 4351/6926 [1:16:37<44:46,  1.04s/it]

{'epoch': 0, 'iter': 4350, 'avg_loss': 6.190476223233924, 'avg_acc': 50.095524017467255, 'loss': 5.747376918792725}


EP_train:0:  63%|| 4361/6926 [1:16:48<45:10,  1.06s/it]

{'epoch': 0, 'iter': 4360, 'avg_loss': 6.1895403887148, 'avg_acc': 50.09315523962394, 'loss': 6.050894737243652}


EP_train:0:  63%|| 4371/6926 [1:16:59<45:20,  1.06s/it]

{'epoch': 0, 'iter': 4370, 'avg_loss': 6.188306769089796, 'avg_acc': 50.0886524822695, 'loss': 5.057234764099121}


EP_train:0:  63%|| 4381/6926 [1:17:09<47:29,  1.12s/it]

{'epoch': 0, 'iter': 4380, 'avg_loss': 6.187383490647118, 'avg_acc': 50.09843643003881, 'loss': 5.468315124511719}


EP_train:0:  63%|| 4391/6926 [1:17:20<48:35,  1.15s/it]

{'epoch': 0, 'iter': 4390, 'avg_loss': 6.186493197162883, 'avg_acc': 50.09678888635845, 'loss': 5.216806411743164}


EP_train:0:  64%|| 4401/6926 [1:17:30<47:29,  1.13s/it]

{'epoch': 0, 'iter': 4400, 'avg_loss': 6.185366261847803, 'avg_acc': 50.094438763917296, 'loss': 5.611016273498535}


EP_train:0:  64%|| 4411/6926 [1:17:41<43:53,  1.05s/it]

{'epoch': 0, 'iter': 4410, 'avg_loss': 6.184313184845829, 'avg_acc': 50.09280775334391, 'loss': 5.929462432861328}


EP_train:0:  64%|| 4421/6926 [1:17:52<42:47,  1.02s/it]

{'epoch': 0, 'iter': 4420, 'avg_loss': 6.183264407386771, 'avg_acc': 50.07987446279122, 'loss': 5.95384407043457}


EP_train:0:  64%|| 4431/6926 [1:18:02<42:50,  1.03s/it]

{'epoch': 0, 'iter': 4430, 'avg_loss': 6.182091931515476, 'avg_acc': 50.07687316632814, 'loss': 5.5602707862854}


EP_train:0:  64%|| 4441/6926 [1:18:13<43:08,  1.04s/it]

{'epoch': 0, 'iter': 4440, 'avg_loss': 6.18094775666527, 'avg_acc': 50.080218419274935, 'loss': 5.1301727294921875}


EP_train:0:  64%|| 4451/6926 [1:18:24<42:43,  1.04s/it]

{'epoch': 0, 'iter': 4450, 'avg_loss': 6.179836902155605, 'avg_acc': 50.07512356773759, 'loss': 5.549008369445801}


EP_train:0:  64%|| 4461/6926 [1:18:34<43:20,  1.05s/it]

{'epoch': 0, 'iter': 4460, 'avg_loss': 6.178848901315538, 'avg_acc': 50.06584846446985, 'loss': 5.272642135620117}


EP_train:0:  65%|| 4471/6926 [1:18:45<44:07,  1.08s/it]

{'epoch': 0, 'iter': 4470, 'avg_loss': 6.177863111860718, 'avg_acc': 50.062905390293, 'loss': 5.552022457122803}


EP_train:0:  65%|| 4481/6926 [1:18:56<44:23,  1.09s/it]

{'epoch': 0, 'iter': 4480, 'avg_loss': 6.176719627005797, 'avg_acc': 50.07741017629993, 'loss': 5.979504585266113}


EP_train:0:  65%|| 4491/6926 [1:19:07<45:16,  1.12s/it]

{'epoch': 0, 'iter': 4490, 'avg_loss': 6.175344000094135, 'avg_acc': 50.08280449788466, 'loss': 5.41490364074707}


EP_train:0:  65%|| 4501/6926 [1:19:18<48:19,  1.20s/it]

{'epoch': 0, 'iter': 4500, 'avg_loss': 6.17430853848986, 'avg_acc': 50.09511775161075, 'loss': 6.051551342010498}


EP_train:0:  65%|| 4511/6926 [1:19:28<45:01,  1.12s/it]

{'epoch': 0, 'iter': 4510, 'avg_loss': 6.17355010071342, 'avg_acc': 50.09075038794059, 'loss': 5.481635093688965}


EP_train:0:  65%|| 4521/6926 [1:19:39<42:04,  1.05s/it]

{'epoch': 0, 'iter': 4520, 'avg_loss': 6.172481090614844, 'avg_acc': 50.08571112585712, 'loss': 5.453517913818359}


EP_train:0:  65%|| 4531/6926 [1:19:50<40:27,  1.01s/it]

{'epoch': 0, 'iter': 4530, 'avg_loss': 6.171706772040016, 'avg_acc': 50.091729198852356, 'loss': 5.956821918487549}


EP_train:0:  66%|| 4541/6926 [1:20:00<41:11,  1.04s/it]

{'epoch': 0, 'iter': 4540, 'avg_loss': 6.1703620570211575, 'avg_acc': 50.09840894076195, 'loss': 5.436481475830078}


EP_train:0:  66%|| 4551/6926 [1:20:11<41:20,  1.04s/it]

{'epoch': 0, 'iter': 4550, 'avg_loss': 6.169513178196511, 'avg_acc': 50.10231267853219, 'loss': 5.705122470855713}


EP_train:0:  66%|| 4561/6926 [1:20:22<41:19,  1.05s/it]

{'epoch': 0, 'iter': 4560, 'avg_loss': 6.168307288177597, 'avg_acc': 50.102773514580136, 'loss': 5.7965474128723145}


EP_train:0:  66%|| 4571/6926 [1:20:33<42:36,  1.09s/it]

{'epoch': 0, 'iter': 4570, 'avg_loss': 6.167542814243494, 'avg_acc': 50.09776307153796, 'loss': 5.374499320983887}


EP_train:0:  66%|| 4581/6926 [1:20:44<42:20,  1.08s/it]

{'epoch': 0, 'iter': 4580, 'avg_loss': 6.1665794581363516, 'avg_acc': 50.09891399257804, 'loss': 5.4291672706604}


EP_train:0:  66%|| 4591/6926 [1:20:55<42:58,  1.10s/it]

{'epoch': 0, 'iter': 4590, 'avg_loss': 6.16606085317583, 'avg_acc': 50.09937922021346, 'loss': 5.770471572875977}


EP_train:0:  66%|| 4601/6926 [1:21:06<42:01,  1.08s/it]

{'epoch': 0, 'iter': 4600, 'avg_loss': 6.165382593565727, 'avg_acc': 50.097125624864155, 'loss': 5.413626670837402}


EP_train:0:  67%|| 4611/6926 [1:21:17<42:43,  1.11s/it]

{'epoch': 0, 'iter': 4610, 'avg_loss': 6.164426763568052, 'avg_acc': 50.08403816959445, 'loss': 5.83560037612915}


EP_train:0:  67%|| 4621/6926 [1:21:29<43:48,  1.14s/it]

{'epoch': 0, 'iter': 4620, 'avg_loss': 6.163817685446525, 'avg_acc': 50.078446223761084, 'loss': 6.09126091003418}


EP_train:0:  67%|| 4631/6926 [1:21:40<44:23,  1.16s/it]

{'epoch': 0, 'iter': 4630, 'avg_loss': 6.163108280579746, 'avg_acc': 50.07962643057655, 'loss': 5.521060466766357}


EP_train:0:  67%|| 4641/6926 [1:21:51<44:33,  1.17s/it]

{'epoch': 0, 'iter': 4640, 'avg_loss': 6.162407795023081, 'avg_acc': 50.07810816634346, 'loss': 5.965053081512451}


EP_train:0:  67%|| 4651/6926 [1:22:02<46:30,  1.23s/it]

{'epoch': 0, 'iter': 4650, 'avg_loss': 6.161514175909613, 'avg_acc': 50.07659643087508, 'loss': 5.9458842277526855}


EP_train:0:  67%|| 4661/6926 [1:22:14<45:57,  1.22s/it]

{'epoch': 0, 'iter': 4660, 'avg_loss': 6.160565318684802, 'avg_acc': 50.07375026818279, 'loss': 5.525349140167236}


EP_train:0:  67%|| 4671/6926 [1:22:26<46:36,  1.24s/it]

{'epoch': 0, 'iter': 4670, 'avg_loss': 6.159784744955237, 'avg_acc': 50.07693748661956, 'loss': 5.316900253295898}


EP_train:0:  68%|| 4681/6926 [1:22:38<47:26,  1.27s/it]

{'epoch': 0, 'iter': 4680, 'avg_loss': 6.158694904855468, 'avg_acc': 50.081446272164065, 'loss': 5.990912914276123}


EP_train:0:  68%|| 4691/6926 [1:22:50<46:41,  1.25s/it]

{'epoch': 0, 'iter': 4690, 'avg_loss': 6.157676795478874, 'avg_acc': 50.078607972713705, 'loss': 5.9051055908203125}


EP_train:0:  68%|| 4701/6926 [1:23:02<46:55,  1.27s/it]

{'epoch': 0, 'iter': 4700, 'avg_loss': 6.15651309147867, 'avg_acc': 50.0777760051053, 'loss': 5.186574935913086}


EP_train:0:  68%|| 4711/6926 [1:23:14<45:54,  1.24s/it]

{'epoch': 0, 'iter': 4710, 'avg_loss': 6.1554478719416155, 'avg_acc': 50.07097749946933, 'loss': 5.61620569229126}


EP_train:0:  68%|| 4721/6926 [1:23:27<46:14,  1.26s/it]

{'epoch': 0, 'iter': 4720, 'avg_loss': 6.15448677113086, 'avg_acc': 50.07546070747723, 'loss': 5.724175930023193}


EP_train:0:  68%|| 4731/6926 [1:23:40<46:47,  1.28s/it]

{'epoch': 0, 'iter': 4730, 'avg_loss': 6.153305807418033, 'avg_acc': 50.08190657366307, 'loss': 5.84460973739624}


EP_train:0:  68%|| 4741/6926 [1:23:52<46:35,  1.28s/it]

{'epoch': 0, 'iter': 4740, 'avg_loss': 6.152178161482699, 'avg_acc': 50.079756380510446, 'loss': 5.315962314605713}


EP_train:0:  69%|| 4751/6926 [1:24:05<45:27,  1.25s/it]

{'epoch': 0, 'iter': 4750, 'avg_loss': 6.151391281175302, 'avg_acc': 50.06840665123132, 'loss': 6.412914276123047}


EP_train:0:  69%|| 4761/6926 [1:24:19<47:28,  1.32s/it]

{'epoch': 0, 'iter': 4760, 'avg_loss': 6.150623962924751, 'avg_acc': 50.07417034236505, 'loss': 5.979710578918457}


EP_train:0:  69%|| 4771/6926 [1:24:32<45:06,  1.26s/it]

{'epoch': 0, 'iter': 4770, 'avg_loss': 6.149889168812279, 'avg_acc': 50.079254873192205, 'loss': 5.800957679748535}


EP_train:0:  69%|| 4781/6926 [1:24:45<46:21,  1.30s/it]

{'epoch': 0, 'iter': 4780, 'avg_loss': 6.149053075427766, 'avg_acc': 50.07974273164611, 'loss': 5.6341233253479}


EP_train:0:  69%|| 4791/6926 [1:24:59<50:25,  1.42s/it]

{'epoch': 0, 'iter': 4790, 'avg_loss': 6.14830705343719, 'avg_acc': 50.08088081820079, 'loss': 6.149736404418945}


EP_train:0:  69%|| 4801/6926 [1:25:13<49:50,  1.41s/it]

{'epoch': 0, 'iter': 4800, 'avg_loss': 6.147369875805598, 'avg_acc': 50.083315975838374, 'loss': 5.7930755615234375}


EP_train:0:  69%|| 4811/6926 [1:25:26<47:00,  1.33s/it]

{'epoch': 0, 'iter': 4810, 'avg_loss': 6.146701182478606, 'avg_acc': 50.090937435044694, 'loss': 5.522029876708984}


EP_train:0:  70%|| 4821/6926 [1:25:40<48:47,  1.39s/it]

{'epoch': 0, 'iter': 4820, 'avg_loss': 6.145609205527267, 'avg_acc': 50.10047189379797, 'loss': 5.55286169052124}


EP_train:0:  70%|| 4831/6926 [1:25:55<49:19,  1.41s/it]

{'epoch': 0, 'iter': 4830, 'avg_loss': 6.144686894445433, 'avg_acc': 50.09832332850341, 'loss': 5.5607099533081055}


EP_train:0:  70%|| 4841/6926 [1:26:10<51:39,  1.49s/it]

{'epoch': 0, 'iter': 4840, 'avg_loss': 6.143681190497027, 'avg_acc': 50.09295600082627, 'loss': 5.735944747924805}


EP_train:0:  70%|| 4851/6926 [1:26:24<50:23,  1.46s/it]

{'epoch': 0, 'iter': 4850, 'avg_loss': 6.142834826760183, 'avg_acc': 50.10758091115234, 'loss': 5.4018473625183105}


EP_train:0:  70%|| 4861/6926 [1:26:39<50:18,  1.46s/it]

{'epoch': 0, 'iter': 4860, 'avg_loss': 6.141995904083189, 'avg_acc': 50.10735959679078, 'loss': 5.825940132141113}


EP_train:0:  70%|| 4871/6926 [1:26:54<53:23,  1.56s/it]

{'epoch': 0, 'iter': 4870, 'avg_loss': 6.141265738159693, 'avg_acc': 50.10778074317389, 'loss': 6.0623698234558105}


EP_train:0:  70%|| 4881/6926 [1:27:10<53:59,  1.58s/it]

{'epoch': 0, 'iter': 4880, 'avg_loss': 6.140510797940227, 'avg_acc': 50.098596599057565, 'loss': 6.026599407196045}


EP_train:0:  71%|| 4891/6926 [1:27:26<54:16,  1.60s/it]

{'epoch': 0, 'iter': 4890, 'avg_loss': 6.139755736974772, 'avg_acc': 50.10286751175629, 'loss': 5.894759178161621}


EP_train:0:  71%|| 4901/6926 [1:27:43<57:26,  1.70s/it]

{'epoch': 0, 'iter': 4900, 'avg_loss': 6.138983741679305, 'avg_acc': 50.10648337074066, 'loss': 5.648519992828369}


EP_train:0:  71%|| 4911/6926 [1:28:00<59:38,  1.78s/it]  

{'epoch': 0, 'iter': 4910, 'avg_loss': 6.13812093534724, 'avg_acc': 50.099266951740994, 'loss': 6.107151985168457}


EP_train:0:  71%|| 4921/6926 [1:28:18<1:02:53,  1.88s/it]

{'epoch': 0, 'iter': 4920, 'avg_loss': 6.1374428812456046, 'avg_acc': 50.10541556594188, 'loss': 5.603177070617676}


EP_train:0:  71%|| 4931/6926 [1:28:38<1:06:06,  1.99s/it]

{'epoch': 0, 'iter': 4930, 'avg_loss': 6.136421217344954, 'avg_acc': 50.11090549584263, 'loss': 5.772839546203613}


EP_train:0:  71%|| 4941/6926 [1:28:59<1:10:08,  2.12s/it]

{'epoch': 0, 'iter': 4940, 'avg_loss': 6.135687633507456, 'avg_acc': 50.09992916413681, 'loss': 5.999887943267822}


EP_train:0:  71%|| 4951/6926 [1:29:21<1:15:00,  2.28s/it]

{'epoch': 0, 'iter': 4950, 'avg_loss': 6.135152594582333, 'avg_acc': 50.095940214098164, 'loss': 5.729734420776367}


EP_train:0:  72%|| 4961/6926 [1:29:44<1:17:34,  2.37s/it]

{'epoch': 0, 'iter': 4960, 'avg_loss': 6.1343379139876175, 'avg_acc': 50.09952630518041, 'loss': 5.688472270965576}


EP_train:0:  72%|| 4971/6926 [1:30:10<1:22:51,  2.54s/it]

{'epoch': 0, 'iter': 4970, 'avg_loss': 6.133367282431554, 'avg_acc': 50.09744015288674, 'loss': 6.0825347900390625}


EP_train:0:  72%|| 4981/6926 [1:30:36<1:25:36,  2.64s/it]

{'epoch': 0, 'iter': 4980, 'avg_loss': 6.13255282947515, 'avg_acc': 50.095989761092156, 'loss': 5.387226581573486}


EP_train:0:  72%|| 4991/6926 [1:31:04<1:27:55,  2.73s/it]

{'epoch': 0, 'iter': 4990, 'avg_loss': 6.131890595974365, 'avg_acc': 50.1020587056702, 'loss': 6.031741142272949}


EP_train:0:  72%|| 5001/6926 [1:31:32<1:31:02,  2.84s/it]

{'epoch': 0, 'iter': 5000, 'avg_loss': 6.130986225697976, 'avg_acc': 50.09060687862428, 'loss': 5.483904838562012}


EP_train:0:  72%|| 5011/6926 [1:32:01<1:35:46,  3.00s/it]

{'epoch': 0, 'iter': 5010, 'avg_loss': 6.130024546564422, 'avg_acc': 50.09915685491918, 'loss': 5.244933128356934}


EP_train:0:  72%|| 5021/6926 [1:32:31<1:34:10,  2.97s/it]

{'epoch': 0, 'iter': 5020, 'avg_loss': 6.129232411364639, 'avg_acc': 50.089623580959966, 'loss': 5.992037773132324}


EP_train:0:  73%|| 5031/6926 [1:33:01<1:37:42,  3.09s/it]

{'epoch': 0, 'iter': 5030, 'avg_loss': 6.128521010501796, 'avg_acc': 50.085718545020875, 'loss': 5.884785175323486}


EP_train:0:  73%|| 5041/6926 [1:33:31<1:35:16,  3.03s/it]

{'epoch': 0, 'iter': 5040, 'avg_loss': 6.127722009357255, 'avg_acc': 50.08616841896449, 'loss': 5.629653453826904}


EP_train:0:  73%|| 5051/6926 [1:34:02<1:36:48,  3.10s/it]

{'epoch': 0, 'iter': 5050, 'avg_loss': 6.126940118985656, 'avg_acc': 50.0841417541081, 'loss': 5.905818462371826}


EP_train:0:  73%|| 5061/6926 [1:34:32<1:34:17,  3.03s/it]

{'epoch': 0, 'iter': 5060, 'avg_loss': 6.126158471871591, 'avg_acc': 50.08891523414345, 'loss': 5.447674751281738}


EP_train:0:  73%|| 5071/6926 [1:35:03<1:35:43,  3.10s/it]

{'epoch': 0, 'iter': 5070, 'avg_loss': 6.124916817445629, 'avg_acc': 50.09551863537764, 'loss': 5.988048076629639}


EP_train:0:  73%|| 5081/6926 [1:35:34<1:37:48,  3.18s/it]

{'epoch': 0, 'iter': 5080, 'avg_loss': 6.124124415674868, 'avg_acc': 50.09840582562488, 'loss': 5.786093711853027}


EP_train:0:  74%|| 5091/6926 [1:36:06<1:36:00,  3.14s/it]

{'epoch': 0, 'iter': 5090, 'avg_loss': 6.1237299040009665, 'avg_acc': 50.096371046945585, 'loss': 6.034430503845215}


EP_train:0:  74%|| 5101/6926 [1:36:38<1:37:55,  3.22s/it]

{'epoch': 0, 'iter': 5100, 'avg_loss': 6.122706937126215, 'avg_acc': 50.09373162125074, 'loss': 5.636478424072266}


EP_train:0:  74%|| 5111/6926 [1:37:09<1:38:09,  3.25s/it]

{'epoch': 0, 'iter': 5110, 'avg_loss': 6.1221737801923695, 'avg_acc': 50.09293680297397, 'loss': 6.166223526000977}


EP_train:0:  74%|| 5121/6926 [1:37:41<1:36:23,  3.20s/it]

{'epoch': 0, 'iter': 5120, 'avg_loss': 6.121593478880132, 'avg_acc': 50.101908806873666, 'loss': 5.948540687561035}


EP_train:0:  74%|| 5131/6926 [1:38:14<1:36:41,  3.23s/it]

{'epoch': 0, 'iter': 5130, 'avg_loss': 6.120835793364515, 'avg_acc': 50.09561976222958, 'loss': 5.966570854187012}


EP_train:0:  74%|| 5141/6926 [1:38:47<1:37:31,  3.28s/it]

{'epoch': 0, 'iter': 5140, 'avg_loss': 6.119971551822795, 'avg_acc': 50.08388445827659, 'loss': 6.011678218841553}


EP_train:0:  74%|| 5151/6926 [1:39:19<1:37:47,  3.31s/it]

{'epoch': 0, 'iter': 5150, 'avg_loss': 6.119323173649588, 'avg_acc': 50.074621432731504, 'loss': 5.752501487731934}


EP_train:0:  75%|| 5161/6926 [1:39:52<1:35:59,  3.26s/it]

{'epoch': 0, 'iter': 5160, 'avg_loss': 6.118513996114658, 'avg_acc': 50.07750435962023, 'loss': 6.028151512145996}


EP_train:0:  75%|| 5171/6926 [1:40:26<1:37:04,  3.32s/it]

{'epoch': 0, 'iter': 5170, 'avg_loss': 6.1175181748119645, 'avg_acc': 50.07131115838329, 'loss': 5.8278398513793945}


EP_train:0:  75%|| 5181/6926 [1:41:00<1:38:47,  3.40s/it]

{'epoch': 0, 'iter': 5180, 'avg_loss': 6.116729328057591, 'avg_acc': 50.07780833815866, 'loss': 5.77786922454834}


EP_train:0:  75%|| 5191/6926 [1:41:34<1:36:48,  3.35s/it]

{'epoch': 0, 'iter': 5190, 'avg_loss': 6.116005701036771, 'avg_acc': 50.082474475052976, 'loss': 5.42225980758667}


EP_train:0:  75%|| 5201/6926 [1:42:07<1:37:14,  3.38s/it]

{'epoch': 0, 'iter': 5200, 'avg_loss': 6.115130980641446, 'avg_acc': 50.08231590078831, 'loss': 5.67633056640625}


EP_train:0:  75%|| 5211/6926 [1:42:42<1:39:46,  3.49s/it]

{'epoch': 0, 'iter': 5210, 'avg_loss': 6.114120703653099, 'avg_acc': 50.08335732105162, 'loss': 5.587223052978516}


EP_train:0:  75%|| 5221/6926 [1:43:16<1:38:28,  3.47s/it]

{'epoch': 0, 'iter': 5220, 'avg_loss': 6.113349418168871, 'avg_acc': 50.09097873970504, 'loss': 5.714132308959961}


EP_train:0:  76%|| 5231/6926 [1:43:51<1:38:56,  3.50s/it]

{'epoch': 0, 'iter': 5230, 'avg_loss': 6.112828440111058, 'avg_acc': 50.08363601605812, 'loss': 5.345940113067627}


EP_train:0:  76%|| 5241/6926 [1:44:26<1:40:47,  3.59s/it]

{'epoch': 0, 'iter': 5240, 'avg_loss': 6.112026010435789, 'avg_acc': 50.08049513451631, 'loss': 5.726517677307129}


EP_train:0:  76%|| 5251/6926 [1:45:01<1:39:56,  3.58s/it]

{'epoch': 0, 'iter': 5250, 'avg_loss': 6.111288315317695, 'avg_acc': 50.086888211769185, 'loss': 5.521177291870117}


EP_train:0:  76%|| 5261/6926 [1:45:36<1:39:12,  3.57s/it]

{'epoch': 0, 'iter': 5260, 'avg_loss': 6.110715918476603, 'avg_acc': 50.08078312107964, 'loss': 6.013071060180664}


EP_train:0:  76%|| 5271/6926 [1:46:12<1:41:06,  3.67s/it]

{'epoch': 0, 'iter': 5270, 'avg_loss': 6.110024150420673, 'avg_acc': 50.077072661734015, 'loss': 5.859122276306152}


EP_train:0:  76%|| 5281/6926 [1:46:48<1:39:11,  3.62s/it]

{'epoch': 0, 'iter': 5280, 'avg_loss': 6.109278422100845, 'avg_acc': 50.08047718235182, 'loss': 5.182705879211426}


EP_train:0:  76%|| 5291/6926 [1:47:23<1:39:12,  3.64s/it]

{'epoch': 0, 'iter': 5290, 'avg_loss': 6.108660441358786, 'avg_acc': 50.08741258741259, 'loss': 5.620635509490967}


EP_train:0:  77%|| 5301/6926 [1:47:59<1:39:26,  3.67s/it]

{'epoch': 0, 'iter': 5300, 'avg_loss': 6.107983756573599, 'avg_acc': 50.08430013205055, 'loss': 5.537447452545166}


EP_train:0:  77%|| 5311/6926 [1:48:35<1:37:08,  3.61s/it]

{'epoch': 0, 'iter': 5310, 'avg_loss': 6.107160408244118, 'avg_acc': 50.072373376012045, 'loss': 5.695735454559326}


EP_train:0:  77%|| 5321/6926 [1:49:11<1:36:09,  3.59s/it]

{'epoch': 0, 'iter': 5320, 'avg_loss': 6.106357777259825, 'avg_acc': 50.07341195264048, 'loss': 5.957878589630127}


EP_train:0:  77%|| 5331/6926 [1:49:47<1:36:49,  3.64s/it]

{'epoch': 0, 'iter': 5330, 'avg_loss': 6.105617276322124, 'avg_acc': 50.06917088726318, 'loss': 5.4526214599609375}


EP_train:0:  77%|| 5341/6926 [1:50:25<1:38:03,  3.71s/it]

{'epoch': 0, 'iter': 5340, 'avg_loss': 6.104839673941333, 'avg_acc': 50.073137052986326, 'loss': 5.282537460327148}


EP_train:0:  77%|| 5351/6926 [1:51:01<1:35:58,  3.66s/it]

{'epoch': 0, 'iter': 5350, 'avg_loss': 6.104001664999511, 'avg_acc': 50.06540833489067, 'loss': 5.321033000946045}


EP_train:0:  77%|| 5361/6926 [1:51:39<1:38:13,  3.77s/it]

{'epoch': 0, 'iter': 5360, 'avg_loss': 6.103613777317189, 'avg_acc': 50.055376795373995, 'loss': 5.746007442474365}


EP_train:0:  78%|| 5371/6926 [1:52:16<1:35:16,  3.68s/it]

{'epoch': 0, 'iter': 5370, 'avg_loss': 6.102793508436267, 'avg_acc': 50.052946378700426, 'loss': 5.604008197784424}


EP_train:0:  78%|| 5381/6926 [1:52:54<1:37:23,  3.78s/it]

{'epoch': 0, 'iter': 5380, 'avg_loss': 6.102059332506073, 'avg_acc': 50.04994424828099, 'loss': 6.168947219848633}


EP_train:0:  78%|| 5391/6926 [1:53:32<1:36:49,  3.78s/it]

{'epoch': 0, 'iter': 5390, 'avg_loss': 6.101642723805253, 'avg_acc': 50.05274995362642, 'loss': 5.926752090454102}


EP_train:0:  78%|| 5401/6926 [1:54:10<1:34:50,  3.73s/it]

{'epoch': 0, 'iter': 5400, 'avg_loss': 6.1011098287123655, 'avg_acc': 50.052652286613586, 'loss': 5.961556434631348}


EP_train:0:  78%|| 5411/6926 [1:54:48<1:35:54,  3.80s/it]

{'epoch': 0, 'iter': 5410, 'avg_loss': 6.100440940758235, 'avg_acc': 50.058907780447235, 'loss': 5.22943115234375}


EP_train:0:  78%|| 5421/6926 [1:55:27<1:35:42,  3.82s/it]

{'epoch': 0, 'iter': 5420, 'avg_loss': 6.099658429413743, 'avg_acc': 50.05649326692492, 'loss': 5.741892337799072}


EP_train:0:  78%|| 5431/6926 [1:56:05<1:35:57,  3.85s/it]

{'epoch': 0, 'iter': 5430, 'avg_loss': 6.09902567802883, 'avg_acc': 50.05236144356472, 'loss': 5.654664516448975}


EP_train:0:  79%|| 5441/6926 [1:56:43<1:35:33,  3.86s/it]

{'epoch': 0, 'iter': 5440, 'avg_loss': 6.098422760166812, 'avg_acc': 50.05398823745635, 'loss': 5.768589019775391}


EP_train:0:  79%|| 5451/6926 [1:57:21<1:32:39,  3.77s/it]

{'epoch': 0, 'iter': 5450, 'avg_loss': 6.098027931049622, 'avg_acc': 50.050449458814896, 'loss': 5.961572170257568}


EP_train:0:  79%|| 5461/6926 [1:57:59<1:30:53,  3.72s/it]

{'epoch': 0, 'iter': 5460, 'avg_loss': 6.0974803897317695, 'avg_acc': 50.05607947262406, 'loss': 5.592130661010742}


EP_train:0:  79%|| 5471/6926 [1:58:37<1:36:45,  3.99s/it]

{'epoch': 0, 'iter': 5470, 'avg_loss': 6.0966919058093945, 'avg_acc': 50.05426338877719, 'loss': 5.998015403747559}


EP_train:0:  79%|| 5481/6926 [1:59:16<1:32:50,  3.85s/it]

{'epoch': 0, 'iter': 5480, 'avg_loss': 6.095886524609679, 'avg_acc': 50.05416438606094, 'loss': 5.742386817932129}


EP_train:0:  79%|| 5491/6926 [1:59:54<1:31:38,  3.83s/it]

{'epoch': 0, 'iter': 5490, 'avg_loss': 6.095252078644588, 'avg_acc': 50.05861864869787, 'loss': 5.818233013153076}


EP_train:0:  79%|| 5501/6926 [2:00:34<1:35:12,  4.01s/it]

{'epoch': 0, 'iter': 5500, 'avg_loss': 6.094484071951739, 'avg_acc': 50.05453553899291, 'loss': 5.647989273071289}


EP_train:0:  80%|| 5511/6926 [2:01:12<1:31:36,  3.88s/it]

{'epoch': 0, 'iter': 5510, 'avg_loss': 6.093890297155461, 'avg_acc': 50.04706496098712, 'loss': 5.566036701202393}


EP_train:0:  80%|| 5521/6926 [2:01:50<1:29:52,  3.84s/it]

{'epoch': 0, 'iter': 5520, 'avg_loss': 6.093128355057683, 'avg_acc': 50.05094185835899, 'loss': 5.729443550109863}


EP_train:0:  80%|| 5531/6926 [2:02:28<1:29:37,  3.86s/it]

{'epoch': 0, 'iter': 5530, 'avg_loss': 6.092430561409142, 'avg_acc': 50.06158470439341, 'loss': 5.502145290374756}


EP_train:0:  80%|| 5541/6926 [2:03:07<1:27:53,  3.81s/it]

{'epoch': 0, 'iter': 5540, 'avg_loss': 6.09169741112324, 'avg_acc': 50.06316549359321, 'loss': 5.675856590270996}


EP_train:0:  80%|| 5551/6926 [2:03:45<1:28:47,  3.87s/it]

{'epoch': 0, 'iter': 5550, 'avg_loss': 6.09105505536986, 'avg_acc': 50.06248874076743, 'loss': 5.850781440734863}


EP_train:0:  80%|| 5561/6926 [2:04:24<1:28:36,  3.90s/it]

{'epoch': 0, 'iter': 5560, 'avg_loss': 6.090439179929292, 'avg_acc': 50.06574806689444, 'loss': 5.907330513000488}


EP_train:0:  80%|| 5571/6926 [2:05:03<1:27:35,  3.88s/it]

{'epoch': 0, 'iter': 5570, 'avg_loss': 6.089828375660738, 'avg_acc': 50.06787381080596, 'loss': 5.930750846862793}


EP_train:0:  81%|| 5581/6926 [2:05:41<1:25:49,  3.83s/it]

{'epoch': 0, 'iter': 5580, 'avg_loss': 6.089091357525223, 'avg_acc': 50.07279161440602, 'loss': 6.002740859985352}


EP_train:0:  81%|| 5591/6926 [2:06:19<1:24:54,  3.82s/it]

{'epoch': 0, 'iter': 5590, 'avg_loss': 6.088467072425642, 'avg_acc': 50.08216329815775, 'loss': 5.454851150512695}


EP_train:0:  81%|| 5601/6926 [2:06:58<1:25:29,  3.87s/it]

{'epoch': 0, 'iter': 5600, 'avg_loss': 6.087916400853576, 'avg_acc': 50.092059453668995, 'loss': 5.607652187347412}


EP_train:0:  81%|| 5611/6926 [2:07:37<1:26:08,  3.93s/it]

{'epoch': 0, 'iter': 5610, 'avg_loss': 6.0874172026961055, 'avg_acc': 50.08966761718053, 'loss': 5.703097343444824}


EP_train:0:  81%|| 5621/6926 [2:08:17<1:26:24,  3.97s/it]

{'epoch': 0, 'iter': 5620, 'avg_loss': 6.08684024664881, 'avg_acc': 50.09284380003558, 'loss': 5.498371601104736}


EP_train:0:  81%|| 5631/6926 [2:08:56<1:24:26,  3.91s/it]

{'epoch': 0, 'iter': 5630, 'avg_loss': 6.08603160450332, 'avg_acc': 50.094898774640384, 'loss': 5.688277721405029}


EP_train:0:  81%|| 5641/6926 [2:09:36<1:24:45,  3.96s/it]

{'epoch': 0, 'iter': 5640, 'avg_loss': 6.0852175040398055, 'avg_acc': 50.08974472611239, 'loss': 5.412929058074951}


EP_train:0:  82%|| 5651/6926 [2:10:15<1:23:31,  3.93s/it]

{'epoch': 0, 'iter': 5650, 'avg_loss': 6.0846604006498435, 'avg_acc': 50.089585913997524, 'loss': 5.651952266693115}


EP_train:0:  82%|| 5661/6926 [2:10:55<1:23:03,  3.94s/it]

{'epoch': 0, 'iter': 5660, 'avg_loss': 6.084259627459644, 'avg_acc': 50.09163575340045, 'loss': 6.1368513107299805}


EP_train:0:  82%|| 5671/6926 [2:11:34<1:22:53,  3.96s/it]

{'epoch': 0, 'iter': 5670, 'avg_loss': 6.083494531816117, 'avg_acc': 50.088167871627576, 'loss': 5.727392673492432}


EP_train:0:  82%|| 5681/6926 [2:12:14<1:22:05,  3.96s/it]

{'epoch': 0, 'iter': 5680, 'avg_loss': 6.083021916085787, 'avg_acc': 50.09571378278472, 'loss': 5.926077842712402}


EP_train:0:  82%|| 5691/6926 [2:12:53<1:19:49,  3.88s/it]

{'epoch': 0, 'iter': 5690, 'avg_loss': 6.082327449579394, 'avg_acc': 50.105978738358814, 'loss': 5.639200210571289}


EP_train:0:  82%|| 5701/6926 [2:13:33<1:21:26,  3.99s/it]

{'epoch': 0, 'iter': 5700, 'avg_loss': 6.081785275463137, 'avg_acc': 50.10085949833363, 'loss': 5.651735305786133}


EP_train:0:  82%|| 5711/6926 [2:14:12<1:19:10,  3.91s/it]

{'epoch': 0, 'iter': 5710, 'avg_loss': 6.08097667598908, 'avg_acc': 50.09904132376116, 'loss': 5.83112907409668}


EP_train:0:  83%|| 5721/6926 [2:14:51<1:18:10,  3.89s/it]

{'epoch': 0, 'iter': 5720, 'avg_loss': 6.080637956411558, 'avg_acc': 50.09613703897921, 'loss': 5.8111162185668945}


EP_train:0:  83%|| 5731/6926 [2:15:31<1:19:39,  4.00s/it]

{'epoch': 0, 'iter': 5730, 'avg_loss': 6.080154193797633, 'avg_acc': 50.09542400977141, 'loss': 5.799943447113037}


EP_train:0:  83%|| 5741/6926 [2:16:11<1:17:48,  3.94s/it]

{'epoch': 0, 'iter': 5740, 'avg_loss': 6.079698501158998, 'avg_acc': 50.092536143528996, 'loss': 5.847317695617676}


EP_train:0:  83%|| 5751/6926 [2:16:50<1:16:30,  3.91s/it]

{'epoch': 0, 'iter': 5750, 'avg_loss': 6.078946716950015, 'avg_acc': 50.096178925404274, 'loss': 5.999346733093262}


EP_train:0:  83%|| 5761/6926 [2:17:30<1:17:24,  3.99s/it]

{'epoch': 0, 'iter': 5760, 'avg_loss': 6.078310174549688, 'avg_acc': 50.08787536885957, 'loss': 5.9321184158325195}


EP_train:0:  83%|| 5771/6926 [2:18:09<1:14:56,  3.89s/it]

{'epoch': 0, 'iter': 5770, 'avg_loss': 6.077783565755899, 'avg_acc': 50.08934760006931, 'loss': 5.256465435028076}


EP_train:0:  83%|| 5781/6926 [2:18:47<1:12:44,  3.81s/it]

{'epoch': 0, 'iter': 5780, 'avg_loss': 6.077105385893007, 'avg_acc': 50.08919304618578, 'loss': 5.466736793518066}


EP_train:0:  84%|| 5791/6926 [2:19:26<1:13:01,  3.86s/it]

{'epoch': 0, 'iter': 5790, 'avg_loss': 6.076326361264081, 'avg_acc': 50.09659385252979, 'loss': 5.861227035522461}


EP_train:0:  84%|| 5801/6926 [2:20:04<1:12:33,  3.87s/it]

{'epoch': 0, 'iter': 5800, 'avg_loss': 6.075400314584228, 'avg_acc': 50.098043440786064, 'loss': 5.82940149307251}


EP_train:0:  84%|| 5811/6926 [2:20:43<1:12:14,  3.89s/it]

{'epoch': 0, 'iter': 5810, 'avg_loss': 6.074962498378146, 'avg_acc': 50.10325245224574, 'loss': 5.374361515045166}


EP_train:0:  84%|| 5821/6926 [2:21:22<1:11:05,  3.86s/it]

{'epoch': 0, 'iter': 5820, 'avg_loss': 6.074374858015408, 'avg_acc': 50.0998539769799, 'loss': 5.483505725860596}


EP_train:0:  84%|| 5831/6926 [2:22:01<1:12:11,  3.96s/it]

{'epoch': 0, 'iter': 5830, 'avg_loss': 6.073767224627321, 'avg_acc': 50.09539530097753, 'loss': 5.7890825271606445}


EP_train:0:  84%|| 5841/6926 [2:22:40<1:11:35,  3.96s/it]

{'epoch': 0, 'iter': 5840, 'avg_loss': 6.073249056289874, 'avg_acc': 50.093626947440505, 'loss': 6.1001787185668945}


EP_train:0:  84%|| 5851/6926 [2:23:19<1:09:22,  3.87s/it]

{'epoch': 0, 'iter': 5850, 'avg_loss': 6.072721008153435, 'avg_acc': 50.09827379935054, 'loss': 5.820581912994385}


EP_train:0:  85%|| 5861/6926 [2:23:59<1:10:06,  3.95s/it]

{'epoch': 0, 'iter': 5860, 'avg_loss': 6.07208039235426, 'avg_acc': 50.09650656884491, 'loss': 5.748669147491455}


EP_train:0:  85%|| 5871/6926 [2:24:38<1:08:42,  3.91s/it]

{'epoch': 0, 'iter': 5870, 'avg_loss': 6.0714883896098835, 'avg_acc': 50.09314852665645, 'loss': 6.095505237579346}


EP_train:0:  85%|| 5881/6926 [2:25:16<1:06:55,  3.84s/it]

{'epoch': 0, 'iter': 5880, 'avg_loss': 6.070888432773719, 'avg_acc': 50.09245876551607, 'loss': 5.943742752075195}


EP_train:0:  85%|| 5891/6926 [2:25:55<1:06:53,  3.88s/it]

{'epoch': 0, 'iter': 5890, 'avg_loss': 6.07020937536431, 'avg_acc': 50.08699711424206, 'loss': 5.6374053955078125}


EP_train:0:  85%|| 5901/6926 [2:26:33<1:05:43,  3.85s/it]

{'epoch': 0, 'iter': 5900, 'avg_loss': 6.069725288574705, 'avg_acc': 50.08737925775293, 'loss': 5.855521202087402}


EP_train:0:  85%|| 5911/6926 [2:27:12<1:06:20,  3.92s/it]

{'epoch': 0, 'iter': 5910, 'avg_loss': 6.069455315872732, 'avg_acc': 50.09357553713416, 'loss': 5.770267009735107}


EP_train:0:  85%|| 5921/6926 [2:27:51<1:04:30,  3.85s/it]

{'epoch': 0, 'iter': 5920, 'avg_loss': 6.06888459570765, 'avg_acc': 50.09763975679784, 'loss': 6.133537292480469}


EP_train:0:  86%|| 5931/6926 [2:28:30<1:05:11,  3.93s/it]

{'epoch': 0, 'iter': 5930, 'avg_loss': 6.068407732446285, 'avg_acc': 50.09115241949081, 'loss': 5.3600945472717285}


EP_train:0:  86%|| 5941/6926 [2:29:09<1:04:49,  3.95s/it]

{'epoch': 0, 'iter': 5940, 'avg_loss': 6.067962301036281, 'avg_acc': 50.09625904729843, 'loss': 6.0487494468688965}


EP_train:0:  86%|| 5951/6926 [2:29:49<1:02:44,  3.86s/it]

{'epoch': 0, 'iter': 5950, 'avg_loss': 6.067276286389763, 'avg_acc': 50.09242144177449, 'loss': 5.809393405914307}


EP_train:0:  86%|| 5961/6926 [2:30:28<1:02:54,  3.91s/it]

{'epoch': 0, 'iter': 5960, 'avg_loss': 6.066843238098472, 'avg_acc': 50.09331488005368, 'loss': 5.550147533416748}


EP_train:0:  86%|| 5971/6926 [2:31:07<1:02:09,  3.91s/it]

{'epoch': 0, 'iter': 5970, 'avg_loss': 6.0663925668259315, 'avg_acc': 50.09001842237482, 'loss': 5.778110980987549}


EP_train:0:  86%|| 5981/6926 [2:31:46<1:01:03,  3.88s/it]

{'epoch': 0, 'iter': 5980, 'avg_loss': 6.0657949705941885, 'avg_acc': 50.093525330212344, 'loss': 5.601467609405518}


EP_train:0:  87%|| 5991/6926 [2:32:25<1:02:50,  4.03s/it]

{'epoch': 0, 'iter': 5990, 'avg_loss': 6.065322547578947, 'avg_acc': 50.10432315139376, 'loss': 5.959235191345215}


EP_train:0:  87%|| 6001/6926 [2:33:04<1:00:37,  3.93s/it]

{'epoch': 0, 'iter': 6000, 'avg_loss': 6.065038014046253, 'avg_acc': 50.11196050658223, 'loss': 6.278867721557617}


EP_train:0:  87%|| 6011/6926 [2:33:43<58:44,  3.85s/it]

{'epoch': 0, 'iter': 6010, 'avg_loss': 6.064412888701596, 'avg_acc': 50.11437364831143, 'loss': 6.15139102935791}


EP_train:0:  87%|| 6021/6926 [2:34:22<58:24,  3.87s/it]

{'epoch': 0, 'iter': 6020, 'avg_loss': 6.064144929012494, 'avg_acc': 50.11210762331838, 'loss': 5.455483436584473}


EP_train:0:  87%|| 6031/6926 [2:35:01<58:04,  3.89s/it]

{'epoch': 0, 'iter': 6030, 'avg_loss': 6.063701007164962, 'avg_acc': 50.11192173768861, 'loss': 5.673882007598877}


EP_train:0:  87%|| 6041/6926 [2:35:39<56:08,  3.81s/it]

{'epoch': 0, 'iter': 6040, 'avg_loss': 6.063422597099901, 'avg_acc': 50.11173646747227, 'loss': 5.901473045349121}


EP_train:0:  87%|| 6051/6926 [2:36:17<55:15,  3.79s/it]

{'epoch': 0, 'iter': 6050, 'avg_loss': 6.062850621333301, 'avg_acc': 50.12136423731614, 'loss': 5.995242118835449}


EP_train:0:  88%|| 6061/6926 [2:36:57<57:49,  4.01s/it]

{'epoch': 0, 'iter': 6060, 'avg_loss': 6.062393399241576, 'avg_acc': 50.11755485893416, 'loss': 5.574792385101318}


EP_train:0:  88%|| 6071/6926 [2:37:36<55:31,  3.90s/it]

{'epoch': 0, 'iter': 6070, 'avg_loss': 6.061947958215303, 'avg_acc': 50.12096442101795, 'loss': 5.589018821716309}


EP_train:0:  88%|| 6081/6926 [2:38:15<54:39,  3.88s/it]

{'epoch': 0, 'iter': 6080, 'avg_loss': 6.0614566737894675, 'avg_acc': 50.1120292715014, 'loss': 5.8121232986450195}


EP_train:0:  88%|| 6091/6926 [2:38:54<55:32,  3.99s/it]

{'epoch': 0, 'iter': 6090, 'avg_loss': 6.060821934025034, 'avg_acc': 50.109280085371864, 'loss': 6.030996322631836}


EP_train:0:  88%|| 6101/6926 [2:39:33<53:34,  3.90s/it]

{'epoch': 0, 'iter': 6100, 'avg_loss': 6.060438418736166, 'avg_acc': 50.11063760039338, 'loss': 6.144297122955322}


EP_train:0:  88%|| 6111/6926 [2:40:12<52:38,  3.88s/it]

{'epoch': 0, 'iter': 6110, 'avg_loss': 6.060034549796158, 'avg_acc': 50.10994518082147, 'loss': 6.084949970245361}


EP_train:0:  88%|| 6121/6926 [2:40:51<53:30,  3.99s/it]

{'epoch': 0, 'iter': 6120, 'avg_loss': 6.059844863198586, 'avg_acc': 50.110276098676685, 'loss': 6.5136518478393555}


EP_train:0:  89%|| 6131/6926 [2:41:30<51:47,  3.91s/it]

{'epoch': 0, 'iter': 6130, 'avg_loss': 6.05924867156512, 'avg_acc': 50.10550888925135, 'loss': 5.638473033905029}


EP_train:0:  89%|| 6141/6926 [2:42:10<51:02,  3.90s/it]

{'epoch': 0, 'iter': 6140, 'avg_loss': 6.058768427074975, 'avg_acc': 50.1048282038756, 'loss': 5.844416618347168}


EP_train:0:  89%|| 6151/6926 [2:42:49<51:12,  3.96s/it]

{'epoch': 0, 'iter': 6150, 'avg_loss': 6.058292157048037, 'avg_acc': 50.10262558933507, 'loss': 5.4745707511901855}


EP_train:0:  89%|| 6161/6926 [2:43:28<50:02,  3.92s/it]

{'epoch': 0, 'iter': 6160, 'avg_loss': 6.0574992264541905, 'avg_acc': 50.10550235351404, 'loss': 5.399084091186523}


EP_train:0:  89%|| 6171/6926 [2:44:07<48:39,  3.87s/it]

{'epoch': 0, 'iter': 6170, 'avg_loss': 6.0571574724352235, 'avg_acc': 50.10887619510614, 'loss': 5.796453952789307}


EP_train:0:  89%|| 6181/6926 [2:44:47<49:31,  3.99s/it]

{'epoch': 0, 'iter': 6180, 'avg_loss': 6.056907289141186, 'avg_acc': 50.10313865070377, 'loss': 5.596674919128418}


EP_train:0:  89%|| 6191/6926 [2:45:26<47:54,  3.91s/it]

{'epoch': 0, 'iter': 6190, 'avg_loss': 6.056633158967061, 'avg_acc': 50.10600064609918, 'loss': 6.613468647003174}


EP_train:0:  90%|| 6201/6926 [2:46:05<46:30,  3.85s/it]

{'epoch': 0, 'iter': 6200, 'avg_loss': 6.0559446614435695, 'avg_acc': 50.10683760683761, 'loss': 5.181078910827637}


EP_train:0:  90%|| 6211/6926 [2:46:44<47:15,  3.97s/it]

{'epoch': 0, 'iter': 6210, 'avg_loss': 6.055631684097685, 'avg_acc': 50.1096844308485, 'loss': 6.039973735809326}


EP_train:0:  90%|| 6221/6926 [2:47:24<46:08,  3.93s/it]

{'epoch': 0, 'iter': 6220, 'avg_loss': 6.055112335463847, 'avg_acc': 50.115033756630766, 'loss': 5.704607963562012}


EP_train:0:  90%|| 6231/6926 [2:48:03<44:46,  3.87s/it]

{'epoch': 0, 'iter': 6230, 'avg_loss': 6.054479230217405, 'avg_acc': 50.11334456748515, 'loss': 6.0806474685668945}


EP_train:0:  90%|| 6241/6926 [2:48:42<45:04,  3.95s/it]

{'epoch': 0, 'iter': 6240, 'avg_loss': 6.053801946085322, 'avg_acc': 50.11917160711425, 'loss': 5.792253017425537}


EP_train:0:  90%|| 6251/6926 [2:49:21<44:14,  3.93s/it]

{'epoch': 0, 'iter': 6250, 'avg_loss': 6.053543275040715, 'avg_acc': 50.11648136298192, 'loss': 5.834957122802734}


EP_train:0:  90%|| 6261/6926 [2:50:00<42:51,  3.87s/it]

{'epoch': 0, 'iter': 6260, 'avg_loss': 6.053130680751998, 'avg_acc': 50.11529707714423, 'loss': 5.824543476104736}


EP_train:0:  91%|| 6271/6926 [2:50:39<43:09,  3.95s/it]

{'epoch': 0, 'iter': 6270, 'avg_loss': 6.052609203078876, 'avg_acc': 50.12458140647424, 'loss': 5.828678131103516}


EP_train:0:  91%|| 6281/6926 [2:51:18<42:14,  3.93s/it]

{'epoch': 0, 'iter': 6280, 'avg_loss': 6.0522638336713666, 'avg_acc': 50.12637318898264, 'loss': 5.949728965759277}


EP_train:0:  91%|| 6291/6926 [2:51:57<40:52,  3.86s/it]

{'epoch': 0, 'iter': 6290, 'avg_loss': 6.051833656061209, 'avg_acc': 50.122695120012715, 'loss': 5.964383602142334}


EP_train:0:  91%|| 6301/6926 [2:52:36<41:30,  3.99s/it]

{'epoch': 0, 'iter': 6300, 'avg_loss': 6.051188824309601, 'avg_acc': 50.11754086652912, 'loss': 5.750181198120117}


EP_train:0:  91%|| 6311/6926 [2:53:16<40:15,  3.93s/it]

{'epoch': 0, 'iter': 6310, 'avg_loss': 6.050359213452316, 'avg_acc': 50.120325621929965, 'loss': 5.03533935546875}


EP_train:0:  91%|| 6321/6926 [2:53:55<39:52,  3.95s/it]

{'epoch': 0, 'iter': 6320, 'avg_loss': 6.050115905481697, 'avg_acc': 50.114202657807304, 'loss': 5.609652519226074}


EP_train:0:  91%|| 6331/6926 [2:54:35<38:44,  3.91s/it]

{'epoch': 0, 'iter': 6330, 'avg_loss': 6.04982316251088, 'avg_acc': 50.115503080082135, 'loss': 6.169557094573975}


EP_train:0:  92%|| 6341/6926 [2:55:14<38:14,  3.92s/it]

{'epoch': 0, 'iter': 6340, 'avg_loss': 6.049345323592401, 'avg_acc': 50.12369894338432, 'loss': 5.546729564666748}


EP_train:0:  92%|| 6351/6926 [2:55:53<37:02,  3.87s/it]

{'epoch': 0, 'iter': 6350, 'avg_loss': 6.048884183135564, 'avg_acc': 50.12842465753424, 'loss': 5.3033905029296875}


EP_train:0:  92%|| 6361/6926 [2:56:32<36:54,  3.92s/it]

{'epoch': 0, 'iter': 6360, 'avg_loss': 6.048308408329686, 'avg_acc': 50.12724021380286, 'loss': 5.872603893280029}


EP_train:0:  92%|| 6371/6926 [2:57:10<35:44,  3.86s/it]

{'epoch': 0, 'iter': 6370, 'avg_loss': 6.047887136976978, 'avg_acc': 50.129493015225236, 'loss': 5.5743021965026855}


EP_train:0:  92%|| 6381/6926 [2:57:49<35:03,  3.86s/it]

{'epoch': 0, 'iter': 6380, 'avg_loss': 6.047365622019024, 'avg_acc': 50.13369769628585, 'loss': 5.173526763916016}


EP_train:0:  92%|| 6391/6926 [2:58:29<35:21,  3.97s/it]

{'epoch': 0, 'iter': 6390, 'avg_loss': 6.046941647151326, 'avg_acc': 50.13544437490221, 'loss': 5.913001537322998}


EP_train:0:  92%|| 6401/6926 [2:59:08<34:20,  3.92s/it]

{'epoch': 0, 'iter': 6400, 'avg_loss': 6.046524825123694, 'avg_acc': 50.133768161224815, 'loss': 5.422516822814941}


EP_train:0:  93%|| 6411/6926 [2:59:47<33:16,  3.88s/it]

{'epoch': 0, 'iter': 6410, 'avg_loss': 6.0460878003872685, 'avg_acc': 50.1345343940103, 'loss': 5.553627014160156}


EP_train:0:  93%|| 6421/6926 [3:00:26<33:27,  3.98s/it]

{'epoch': 0, 'iter': 6420, 'avg_loss': 6.045764007295075, 'avg_acc': 50.13627160878368, 'loss': 5.495111465454102}


EP_train:0:  93%|| 6431/6926 [3:01:05<31:57,  3.87s/it]

{'epoch': 0, 'iter': 6430, 'avg_loss': 6.045267044450573, 'avg_acc': 50.13654563831441, 'loss': 5.437535285949707}


EP_train:0:  93%|| 6441/6926 [3:01:44<31:19,  3.87s/it]

{'epoch': 0, 'iter': 6440, 'avg_loss': 6.044740244239555, 'avg_acc': 50.1295412203074, 'loss': 5.802971363067627}


EP_train:0:  93%|| 6451/6926 [3:02:23<31:12,  3.94s/it]

{'epoch': 0, 'iter': 6450, 'avg_loss': 6.044476197017587, 'avg_acc': 50.12401178111921, 'loss': 5.7318339347839355}


EP_train:0:  93%|| 6461/6926 [3:03:02<30:01,  3.87s/it]

{'epoch': 0, 'iter': 6460, 'avg_loss': 6.0440804596342845, 'avg_acc': 50.119950472063145, 'loss': 5.854379653930664}


EP_train:0:  93%|| 6471/6926 [3:03:41<29:16,  3.86s/it]

{'epoch': 0, 'iter': 6470, 'avg_loss': 6.043862342834473, 'avg_acc': 50.112038324833875, 'loss': 5.925180912017822}


EP_train:0:  94%|| 6481/6926 [3:04:20<29:23,  3.96s/it]

{'epoch': 0, 'iter': 6480, 'avg_loss': 6.043752327283174, 'avg_acc': 50.118615954328035, 'loss': 5.984925746917725}


EP_train:0:  94%|| 6491/6926 [3:04:59<28:01,  3.87s/it]

{'epoch': 0, 'iter': 6490, 'avg_loss': 6.043211131374305, 'avg_acc': 50.11939608688954, 'loss': 5.732913970947266}


EP_train:0:  94%|| 6501/6926 [3:05:37<27:25,  3.87s/it]

{'epoch': 0, 'iter': 6500, 'avg_loss': 6.042654294561302, 'avg_acc': 50.1163282571912, 'loss': 5.57657527923584}


EP_train:0:  94%|| 6511/6926 [3:06:16<26:57,  3.90s/it]

{'epoch': 0, 'iter': 6510, 'avg_loss': 6.042256203541428, 'avg_acc': 50.120949162955, 'loss': 5.5736846923828125}


EP_train:0:  94%|| 6521/6926 [3:06:55<26:12,  3.88s/it]

{'epoch': 0, 'iter': 6520, 'avg_loss': 6.041792079543541, 'avg_acc': 50.12076368655114, 'loss': 5.49852991104126}


EP_train:0:  94%|| 6531/6926 [3:07:33<25:19,  3.85s/it]

{'epoch': 0, 'iter': 6530, 'avg_loss': 6.041706460775488, 'avg_acc': 50.12010029092022, 'loss': 5.91827917098999}


EP_train:0:  94%|| 6541/6926 [3:08:13<25:27,  3.97s/it]

{'epoch': 0, 'iter': 6540, 'avg_loss': 6.041488742886689, 'avg_acc': 50.12087219079652, 'loss': 6.135743618011475}


EP_train:0:  95%|| 6551/6926 [3:08:51<24:19,  3.89s/it]

{'epoch': 0, 'iter': 6550, 'avg_loss': 6.040917135744381, 'avg_acc': 50.121641734086396, 'loss': 6.034829616546631}


EP_train:0:  95%|| 6561/6926 [3:09:30<23:32,  3.87s/it]

{'epoch': 0, 'iter': 6560, 'avg_loss': 6.040432966622142, 'avg_acc': 50.12098003353147, 'loss': 5.7155961990356445}


EP_train:0:  95%|| 6571/6926 [3:10:10<23:24,  3.96s/it]

{'epoch': 0, 'iter': 6570, 'avg_loss': 6.039972665519987, 'avg_acc': 50.12222264495511, 'loss': 5.658991813659668}


EP_train:0:  95%|| 6581/6926 [3:10:48<22:15,  3.87s/it]

{'epoch': 0, 'iter': 6580, 'avg_loss': 6.0397726236333815, 'avg_acc': 50.114439294939984, 'loss': 6.215570449829102}


EP_train:0:  95%|| 6591/6926 [3:11:27<21:37,  3.87s/it]

{'epoch': 0, 'iter': 6590, 'avg_loss': 6.039470501941536, 'avg_acc': 50.117584585040206, 'loss': 6.36869478225708}


EP_train:0:  95%|| 6601/6926 [3:12:07<21:32,  3.98s/it]

{'epoch': 0, 'iter': 6600, 'avg_loss': 6.039122320561495, 'avg_acc': 50.1145659748523, 'loss': 5.966281890869141}


EP_train:0:  95%|| 6611/6926 [3:12:46<20:39,  3.94s/it]

{'epoch': 0, 'iter': 6610, 'avg_loss': 6.038619209500741, 'avg_acc': 50.111556496747845, 'loss': 5.106238842010498}


EP_train:0:  96%|| 6621/6926 [3:13:25<19:50,  3.90s/it]

{'epoch': 0, 'iter': 6620, 'avg_loss': 6.038258826223101, 'avg_acc': 50.10383627850777, 'loss': 6.014981746673584}


EP_train:0:  96%|| 6631/6926 [3:14:05<19:28,  3.96s/it]

{'epoch': 0, 'iter': 6630, 'avg_loss': 6.037868136105503, 'avg_acc': 50.09661061679987, 'loss': 6.192661762237549}


EP_train:0:  96%|| 6641/6926 [3:14:44<18:38,  3.92s/it]

{'epoch': 0, 'iter': 6640, 'avg_loss': 6.037368362485923, 'avg_acc': 50.09975907242885, 'loss': 5.359433650970459}


EP_train:0:  96%|| 6651/6926 [3:15:23<17:51,  3.90s/it]

{'epoch': 0, 'iter': 6650, 'avg_loss': 6.036951922667759, 'avg_acc': 50.10054878965568, 'loss': 5.295896053314209}


EP_train:0:  96%|| 6661/6926 [3:16:03<17:38,  3.99s/it]

{'epoch': 0, 'iter': 6660, 'avg_loss': 6.036572803267665, 'avg_acc': 50.099459540609516, 'loss': 5.440064907073975}


EP_train:0:  96%|| 6671/6926 [3:16:42<16:42,  3.93s/it]

{'epoch': 0, 'iter': 6670, 'avg_loss': 6.036108963279741, 'avg_acc': 50.104463348823266, 'loss': 5.104126453399658}


EP_train:0:  96%|| 6681/6926 [3:17:22<16:02,  3.93s/it]

{'epoch': 0, 'iter': 6680, 'avg_loss': 6.035591810161064, 'avg_acc': 50.10477473432121, 'loss': 5.98341703414917}


EP_train:0:  97%|| 6691/6926 [3:18:02<15:42,  4.01s/it]

{'epoch': 0, 'iter': 6690, 'avg_loss': 6.035092998131778, 'avg_acc': 50.10088178149753, 'loss': 5.970633029937744}


EP_train:0:  97%|| 6701/6926 [3:18:42<14:47,  3.94s/it]

{'epoch': 0, 'iter': 6700, 'avg_loss': 6.034642670407542, 'avg_acc': 50.09839949261304, 'loss': 5.689349174499512}


EP_train:0:  97%|| 6711/6926 [3:19:21<14:05,  3.93s/it]

{'epoch': 0, 'iter': 6710, 'avg_loss': 6.034354222547349, 'avg_acc': 50.09825286842498, 'loss': 5.624302864074707}


EP_train:0:  97%|| 6721/6926 [3:20:01<13:35,  3.98s/it]

{'epoch': 0, 'iter': 6720, 'avg_loss': 6.033980883652389, 'avg_acc': 50.09345707484005, 'loss': 5.338839054107666}


EP_train:0:  97%|| 6731/6926 [3:20:40<12:52,  3.96s/it]

{'epoch': 0, 'iter': 6730, 'avg_loss': 6.0336809969465515, 'avg_acc': 50.08913980092111, 'loss': 5.35446310043335}


EP_train:0:  97%|| 6741/6926 [3:21:21<12:29,  4.05s/it]

{'epoch': 0, 'iter': 6740, 'avg_loss': 6.03339308324332, 'avg_acc': 50.09039830885625, 'loss': 5.865133285522461}


EP_train:0:  97%|| 6751/6926 [3:22:01<11:41,  4.01s/it]

{'epoch': 0, 'iter': 6750, 'avg_loss': 6.032867554540441, 'avg_acc': 50.08980151088728, 'loss': 6.179904937744141}


EP_train:0:  98%|| 6761/6926 [3:22:41<10:56,  3.98s/it]

{'epoch': 0, 'iter': 6760, 'avg_loss': 6.03239508963005, 'avg_acc': 50.091517526993044, 'loss': 5.45190954208374}


EP_train:0:  98%|| 6771/6926 [3:23:22<10:37,  4.11s/it]

{'epoch': 0, 'iter': 6770, 'avg_loss': 6.031916047635555, 'avg_acc': 50.104766651897805, 'loss': 6.100796222686768}


EP_train:0:  98%|| 6781/6926 [3:24:02<09:46,  4.04s/it]

{'epoch': 0, 'iter': 6780, 'avg_loss': 6.031588107149714, 'avg_acc': 50.11106400235953, 'loss': 6.323483467102051}


EP_train:0:  98%|| 6791/6926 [3:24:43<09:03,  4.03s/it]

{'epoch': 0, 'iter': 6790, 'avg_loss': 6.030992637974685, 'avg_acc': 50.114121631571194, 'loss': 5.8622941970825195}


EP_train:0:  98%|| 6801/6926 [3:25:23<08:19,  4.00s/it]

{'epoch': 0, 'iter': 6800, 'avg_loss': 6.030564342591889, 'avg_acc': 50.105223496544625, 'loss': 5.893245697021484}


EP_train:0:  98%|| 6811/6926 [3:26:04<07:41,  4.01s/it]

{'epoch': 0, 'iter': 6810, 'avg_loss': 6.030200982685394, 'avg_acc': 50.105986639260024, 'loss': 5.678061008453369}


EP_train:0:  98%|| 6821/6926 [3:26:43<06:59,  4.00s/it]

{'epoch': 0, 'iter': 6820, 'avg_loss': 6.029832274136769, 'avg_acc': 50.10628940038118, 'loss': 5.836287498474121}


EP_train:0:  99%|| 6831/6926 [3:27:24<06:26,  4.07s/it]

{'epoch': 0, 'iter': 6830, 'avg_loss': 6.02968299896716, 'avg_acc': 50.11025106133802, 'loss': 5.803526878356934}


EP_train:0:  99%|| 6841/6926 [3:28:04<05:43,  4.04s/it]

{'epoch': 0, 'iter': 6840, 'avg_loss': 6.029165567805972, 'avg_acc': 50.10826268089461, 'loss': 5.774418830871582}


EP_train:0:  99%|| 6851/6926 [3:28:44<04:59,  3.99s/it]

{'epoch': 0, 'iter': 6850, 'avg_loss': 6.028763460845012, 'avg_acc': 50.10719238067436, 'loss': 5.938295364379883}


EP_train:0:  99%|| 6861/6926 [3:29:24<04:20,  4.00s/it]

{'epoch': 0, 'iter': 6860, 'avg_loss': 6.02853285948366, 'avg_acc': 50.110679930039346, 'loss': 5.666587829589844}


EP_train:0:  99%|| 6871/6926 [3:30:05<03:39,  4.00s/it]

{'epoch': 0, 'iter': 6870, 'avg_loss': 6.028282226875626, 'avg_acc': 50.11051884732935, 'loss': 5.80293083190918}


EP_train:0:  99%|| 6881/6926 [3:30:45<03:05,  4.12s/it]

{'epoch': 0, 'iter': 6880, 'avg_loss': 6.027943664227022, 'avg_acc': 50.10763333817759, 'loss': 5.579654216766357}


EP_train:0:  99%|| 6891/6926 [3:31:26<02:22,  4.08s/it]

{'epoch': 0, 'iter': 6890, 'avg_loss': 6.027491288578851, 'avg_acc': 50.109744594398485, 'loss': 5.64144229888916}


EP_train:0: 100%|| 6901/6926 [3:32:06<01:40,  4.03s/it]

{'epoch': 0, 'iter': 6900, 'avg_loss': 6.027088387429481, 'avg_acc': 50.10822706854079, 'loss': 6.090066432952881}


EP_train:0: 100%|| 6911/6926 [3:32:47<01:01,  4.09s/it]

{'epoch': 0, 'iter': 6910, 'avg_loss': 6.026868039215203, 'avg_acc': 50.109427000434096, 'loss': 5.715153694152832}


EP_train:0: 100%|| 6921/6926 [3:33:28<00:20,  4.09s/it]

{'epoch': 0, 'iter': 6920, 'avg_loss': 6.026447625884327, 'avg_acc': 50.107462794393875, 'loss': 6.010447978973389}


EP_train:0: 100%|| 6926/6926 [3:33:46<00:00,  1.85s/it]


EP0, train:             avg_loss=6.026261819775587,             total_acc=50.11325897047145


EP_train:1:   0%|| 1/6926 [00:04<7:54:08,  4.11s/it]

{'epoch': 1, 'iter': 0, 'avg_loss': 5.983631134033203, 'avg_acc': 68.75, 'loss': 5.983631134033203}


EP_train:1:   0%|| 5/6926 [00:24<9:20:20,  4.86s/it]


KeyboardInterrupt: 