# CSGY 6923 Machine Learning - Final Project

# Building a BERT Model from Scratch

This notebook implements a BERT (Bidirectional Encoder Representations from Transformers) model from scratch using PyTorch. We'll train it on the WikiText dataset for two tasks:
1. Masked Language Modeling (MLM)
2. Next Sentence Prediction (NSP)


Installing huggingface and pytorch specific dependencies

In [None]:
!pip install datasets transformers==4.18.0 sentencepiece
!pip install torchtext==0.18.0

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting transformers==4.18.0
  Downloading transformers-4.18.0-py3-none-any.whl.metadata (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.3/70.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting sacremoses (from transformers==4.18.0)
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1 (from transformers==4.18.0)
  Downloading tokenizers-0.12.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (6.5 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.

## Setting Up the Environment

Import all necessary modules and mount Google Drive to store our vocabulary and model weights.

In [None]:
from datasets import *
import argparse
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader
import torch
import math
import numpy as np
import tqdm
import random
import pickle
from collections import Counter
from typing import Dict, List, Tuple, Optional
import re
import nltk
from tqdm.notebook import tqdm as notebook_tqdm

# Connect with Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

nltk.download('punkt_tab')

Mounted at /content/drive


In [None]:
cd /content/drive/My\ Drive/Colab\ Notebooks/

/content/drive/My Drive/Colab Notebooks


In [None]:
pwd

'/content/drive/My Drive/Colab Notebooks'

In [None]:
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


## Building the BERT Architecture

### Attention Mechanism
The core of BERT is the transformer architecture, which relies heavily on attention mechanisms:
1. Basic Attention computation
2. Multi-Head Attention
3. Layer Normalization

The following code implements the scaled dot-product attention and multi-head attention mechanisms.

In [None]:
class Attention(nn.Module):

    def forward(self, query, key, value, mask=None, dropout=None):
        scores = torch.matmul(query, key.transpose(-2, -1)) \
                 / math.sqrt(query.size(-1))

        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)

        p_attn = F.softmax(scores, dim=-1)

        if dropout is not None:
            p_attn = dropout(p_attn)

        return torch.matmul(p_attn, value), p_attn

In [None]:
class MultiHeadedAttention(nn.Module):

    def __init__(self, h, d_model, dropout=0.1):
        super().__init__()
        assert d_model % h == 0

        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h

        self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
        self.output_linear = nn.Linear(d_model, d_model)
        self.attention = Attention()

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, query, key, value, mask=None):
        batch_size = query.size(0)

        # 1) Do all the linear projections in batch from d_model => h x d_k
        query, key, value = [l(x).view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
                             for l, x in zip(self.linear_layers, (query, key, value))]

        # 2) Apply attention on all the projected vectors in batch.
        x, attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)

        # 3) "Concat" using a view and apply a final linear.
        x = x.transpose(1, 2).contiguous().view(batch_size, -1, self.h * self.d_k)

        return self.output_linear(x)

### Layer Normalization and Feed Forward Components

Implemenitng the
1. Layer normalization
2. Sublayer connection
3. Position-wise feed-forward networks with GELU activation

In [None]:
class LayerNorm(nn.Module):

    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2

In [None]:
class SublayerConnection(nn.Module):

    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))

In [None]:
class GELU(nn.Module):

    def forward(self, x):
        return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))

In [None]:
class PositionwiseFeedForward(nn.Module):

    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)
        self.activation = GELU()

    def forward(self, x):
        return self.w_2(self.dropout(self.activation(self.w_1(x))))

### BERT's Core Components

Now we'll implement the main BERT components:
1. Transformer blocks
2. Embedding layers (Token, Position, and Segment embeddings)
3. The main BERT model
4. Task-specific heads (MLM and NSP)

In [None]:
class TransformerBlock(nn.Module):

    def __init__(self, hidden, attn_heads, feed_forward_hidden, dropout):

        super().__init__()
        self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden)
        self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout)
        self.input_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.output_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, mask):
        x = self.input_sublayer(x, lambda _x: self.attention.forward(_x, _x, _x, mask=mask))
        x = self.output_sublayer(x, self.feed_forward)
        return self.dropout(x)

In [None]:
class TokenEmbedding(nn.Embedding):
    def __init__(self, vocab_size, embed_size=512):
        super().__init__(vocab_size, embed_size, padding_idx=0)

In [None]:
class PositionalEmbedding(nn.Module):

    def __init__(self, d_model, max_len=512):
        super().__init__()

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]

In [None]:
class SegmentEmbedding(nn.Embedding):
    def __init__(self, embed_size=512):
        super().__init__(3, embed_size, padding_idx=0)

In [None]:
class BERTEmbedding(nn.Module):
    def __init__(self, vocab_size, embed_size, dropout=0.1):
        super().__init__()
        self.token = TokenEmbedding(vocab_size=vocab_size, embed_size=embed_size)
        self.position = PositionalEmbedding(d_model=self.token.embedding_dim)
        self.segment = SegmentEmbedding(embed_size=self.token.embedding_dim)
        self.dropout = nn.Dropout(p=dropout)
        self.embed_size = embed_size

    def forward(self, sequence, segment_label):
        x = self.token(sequence) + self.position(sequence) + self.segment(segment_label)
        return self.dropout(x)

### The Complete BERT Model

Combining all parts of the model for MLM and NSP (Masked LM and Next Sentence Prediction)

In [None]:
class BERT(nn.Module):

    def __init__(self, vocab_size, hidden=768, n_layers=12, attn_heads=12, dropout=0.1):

        super().__init__()
        self.hidden = hidden
        self.n_layers = n_layers
        self.attn_heads = attn_heads

        # paper noted they used 4*hidden_size for ff_network_hidden_size
        self.feed_forward_hidden = hidden * 4

        # embedding for BERT, sum of positional, segment, token embeddings
        self.embedding = BERTEmbedding(vocab_size=vocab_size, embed_size=hidden)

        # multi-layers transformer blocks, deep network
        self.transformer_blocks = nn.ModuleList(
            [TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)])

    def forward(self, x, segment_info):
        # attention masking for padded token
        # torch.ByteTensor([batch_size, 1, seq_len, seq_len)
        mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)

        # embedding the indexed sequence to sequence of vectors
        x = self.embedding(x, segment_info)

        # running over multiple transformer blocks
        for transformer in self.transformer_blocks:
            x = transformer.forward(x, mask)

        return x

In [None]:
class BERTLM(nn.Module):
    """
    BERT Language Model
    Next Sentence Prediction Model + Masked Language Model
    """

    def __init__(self, bert: BERT, vocab_size):
        """
        :param bert: BERT model which should be trained
        :param vocab_size: total vocab size for masked_lm
        """

        super().__init__()
        self.bert = bert
        self.next_sentence = NextSentencePrediction(self.bert.hidden)
        self.mask_lm = MaskedLanguageModel(self.bert.hidden, vocab_size)

    def forward(self, x, segment_label):
        x = self.bert(x, segment_label)
        return self.next_sentence(x), self.mask_lm(x)


class NextSentencePrediction(nn.Module):
    """
    2-class classification model : is_next, is_not_next
    """

    def __init__(self, hidden):
        """
        :param hidden: BERT model output size
        """
        super().__init__()
        self.linear = nn.Linear(hidden, 2)
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, x):
        return self.softmax(self.linear(x[:, 0]))


class MaskedLanguageModel(nn.Module):
    """
    predicting origin token from masked input sequence
    n-class classification problem, n-class = vocab_size
    """

    def __init__(self, hidden, vocab_size):
        """
        :param hidden: output size of BERT model
        :param vocab_size: total vocab size
        """
        super().__init__()
        self.linear = nn.Linear(hidden, vocab_size)
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, x):
        return self.softmax(self.linear(x))

## Training the model

### Optimizer and Learning Rate Scheduling

We implement a custom learning rate scheduler that follows the original BERT paper's approach with warmup steps and decay.

In [None]:
class ScheduledOptim:

    def __init__(self, optimizer, d_model, n_warmup_steps):
        self._optimizer = optimizer
        self.n_warmup_steps = n_warmup_steps
        self.n_current_steps = 0
        self.init_lr = np.power(d_model, -0.5)

    def step_and_update_lr(self):
        "Step with the inner optimizer"
        self._update_learning_rate()
        self._optimizer.step()

    def zero_grad(self):
        "Zero out the gradients by the inner optimizer"
        self._optimizer.zero_grad()

    def _get_lr_scale(self):
        return np.min([
            np.power(self.n_current_steps, -0.5),
            np.power(self.n_warmup_steps, -1.5) * self.n_current_steps])

    def _update_learning_rate(self):
        ''' Learning rate scheduling per step '''

        self.n_current_steps += 1
        lr = self.init_lr * self._get_lr_scale()

        for param_group in self._optimizer.param_groups:
            param_group['lr'] = lr

### Training and Evaluation

The following classes handle the training loop and data management:
1. `BERTTrainer`: Manages the training process
2. `BERTDataset`: Handles data preprocessing and loading

In [None]:
class BERTTrainer:

    def __init__(self, bert: BERT, vocab_size: int,
                 train_dataloader: DataLoader, test_dataloader: DataLoader = None,
                 lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
                 with_cuda: bool = True, cuda_devices=None, log_freq: int = 10):

        # Setup cuda device for BERT training, argument -c, --cuda should be true
        cuda_condition = torch.cuda.is_available() and with_cuda
        self.device = torch.device("cuda:0" if cuda_condition else "cpu")

        # This BERT model will be saved every epoch
        self.bert = bert
        # Initialize the BERT Language Model, with BERT model
        self.model = BERTLM(bert, vocab_size).to(self.device)

        # Distributed GPU training if CUDA can detect more than 1 GPU
        if with_cuda and torch.cuda.device_count() > 1:
            print("Using %d GPUS for BERT" % torch.cuda.device_count())
            self.model = nn.DataParallel(self.model, device_ids=cuda_devices)

        # Setting the train and test data loader
        self.train_data = train_dataloader
        self.test_data = test_dataloader

        # Setting the Adam optimizer with hyper-param
        self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
        self.optim_schedule = ScheduledOptim(self.optim, self.bert.hidden, n_warmup_steps=warmup_steps)

        # Using Negative Log Likelihood Loss function for predicting the masked_token
        self.criterion = nn.NLLLoss(ignore_index=0)

        self.log_freq = log_freq

        print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))

    def train(self, epoch):
        self.iteration(epoch, self.train_data)

    def test(self, epoch):
        self.iteration(epoch, self.test_data, train=False)

    def iteration(self, epoch, data_loader, train=True):
        """
        loop over the data_loader for training or testing
        if on train status, backward operation is activated
        and also auto save the model every peoch
        :param epoch: current epoch index
        :param data_loader: torch.utils.data.DataLoader for iteration
        :param train: boolean value of is train or test
        :return: None
        """
        str_code = "train" if train else "test"

        # Setting the tqdm progress bar
        data_iter = tqdm.tqdm(enumerate(data_loader),
                              desc="EP_%s:%d" % (str_code, epoch),
                              total=len(data_loader),
                              bar_format="{l_bar}{r_bar}")

        avg_loss = 0.0
        total_correct = 0
        total_element = 0

        for i, data in data_iter:
            # 0. batch_data will be sent into the device(GPU or cpu)
            data = {key: value.to(self.device) for key, value in data.items()}

            # 1. forward the next_sentence_prediction and masked_lm model
            next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"])

            # 2-1. NLL(negative log likelihood) loss of is_next classification result
            next_loss = self.criterion(next_sent_output, data["is_next"])

            # 2-2. NLLLoss of predicting masked token word
            mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"])

            # 2-3. Adding next_loss and mask_loss : 3.4 Pre-training Procedure
            loss = next_loss + mask_loss

            # 3. backward and optimization only in train
            if train:
                self.optim_schedule.zero_grad()
                loss.backward()
                self.optim_schedule.step_and_update_lr()

            # next sentence prediction accuracy
            correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item()
            avg_loss += loss.item()
            total_correct += correct
            total_element += data["is_next"].nelement()

            post_fix = {
                "epoch": epoch,
                "iter": i,
                "avg_loss": avg_loss / (i + 1),
                "avg_acc": total_correct / total_element * 100,
                "loss": loss.item()
            }

            if i % self.log_freq == 0:
                data_iter.write(str(post_fix))

        print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_acc=",
              total_correct * 100.0 / total_element)

    def save(self, epoch, file_path="output/bert_trained.model"):
        """
        Saving the current BERT model on file_path
        :param epoch: current epoch number
        :param file_path: model output path which gonna be file_path+"ep%d" % epoch
        :return: final_output_path
        """
        output_path = file_path + ".ep%d" % epoch
        torch.save(self.bert.cpu(), output_path)
        self.bert.to(self.device)
        print("EP:%d Model Saved on:" % epoch, output_path)
        return output_path

In [None]:
class BERTWikiTextTrainer:
    """
    Trainer class specifically designed for BERT pretraining on WikiText data.
    Handles the training loop, optimization, and logging.
    """
    def __init__(self,
                 bert_model: nn.Module,
                 vocab_size: int,
                 train_dataloader: DataLoader,
                 valid_dataloader: Optional[DataLoader] = None,
                 lr: float = 1e-4,
                 betas: tuple = (0.9, 0.999),
                 weight_decay: float = 0.01,
                 warmup_steps: int = 10000,
                 device: Optional[str] = None,
                 log_freq: int = 10):
        """
        Initialize the trainer with model and training parameters.

        Args:
            bert_model: The BERT model to train
            vocab_size: Size of the vocabulary
            train_dataloader: DataLoader for training data
            valid_dataloader: Optional DataLoader for validation data
            lr: Learning rate
            betas: Adam optimizer betas
            weight_decay: Weight decay for optimization
            warmup_steps: Number of warmup steps for learning rate scheduling
            device: Device to train on ('cuda' or 'cpu')
            log_freq: How often to log training metrics
        """
        # Setup device
        self.device = device if device else ('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")

        # Initialize model
        self.bert = bert_model
        self.model = BERTLM(bert_model, vocab_size).to(self.device)

        # Setup multi-GPU if available
        if self.device == 'cuda' and torch.cuda.device_count() > 1:
            print(f"Using {torch.cuda.device_count()} GPUs for training")
            self.model = nn.DataParallel(self.model)

        # Store data loaders
        self.train_dataloader = train_dataloader
        self.valid_dataloader = valid_dataloader

        # Setup optimizer with learning rate scheduling
        self.optimizer = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
        self.optim_schedule = ScheduledOptim(self.optimizer, self.bert.hidden, warmup_steps)

        # Loss function for both MLM and NSP tasks
        self.criterion = nn.NLLLoss(ignore_index=0)  # ignore padding index

        self.log_freq = log_freq
        print(f"Total Parameters: {sum([p.nelement() for p in self.model.parameters()])}")

    def train(self, epochs: int, save_path: str):
        """
        Train the model for specified number of epochs.

        Args:
            epochs: Number of epochs to train
            save_path: Where to save model checkpoints
        """
        best_loss = float('inf')

        for epoch in range(epochs):
            # Training phase
            train_stats = self.run_epoch(epoch, is_training=True)
            print(f"\nEpoch {epoch} Training Stats:")
            self._print_stats(train_stats)

            # Validation phase
            if self.valid_dataloader:
                with torch.no_grad():
                    valid_stats = self.run_epoch(epoch, is_training=False)
                print(f"\nEpoch {epoch} Validation Stats:")
                self._print_stats(valid_stats)

                # Save best model
                if valid_stats['avg_loss'] < best_loss:
                    best_loss = valid_stats['avg_loss']
                    self.save(save_path, epoch)
                    print(f"New best model saved with validation loss: {best_loss:.4f}")
            else:
                # If no validation set, save based on training loss
                self.save(save_path, epoch)

    def run_epoch(self, epoch: int, is_training: bool) -> Dict[str, float]:
        """
        Run one epoch of training or validation.

        Args:
            epoch: Current epoch number
            is_training: Whether this is a training or validation pass

        Returns:
            Dictionary containing epoch statistics
        """
        mode = "train" if is_training else "valid"
        data_loader = self.train_dataloader if is_training else self.valid_dataloader

        # Set model mode
        self.model.train() if is_training else self.model.eval()

        # Initialize statistics
        stats = {
            'total_loss': 0.0,
            'mlm_loss': 0.0,
            'nsp_loss': 0.0,
            'correct_nsp': 0,
            'total_nsp': 0
        }

        # Progress bar
        data_iter = tqdm.tqdm(
            enumerate(data_loader),
            desc=f"EP_{mode}:{epoch}",
            total=len(data_loader),
            bar_format="{l_bar}{r_bar}"
        )

        for i, data in data_iter:
            # Move data to device
            data = {key: value.to(self.device) for key, value in data.items()}

            # Forward pass
            next_sent_output, mask_lm_output = self.model.forward(
                data["bert_input"],
                data["segment_label"]
            )

            # Calculate losses
            next_loss = self.criterion(next_sent_output, data["is_next"])
            mask_loss = self.criterion(
                mask_lm_output.transpose(1, 2),
                data["bert_label"]
            )
            loss = next_loss + mask_loss

            if is_training:
                # Backward pass and optimization
                self.optim_schedule.zero_grad()
                loss.backward()
                self.optim_schedule.step_and_update_lr()

            # Update statistics
            stats['total_loss'] += loss.item()
            stats['mlm_loss'] += mask_loss.item()
            stats['nsp_loss'] += next_loss.item()
            stats['correct_nsp'] += next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item()
            stats['total_nsp'] += data["is_next"].nelement()

            # Log progress
            if i % self.log_freq == 0:
                current_stats = {
                    'avg_loss': stats['total_loss'] / (i + 1),
                    'nsp_acc': stats['correct_nsp'] / stats['total_nsp'] * 100,
                    'lr': self.optim_schedule._optimizer.param_groups[0]['lr']
                }
                data_iter.write(
                    f"Step: {i}, Loss: {current_stats['avg_loss']:.4f}, "
                    f"NSP Acc: {current_stats['nsp_acc']:.2f}%, "
                    f"LR: {current_stats['lr']:.6f}"
                )

        # Calculate final statistics
        num_steps = len(data_loader)
        return {
            'avg_loss': stats['total_loss'] / num_steps,
            'avg_mlm_loss': stats['mlm_loss'] / num_steps,
            'avg_nsp_loss': stats['nsp_loss'] / num_steps,
            'nsp_accuracy': stats['correct_nsp'] / stats['total_nsp'] * 100
        }

    def _print_stats(self, stats: Dict[str, float]):
        """Print training/validation statistics in a formatted way."""
        print(f"  Average Loss: {stats['avg_loss']:.4f}")
        print(f"  MLM Loss: {stats['avg_mlm_loss']:.4f}")
        print(f"  NSP Loss: {stats['avg_nsp_loss']:.4f}")
        print(f"  NSP Accuracy: {stats['nsp_accuracy']:.2f}%")

    def save(self, path: str, epoch: int):
        """Save model checkpoint."""
        torch.save({
            'epoch': epoch,
            'model_state_dict': self.bert.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'scheduler_state_dict': self.optim_schedule.state_dict()
        }, f"{path}_epoch_{epoch}.pt")


In [None]:
class BERTDataset(Dataset):
    def __init__(self, corpus_path, vocab, seq_len, encoding="utf-8", corpus_lines=None, on_memory=True):
        self.vocab = vocab
        self.seq_len = seq_len

        self.on_memory = on_memory
        self.corpus_lines = corpus_lines
        self.corpus_path = corpus_path
        self.encoding = encoding

        with open(corpus_path, "r", encoding=encoding) as f:
            if self.corpus_lines is None and not on_memory:
                for _ in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines):
                    self.corpus_lines += 1

            if on_memory:
                self.lines = [line[:-1].split("\t")
                              for line in tqdm.tqdm(f, desc="Loading Dataset", total=corpus_lines)]
                self.corpus_lines = len(self.lines)

        if not on_memory:
            self.file = open(corpus_path, "r", encoding=encoding)
            self.random_file = open(corpus_path, "r", encoding=encoding)

            for _ in range(random.randint(self.corpus_lines if self.corpus_lines < 1000 else 1000)):
                self.random_file.__next__()

    def __len__(self):
        return self.corpus_lines

    def __getitem__(self, item):
        t1, t2, is_next_label = self.random_sent(item)
        t1_random, t1_label = self.random_word(t1)
        t2_random, t2_label = self.random_word(t2)

        # [CLS] tag = SOS tag, [SEP] tag = EOS tag
        t1 = [self.vocab.sos_index] + t1_random + [self.vocab.eos_index]
        t2 = t2_random + [self.vocab.eos_index]

        t1_label = [self.vocab.pad_index] + t1_label + [self.vocab.pad_index]
        t2_label = t2_label + [self.vocab.pad_index]

        segment_label = ([1 for _ in range(len(t1))] + [2 for _ in range(len(t2))])[:self.seq_len]
        bert_input = (t1 + t2)[:self.seq_len]
        bert_label = (t1_label + t2_label)[:self.seq_len]

        padding = [self.vocab.pad_index for _ in range(self.seq_len - len(bert_input))]
        bert_input.extend(padding), bert_label.extend(padding), segment_label.extend(padding)

        output = {"bert_input": bert_input,
                  "bert_label": bert_label,
                  "segment_label": segment_label,
                  "is_next": is_next_label}

        return {key: torch.tensor(value) for key, value in output.items()}

    def random_word(self, sentence):
        tokens = sentence.split()
        output_label = []

        for i, token in enumerate(tokens):
            prob = random.random()
            if prob < 0.15:
                prob /= 0.15

                # 80% randomly change token to mask token
                if prob < 0.8:
                    tokens[i] = self.vocab.mask_index

                # 10% randomly change token to random token
                elif prob < 0.9:
                    tokens[i] = random.randrange(len(self.vocab))

                # 10% randomly change token to current token
                else:
                    tokens[i] = self.vocab.stoi.get(token, self.vocab.unk_index)

                output_label.append(self.vocab.stoi.get(token, self.vocab.unk_index))

            else:
                tokens[i] = self.vocab.stoi.get(token, self.vocab.unk_index)
                output_label.append(0)

        return tokens, output_label

    def random_sent(self, index):
        t1, t2 = self.get_corpus_line(index)

        # output_text, label(isNotNext:0, isNext:1)
        if random.random() > 0.5:
            return t1, t2, 1
        else:
            return t1, self.get_random_line(), 0

    def get_corpus_line(self, item):
        if self.on_memory:
            return self.lines[item][0], self.lines[item][1]
        else:
            line = self.file.__next__()
            if line is None:
                self.file.close()
                self.file = open(self.corpus_path, "r", encoding=self.encoding)
                line = self.file.__next__()

            t1, t2 = line[:-1].split("\t")
            return t1, t2

    def get_random_line(self):
        if self.on_memory:
            return self.lines[random.randrange(len(self.lines))][1]

        line = self.file.__next__()
        if line is None:
            self.file.close()
            self.file = open(self.corpus_path, "r", encoding=self.encoding)
            for _ in range(random.randint(self.corpus_lines if self.corpus_lines < 1000 else 1000)):
                self.random_file.__next__()
            line = self.random_file.__next__()
        return line[:-1].split("\t")[1]

## Vocabulary Management

We implement vocabulary handling with special token support and methods for converting between tokens and indices.

In [None]:
class TorchVocab(object):

    def __init__(self, counter, max_size=None, min_freq=1, specials=['<pad>', '<oov>'],
                 vectors=None, unk_init=None, vectors_cache=None):

        self.freqs = counter
        counter = counter.copy()
        min_freq = max(min_freq, 1)

        self.itos = list(specials)
        # frequencies of special tokens are not counted when building vocabulary
        # in frequency order
        for tok in specials:
            del counter[tok]

        max_size = None if max_size is None else max_size + len(self.itos)

        # sort by frequency, then alphabetically
        words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
        words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)

        for word, freq in words_and_frequencies:
            if freq < min_freq or len(self.itos) == max_size:
                break
            self.itos.append(word)

        # stoi is simply a reverse dict for itos
        self.stoi = {tok: i for i, tok in enumerate(self.itos)}

        self.vectors = None
        if vectors is not None:
            self.load_vectors(vectors, unk_init=unk_init, cache=vectors_cache)
        else:
            assert unk_init is None and vectors_cache is None

    def __eq__(self, other):
        if self.freqs != other.freqs:
            return False
        if self.stoi != other.stoi:
            return False
        if self.itos != other.itos:
            return False
        if self.vectors != other.vectors:
            return False
        return True

    def __len__(self):
        return len(self.itos)

    def vocab_rerank(self):
        self.stoi = {word: i for i, word in enumerate(self.itos)}

    def extend(self, v, sort=False):
        words = sorted(v.itos) if sort else v.itos
        for w in words:
            if w not in self.stoi:
                self.itos.append(w)
                self.stoi[w] = len(self.itos) - 1


class Vocab(TorchVocab):
    def __init__(self, counter, max_size=None, min_freq=1):
        self.pad_index = 0
        self.unk_index = 1
        self.eos_index = 2
        self.sos_index = 3
        self.mask_index = 4
        super().__init__(counter, specials=["<pad>", "<unk>", "<eos>", "<sos>", "<mask>"],
                         max_size=max_size, min_freq=min_freq)

    def to_seq(self, sentece, seq_len, with_eos=False, with_sos=False) -> list:
        pass

    def from_seq(self, seq, join=False, with_pad=False):
        pass

    @staticmethod
    def load_vocab(vocab_path: str) -> 'Vocab':
        with open(vocab_path, "rb") as f:
            return pickle.load(f)

    def save_vocab(self, vocab_path):
        with open(vocab_path, "wb") as f:
            pickle.dump(self, f)


# Building Vocab with text files
class WordVocab(Vocab):
    def __init__(self, texts, max_size=None, min_freq=1):
        print("Building Vocab")
        counter = Counter()
        for line in tqdm.tqdm(texts):
            if isinstance(line, list):
                words = line
            else:
                words = line.replace("\n", "").replace("\t", "").split()

            for word in words:
                counter[word] += 1
        super().__init__(counter, max_size=max_size, min_freq=min_freq)

    def to_seq(self, sentence, seq_len=None, with_eos=False, with_sos=False, with_len=False):
        if isinstance(sentence, str):
            sentence = sentence.split()

        seq = [self.stoi.get(word, self.unk_index) for word in sentence]

        if with_eos:
            seq += [self.eos_index]  # this would be index 1
        if with_sos:
            seq = [self.sos_index] + seq

        origin_seq_len = len(seq)

        if seq_len is None:
            pass
        elif len(seq) <= seq_len:
            seq += [self.pad_index for _ in range(seq_len - len(seq))]
        else:
            seq = seq[:seq_len]

        return (seq, origin_seq_len) if with_len else seq

    def from_seq(self, seq, join=False, with_pad=False):
        words = [self.itos[idx]
                 if idx < len(self.itos)
                 else "<%d>" % idx
                 for idx in seq
                 if not with_pad or idx != self.pad_index]

        return " ".join(words) if join else words

    @staticmethod
    def load_vocab(vocab_path: str) -> 'WordVocab':
        with open(vocab_path, "rb") as f:
            return pickle.load(f)


def build(corpus_path: str, output_path: str, vocab_size=None, encoding: str = "utf-8",
          min_freq: int=1):

    with open(corpus_path, "r", encoding=encoding) as f:
        vocab = WordVocab(f, max_size=vocab_size, min_freq=min_freq)

    print("VOCAB SIZE:", len(vocab))
    vocab.save_vocab(output_path)

In [None]:
pwd

'/content/drive/My Drive/Colab Notebooks'

In [None]:
build("corpus.small", "./vocab_train.txt")

Building Vocab


120000it [00:02, 46247.97it/s]


VOCAB SIZE: 244514


## Helper functions for training

1. `train_small_bert`: scaled down to train in a few hours
2. `train_bert_wikitext`: For full-scale training

In [None]:
def train_small_bert(epochs: int=10):
    # Direct configuration setup
    config = {
        # Required parameters
        'train_dataset': '/content/drive/MyDrive/Colab Notebooks/corpus.small',
        'vocab_path': './vocab_train.txt',
        'output_path': '/content/drive/MyDrive/Colab Notebooks/bert.model',
        'test_dataset': None,

        # Model parameters
        'hidden': 256,
        'layers': 8,
        'attn_heads': 8,
        'seq_len': 20,

        # Training parameters
        'batch_size': 64,
        'epochs': epochs,
        'num_workers': 10,

        # Runtime parameters
        'with_cuda': True,
        'log_freq': 10,
        'corpus_lines': None,
        'cuda_devices': None,
        'on_memory': True,

        # Optimizer parameters
        'lr': 1e-3,
        'adam_weight_decay': 0.01,
        'adam_beta1': 0.9,
        'adam_beta2': 0.999
    }

    print("Loading Vocab", config['vocab_path'])
    vocab = WordVocab.load_vocab(config['vocab_path'])
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", config['train_dataset'])
    train_dataset = BERTDataset(config['train_dataset'], vocab, seq_len=config['seq_len'],
                              corpus_lines=config['corpus_lines'], on_memory=config['on_memory'])

    print("Loading Test Dataset", config['test_dataset'])
    test_dataset = BERTDataset(config['test_dataset'], vocab, seq_len=config['seq_len'],
                              on_memory=config['on_memory']) if config['test_dataset'] else None

    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset, batch_size=config['batch_size'],
                                  num_workers=config['num_workers'])
    test_data_loader = DataLoader(test_dataset, batch_size=config['batch_size'],
                                num_workers=config['num_workers']) if test_dataset else None

    print("Building BERT model")
    bert = BERT(len(vocab), hidden=config['hidden'], n_layers=config['layers'],
                attn_heads=config['attn_heads'])

    print("Creating BERT Trainer")
    trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader,
                        test_dataloader=test_data_loader, lr=config['lr'],
                        betas=(config['adam_beta1'], config['adam_beta2']),
                        weight_decay=config['adam_weight_decay'],
                        with_cuda=config['with_cuda'], cuda_devices=config['cuda_devices'],
                        log_freq=config['log_freq'])

    print("Training Start")
    for epoch in range(config['epochs']):
        trainer.train(epoch)
        trainer.save(epoch, config['output_path'])

        if test_data_loader:
            trainer.test(epoch)


In [None]:
train_small_bert()

## Model Inference.

Since the small BERT model has been trained (through the notebook). Let us now perform inference on some sample data to see how the model performs.

In [None]:
def load_vocab(vocab_path):
    with open(vocab_path, "rb") as f:
        vocab = pickle.load(f)
    return vocab


def load_bert_model(model_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
    """
    Load the saved BERT model
    """
    model = torch.load(model_path, map_location=device)
    model.eval()  # Set to evaluation mode
    return model


# Load the vocabulary
vocab = load_vocab("vocab_train.txt")
model = load_bert_model("bert.model.ep9")

# Create a tokenization function
def tokenize_text(text, vocab, seq_len=20):
    """
    Convert input text to tokens using the vocabulary
    Args:
        text (str): Input text
        vocab: Vocabulary object
        seq_len (int): Maximum sequence length
    Returns:
        tuple: (token_ids, segment_ids)
    """
    # Split text into tokens
    tokens = text.split()

    # Convert tokens to ids
    token_ids = [vocab.stoi.get(token, vocab.unk_index) for token in tokens]

    # Add [CLS] at start and [SEP] at end
    token_ids = [vocab.sos_index] + token_ids + [vocab.eos_index]

    # Pad or truncate to seq_len
    if len(token_ids) < seq_len:
        token_ids = token_ids + [vocab.pad_index] * (seq_len - len(token_ids))
    else:
        token_ids = token_ids[:seq_len]

    # Create segment ids (all 1's for single sequence)
    segment_ids = [1] * seq_len

    return torch.tensor([token_ids]), torch.tensor([segment_ids])

# Create inference function
def get_bert_embeddings(model, text, vocab, seq_len=20, device='cuda' if torch.cuda.is_available() else 'cpu'):
    """
    Get BERT embeddings for input text
    Args:
        model: BERT model
        text (str): Input text
        vocab: Vocabulary object
        seq_len (int): Maximum sequence length
        device (str): Device to run inference on
    Returns:
        torch.Tensor: BERT embeddings
    """
    model.eval()
    model = model.to(device)

    # Tokenize input text
    token_ids, segment_ids = tokenize_text(text, vocab, seq_len)

    # Move tensors to device
    token_ids = token_ids.to(device)
    segment_ids = segment_ids.to(device)

    # Get embeddings
    with torch.no_grad():
        embeddings = model(token_ids, segment_ids)

    return embeddings

# Let's test it with a sample sentence
test_text = "this is a test sentence"
embeddings = get_bert_embeddings(model, test_text, vocab)
print(f"Shape of embeddings: {embeddings.shape}")

  model = torch.load(model_path, map_location=device)


Shape of embeddings: torch.Size([1, 20, 256])


In [None]:
class NextSentencePrediction(nn.Module):
    def __init__(self, hidden):
        super().__init__()
        self.linear = nn.Linear(hidden, 2)
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, x):
        return self.softmax(self.linear(x[:, 0]))

def predict_next_sentence(model, sentence1, sentence2, vocab, seq_len=20, device='cuda' if torch.cuda.is_available() else 'cpu'):
    """
    Predict if sentence2 follows sentence1
    """
    # Combine sentences with [SEP] token
    combined_text = sentence1 + " [SEP] " + sentence2

    # Get BERT embeddings
    embeddings = get_bert_embeddings(model, combined_text, vocab, seq_len=seq_len, device=device)

    # Use NSP head to get prediction
    nsp_head = NextSentencePrediction(model.hidden).to(device)
    prediction = nsp_head(embeddings)

    # Get probabilities
    probs = torch.exp(prediction)
    return probs[0][1].item()  # Probability that sentence2 follows sentence1



def test_next_sentence_prediction():
    # Sample sentence pairs
    pairs = [
        ("the cat sat on the mat", "it was taking a nap"),
        ("the cat sat on the mat", "dogs love to play fetch"),
        ("it is raining outside", "I need an umbrella"),
        ("it is raining outside", "the cat likes milk")
    ]

    print("\nNext Sentence Prediction:")
    for sent1, sent2 in pairs:
        prob = predict_next_sentence(model, sent1, sent2, vocab)
        print(f"\nSentence 1: {sent1}")
        print(f"Sentence 2: {sent2}")
        print(f"Probability that sentence 2 follows sentence 1: {prob}")

print("\nTesting Next Sentence Prediction...")
test_next_sentence_prediction()


Testing Next Sentence Prediction...

Next Sentence Prediction:

Sentence 1: the cat sat on the mat
Sentence 2: it was taking a nap
Probability that sentence 2 follows sentence 1: 0.9739370942115784

Sentence 1: the cat sat on the mat
Sentence 2: dogs love to play fetch
Probability that sentence 2 follows sentence 1: 0.051843009889125824

Sentence 1: it is raining outside
Sentence 2: I need an umbrella
Probability that sentence 2 follows sentence 1: 0.9872457385063171

Sentence 1: it is raining outside
Sentence 2: the cat likes milk
Probability that sentence 2 follows sentence 1: 0.09107743948698044


## Training on the WikiText Dataset

We load the Salesforce/WikiText dataset from huggingface, and preprocess it to have tab separators for next sentence predictions.

In [None]:
def clean_wikitext(text):
    """Clean WikiText formatting"""
    # Remove section headers
    if text.startswith('='):
        return ''

    # Remove article titles and other formatting
    text = re.sub(r'@-@', '-', text)  # Fix hyphenation
    text = re.sub(r'\( Japanese : .+?\)', '', text)  # Remove Japanese translations
    text = re.sub(r'\s+', ' ', text)  # Normalize whitespace
    text = re.sub(r' , ', ', ', text)  # Fix spacing around punctuation
    text = re.sub(r' \. ', '. ', text)

    # Remove citations and special characters
    text = re.sub(r'\[\d+\]', '', text)
    text = re.sub(r'[^\w\s.,!?-]', ' ', text)

    return text.strip()

def process_wikitext_to_pairs(output_file, split="train", max_pairs=None):
    dataset = load_dataset("Salesforce/wikitext", "wikitext-103-raw-v1", split=split)
    pairs = []

    for item in dataset:
        text = clean_wikitext(item['text'])
        if not text:
            continue

        sentences = [s.strip() for s in sent_tokenize(text)]
        sentences = [s for s in sentences if len(s) > 20]  # Filter short sentences

        for i in range(len(sentences) - 1):
            # Consecutive pairs (positive examples)
            pairs.append(f"{sentences[i]}\t{sentences[i+1]}\n")

            # Random pairs (negative examples)
            if random.random() < 0.5 and len(sentences) > 2:
                random_idx = random.choice([j for j in range(len(sentences)) if j != i and j != i+1])
                pairs.append(f"{sentences[i]}\t{sentences[random_idx]}\n")

        if max_pairs and len(pairs) >= max_pairs:
            break

    # Write pairs to file
    with open(output_file, 'w', encoding='utf-8') as f:
        f.writelines(pairs[:max_pairs] if max_pairs else pairs)

    return len(pairs)

In [None]:
process_wikitext_to_pairs("wikitext_pairs.small", split="train[:20%]")

884013

## Load the WikiText Vocabulary

Reusing the `WordVocab` class, we have

In [None]:
with open("wikitext_pairs.small", "r", encoding="utf-8") as f:
    vocab = WordVocab(f, max_size=None, min_freq=1)

print("VOCAB SIZE:", len(vocab))
vocab.save_vocab("./wikitext_vocab.txt")

Building Vocab


884013it [00:15, 58606.58it/s]


VOCAB SIZE: 855975


In [None]:
train_dataset = BERTDataset(corpus_path="wikitext_pairs.small",
                           vocab=vocab,
                           seq_len=20,
                           corpus_lines=None,
                           on_memory=True)

train_dataloader = DataLoader(train_dataset, batch_size=32, num_workers=2)


bert = BERT(vocab_size=len(vocab),
           hidden=256,
           n_layers=6,
           attn_heads=8)

trainer = BERTTrainer(bert=bert,
                     vocab_size=len(vocab),
                     train_dataloader=train_dataloader,
                     test_dataloader=None,
                     lr=1e-3,
                     betas=(0.9, 0.999),
                     weight_decay=0.01,
                     with_cuda=True,
                     log_freq=10)

Loading Dataset: 884013it [00:02, 363331.02it/s]


Total Parameters: 443855017


In [None]:
print("Training Start")
for epoch in range(10):  # 10 epochs
    trainer.train(epoch)
    trainer.save(epoch, "./wikitext_trained.model")

Training Start


EP_train:0:   0%|| 2/27626 [00:00<2:29:15,  3.08it/s]

{'epoch': 0, 'iter': 0, 'avg_loss': 15.758881568908691, 'avg_acc': 59.375, 'loss': 15.758881568908691}


EP_train:0:   0%|| 12/27626 [00:02<1:05:52,  6.99it/s]

{'epoch': 0, 'iter': 10, 'avg_loss': 15.898591301657937, 'avg_acc': 50.0, 'loss': 16.01879119873047}


EP_train:0:   0%|| 22/27626 [00:03<1:04:38,  7.12it/s]

{'epoch': 0, 'iter': 20, 'avg_loss': 15.831828843979608, 'avg_acc': 50.44642857142857, 'loss': 15.968213081359863}


EP_train:0:   0%|| 32/27626 [00:04<1:04:55,  7.08it/s]

{'epoch': 0, 'iter': 30, 'avg_loss': 15.742893465103641, 'avg_acc': 50.201612903225815, 'loss': 15.40041732788086}


EP_train:0:   0%|| 42/27626 [00:06<1:05:31,  7.02it/s]

{'epoch': 0, 'iter': 40, 'avg_loss': 15.671437705435403, 'avg_acc': 50.0, 'loss': 15.284172058105469}


EP_train:0:   0%|| 52/27626 [00:07<1:04:24,  7.14it/s]

{'epoch': 0, 'iter': 50, 'avg_loss': 15.560054797752231, 'avg_acc': 50.857843137254896, 'loss': 15.01026725769043}


EP_train:0:   0%|| 62/27626 [00:09<1:04:25,  7.13it/s]

{'epoch': 0, 'iter': 60, 'avg_loss': 15.472316241655193, 'avg_acc': 50.409836065573764, 'loss': 14.83167839050293}


EP_train:0:   0%|| 72/27626 [00:10<1:04:24,  7.13it/s]

{'epoch': 0, 'iter': 70, 'avg_loss': 15.38633938910256, 'avg_acc': 51.012323943661976, 'loss': 14.82754135131836}


EP_train:0:   0%|| 82/27626 [00:11<1:04:23,  7.13it/s]

{'epoch': 0, 'iter': 80, 'avg_loss': 15.310890515645346, 'avg_acc': 51.27314814814815, 'loss': 14.71745777130127}


EP_train:0:   0%|| 92/27626 [00:13<1:04:17,  7.14it/s]

{'epoch': 0, 'iter': 90, 'avg_loss': 15.239903167053894, 'avg_acc': 50.755494505494504, 'loss': 14.480834007263184}


EP_train:0:   0%|| 102/27626 [00:14<1:04:09,  7.15it/s]

{'epoch': 0, 'iter': 100, 'avg_loss': 15.180365496342725, 'avg_acc': 50.58787128712871, 'loss': 14.60338306427002}


EP_train:0:   0%|| 112/27626 [00:16<1:04:18,  7.13it/s]

{'epoch': 0, 'iter': 110, 'avg_loss': 15.128594493006801, 'avg_acc': 50.9009009009009, 'loss': 14.728401184082031}


EP_train:0:   0%|| 122/27626 [00:17<1:05:00,  7.05it/s]

{'epoch': 0, 'iter': 120, 'avg_loss': 15.081092243352213, 'avg_acc': 50.6198347107438, 'loss': 14.582793235778809}


EP_train:0:   0%|| 132/27626 [00:18<1:04:58,  7.05it/s]

{'epoch': 0, 'iter': 130, 'avg_loss': 15.032273219741938, 'avg_acc': 50.286259541984734, 'loss': 14.51500415802002}


EP_train:0:   1%|| 142/27626 [00:20<1:04:39,  7.08it/s]

{'epoch': 0, 'iter': 140, 'avg_loss': 14.98866098146912, 'avg_acc': 50.6427304964539, 'loss': 14.352850914001465}


EP_train:0:   1%|| 152/27626 [00:21<1:05:07,  7.03it/s]

{'epoch': 0, 'iter': 150, 'avg_loss': 14.949133727724188, 'avg_acc': 50.579470198675494, 'loss': 14.302196502685547}


EP_train:0:   1%|| 162/27626 [00:23<1:04:35,  7.09it/s]

{'epoch': 0, 'iter': 160, 'avg_loss': 14.904745676502678, 'avg_acc': 50.621118012422365, 'loss': 14.29261589050293}


EP_train:0:   1%|| 172/27626 [00:24<1:04:30,  7.09it/s]

{'epoch': 0, 'iter': 170, 'avg_loss': 14.859345006663897, 'avg_acc': 50.45687134502924, 'loss': 13.975199699401855}


EP_train:0:   1%|| 182/27626 [00:25<1:04:28,  7.09it/s]

{'epoch': 0, 'iter': 180, 'avg_loss': 14.818429346242663, 'avg_acc': 50.362569060773474, 'loss': 14.200885772705078}


EP_train:0:   1%|| 192/27626 [00:27<1:04:28,  7.09it/s]

{'epoch': 0, 'iter': 190, 'avg_loss': 14.779296255860654, 'avg_acc': 50.44175392670157, 'loss': 14.013199806213379}


EP_train:0:   1%|| 202/27626 [00:28<1:04:53,  7.04it/s]

{'epoch': 0, 'iter': 200, 'avg_loss': 14.74197603576812, 'avg_acc': 50.17101990049751, 'loss': 14.06632137298584}


EP_train:0:   1%|| 212/27626 [00:30<1:04:38,  7.07it/s]

{'epoch': 0, 'iter': 210, 'avg_loss': 14.70194018395591, 'avg_acc': 50.22215639810427, 'loss': 13.531367301940918}


EP_train:0:   1%|| 222/27626 [00:31<1:05:33,  6.97it/s]

{'epoch': 0, 'iter': 220, 'avg_loss': 14.66347871322977, 'avg_acc': 50.282805429864254, 'loss': 13.716456413269043}


EP_train:0:   1%|| 232/27626 [00:33<1:04:23,  7.09it/s]

{'epoch': 0, 'iter': 230, 'avg_loss': 14.624971216375178, 'avg_acc': 50.243506493506494, 'loss': 13.732596397399902}


EP_train:0:   1%|| 242/27626 [00:34<1:04:29,  7.08it/s]

{'epoch': 0, 'iter': 240, 'avg_loss': 14.589185611835655, 'avg_acc': 50.1945020746888, 'loss': 13.77857780456543}


EP_train:0:   1%|| 252/27626 [00:35<1:04:15,  7.10it/s]

{'epoch': 0, 'iter': 250, 'avg_loss': 14.548834143406841, 'avg_acc': 50.13695219123506, 'loss': 13.557279586791992}


EP_train:0:   1%|| 262/27626 [00:37<1:04:15,  7.10it/s]

{'epoch': 0, 'iter': 260, 'avg_loss': 14.514890086148434, 'avg_acc': 50.09578544061303, 'loss': 13.42916202545166}


EP_train:0:   1%|| 272/27626 [00:38<1:04:11,  7.10it/s]

{'epoch': 0, 'iter': 270, 'avg_loss': 14.480213299008753, 'avg_acc': 50.0, 'loss': 13.71973705291748}


EP_train:0:   1%|| 282/27626 [00:40<1:04:18,  7.09it/s]

{'epoch': 0, 'iter': 280, 'avg_loss': 14.438388712465551, 'avg_acc': 50.11120996441281, 'loss': 13.398752212524414}


EP_train:0:   1%|| 292/27626 [00:41<1:04:14,  7.09it/s]

{'epoch': 0, 'iter': 290, 'avg_loss': 14.398256695147643, 'avg_acc': 50.139604810996566, 'loss': 13.449653625488281}


EP_train:0:   1%|| 302/27626 [00:42<1:04:34,  7.05it/s]

{'epoch': 0, 'iter': 300, 'avg_loss': 14.370078397351643, 'avg_acc': 50.1764950166113, 'loss': 12.711112976074219}


EP_train:0:   1%|| 312/27626 [00:44<1:04:24,  7.07it/s]

{'epoch': 0, 'iter': 310, 'avg_loss': 14.337968660704192, 'avg_acc': 50.23110932475884, 'loss': 13.480631828308105}


EP_train:0:   1%|| 322/27626 [00:45<1:04:12,  7.09it/s]

{'epoch': 0, 'iter': 320, 'avg_loss': 14.302806179842845, 'avg_acc': 50.2433800623053, 'loss': 12.771651268005371}


EP_train:0:   1%|| 332/27626 [00:47<1:04:01,  7.11it/s]

{'epoch': 0, 'iter': 330, 'avg_loss': 14.267085715002883, 'avg_acc': 50.188821752265866, 'loss': 12.583415031433105}


EP_train:0:   1%|| 342/27626 [00:48<1:04:03,  7.10it/s]

{'epoch': 0, 'iter': 340, 'avg_loss': 14.235116032910836, 'avg_acc': 50.26576246334311, 'loss': 13.719797134399414}


EP_train:0:   1%|| 352/27626 [00:49<1:04:03,  7.10it/s]

{'epoch': 0, 'iter': 350, 'avg_loss': 14.204848990481125, 'avg_acc': 50.20477207977208, 'loss': 13.183366775512695}


EP_train:0:   1%|| 362/27626 [00:51<1:04:21,  7.06it/s]

{'epoch': 0, 'iter': 360, 'avg_loss': 14.172848569389195, 'avg_acc': 50.302977839335185, 'loss': 12.324145317077637}


EP_train:0:   1%|| 372/27626 [00:52<1:04:22,  7.06it/s]

{'epoch': 0, 'iter': 370, 'avg_loss': 14.13667271400719, 'avg_acc': 50.35377358490566, 'loss': 12.996867179870605}


EP_train:0:   1%|| 382/27626 [00:54<1:03:56,  7.10it/s]

{'epoch': 0, 'iter': 380, 'avg_loss': 14.096873576246848, 'avg_acc': 50.21325459317585, 'loss': 12.918309211730957}


EP_train:0:   1%|| 392/27626 [00:55<1:04:17,  7.06it/s]

{'epoch': 0, 'iter': 390, 'avg_loss': 14.066172175395215, 'avg_acc': 50.18382352941176, 'loss': 12.771383285522461}


EP_train:0:   1%|| 402/27626 [00:56<1:04:31,  7.03it/s]

{'epoch': 0, 'iter': 400, 'avg_loss': 14.034121237490837, 'avg_acc': 50.062344139650875, 'loss': 12.80611801147461}


EP_train:0:   1%|| 412/27626 [00:58<1:04:22,  7.05it/s]

{'epoch': 0, 'iter': 410, 'avg_loss': 13.999260990579053, 'avg_acc': 50.030413625304135, 'loss': 12.444829940795898}


EP_train:0:   2%|| 422/27626 [00:59<1:04:16,  7.05it/s]

{'epoch': 0, 'iter': 420, 'avg_loss': 13.965178165752928, 'avg_acc': 50.044536817102134, 'loss': 12.771843910217285}


EP_train:0:   2%|| 432/27626 [01:01<1:03:59,  7.08it/s]

{'epoch': 0, 'iter': 430, 'avg_loss': 13.92787862985704, 'avg_acc': 50.05800464037124, 'loss': 12.775333404541016}


EP_train:0:   2%|| 442/27626 [01:02<1:03:53,  7.09it/s]

{'epoch': 0, 'iter': 440, 'avg_loss': 13.893017243365852, 'avg_acc': 49.86536281179138, 'loss': 12.072834014892578}


EP_train:0:   2%|| 452/27626 [01:03<1:03:49,  7.10it/s]

{'epoch': 0, 'iter': 450, 'avg_loss': 13.863765456459738, 'avg_acc': 49.958425720620845, 'loss': 13.131207466125488}


EP_train:0:   2%|| 462/27626 [01:05<1:03:59,  7.08it/s]

{'epoch': 0, 'iter': 460, 'avg_loss': 13.833058040726469, 'avg_acc': 49.898318872017356, 'loss': 12.546855926513672}


EP_train:0:   2%|| 472/27626 [01:06<1:03:37,  7.11it/s]

{'epoch': 0, 'iter': 470, 'avg_loss': 13.798252362860742, 'avg_acc': 49.87393842887474, 'loss': 11.655359268188477}


EP_train:0:   2%|| 482/27626 [01:08<1:03:42,  7.10it/s]

{'epoch': 0, 'iter': 480, 'avg_loss': 13.765728718525654, 'avg_acc': 49.974012474012476, 'loss': 12.399679183959961}


EP_train:0:   2%|| 492/27626 [01:09<1:04:24,  7.02it/s]

{'epoch': 0, 'iter': 490, 'avg_loss': 13.72031602451612, 'avg_acc': 49.98727087576375, 'loss': 11.916863441467285}


EP_train:0:   2%|| 502/27626 [01:11<1:03:43,  7.09it/s]

{'epoch': 0, 'iter': 500, 'avg_loss': 13.68347326533761, 'avg_acc': 50.0561377245509, 'loss': 12.539313316345215}


EP_train:0:   2%|| 512/27626 [01:12<1:03:22,  7.13it/s]

{'epoch': 0, 'iter': 510, 'avg_loss': 13.647976024276822, 'avg_acc': 50.06727005870842, 'loss': 11.232673645019531}


EP_train:0:   2%|| 522/27626 [01:13<1:03:29,  7.12it/s]

{'epoch': 0, 'iter': 520, 'avg_loss': 13.610788116528298, 'avg_acc': 49.97600767754319, 'loss': 11.741682052612305}


EP_train:0:   2%|| 532/27626 [01:15<1:03:10,  7.15it/s]

{'epoch': 0, 'iter': 530, 'avg_loss': 13.575887042474836, 'avg_acc': 50.01765536723164, 'loss': 11.127168655395508}


EP_train:0:   2%|| 542/27626 [01:16<1:03:38,  7.09it/s]

{'epoch': 0, 'iter': 540, 'avg_loss': 13.545266578025606, 'avg_acc': 50.06353974121996, 'loss': 11.44643497467041}


EP_train:0:   2%|| 552/27626 [01:18<1:03:20,  7.12it/s]

{'epoch': 0, 'iter': 550, 'avg_loss': 13.511201593707131, 'avg_acc': 50.08507259528131, 'loss': 11.7476167678833}


EP_train:0:   2%|| 562/27626 [01:19<1:03:31,  7.10it/s]

{'epoch': 0, 'iter': 560, 'avg_loss': 13.475663688306078, 'avg_acc': 50.07798573975044, 'loss': 11.251593589782715}


EP_train:0:   2%|| 572/27626 [01:20<1:04:19,  7.01it/s]

{'epoch': 0, 'iter': 570, 'avg_loss': 13.441088160366185, 'avg_acc': 50.08209281961471, 'loss': 11.868295669555664}


EP_train:0:   2%|| 582/27626 [01:22<1:04:03,  7.04it/s]

{'epoch': 0, 'iter': 580, 'avg_loss': 13.409720926399691, 'avg_acc': 50.123709122203095, 'loss': 11.715993881225586}


EP_train:0:   2%|| 592/27626 [01:23<1:04:01,  7.04it/s]

{'epoch': 0, 'iter': 590, 'avg_loss': 13.38513608632354, 'avg_acc': 50.132191201353635, 'loss': 12.78143310546875}


EP_train:0:   2%|| 602/27626 [01:25<1:04:21,  7.00it/s]

{'epoch': 0, 'iter': 600, 'avg_loss': 13.357962487739652, 'avg_acc': 50.17678868552413, 'loss': 11.33453369140625}


EP_train:0:   2%|| 612/27626 [01:26<1:04:08,  7.02it/s]

{'epoch': 0, 'iter': 610, 'avg_loss': 13.323193453337675, 'avg_acc': 50.16878068739771, 'loss': 9.535526275634766}


EP_train:0:   2%|| 622/27626 [01:27<1:03:54,  7.04it/s]

{'epoch': 0, 'iter': 620, 'avg_loss': 13.286618470761702, 'avg_acc': 50.22644927536232, 'loss': 10.434592247009277}


EP_train:0:   2%|| 632/27626 [01:29<1:03:37,  7.07it/s]

{'epoch': 0, 'iter': 630, 'avg_loss': 13.258895147054584, 'avg_acc': 50.20305071315373, 'loss': 11.671636581420898}


EP_train:0:   2%|| 642/27626 [01:30<1:03:40,  7.06it/s]

{'epoch': 0, 'iter': 640, 'avg_loss': 13.219094093429876, 'avg_acc': 50.15600624024961, 'loss': 10.493030548095703}


EP_train:0:   2%|| 652/27626 [01:32<1:04:02,  7.02it/s]

{'epoch': 0, 'iter': 650, 'avg_loss': 13.194244802272815, 'avg_acc': 50.08160522273426, 'loss': 11.997251510620117}


EP_train:0:   2%|| 662/27626 [01:33<1:03:35,  7.07it/s]

{'epoch': 0, 'iter': 660, 'avg_loss': 13.163058241989898, 'avg_acc': 50.08509833585476, 'loss': 10.8340425491333}


EP_train:0:   2%|| 672/27626 [01:35<1:06:26,  6.76it/s]

{'epoch': 0, 'iter': 670, 'avg_loss': 13.136741149265614, 'avg_acc': 50.16300298062594, 'loss': 11.225499153137207}


EP_train:0:   2%|| 682/27626 [01:36<1:03:39,  7.05it/s]

{'epoch': 0, 'iter': 680, 'avg_loss': 13.10123251188169, 'avg_acc': 50.1606093979442, 'loss': 9.975610733032227}


EP_train:0:   3%|| 692/27626 [01:37<1:03:17,  7.09it/s]

{'epoch': 0, 'iter': 690, 'avg_loss': 13.068343025903108, 'avg_acc': 50.226121562952244, 'loss': 11.783873558044434}


EP_train:0:   3%|| 702/27626 [01:39<1:03:43,  7.04it/s]

{'epoch': 0, 'iter': 700, 'avg_loss': 13.038004048030489, 'avg_acc': 50.21843794579173, 'loss': 11.598884582519531}


EP_train:0:   3%|| 712/27626 [01:40<1:03:19,  7.08it/s]

{'epoch': 0, 'iter': 710, 'avg_loss': 13.006945805878244, 'avg_acc': 50.27689873417721, 'loss': 10.1358642578125}


EP_train:0:   3%|| 722/27626 [01:42<1:03:40,  7.04it/s]

{'epoch': 0, 'iter': 720, 'avg_loss': 12.975578773699587, 'avg_acc': 50.19504160887656, 'loss': 10.35428237915039}


EP_train:0:   3%|| 732/27626 [01:43<1:03:25,  7.07it/s]

{'epoch': 0, 'iter': 730, 'avg_loss': 12.948969577470981, 'avg_acc': 50.200923392612864, 'loss': 10.726326942443848}


EP_train:0:   3%|| 742/27626 [01:44<1:03:30,  7.06it/s]

{'epoch': 0, 'iter': 740, 'avg_loss': 12.92061691181219, 'avg_acc': 50.22351551956815, 'loss': 10.990951538085938}


EP_train:0:   3%|| 752/27626 [01:46<1:03:13,  7.08it/s]

{'epoch': 0, 'iter': 750, 'avg_loss': 12.893380047001946, 'avg_acc': 50.22053928095872, 'loss': 10.678084373474121}


EP_train:0:   3%|| 762/27626 [01:47<1:03:39,  7.03it/s]

{'epoch': 0, 'iter': 760, 'avg_loss': 12.86203060200274, 'avg_acc': 50.18478975032852, 'loss': 10.355402946472168}


EP_train:0:   3%|| 772/27626 [01:49<1:03:11,  7.08it/s]

{'epoch': 0, 'iter': 770, 'avg_loss': 12.832937303684101, 'avg_acc': 50.14591439688716, 'loss': 10.500639915466309}


EP_train:0:   3%|| 782/27626 [01:50<1:03:02,  7.10it/s]

{'epoch': 0, 'iter': 780, 'avg_loss': 12.806746995708549, 'avg_acc': 50.16805377720871, 'loss': 10.569637298583984}


EP_train:0:   3%|| 792/27626 [01:51<1:03:08,  7.08it/s]

{'epoch': 0, 'iter': 790, 'avg_loss': 12.778123159927002, 'avg_acc': 50.15407711757269, 'loss': 9.597249984741211}


EP_train:0:   3%|| 802/27626 [01:53<1:02:51,  7.11it/s]

{'epoch': 0, 'iter': 800, 'avg_loss': 12.746428427773617, 'avg_acc': 50.22627965043696, 'loss': 10.29221248626709}


EP_train:0:   3%|| 812/27626 [01:54<1:03:03,  7.09it/s]

{'epoch': 0, 'iter': 810, 'avg_loss': 12.71949160231321, 'avg_acc': 50.2119297163995, 'loss': 10.5111665725708}


EP_train:0:   3%|| 822/27626 [01:56<1:02:24,  7.16it/s]

{'epoch': 0, 'iter': 820, 'avg_loss': 12.690143922069332, 'avg_acc': 50.26644336175396, 'loss': 10.29017162322998}


EP_train:0:   3%|| 832/27626 [01:57<1:02:50,  7.11it/s]

{'epoch': 0, 'iter': 830, 'avg_loss': 12.66311263579037, 'avg_acc': 50.251955475330924, 'loss': 9.9949369430542}


EP_train:0:   3%|| 842/27626 [01:58<1:03:18,  7.05it/s]

{'epoch': 0, 'iter': 840, 'avg_loss': 12.633627499184625, 'avg_acc': 50.25639120095124, 'loss': 9.54300594329834}


EP_train:0:   3%|| 852/27626 [02:00<1:02:58,  7.09it/s]

{'epoch': 0, 'iter': 850, 'avg_loss': 12.60659778944614, 'avg_acc': 50.28642773207991, 'loss': 9.7623872756958}


EP_train:0:   3%|| 862/27626 [02:01<1:02:29,  7.14it/s]

{'epoch': 0, 'iter': 860, 'avg_loss': 12.576392891515008, 'avg_acc': 50.29036004645761, 'loss': 10.289046287536621}


EP_train:0:   3%|| 872/27626 [02:03<1:02:55,  7.09it/s]

{'epoch': 0, 'iter': 870, 'avg_loss': 12.551939992762323, 'avg_acc': 50.330080367393805, 'loss': 10.328657150268555}


EP_train:0:   3%|| 882/27626 [02:04<1:02:27,  7.14it/s]

{'epoch': 0, 'iter': 880, 'avg_loss': 12.52171572707974, 'avg_acc': 50.31569239500568, 'loss': 9.989386558532715}


EP_train:0:   3%|| 892/27626 [02:05<1:03:03,  7.07it/s]

{'epoch': 0, 'iter': 890, 'avg_loss': 12.494224100937078, 'avg_acc': 50.32617845117845, 'loss': 10.585389137268066}


EP_train:0:   3%|| 902/27626 [02:07<1:02:36,  7.11it/s]

{'epoch': 0, 'iter': 900, 'avg_loss': 12.466189717875999, 'avg_acc': 50.332963374028864, 'loss': 9.342923164367676}


EP_train:0:   3%|| 912/27626 [02:08<1:02:34,  7.11it/s]

{'epoch': 0, 'iter': 910, 'avg_loss': 12.442492778162531, 'avg_acc': 50.336169045005484, 'loss': 10.445684432983398}


EP_train:0:   3%|| 922/27626 [02:10<1:02:46,  7.09it/s]

{'epoch': 0, 'iter': 920, 'avg_loss': 12.418932389747049, 'avg_acc': 50.3528773072747, 'loss': 9.957566261291504}


EP_train:0:   3%|| 932/27626 [02:11<1:02:39,  7.10it/s]

{'epoch': 0, 'iter': 930, 'avg_loss': 12.394726521730679, 'avg_acc': 50.30209452201934, 'loss': 9.670595169067383}


EP_train:0:   3%|| 942/27626 [02:12<1:02:55,  7.07it/s]

{'epoch': 0, 'iter': 940, 'avg_loss': 12.369225265875887, 'avg_acc': 50.29224229543039, 'loss': 9.78545093536377}


EP_train:0:   3%|| 952/27626 [02:14<1:02:46,  7.08it/s]

{'epoch': 0, 'iter': 950, 'avg_loss': 12.344209235800053, 'avg_acc': 50.26616719242902, 'loss': 9.369765281677246}


EP_train:0:   3%|| 962/27626 [02:15<1:02:51,  7.07it/s]

{'epoch': 0, 'iter': 960, 'avg_loss': 12.318785644594762, 'avg_acc': 50.25039021852237, 'loss': 9.211615562438965}


EP_train:0:   4%|| 972/27626 [02:17<1:02:35,  7.10it/s]

{'epoch': 0, 'iter': 970, 'avg_loss': 12.293677936007876, 'avg_acc': 50.205973223480946, 'loss': 10.276629447937012}


EP_train:0:   4%|| 982/27626 [02:18<1:02:37,  7.09it/s]

{'epoch': 0, 'iter': 980, 'avg_loss': 12.269307969663972, 'avg_acc': 50.2229867482161, 'loss': 10.449034690856934}


EP_train:0:   4%|| 992/27626 [02:19<1:02:46,  7.07it/s]

{'epoch': 0, 'iter': 990, 'avg_loss': 12.24475733374972, 'avg_acc': 50.16397578203835, 'loss': 10.13376235961914}


EP_train:0:   4%|| 1002/27626 [02:21<1:02:58,  7.05it/s]

{'epoch': 0, 'iter': 1000, 'avg_loss': 12.221235553463261, 'avg_acc': 50.1717032967033, 'loss': 9.626214981079102}


EP_train:0:   4%|| 1012/27626 [02:22<1:02:28,  7.10it/s]

{'epoch': 0, 'iter': 1010, 'avg_loss': 12.195732107266238, 'avg_acc': 50.163822947576655, 'loss': 10.158151626586914}


EP_train:0:   4%|| 1022/27626 [02:24<1:03:21,  7.00it/s]

{'epoch': 0, 'iter': 1020, 'avg_loss': 12.173448781191894, 'avg_acc': 50.165279138099905, 'loss': 9.76537036895752}


EP_train:0:   4%|| 1032/27626 [02:25<1:02:51,  7.05it/s]

{'epoch': 0, 'iter': 1030, 'avg_loss': 12.15225029269199, 'avg_acc': 50.18792434529583, 'loss': 10.741121292114258}


EP_train:0:   4%|| 1042/27626 [02:27<1:02:31,  7.09it/s]

{'epoch': 0, 'iter': 1040, 'avg_loss': 12.127345706269102, 'avg_acc': 50.19512487992315, 'loss': 10.693388938903809}


EP_train:0:   4%|| 1052/27626 [02:28<1:02:06,  7.13it/s]

{'epoch': 0, 'iter': 1050, 'avg_loss': 12.106839767078577, 'avg_acc': 50.139747859181725, 'loss': 10.162407875061035}


EP_train:0:   4%|| 1062/27626 [02:29<1:02:19,  7.10it/s]

{'epoch': 0, 'iter': 1060, 'avg_loss': 12.087862515427053, 'avg_acc': 50.15021206409048, 'loss': 9.719466209411621}


EP_train:0:   4%|| 1072/27626 [02:31<1:02:40,  7.06it/s]

{'epoch': 0, 'iter': 1070, 'avg_loss': 12.06623370796581, 'avg_acc': 50.15172735760971, 'loss': 10.299542427062988}


EP_train:0:   4%|| 1082/27626 [02:32<1:02:39,  7.06it/s]

{'epoch': 0, 'iter': 1080, 'avg_loss': 12.040426377782548, 'avg_acc': 50.19657724329325, 'loss': 10.79741096496582}


EP_train:0:   4%|| 1092/27626 [02:34<1:02:14,  7.10it/s]

{'epoch': 0, 'iter': 1090, 'avg_loss': 12.019001057779537, 'avg_acc': 50.20050412465628, 'loss': 10.270380973815918}


EP_train:0:   4%|| 1102/27626 [02:35<1:01:56,  7.14it/s]

{'epoch': 0, 'iter': 1100, 'avg_loss': 12.001243773208326, 'avg_acc': 50.17029972752044, 'loss': 9.361181259155273}


EP_train:0:   4%|| 1112/27626 [02:36<1:03:13,  6.99it/s]

{'epoch': 0, 'iter': 1110, 'avg_loss': 11.981365823616969, 'avg_acc': 50.18845634563457, 'loss': 10.337897300720215}


EP_train:0:   4%|| 1122/27626 [02:38<1:02:57,  7.02it/s]

{'epoch': 0, 'iter': 1120, 'avg_loss': 11.961049784303881, 'avg_acc': 50.20628902765388, 'loss': 9.692481994628906}


EP_train:0:   4%|| 1132/27626 [02:39<1:02:13,  7.10it/s]

{'epoch': 0, 'iter': 1130, 'avg_loss': 11.940543894847867, 'avg_acc': 50.21828028293546, 'loss': 9.137368202209473}


EP_train:0:   4%|| 1142/27626 [02:41<1:02:02,  7.11it/s]

{'epoch': 0, 'iter': 1140, 'avg_loss': 11.918436642177893, 'avg_acc': 50.16706836108676, 'loss': 8.995661735534668}


EP_train:0:   4%|| 1152/27626 [02:42<1:01:49,  7.14it/s]

{'epoch': 0, 'iter': 1150, 'avg_loss': 11.901087041733888, 'avg_acc': 50.135751520417024, 'loss': 10.169839859008789}


EP_train:0:   4%|| 1162/27626 [02:43<1:01:53,  7.13it/s]

{'epoch': 0, 'iter': 1160, 'avg_loss': 11.880270746019152, 'avg_acc': 50.11035745047373, 'loss': 8.028094291687012}


EP_train:0:   4%|| 1172/27626 [02:45<1:01:38,  7.15it/s]

{'epoch': 0, 'iter': 1170, 'avg_loss': 11.862520715500125, 'avg_acc': 50.14677625960717, 'loss': 9.79719352722168}


EP_train:0:   4%|| 1182/27626 [02:46<1:01:58,  7.11it/s]

{'epoch': 0, 'iter': 1180, 'avg_loss': 11.844619520835005, 'avg_acc': 50.193162574089754, 'loss': 9.582707405090332}


EP_train:0:   4%|| 1192/27626 [02:48<1:02:06,  7.09it/s]

{'epoch': 0, 'iter': 1190, 'avg_loss': 11.82418396130617, 'avg_acc': 50.178421494542405, 'loss': 10.154824256896973}


EP_train:0:   4%|| 1202/27626 [02:49<1:02:45,  7.02it/s]

{'epoch': 0, 'iter': 1200, 'avg_loss': 11.804589439887588, 'avg_acc': 50.166527893422156, 'loss': 9.133466720581055}


EP_train:0:   4%|| 1212/27626 [02:50<1:02:50,  7.00it/s]

{'epoch': 0, 'iter': 1210, 'avg_loss': 11.787945196118658, 'avg_acc': 50.18063583815029, 'loss': 9.030220031738281}


EP_train:0:   4%|| 1222/27626 [02:52<1:02:08,  7.08it/s]

{'epoch': 0, 'iter': 1220, 'avg_loss': 11.76998587537261, 'avg_acc': 50.16891891891891, 'loss': 10.061993598937988}


EP_train:0:   4%|| 1232/27626 [02:53<1:02:17,  7.06it/s]

{'epoch': 0, 'iter': 1230, 'avg_loss': 11.752428341454166, 'avg_acc': 50.13454508529651, 'loss': 9.615365028381348}


EP_train:0:   4%|| 1242/27626 [02:55<1:02:07,  7.08it/s]

{'epoch': 0, 'iter': 1240, 'avg_loss': 11.734285296210352, 'avg_acc': 50.11583400483482, 'loss': 9.352178573608398}


EP_train:0:   5%|| 1252/27626 [02:56<1:02:19,  7.05it/s]

{'epoch': 0, 'iter': 1250, 'avg_loss': 11.715115487718467, 'avg_acc': 50.107414068745, 'loss': 8.97008991241455}


EP_train:0:   5%|| 1262/27626 [02:57<1:02:09,  7.07it/s]

{'epoch': 0, 'iter': 1260, 'avg_loss': 11.697295519399983, 'avg_acc': 50.08178033306899, 'loss': 9.688865661621094}


EP_train:0:   5%|| 1272/27626 [02:59<1:02:06,  7.07it/s]

{'epoch': 0, 'iter': 1270, 'avg_loss': 11.680277719355118, 'avg_acc': 50.090971675845786, 'loss': 9.360459327697754}


EP_train:0:   5%|| 1282/27626 [03:00<1:01:44,  7.11it/s]

{'epoch': 0, 'iter': 1280, 'avg_loss': 11.662550116218132, 'avg_acc': 50.11953551912568, 'loss': 9.137365341186523}


EP_train:0:   5%|| 1292/27626 [03:02<1:01:53,  7.09it/s]

{'epoch': 0, 'iter': 1290, 'avg_loss': 11.645193385856269, 'avg_acc': 50.14765685515105, 'loss': 9.533892631530762}


EP_train:0:   5%|| 1302/27626 [03:03<1:02:25,  7.03it/s]

{'epoch': 0, 'iter': 1300, 'avg_loss': 11.62662484112563, 'avg_acc': 50.1609338970023, 'loss': 10.097856521606445}


EP_train:0:   5%|| 1312/27626 [03:04<1:01:42,  7.11it/s]

{'epoch': 0, 'iter': 1310, 'avg_loss': 11.611717129553847, 'avg_acc': 50.15493897787948, 'loss': 9.655461311340332}


EP_train:0:   5%|| 1322/27626 [03:06<1:01:40,  7.11it/s]

{'epoch': 0, 'iter': 1320, 'avg_loss': 11.59388328988294, 'avg_acc': 50.12301286903861, 'loss': 9.658428192138672}


EP_train:0:   5%|| 1332/27626 [03:07<1:01:22,  7.14it/s]

{'epoch': 0, 'iter': 1330, 'avg_loss': 11.579423406444754, 'avg_acc': 50.14321938392187, 'loss': 10.87463092803955}


EP_train:0:   5%|| 1342/27626 [03:09<1:01:26,  7.13it/s]

{'epoch': 0, 'iter': 1340, 'avg_loss': 11.563007115784375, 'avg_acc': 50.151472781506335, 'loss': 8.873336791992188}


EP_train:0:   5%|| 1352/27626 [03:10<1:01:12,  7.15it/s]

{'epoch': 0, 'iter': 1350, 'avg_loss': 11.547042584083947, 'avg_acc': 50.1549777942265, 'loss': 9.319939613342285}


EP_train:0:   5%|| 1362/27626 [03:11<1:01:33,  7.11it/s]

{'epoch': 0, 'iter': 1360, 'avg_loss': 11.531475413993034, 'avg_acc': 50.14924687729611, 'loss': 9.436972618103027}


EP_train:0:   5%|| 1372/27626 [03:13<1:01:11,  7.15it/s]

{'epoch': 0, 'iter': 1370, 'avg_loss': 11.514844321230743, 'avg_acc': 50.18690736688548, 'loss': 8.658235549926758}


EP_train:0:   5%|| 1382/27626 [03:14<1:01:44,  7.08it/s]

{'epoch': 0, 'iter': 1380, 'avg_loss': 11.49614426780662, 'avg_acc': 50.18555394641564, 'loss': 9.98475456237793}


EP_train:0:   5%|| 1392/27626 [03:16<1:02:04,  7.04it/s]

{'epoch': 0, 'iter': 1390, 'avg_loss': 11.477700001725527, 'avg_acc': 50.20443925233645, 'loss': 8.975102424621582}


EP_train:0:   5%|| 1402/27626 [03:17<1:01:44,  7.08it/s]

{'epoch': 0, 'iter': 1400, 'avg_loss': 11.461872844165091, 'avg_acc': 50.17175231977159, 'loss': 9.416696548461914}


EP_train:0:   5%|| 1412/27626 [03:18<1:01:41,  7.08it/s]

{'epoch': 0, 'iter': 1410, 'avg_loss': 11.447271399764961, 'avg_acc': 50.190467753366406, 'loss': 8.970702171325684}


EP_train:0:   5%|| 1422/27626 [03:20<1:01:18,  7.12it/s]

{'epoch': 0, 'iter': 1420, 'avg_loss': 11.430729913006862, 'avg_acc': 50.206720619282194, 'loss': 8.978960037231445}


EP_train:0:   5%|| 1432/27626 [03:21<1:01:19,  7.12it/s]

{'epoch': 0, 'iter': 1430, 'avg_loss': 11.414492487824106, 'avg_acc': 50.194357092942, 'loss': 9.226767539978027}


EP_train:0:   5%|| 1442/27626 [03:23<1:01:13,  7.13it/s]

{'epoch': 0, 'iter': 1440, 'avg_loss': 11.396978707548481, 'avg_acc': 50.18867106176267, 'loss': 8.966302871704102}


EP_train:0:   5%|| 1452/27626 [03:24<1:01:21,  7.11it/s]

{'epoch': 0, 'iter': 1450, 'avg_loss': 11.382513780087788, 'avg_acc': 50.18306340454859, 'loss': 9.465744018554688}


EP_train:0:   5%|| 1462/27626 [03:25<1:01:04,  7.14it/s]

{'epoch': 0, 'iter': 1460, 'avg_loss': 11.366932135587845, 'avg_acc': 50.17967145790554, 'loss': 10.076478958129883}


EP_train:0:   5%|| 1472/27626 [03:27<1:01:30,  7.09it/s]

{'epoch': 0, 'iter': 1470, 'avg_loss': 11.354374438544506, 'avg_acc': 50.161454792658056, 'loss': 9.54269027709961}


EP_train:0:   5%|| 1482/27626 [03:28<1:01:41,  7.06it/s]

{'epoch': 0, 'iter': 1480, 'avg_loss': 11.339005620635739, 'avg_acc': 50.13926401080351, 'loss': 9.19810676574707}


EP_train:0:   5%|| 1492/27626 [03:30<1:01:18,  7.10it/s]

{'epoch': 0, 'iter': 1490, 'avg_loss': 11.32718039646315, 'avg_acc': 50.12785043594903, 'loss': 8.656728744506836}


EP_train:0:   5%|| 1502/27626 [03:31<1:01:24,  7.09it/s]

{'epoch': 0, 'iter': 1500, 'avg_loss': 11.313163262697, 'avg_acc': 50.126998667554965, 'loss': 8.893803596496582}


EP_train:0:   5%|| 1512/27626 [03:32<1:01:19,  7.10it/s]

{'epoch': 0, 'iter': 1510, 'avg_loss': 11.300566156678922, 'avg_acc': 50.0992720052945, 'loss': 9.840021133422852}


EP_train:0:   6%|| 1522/27626 [03:34<1:01:05,  7.12it/s]

{'epoch': 0, 'iter': 1520, 'avg_loss': 11.286315184686625, 'avg_acc': 50.10272846811308, 'loss': 8.948793411254883}


EP_train:0:   6%|| 1532/27626 [03:35<1:01:03,  7.12it/s]

{'epoch': 0, 'iter': 1530, 'avg_loss': 11.27148008969467, 'avg_acc': 50.091851730894845, 'loss': 9.40364933013916}


EP_train:0:   6%|| 1542/27626 [03:37<1:01:17,  7.09it/s]

{'epoch': 0, 'iter': 1540, 'avg_loss': 11.257946248645522, 'avg_acc': 50.05880921479559, 'loss': 8.883793830871582}


EP_train:0:   6%|| 1552/27626 [03:38<1:01:07,  7.11it/s]

{'epoch': 0, 'iter': 1550, 'avg_loss': 11.242925872962603, 'avg_acc': 50.0523855577047, 'loss': 9.217411041259766}


EP_train:0:   6%|| 1562/27626 [03:39<1:01:33,  7.06it/s]

{'epoch': 0, 'iter': 1560, 'avg_loss': 11.22985186903695, 'avg_acc': 50.08007687379885, 'loss': 8.037100791931152}


EP_train:0:   6%|| 1572/27626 [03:41<1:01:21,  7.08it/s]

{'epoch': 0, 'iter': 1570, 'avg_loss': 11.216912802277971, 'avg_acc': 50.061664544875875, 'loss': 8.87069034576416}


EP_train:0:   6%|| 1582/27626 [03:42<1:01:17,  7.08it/s]

{'epoch': 0, 'iter': 1580, 'avg_loss': 11.202940041773083, 'avg_acc': 50.06720430107527, 'loss': 8.650654792785645}


EP_train:0:   6%|| 1592/27626 [03:44<1:01:01,  7.11it/s]

{'epoch': 0, 'iter': 1590, 'avg_loss': 11.187961054177947, 'avg_acc': 50.09035197988686, 'loss': 9.08853816986084}


EP_train:0:   6%|| 1602/27626 [03:45<1:00:48,  7.13it/s]

{'epoch': 0, 'iter': 1600, 'avg_loss': 11.174577919711625, 'avg_acc': 50.10345096814491, 'loss': 9.385326385498047}


EP_train:0:   6%|| 1612/27626 [03:46<1:00:50,  7.13it/s]

{'epoch': 0, 'iter': 1610, 'avg_loss': 11.162795292376584, 'avg_acc': 50.10668839230292, 'loss': 8.685715675354004}


EP_train:0:   6%|| 1622/27626 [03:48<1:00:48,  7.13it/s]

{'epoch': 0, 'iter': 1620, 'avg_loss': 11.14982941972849, 'avg_acc': 50.096391116594695, 'loss': 8.327402114868164}


EP_train:0:   6%|| 1632/27626 [03:49<1:00:43,  7.13it/s]

{'epoch': 0, 'iter': 1630, 'avg_loss': 11.135012650036652, 'avg_acc': 50.107296137339056, 'loss': 8.629804611206055}


EP_train:0:   6%|| 1642/27626 [03:51<1:00:54,  7.11it/s]

{'epoch': 0, 'iter': 1640, 'avg_loss': 11.122529992237823, 'avg_acc': 50.118068251066425, 'loss': 8.885116577148438}


EP_train:0:   6%|| 1652/27626 [03:52<1:01:26,  7.05it/s]

{'epoch': 0, 'iter': 1650, 'avg_loss': 11.108483549611341, 'avg_acc': 50.11735311932163, 'loss': 8.905417442321777}


EP_train:0:   6%|| 1662/27626 [03:54<1:01:38,  7.02it/s]

{'epoch': 0, 'iter': 1660, 'avg_loss': 11.098786433298569, 'avg_acc': 50.12229078868151, 'loss': 9.806475639343262}


EP_train:0:   6%|| 1672/27626 [03:55<1:01:47,  7.00it/s]

{'epoch': 0, 'iter': 1670, 'avg_loss': 11.087660304947859, 'avg_acc': 50.11968880909635, 'loss': 9.47118091583252}


EP_train:0:   6%|| 1682/27626 [03:56<1:01:29,  7.03it/s]

{'epoch': 0, 'iter': 1680, 'avg_loss': 11.075213917375391, 'avg_acc': 50.12269482450922, 'loss': 8.40909194946289}


EP_train:0:   6%|| 1692/27626 [03:58<1:01:03,  7.08it/s]

{'epoch': 0, 'iter': 1690, 'avg_loss': 11.063805382185473, 'avg_acc': 50.11642519219397, 'loss': 9.908269882202148}


EP_train:0:   6%|| 1702/27626 [03:59<1:01:11,  7.06it/s]

{'epoch': 0, 'iter': 1700, 'avg_loss': 11.05392431890453, 'avg_acc': 50.10839212228101, 'loss': 9.696602821350098}


EP_train:0:   6%|| 1712/27626 [04:01<1:00:57,  7.09it/s]

{'epoch': 0, 'iter': 1710, 'avg_loss': 11.042477892547376, 'avg_acc': 50.115064289888956, 'loss': 8.896991729736328}


EP_train:0:   6%|| 1722/27626 [04:02<1:01:19,  7.04it/s]

{'epoch': 0, 'iter': 1720, 'avg_loss': 11.033913171824711, 'avg_acc': 50.08897443346891, 'loss': 9.495255470275879}


EP_train:0:   6%|| 1732/27626 [04:03<1:00:59,  7.08it/s]

{'epoch': 0, 'iter': 1730, 'avg_loss': 11.023935262899327, 'avg_acc': 50.09207105719238, 'loss': 9.008366584777832}


EP_train:0:   6%|| 1742/27626 [04:05<1:00:58,  7.07it/s]

{'epoch': 0, 'iter': 1740, 'avg_loss': 11.012582004378405, 'avg_acc': 50.08974727168294, 'loss': 9.538820266723633}


EP_train:0:   6%|| 1752/27626 [04:06<1:01:26,  7.02it/s]

{'epoch': 0, 'iter': 1750, 'avg_loss': 10.99985652021651, 'avg_acc': 50.09637350085665, 'loss': 9.10500431060791}


EP_train:0:   6%|| 1762/27626 [04:08<1:01:17,  7.03it/s]

{'epoch': 0, 'iter': 1760, 'avg_loss': 10.988311641937898, 'avg_acc': 50.09405167518456, 'loss': 9.545127868652344}


EP_train:0:   6%|| 1772/27626 [04:09<1:00:51,  7.08it/s]

{'epoch': 0, 'iter': 1770, 'avg_loss': 10.976565695966723, 'avg_acc': 50.077639751552795, 'loss': 9.219009399414062}


EP_train:0:   6%|| 1782/27626 [04:10<1:00:56,  7.07it/s]

{'epoch': 0, 'iter': 1780, 'avg_loss': 10.965640797366056, 'avg_acc': 50.06316676024706, 'loss': 9.421607971191406}


EP_train:0:   6%|| 1792/27626 [04:12<1:00:42,  7.09it/s]

{'epoch': 0, 'iter': 1790, 'avg_loss': 10.955961349355249, 'avg_acc': 50.06455890563931, 'loss': 8.64771556854248}


EP_train:0:   7%|| 1802/27626 [04:13<1:00:50,  7.07it/s]

{'epoch': 0, 'iter': 1800, 'avg_loss': 10.945816267205238, 'avg_acc': 50.064200444197674, 'loss': 8.700065612792969}


EP_train:0:   7%|| 1812/27626 [04:15<1:00:43,  7.09it/s]

{'epoch': 0, 'iter': 1810, 'avg_loss': 10.934653658158604, 'avg_acc': 50.06729707344009, 'loss': 9.013691902160645}


EP_train:0:   7%|| 1822/27626 [04:16<1:00:42,  7.08it/s]

{'epoch': 0, 'iter': 1820, 'avg_loss': 10.923536615932074, 'avg_acc': 50.09266886326195, 'loss': 9.34122085571289}


EP_train:0:   7%|| 1832/27626 [04:17<1:00:59,  7.05it/s]

{'epoch': 0, 'iter': 1830, 'avg_loss': 10.912619681907833, 'avg_acc': 50.11776351720372, 'loss': 9.495980262756348}


EP_train:0:   7%|| 1842/27626 [04:19<1:00:44,  7.08it/s]

{'epoch': 0, 'iter': 1840, 'avg_loss': 10.902572762894929, 'avg_acc': 50.11372895165671, 'loss': 9.226567268371582}


EP_train:0:   7%|| 1852/27626 [04:20<1:00:35,  7.09it/s]

{'epoch': 0, 'iter': 1850, 'avg_loss': 10.892977172010722, 'avg_acc': 50.10636142625607, 'loss': 9.564340591430664}


EP_train:0:   7%|| 1862/27626 [04:22<1:00:36,  7.08it/s]

{'epoch': 0, 'iter': 1860, 'avg_loss': 10.88257421107141, 'avg_acc': 50.11418592154756, 'loss': 8.656015396118164}


EP_train:0:   7%|| 1872/27626 [04:23<1:00:28,  7.10it/s]

{'epoch': 0, 'iter': 1870, 'avg_loss': 10.871396046408607, 'avg_acc': 50.141969535008016, 'loss': 9.311346054077148}


EP_train:0:   7%|| 1882/27626 [04:24<1:00:46,  7.06it/s]

{'epoch': 0, 'iter': 1880, 'avg_loss': 10.862560053191116, 'avg_acc': 50.12958532695375, 'loss': 9.275178909301758}


EP_train:0:   7%|| 1892/27626 [04:26<1:00:42,  7.07it/s]

{'epoch': 0, 'iter': 1890, 'avg_loss': 10.854935921417349, 'avg_acc': 50.12228979375991, 'loss': 9.418560981750488}


EP_train:0:   7%|| 1902/27626 [04:27<1:00:37,  7.07it/s]

{'epoch': 0, 'iter': 1900, 'avg_loss': 10.845300844504543, 'avg_acc': 50.113427143608625, 'loss': 9.564167022705078}


EP_train:0:   7%|| 1912/27626 [04:29<1:00:36,  7.07it/s]

{'epoch': 0, 'iter': 1910, 'avg_loss': 10.835125980546872, 'avg_acc': 50.10302197802198, 'loss': 8.111794471740723}


EP_train:0:   7%|| 1922/27626 [04:30<1:01:01,  7.02it/s]

{'epoch': 0, 'iter': 1920, 'avg_loss': 10.82480235623544, 'avg_acc': 50.081337844872465, 'loss': 9.044294357299805}


EP_train:0:   7%|| 1932/27626 [04:32<1:00:55,  7.03it/s]

{'epoch': 0, 'iter': 1930, 'avg_loss': 10.81490258785309, 'avg_acc': 50.08253495598136, 'loss': 9.812376022338867}


EP_train:0:   7%|| 1942/27626 [04:33<1:00:43,  7.05it/s]

{'epoch': 0, 'iter': 1940, 'avg_loss': 10.806063262167081, 'avg_acc': 50.082109737248835, 'loss': 9.14581298828125}


EP_train:0:   7%|| 1952/27626 [04:34<1:00:27,  7.08it/s]

{'epoch': 0, 'iter': 1950, 'avg_loss': 10.795971328328658, 'avg_acc': 50.10090978985136, 'loss': 8.561115264892578}


EP_train:0:   7%|| 1962/27626 [04:36<1:00:14,  7.10it/s]

{'epoch': 0, 'iter': 1960, 'avg_loss': 10.785245253929126, 'avg_acc': 50.08445945945946, 'loss': 8.230433464050293}


EP_train:0:   7%|| 1972/27626 [04:37<1:00:20,  7.09it/s]

{'epoch': 0, 'iter': 1970, 'avg_loss': 10.774523733836753, 'avg_acc': 50.10939878234398, 'loss': 10.108555793762207}


EP_train:0:   7%|| 1982/27626 [04:39<1:00:15,  7.09it/s]

{'epoch': 0, 'iter': 1980, 'avg_loss': 10.765527765898918, 'avg_acc': 50.121466431095406, 'loss': 8.707840919494629}


EP_train:0:   7%|| 1992/27626 [04:40<59:45,  7.15it/s]

{'epoch': 0, 'iter': 1990, 'avg_loss': 10.754782756209552, 'avg_acc': 50.14126067302863, 'loss': 8.699431419372559}


EP_train:0:   7%|| 2002/27626 [04:41<1:00:01,  7.11it/s]

{'epoch': 0, 'iter': 2000, 'avg_loss': 10.742795717352811, 'avg_acc': 50.143678160919535, 'loss': 8.619141578674316}


EP_train:0:   7%|| 2012/27626 [04:43<1:00:05,  7.10it/s]

{'epoch': 0, 'iter': 2010, 'avg_loss': 10.733233129841839, 'avg_acc': 50.17248881153655, 'loss': 9.001572608947754}


EP_train:0:   7%|| 2022/27626 [04:44<1:00:12,  7.09it/s]

{'epoch': 0, 'iter': 2020, 'avg_loss': 10.723445136378391, 'avg_acc': 50.1700890648194, 'loss': 8.820048332214355}


EP_train:0:   7%|| 2032/27626 [04:46<1:00:36,  7.04it/s]

{'epoch': 0, 'iter': 2030, 'avg_loss': 10.714827669717478, 'avg_acc': 50.150787789266374, 'loss': 9.073273658752441}


EP_train:0:   7%|| 2042/27626 [04:47<1:00:13,  7.08it/s]

{'epoch': 0, 'iter': 2040, 'avg_loss': 10.7061995499745, 'avg_acc': 50.143924546790785, 'loss': 7.735583305358887}


EP_train:0:   7%|| 2052/27626 [04:48<1:01:03,  6.98it/s]

{'epoch': 0, 'iter': 2050, 'avg_loss': 10.697031922017231, 'avg_acc': 50.149317406143346, 'loss': 8.16777229309082}


EP_train:0:   7%|| 2062/27626 [04:50<1:00:14,  7.07it/s]

{'epoch': 0, 'iter': 2060, 'avg_loss': 10.687349849277073, 'avg_acc': 50.162239204269774, 'loss': 8.329235076904297}


EP_train:0:   8%|| 2072/27626 [04:51<59:41,  7.14it/s]

{'epoch': 0, 'iter': 2070, 'avg_loss': 10.67815614356792, 'avg_acc': 50.17503621438918, 'loss': 8.621030807495117}


EP_train:0:   8%|| 2082/27626 [04:53<59:53,  7.11it/s]

{'epoch': 0, 'iter': 2080, 'avg_loss': 10.668745607323855, 'avg_acc': 50.153171552138396, 'loss': 8.162529945373535}


EP_train:0:   8%|| 2092/27626 [04:54<59:45,  7.12it/s]

{'epoch': 0, 'iter': 2090, 'avg_loss': 10.659497898654582, 'avg_acc': 50.15094452415112, 'loss': 8.039162635803223}


EP_train:0:   8%|| 2102/27626 [04:55<59:36,  7.14it/s]

{'epoch': 0, 'iter': 2100, 'avg_loss': 10.65279423969238, 'avg_acc': 50.171049500237984, 'loss': 8.94189167022705}


EP_train:0:   8%|| 2112/27626 [04:57<1:00:11,  7.06it/s]

{'epoch': 0, 'iter': 2110, 'avg_loss': 10.644476345284524, 'avg_acc': 50.17468024632875, 'loss': 9.481986045837402}


EP_train:0:   8%|| 2122/27626 [04:58<1:00:10,  7.06it/s]

{'epoch': 0, 'iter': 2120, 'avg_loss': 10.637461213557465, 'avg_acc': 50.1753300330033, 'loss': 9.132853507995605}


EP_train:0:   8%|| 2132/27626 [05:00<59:55,  7.09it/s]  

{'epoch': 0, 'iter': 2130, 'avg_loss': 10.629163211301591, 'avg_acc': 50.180373064289064, 'loss': 9.050668716430664}


EP_train:0:   8%|| 2142/27626 [05:01<59:59,  7.08it/s]

{'epoch': 0, 'iter': 2140, 'avg_loss': 10.620788340811304, 'avg_acc': 50.17953059318076, 'loss': 9.919368743896484}


EP_train:0:   8%|| 2152/27626 [05:02<1:00:00,  7.08it/s]

{'epoch': 0, 'iter': 2150, 'avg_loss': 10.612879334577901, 'avg_acc': 50.16997907949791, 'loss': 8.657045364379883}


EP_train:0:   8%|| 2162/27626 [05:04<1:00:05,  7.06it/s]

{'epoch': 0, 'iter': 2160, 'avg_loss': 10.603560017855395, 'avg_acc': 50.187991670522905, 'loss': 8.710673332214355}


EP_train:0:   8%|| 2172/27626 [05:05<59:32,  7.13it/s]

{'epoch': 0, 'iter': 2170, 'avg_loss': 10.595550221707184, 'avg_acc': 50.17704974666053, 'loss': 8.581340789794922}


EP_train:0:   8%|| 2182/27626 [05:07<59:57,  7.07it/s]

{'epoch': 0, 'iter': 2180, 'avg_loss': 10.588568319262086, 'avg_acc': 50.181969280146724, 'loss': 9.095762252807617}


EP_train:0:   8%|| 2192/27626 [05:08<59:44,  7.10it/s]

{'epoch': 0, 'iter': 2190, 'avg_loss': 10.58080000553105, 'avg_acc': 50.18256503879507, 'loss': 8.397492408752441}


EP_train:0:   8%|| 2202/27626 [05:09<59:47,  7.09it/s]

{'epoch': 0, 'iter': 2200, 'avg_loss': 10.571719224428058, 'avg_acc': 50.18457519309405, 'loss': 9.726082801818848}


EP_train:0:   8%|| 2212/27626 [05:11<1:00:06,  7.05it/s]

{'epoch': 0, 'iter': 2210, 'avg_loss': 10.563722521834976, 'avg_acc': 50.187980551786524, 'loss': 8.5801420211792}


EP_train:0:   8%|| 2222/27626 [05:12<59:34,  7.11it/s]

{'epoch': 0, 'iter': 2220, 'avg_loss': 10.556139394432938, 'avg_acc': 50.19135524538496, 'loss': 8.609789848327637}


EP_train:0:   8%|| 2232/27626 [05:14<59:38,  7.10it/s]

{'epoch': 0, 'iter': 2230, 'avg_loss': 10.54803115655787, 'avg_acc': 50.17368892873151, 'loss': 8.954029083251953}


EP_train:0:   8%|| 2242/27626 [05:15<59:22,  7.12it/s]

{'epoch': 0, 'iter': 2240, 'avg_loss': 10.540445762465756, 'avg_acc': 50.18128067826863, 'loss': 8.53884220123291}


EP_train:0:   8%|| 2252/27626 [05:16<59:30,  7.11it/s]

{'epoch': 0, 'iter': 2250, 'avg_loss': 10.53216033022121, 'avg_acc': 50.179087072412266, 'loss': 9.166839599609375}


EP_train:0:   8%|| 2262/27626 [05:18<59:19,  7.13it/s]

{'epoch': 0, 'iter': 2260, 'avg_loss': 10.526255796568313, 'avg_acc': 50.18520566121185, 'loss': 9.48712158203125}


EP_train:0:   8%|| 2272/27626 [05:19<59:23,  7.11it/s]

{'epoch': 0, 'iter': 2270, 'avg_loss': 10.519468550218038, 'avg_acc': 50.15274108322325, 'loss': 10.050043106079102}


EP_train:0:   8%|| 2282/27626 [05:21<59:26,  7.11it/s]

{'epoch': 0, 'iter': 2280, 'avg_loss': 10.513647119186782, 'avg_acc': 50.14385138097326, 'loss': 9.618915557861328}


EP_train:0:   8%|| 2292/27626 [05:22<59:37,  7.08it/s]

{'epoch': 0, 'iter': 2290, 'avg_loss': 10.507125759998923, 'avg_acc': 50.13503928415539, 'loss': 8.19811725616455}


EP_train:0:   8%|| 2302/27626 [05:24<59:57,  7.04it/s]  

{'epoch': 0, 'iter': 2300, 'avg_loss': 10.499005167031486, 'avg_acc': 50.14667535853976, 'loss': 8.380976676940918}


EP_train:0:   8%|| 2312/27626 [05:25<59:30,  7.09it/s]

{'epoch': 0, 'iter': 2310, 'avg_loss': 10.491411887737952, 'avg_acc': 50.12846170488966, 'loss': 9.671371459960938}


EP_train:0:   8%|| 2322/27626 [05:26<59:28,  7.09it/s]

{'epoch': 0, 'iter': 2320, 'avg_loss': 10.485489449794823, 'avg_acc': 50.12252261956054, 'loss': 8.942007064819336}


EP_train:0:   8%|| 2332/27626 [05:28<59:29,  7.09it/s]

{'epoch': 0, 'iter': 2330, 'avg_loss': 10.479417689416296, 'avg_acc': 50.11127198627199, 'loss': 8.859347343444824}


EP_train:0:   8%|| 2342/27626 [05:29<59:29,  7.08it/s]

{'epoch': 0, 'iter': 2340, 'avg_loss': 10.472735562012879, 'avg_acc': 50.106791969243915, 'loss': 9.108819007873535}


EP_train:0:   9%|| 2352/27626 [05:31<59:13,  7.11it/s]

{'epoch': 0, 'iter': 2350, 'avg_loss': 10.465110292641572, 'avg_acc': 50.12760527435134, 'loss': 8.383101463317871}


EP_train:0:   9%|| 2362/27626 [05:32<59:09,  7.12it/s]

{'epoch': 0, 'iter': 2360, 'avg_loss': 10.458533563537145, 'avg_acc': 50.120446844557385, 'loss': 7.983753204345703}


EP_train:0:   9%|| 2372/27626 [05:33<59:42,  7.05it/s]

{'epoch': 0, 'iter': 2370, 'avg_loss': 10.451021969946865, 'avg_acc': 50.11993884436946, 'loss': 9.737271308898926}


EP_train:0:   9%|| 2382/27626 [05:35<59:25,  7.08it/s]

{'epoch': 0, 'iter': 2380, 'avg_loss': 10.444408731768984, 'avg_acc': 50.12337253254935, 'loss': 9.245409965515137}


EP_train:0:   9%|| 2392/27626 [05:36<59:52,  7.02it/s]

{'epoch': 0, 'iter': 2390, 'avg_loss': 10.438293578882988, 'avg_acc': 50.11762860727729, 'loss': 9.149643898010254}


EP_train:0:   9%|| 2402/27626 [05:38<59:15,  7.09it/s]

{'epoch': 0, 'iter': 2400, 'avg_loss': 10.432367791339091, 'avg_acc': 50.109329446064145, 'loss': 8.899062156677246}


EP_train:0:   9%|| 2412/27626 [05:39<58:53,  7.14it/s]

{'epoch': 0, 'iter': 2410, 'avg_loss': 10.42601461837718, 'avg_acc': 50.11665284114475, 'loss': 8.951769828796387}


EP_train:0:   9%|| 2422/27626 [05:40<1:00:27,  6.95it/s]

{'epoch': 0, 'iter': 2420, 'avg_loss': 10.420653035748058, 'avg_acc': 50.12391573729864, 'loss': 8.503026008605957}


EP_train:0:   9%|| 2432/27626 [05:42<59:13,  7.09it/s]

{'epoch': 0, 'iter': 2430, 'avg_loss': 10.413112675491453, 'avg_acc': 50.125976964212256, 'loss': 8.715977668762207}


EP_train:0:   9%|| 2442/27626 [05:43<59:08,  7.10it/s]

{'epoch': 0, 'iter': 2440, 'avg_loss': 10.405400636790961, 'avg_acc': 50.13314215485457, 'loss': 8.388286590576172}


EP_train:0:   9%|| 2452/27626 [05:45<59:27,  7.06it/s]

{'epoch': 0, 'iter': 2450, 'avg_loss': 10.398281941750447, 'avg_acc': 50.122399020807826, 'loss': 8.791048049926758}


EP_train:0:   9%|| 2462/27626 [05:46<59:06,  7.10it/s]

{'epoch': 0, 'iter': 2460, 'avg_loss': 10.391255627120234, 'avg_acc': 50.14475822836245, 'loss': 8.553326606750488}


EP_train:0:   9%|| 2472/27626 [05:47<59:16,  7.07it/s]

{'epoch': 0, 'iter': 2470, 'avg_loss': 10.385670612911037, 'avg_acc': 50.13784904896803, 'loss': 7.910526275634766}


EP_train:0:   9%|| 2482/27626 [05:49<59:47,  7.01it/s]

{'epoch': 0, 'iter': 2480, 'avg_loss': 10.379122681408244, 'avg_acc': 50.12595727529222, 'loss': 8.885004043579102}


EP_train:0:   9%|| 2492/27626 [05:50<59:21,  7.06it/s]

{'epoch': 0, 'iter': 2490, 'avg_loss': 10.37273244961157, 'avg_acc': 50.10412484945805, 'loss': 8.621467590332031}


EP_train:0:   9%|| 2502/27626 [05:52<59:27,  7.04it/s]

{'epoch': 0, 'iter': 2500, 'avg_loss': 10.367028033147093, 'avg_acc': 50.10620751699321, 'loss': 10.077850341796875}


EP_train:0:   9%|| 2512/27626 [05:53<59:13,  7.07it/s]

{'epoch': 0, 'iter': 2510, 'avg_loss': 10.359512011210123, 'avg_acc': 50.10827359617682, 'loss': 8.073406219482422}


EP_train:0:   9%|| 2522/27626 [05:55<59:30,  7.03it/s]

{'epoch': 0, 'iter': 2520, 'avg_loss': 10.352591427580226, 'avg_acc': 50.11280245934153, 'loss': 8.566123962402344}


EP_train:0:   9%|| 2532/27626 [05:56<59:19,  7.05it/s]

{'epoch': 0, 'iter': 2530, 'avg_loss': 10.346082868429098, 'avg_acc': 50.116060845515605, 'loss': 9.186424255371094}


EP_train:0:   9%|| 2542/27626 [05:57<59:23,  7.04it/s]

{'epoch': 0, 'iter': 2540, 'avg_loss': 10.340106485772912, 'avg_acc': 50.09838646202282, 'loss': 8.499229431152344}


EP_train:0:   9%|| 2552/27626 [05:59<59:17,  7.05it/s]

{'epoch': 0, 'iter': 2550, 'avg_loss': 10.333098110990028, 'avg_acc': 50.07717561740493, 'loss': 8.385838508605957}


EP_train:0:   9%|| 2562/27626 [06:00<59:16,  7.05it/s]

{'epoch': 0, 'iter': 2560, 'avg_loss': 10.327916875941266, 'avg_acc': 50.08541585318235, 'loss': 8.837742805480957}


EP_train:0:   9%|| 2572/27626 [06:02<59:25,  7.03it/s]

{'epoch': 0, 'iter': 2570, 'avg_loss': 10.322745186306076, 'avg_acc': 50.103315830416186, 'loss': 8.959938049316406}


EP_train:0:   9%|| 2582/27626 [06:03<58:55,  7.08it/s]

{'epoch': 0, 'iter': 2580, 'avg_loss': 10.315751806426908, 'avg_acc': 50.10654784967067, 'loss': 8.64196491241455}


EP_train:0:   9%|| 2592/27626 [06:04<58:41,  7.11it/s]

{'epoch': 0, 'iter': 2590, 'avg_loss': 10.311126501575854, 'avg_acc': 50.10131223465844, 'loss': 9.654808044433594}


EP_train:0:   9%|| 2602/27626 [06:06<58:36,  7.12it/s]

{'epoch': 0, 'iter': 2600, 'avg_loss': 10.305587482012404, 'avg_acc': 50.10933294886583, 'loss': 8.072824478149414}


EP_train:0:   9%|| 2612/27626 [06:07<58:49,  7.09it/s]

{'epoch': 0, 'iter': 2610, 'avg_loss': 10.299994784997642, 'avg_acc': 50.11011106855611, 'loss': 9.044325828552246}


EP_train:0:   9%|| 2622/27626 [06:09<58:44,  7.09it/s]

{'epoch': 0, 'iter': 2620, 'avg_loss': 10.293011259277646, 'avg_acc': 50.11326783670354, 'loss': 8.58096981048584}


EP_train:0:  10%|| 2632/27626 [06:10<58:35,  7.11it/s]

{'epoch': 0, 'iter': 2630, 'avg_loss': 10.286757599849839, 'avg_acc': 50.116400608133794, 'loss': 8.647611618041992}


EP_train:0:  10%|| 2642/27626 [06:11<58:58,  7.06it/s]

{'epoch': 0, 'iter': 2640, 'avg_loss': 10.282345424769098, 'avg_acc': 50.118326391518366, 'loss': 9.69143295288086}


EP_train:0:  10%|| 2652/27626 [06:13<58:49,  7.08it/s]

{'epoch': 0, 'iter': 2650, 'avg_loss': 10.278150872885618, 'avg_acc': 50.104913240286685, 'loss': 9.288968086242676}


EP_train:0:  10%|| 2662/27626 [06:14<58:44,  7.08it/s]

{'epoch': 0, 'iter': 2660, 'avg_loss': 10.273905661624879, 'avg_acc': 50.09277527245396, 'loss': 9.009281158447266}


EP_train:0:  10%|| 2672/27626 [06:16<58:39,  7.09it/s]

{'epoch': 0, 'iter': 2670, 'avg_loss': 10.268385190188951, 'avg_acc': 50.08540808685885, 'loss': 8.35171127319336}


EP_train:0:  10%|| 2682/27626 [06:17<58:22,  7.12it/s]

{'epoch': 0, 'iter': 2680, 'avg_loss': 10.262924106118396, 'avg_acc': 50.0874207385304, 'loss': 9.180389404296875}


EP_train:0:  10%|| 2692/27626 [06:18<59:01,  7.04it/s]

{'epoch': 0, 'iter': 2690, 'avg_loss': 10.257278114568342, 'avg_acc': 50.08477331846897, 'loss': 9.659748077392578}


EP_train:0:  10%|| 2702/27626 [06:20<58:33,  7.09it/s]

{'epoch': 0, 'iter': 2700, 'avg_loss': 10.251647863066758, 'avg_acc': 50.11454091077378, 'loss': 8.164639472961426}


EP_train:0:  10%|| 2712/27626 [06:21<58:11,  7.14it/s]

{'epoch': 0, 'iter': 2710, 'avg_loss': 10.245686594035432, 'avg_acc': 50.10720213943194, 'loss': 9.119787216186523}


EP_train:0:  10%|| 2722/27626 [06:23<58:27,  7.10it/s]

{'epoch': 0, 'iter': 2720, 'avg_loss': 10.240463924162619, 'avg_acc': 50.10221425946343, 'loss': 8.604619026184082}


EP_train:0:  10%|| 2732/27626 [06:24<58:47,  7.06it/s]

{'epoch': 0, 'iter': 2730, 'avg_loss': 10.234430951628655, 'avg_acc': 50.08353167337971, 'loss': 8.55744743347168}


EP_train:0:  10%|| 2742/27626 [06:25<58:35,  7.08it/s]

{'epoch': 0, 'iter': 2740, 'avg_loss': 10.23095975849412, 'avg_acc': 50.092347683327255, 'loss': 9.702105522155762}


EP_train:0:  10%|| 2752/27626 [06:27<58:48,  7.05it/s]

{'epoch': 0, 'iter': 2750, 'avg_loss': 10.22609270619982, 'avg_acc': 50.09541984732825, 'loss': 8.135649681091309}


EP_train:0:  10%|| 2762/27626 [06:28<58:23,  7.10it/s]

{'epoch': 0, 'iter': 2760, 'avg_loss': 10.221607398054212, 'avg_acc': 50.08601955813111, 'loss': 9.615049362182617}


EP_train:0:  10%|| 2772/27626 [06:30<58:33,  7.07it/s]

{'epoch': 0, 'iter': 2770, 'avg_loss': 10.21632995849824, 'avg_acc': 50.07555936485023, 'loss': 8.178322792053223}


EP_train:0:  10%|| 2782/27626 [06:31<58:10,  7.12it/s]

{'epoch': 0, 'iter': 2780, 'avg_loss': 10.209941706954039, 'avg_acc': 50.071916576770946, 'loss': 8.859466552734375}


EP_train:0:  10%|| 2792/27626 [06:32<58:05,  7.12it/s]

{'epoch': 0, 'iter': 2790, 'avg_loss': 10.205551161641333, 'avg_acc': 50.06829989251165, 'loss': 8.791864395141602}


EP_train:0:  10%|| 2802/27626 [06:34<58:33,  7.06it/s]

{'epoch': 0, 'iter': 2800, 'avg_loss': 10.201433772318962, 'avg_acc': 50.06582470546233, 'loss': 7.976152420043945}


EP_train:0:  10%|| 2812/27626 [06:35<58:25,  7.08it/s]

{'epoch': 0, 'iter': 2810, 'avg_loss': 10.197415240419076, 'avg_acc': 50.06003201707577, 'loss': 8.773226737976074}


EP_train:0:  10%|| 2822/27626 [06:37<58:24,  7.08it/s]

{'epoch': 0, 'iter': 2820, 'avg_loss': 10.191256520765887, 'avg_acc': 50.05538816022687, 'loss': 8.434308052062988}


EP_train:0:  10%|| 2832/27626 [06:38<58:29,  7.07it/s]

{'epoch': 0, 'iter': 2830, 'avg_loss': 10.18591031923769, 'avg_acc': 50.075061815612855, 'loss': 8.79059886932373}


EP_train:0:  10%|| 2842/27626 [06:39<58:30,  7.06it/s]

{'epoch': 0, 'iter': 2840, 'avg_loss': 10.180393623659537, 'avg_acc': 50.07149771207321, 'loss': 8.38939094543457}


EP_train:0:  10%|| 2852/27626 [06:41<58:17,  7.08it/s]

{'epoch': 0, 'iter': 2850, 'avg_loss': 10.175954782766278, 'avg_acc': 50.0559014380919, 'loss': 9.041672706604004}


EP_train:0:  10%|| 2862/27626 [06:42<58:29,  7.06it/s]

{'epoch': 0, 'iter': 2860, 'avg_loss': 10.170789920462882, 'avg_acc': 50.0622596994058, 'loss': 9.182700157165527}


EP_train:0:  10%|| 2872/27626 [06:44<57:50,  7.13it/s]

{'epoch': 0, 'iter': 2870, 'avg_loss': 10.165866657152113, 'avg_acc': 50.05442354580285, 'loss': 8.100512504577637}


EP_train:0:  10%|| 2882/27626 [06:45<57:49,  7.13it/s]

{'epoch': 0, 'iter': 2880, 'avg_loss': 10.162073740815172, 'avg_acc': 50.04772648385977, 'loss': 9.240229606628418}


EP_train:0:  10%|| 2892/27626 [06:46<58:14,  7.08it/s]

{'epoch': 0, 'iter': 2890, 'avg_loss': 10.157499050937364, 'avg_acc': 50.05512798339675, 'loss': 7.911416530609131}


EP_train:0:  11%|| 2902/27626 [06:48<58:03,  7.10it/s]

{'epoch': 0, 'iter': 2900, 'avg_loss': 10.152858325342851, 'avg_acc': 50.085099965529125, 'loss': 9.093060493469238}


EP_train:0:  11%|| 2912/27626 [06:49<57:47,  7.13it/s]

{'epoch': 0, 'iter': 2910, 'avg_loss': 10.148063030328098, 'avg_acc': 50.081587083476464, 'loss': 8.369112968444824}


EP_train:0:  11%|| 2922/27626 [06:51<58:10,  7.08it/s]

{'epoch': 0, 'iter': 2920, 'avg_loss': 10.142355227707103, 'avg_acc': 50.09735535775419, 'loss': 7.864046096801758}


EP_train:0:  11%|| 2932/27626 [06:52<58:32,  7.03it/s]

{'epoch': 0, 'iter': 2930, 'avg_loss': 10.136814104091263, 'avg_acc': 50.087427499147054, 'loss': 8.108458518981934}


EP_train:0:  11%|| 2942/27626 [06:54<57:58,  7.10it/s]

{'epoch': 0, 'iter': 2940, 'avg_loss': 10.132133960237473, 'avg_acc': 50.109444066643995, 'loss': 8.922804832458496}


EP_train:0:  11%|| 2952/27626 [06:55<58:06,  7.08it/s]

{'epoch': 0, 'iter': 2950, 'avg_loss': 10.126542755160562, 'avg_acc': 50.090011860386305, 'loss': 8.186217308044434}


EP_train:0:  11%|| 2962/27626 [06:56<57:46,  7.12it/s]

{'epoch': 0, 'iter': 2960, 'avg_loss': 10.122862684771, 'avg_acc': 50.09815096251267, 'loss': 9.669595718383789}


EP_train:0:  11%|| 2972/27626 [06:58<58:15,  7.05it/s]

{'epoch': 0, 'iter': 2970, 'avg_loss': 10.119418113410614, 'avg_acc': 50.10833894311679, 'loss': 8.948036193847656}


EP_train:0:  11%|| 2982/27626 [06:59<58:13,  7.05it/s]

{'epoch': 0, 'iter': 2980, 'avg_loss': 10.1158599696756, 'avg_acc': 50.115313653136525, 'loss': 8.573132514953613}


EP_train:0:  11%|| 2992/27626 [07:01<57:32,  7.14it/s]

{'epoch': 0, 'iter': 2990, 'avg_loss': 10.11160186715287, 'avg_acc': 50.10865931126713, 'loss': 8.332199096679688}


EP_train:0:  11%|| 3002/27626 [07:02<57:37,  7.12it/s]

{'epoch': 0, 'iter': 3000, 'avg_loss': 10.108222597879157, 'avg_acc': 50.11350383205598, 'loss': 8.448087692260742}


EP_train:0:  11%|| 3012/27626 [07:03<58:07,  7.06it/s]

{'epoch': 0, 'iter': 3010, 'avg_loss': 10.103863792299077, 'avg_acc': 50.103786117568916, 'loss': 9.548453330993652}


EP_train:0:  11%|| 3022/27626 [07:05<57:45,  7.10it/s]

{'epoch': 0, 'iter': 3020, 'avg_loss': 10.10021112818309, 'avg_acc': 50.10033929162528, 'loss': 8.594254493713379}


EP_train:0:  11%|| 3032/27626 [07:06<58:04,  7.06it/s]

{'epoch': 0, 'iter': 3030, 'avg_loss': 10.095595702917338, 'avg_acc': 50.10103926096998, 'loss': 8.82009506225586}


EP_train:0:  11%|| 3042/27626 [07:08<57:38,  7.11it/s]

{'epoch': 0, 'iter': 3040, 'avg_loss': 10.090492727097145, 'avg_acc': 50.10481749424531, 'loss': 8.758447647094727}


EP_train:0:  11%|| 3052/27626 [07:09<57:47,  7.09it/s]

{'epoch': 0, 'iter': 3050, 'avg_loss': 10.085433314800731, 'avg_acc': 50.095255653883974, 'loss': 8.02202033996582}


EP_train:0:  11%|| 3062/27626 [07:10<57:54,  7.07it/s]

{'epoch': 0, 'iter': 3060, 'avg_loss': 10.08085561071257, 'avg_acc': 50.08677719699445, 'loss': 8.863821983337402}


EP_train:0:  11%|| 3072/27626 [07:12<57:27,  7.12it/s]

{'epoch': 0, 'iter': 3070, 'avg_loss': 10.077071246088463, 'avg_acc': 50.089547378704005, 'loss': 8.736980438232422}


EP_train:0:  11%|| 3082/27626 [07:13<57:33,  7.11it/s]

{'epoch': 0, 'iter': 3080, 'avg_loss': 10.073771559700722, 'avg_acc': 50.080128205128204, 'loss': 8.879733085632324}


EP_train:0:  11%|| 3092/27626 [07:15<57:58,  7.05it/s]

{'epoch': 0, 'iter': 3090, 'avg_loss': 10.069300294578056, 'avg_acc': 50.08290197347137, 'loss': 8.168357849121094}


EP_train:0:  11%|| 3102/27626 [07:16<57:57,  7.05it/s]

{'epoch': 0, 'iter': 3100, 'avg_loss': 10.065725896097852, 'avg_acc': 50.07054176072234, 'loss': 9.428130149841309}


EP_train:0:  11%|| 3112/27626 [07:17<58:34,  6.98it/s]

{'epoch': 0, 'iter': 3110, 'avg_loss': 10.062210426955927, 'avg_acc': 50.06629701060752, 'loss': 9.138813972473145}


EP_train:0:  11%|| 3122/27626 [07:19<57:28,  7.11it/s]

{'epoch': 0, 'iter': 3120, 'avg_loss': 10.05878573554863, 'avg_acc': 50.057073053508496, 'loss': 8.775389671325684}


EP_train:0:  11%|| 3132/27626 [07:20<57:19,  7.12it/s]

{'epoch': 0, 'iter': 3130, 'avg_loss': 10.054892038925887, 'avg_acc': 50.062879271798145, 'loss': 8.07006549835205}


EP_train:0:  11%|| 3142/27626 [07:22<57:41,  7.07it/s]

{'epoch': 0, 'iter': 3140, 'avg_loss': 10.051026438421902, 'avg_acc': 50.060689270932826, 'loss': 9.274894714355469}


EP_train:0:  11%|| 3152/27626 [07:23<57:28,  7.10it/s]

{'epoch': 0, 'iter': 3150, 'avg_loss': 10.047227182149205, 'avg_acc': 50.057521421770865, 'loss': 9.203720092773438}


EP_train:0:  11%|| 3162/27626 [07:24<57:19,  7.11it/s]

{'epoch': 0, 'iter': 3160, 'avg_loss': 10.043225279943508, 'avg_acc': 50.06327111673521, 'loss': 8.462244033813477}


EP_train:0:  11%|| 3172/27626 [07:26<57:25,  7.10it/s]

{'epoch': 0, 'iter': 3170, 'avg_loss': 10.03806180587066, 'avg_acc': 50.06504257332072, 'loss': 9.149855613708496}


EP_train:0:  12%|| 3182/27626 [07:27<57:22,  7.10it/s]

{'epoch': 0, 'iter': 3180, 'avg_loss': 10.034389368583106, 'avg_acc': 50.071714869537885, 'loss': 9.371418952941895}


EP_train:0:  12%|| 3192/27626 [07:29<57:33,  7.07it/s]

{'epoch': 0, 'iter': 3190, 'avg_loss': 10.031207472431515, 'avg_acc': 50.0852005640865, 'loss': 8.996804237365723}


EP_train:0:  12%|| 3202/27626 [07:30<57:51,  7.04it/s]

{'epoch': 0, 'iter': 3200, 'avg_loss': 10.027510572395933, 'avg_acc': 50.08591065292096, 'loss': 8.301584243774414}


EP_train:0:  12%|| 3212/27626 [07:31<57:21,  7.09it/s]

{'epoch': 0, 'iter': 3210, 'avg_loss': 10.023720411896445, 'avg_acc': 50.10218779196512, 'loss': 8.588292121887207}


EP_train:0:  12%|| 3222/27626 [07:33<57:17,  7.10it/s]

{'epoch': 0, 'iter': 3220, 'avg_loss': 10.020117991239305, 'avg_acc': 50.11448307978888, 'loss': 8.922195434570312}


EP_train:0:  12%|| 3232/27626 [07:34<57:10,  7.11it/s]

{'epoch': 0, 'iter': 3230, 'avg_loss': 10.016196023934546, 'avg_acc': 50.113161559888574, 'loss': 8.90870475769043}


EP_train:0:  12%|| 3242/27626 [07:36<57:16,  7.10it/s]

{'epoch': 0, 'iter': 3240, 'avg_loss': 10.012256568760387, 'avg_acc': 50.12245448935514, 'loss': 8.456840515136719}


EP_train:0:  12%|| 3252/27626 [07:37<57:38,  7.05it/s]

{'epoch': 0, 'iter': 3250, 'avg_loss': 10.008628364710836, 'avg_acc': 50.131690249154104, 'loss': 8.403711318969727}


EP_train:0:  12%|| 3262/27626 [07:38<56:59,  7.12it/s]

{'epoch': 0, 'iter': 3260, 'avg_loss': 10.004506560345801, 'avg_acc': 50.132244710211594, 'loss': 9.027348518371582}


EP_train:0:  12%|| 3272/27626 [07:40<57:07,  7.11it/s]

{'epoch': 0, 'iter': 3270, 'avg_loss': 10.000754970689316, 'avg_acc': 50.1366172424335, 'loss': 8.492703437805176}


EP_train:0:  12%|| 3282/27626 [07:41<57:19,  7.08it/s]

{'epoch': 0, 'iter': 3280, 'avg_loss': 9.997342575908034, 'avg_acc': 50.14096312099969, 'loss': 9.333148002624512}


EP_train:0:  12%|| 3292/27626 [07:43<57:27,  7.06it/s]

{'epoch': 0, 'iter': 3290, 'avg_loss': 9.994433473722074, 'avg_acc': 50.12724096019448, 'loss': 8.892034530639648}


EP_train:0:  12%|| 3302/27626 [07:44<57:34,  7.04it/s]

{'epoch': 0, 'iter': 3300, 'avg_loss': 9.989952833065297, 'avg_acc': 50.11833535292336, 'loss': 8.433587074279785}


EP_train:0:  12%|| 3312/27626 [07:45<57:14,  7.08it/s]

{'epoch': 0, 'iter': 3310, 'avg_loss': 9.984823579480013, 'avg_acc': 50.114202657807304, 'loss': 8.591121673583984}


EP_train:0:  12%|| 3322/27626 [07:47<56:43,  7.14it/s]

{'epoch': 0, 'iter': 3320, 'avg_loss': 9.981040170055598, 'avg_acc': 50.11197681421259, 'loss': 9.044299125671387}


EP_train:0:  12%|| 3332/27626 [07:48<57:26,  7.05it/s]

{'epoch': 0, 'iter': 3330, 'avg_loss': 9.977710738478391, 'avg_acc': 50.11070249174422, 'loss': 8.383726119995117}


EP_train:0:  12%|| 3342/27626 [07:50<57:04,  7.09it/s]

{'epoch': 0, 'iter': 3340, 'avg_loss': 9.974117076100793, 'avg_acc': 50.11972463334331, 'loss': 8.444853782653809}


EP_train:0:  12%|| 3352/27626 [07:51<56:51,  7.11it/s]

{'epoch': 0, 'iter': 3350, 'avg_loss': 9.970602045767842, 'avg_acc': 50.124030140256636, 'loss': 9.392969131469727}


EP_train:0:  12%|| 3362/27626 [07:52<56:45,  7.12it/s]

{'epoch': 0, 'iter': 3360, 'avg_loss': 9.966470813580973, 'avg_acc': 50.10878458792026, 'loss': 7.928455829620361}


EP_train:0:  12%|| 3372/27626 [07:54<57:07,  7.08it/s]

{'epoch': 0, 'iter': 3370, 'avg_loss': 9.9622884475665, 'avg_acc': 50.102899733016905, 'loss': 8.712390899658203}


EP_train:0:  12%|| 3382/27626 [07:55<57:19,  7.05it/s]

{'epoch': 0, 'iter': 3380, 'avg_loss': 9.958628279751697, 'avg_acc': 50.09889825495416, 'loss': 8.772539138793945}


EP_train:0:  12%|| 3392/27626 [07:57<56:51,  7.10it/s]

{'epoch': 0, 'iter': 3390, 'avg_loss': 9.954777287860354, 'avg_acc': 50.09123414921852, 'loss': 8.56447982788086}


EP_train:0:  12%|| 3402/27626 [07:58<56:58,  7.09it/s]

{'epoch': 0, 'iter': 3400, 'avg_loss': 9.952043914542553, 'avg_acc': 50.10015436636284, 'loss': 8.442137718200684}


EP_train:0:  12%|| 3412/27626 [07:59<57:01,  7.08it/s]

{'epoch': 0, 'iter': 3410, 'avg_loss': 9.947879632593635, 'avg_acc': 50.108106127235416, 'loss': 8.988296508789062}


EP_train:0:  12%|| 3422/27626 [08:01<56:55,  7.09it/s]

{'epoch': 0, 'iter': 3420, 'avg_loss': 9.944267308492474, 'avg_acc': 50.09682841274481, 'loss': 7.906386852264404}


EP_train:0:  12%|| 3432/27626 [08:02<57:23,  7.03it/s]

{'epoch': 0, 'iter': 3430, 'avg_loss': 9.940912861408489, 'avg_acc': 50.09290294374817, 'loss': 8.308733940124512}


EP_train:0:  12%|| 3442/27626 [08:04<56:56,  7.08it/s]

{'epoch': 0, 'iter': 3440, 'avg_loss': 9.936156675171901, 'avg_acc': 50.08900029061319, 'loss': 7.811049461364746}


EP_train:0:  12%|| 3452/27626 [08:05<56:47,  7.09it/s]

{'epoch': 0, 'iter': 3450, 'avg_loss': 9.932624854754103, 'avg_acc': 50.07244277021153, 'loss': 8.570446968078613}


EP_train:0:  13%|| 3462/27626 [08:07<57:25,  7.01it/s]

{'epoch': 0, 'iter': 3460, 'avg_loss': 9.929123454133888, 'avg_acc': 50.08035972262353, 'loss': 8.863738059997559}


EP_train:0:  13%|| 3472/27626 [08:08<57:16,  7.03it/s]

{'epoch': 0, 'iter': 3470, 'avg_loss': 9.925388580164007, 'avg_acc': 50.07382598674733, 'loss': 9.333295822143555}


EP_train:0:  13%|| 3482/27626 [08:09<56:32,  7.12it/s]

{'epoch': 0, 'iter': 3480, 'avg_loss': 9.922183909932624, 'avg_acc': 50.060147945992526, 'loss': 8.293523788452148}


EP_train:0:  13%|| 3492/27626 [08:11<56:30,  7.12it/s]

{'epoch': 0, 'iter': 3490, 'avg_loss': 9.91930940546644, 'avg_acc': 50.05191922085363, 'loss': 9.243512153625488}


EP_train:0:  13%|| 3502/27626 [08:12<56:51,  7.07it/s]

{'epoch': 0, 'iter': 3500, 'avg_loss': 9.917133482753533, 'avg_acc': 50.06158954584404, 'loss': 9.985819816589355}


EP_train:0:  13%|| 3512/27626 [08:14<56:23,  7.13it/s]

{'epoch': 0, 'iter': 3510, 'avg_loss': 9.91426828585945, 'avg_acc': 50.06853460552549, 'loss': 8.720821380615234}


EP_train:0:  13%|| 3522/27626 [08:15<56:12,  7.15it/s]

{'epoch': 0, 'iter': 3520, 'avg_loss': 9.911556922654745, 'avg_acc': 50.079877875603515, 'loss': 9.325620651245117}


EP_train:0:  13%|| 3532/27626 [08:16<56:25,  7.12it/s]

{'epoch': 0, 'iter': 3530, 'avg_loss': 9.908118785187261, 'avg_acc': 50.0672613990371, 'loss': 8.354520797729492}


EP_train:0:  13%|| 3542/27626 [08:18<56:42,  7.08it/s]

{'epoch': 0, 'iter': 3540, 'avg_loss': 9.904935988959739, 'avg_acc': 50.06442389155605, 'loss': 9.18877124786377}


EP_train:0:  13%|| 3552/27626 [08:19<56:51,  7.06it/s]

{'epoch': 0, 'iter': 3550, 'avg_loss': 9.901373254517642, 'avg_acc': 50.07128273725711, 'loss': 8.292579650878906}


EP_train:0:  13%|| 3562/27626 [08:21<57:18,  7.00it/s]

{'epoch': 0, 'iter': 3560, 'avg_loss': 9.898089385976688, 'avg_acc': 50.068449873631, 'loss': 8.197916030883789}


EP_train:0:  13%|| 3572/27626 [08:22<56:32,  7.09it/s]

{'epoch': 0, 'iter': 3570, 'avg_loss': 9.893410998878275, 'avg_acc': 50.08050966115933, 'loss': 8.409220695495605}


EP_train:0:  13%|| 3582/27626 [08:23<56:30,  7.09it/s]

{'epoch': 0, 'iter': 3580, 'avg_loss': 9.890021674761524, 'avg_acc': 50.075048869030994, 'loss': 7.850034236907959}


EP_train:0:  13%|| 3592/27626 [08:25<56:15,  7.12it/s]

{'epoch': 0, 'iter': 3590, 'avg_loss': 9.88699544554997, 'avg_acc': 50.075710108604845, 'loss': 8.724764823913574}


EP_train:0:  13%|| 3602/27626 [08:26<56:20,  7.11it/s]

{'epoch': 0, 'iter': 3600, 'avg_loss': 9.884293323290146, 'avg_acc': 50.08417800610941, 'loss': 8.981524467468262}


EP_train:0:  13%|| 3612/27626 [08:28<56:25,  7.09it/s]

{'epoch': 0, 'iter': 3610, 'avg_loss': 9.880133827582386, 'avg_acc': 50.09779147050678, 'loss': 8.39699935913086}


EP_train:0:  13%|| 3622/27626 [08:29<56:23,  7.10it/s]

{'epoch': 0, 'iter': 3620, 'avg_loss': 9.87787679492094, 'avg_acc': 50.08198702016018, 'loss': 9.352567672729492}


EP_train:0:  13%|| 3632/27626 [08:30<56:16,  7.11it/s]

{'epoch': 0, 'iter': 3630, 'avg_loss': 9.874987794623431, 'avg_acc': 50.079179289451936, 'loss': 8.27973461151123}


EP_train:0:  13%|| 3642/27626 [08:32<56:30,  7.07it/s]

{'epoch': 0, 'iter': 3640, 'avg_loss': 9.87213140383948, 'avg_acc': 50.0832532271354, 'loss': 9.11184024810791}


EP_train:0:  13%|| 3652/27626 [08:33<56:48,  7.03it/s]

{'epoch': 0, 'iter': 3650, 'avg_loss': 9.868884612945164, 'avg_acc': 50.07788961928239, 'loss': 8.827699661254883}


EP_train:0:  13%|| 3662/27626 [08:35<56:15,  7.10it/s]

{'epoch': 0, 'iter': 3660, 'avg_loss': 9.865450942688906, 'avg_acc': 50.0751160885004, 'loss': 8.361952781677246}


EP_train:0:  13%|| 3672/27626 [08:36<56:29,  7.07it/s]

{'epoch': 0, 'iter': 3670, 'avg_loss': 9.862324571258785, 'avg_acc': 50.069803868155816, 'loss': 8.964025497436523}


EP_train:0:  13%|| 3682/27626 [08:37<56:29,  7.06it/s]

{'epoch': 0, 'iter': 3680, 'avg_loss': 9.858840967643134, 'avg_acc': 50.05857783211084, 'loss': 9.786574363708496}


EP_train:0:  13%|| 3692/27626 [08:39<56:19,  7.08it/s]

{'epoch': 0, 'iter': 3690, 'avg_loss': 9.85529194805752, 'avg_acc': 50.0634990517475, 'loss': 8.089377403259277}


EP_train:0:  13%|| 3702/27626 [08:40<56:07,  7.10it/s]

{'epoch': 0, 'iter': 3700, 'avg_loss': 9.852980331935228, 'avg_acc': 50.05150634963523, 'loss': 8.845104217529297}


EP_train:0:  13%|| 3712/27626 [08:42<56:12,  7.09it/s]

{'epoch': 0, 'iter': 3710, 'avg_loss': 9.850128498272701, 'avg_acc': 50.0614726488817, 'loss': 8.864105224609375}


EP_train:0:  13%|| 3722/27626 [08:43<56:13,  7.08it/s]

{'epoch': 0, 'iter': 3720, 'avg_loss': 9.84707744555331, 'avg_acc': 50.06886589626445, 'loss': 9.034167289733887}


EP_train:0:  14%|| 3732/27626 [08:44<57:00,  6.99it/s]

{'epoch': 0, 'iter': 3730, 'avg_loss': 9.843597362137965, 'avg_acc': 50.07538193513803, 'loss': 7.767045497894287}


EP_train:0:  14%|| 3742/27626 [08:46<56:36,  7.03it/s]

{'epoch': 0, 'iter': 3740, 'avg_loss': 9.840360451815314, 'avg_acc': 50.07267441860465, 'loss': 8.650851249694824}


EP_train:0:  14%|| 3752/27626 [08:47<56:08,  7.09it/s]

{'epoch': 0, 'iter': 3750, 'avg_loss': 9.837650665185572, 'avg_acc': 50.07581311650227, 'loss': 8.934982299804688}


EP_train:0:  14%|| 3762/27626 [08:49<55:55,  7.11it/s]

{'epoch': 0, 'iter': 3760, 'avg_loss': 9.834444967998655, 'avg_acc': 50.07561153948418, 'loss': 8.67723274230957}


EP_train:0:  14%|| 3772/27626 [08:50<56:07,  7.08it/s]

{'epoch': 0, 'iter': 3770, 'avg_loss': 9.831177903941379, 'avg_acc': 50.07623972421108, 'loss': 9.129868507385254}


EP_train:0:  14%|| 3782/27626 [08:51<56:08,  7.08it/s]

{'epoch': 0, 'iter': 3780, 'avg_loss': 9.829607419758302, 'avg_acc': 50.06942607775721, 'loss': 9.406314849853516}


EP_train:0:  14%|| 3792/27626 [08:53<56:00,  7.09it/s]

{'epoch': 0, 'iter': 3790, 'avg_loss': 9.826809380998464, 'avg_acc': 50.069242943814295, 'loss': 8.582762718200684}


EP_train:0:  14%|| 3802/27626 [08:54<56:07,  7.07it/s]

{'epoch': 0, 'iter': 3800, 'avg_loss': 9.824844660178137, 'avg_acc': 50.07728229413312, 'loss': 10.053214073181152}


EP_train:0:  14%|| 3812/27626 [08:56<56:04,  7.08it/s]

{'epoch': 0, 'iter': 3810, 'avg_loss': 9.82111686535377, 'avg_acc': 50.06887955917082, 'loss': 8.098583221435547}


EP_train:0:  14%|| 3822/27626 [08:57<55:57,  7.09it/s]

{'epoch': 0, 'iter': 3820, 'avg_loss': 9.81740511040886, 'avg_acc': 50.074424234493584, 'loss': 8.787955284118652}


EP_train:0:  14%|| 3832/27626 [08:59<56:42,  6.99it/s]

{'epoch': 0, 'iter': 3830, 'avg_loss': 9.814180390948696, 'avg_acc': 50.0644413991125, 'loss': 8.017888069152832}


EP_train:0:  14%|| 3842/27626 [09:00<55:41,  7.12it/s]

{'epoch': 0, 'iter': 3840, 'avg_loss': 9.81098623357692, 'avg_acc': 50.06346003644884, 'loss': 9.261310577392578}


EP_train:0:  14%|| 3852/27626 [09:01<55:36,  7.12it/s]

{'epoch': 0, 'iter': 3850, 'avg_loss': 9.808801586861364, 'avg_acc': 50.07303297844715, 'loss': 8.988430976867676}


EP_train:0:  14%|| 3862/27626 [09:03<55:48,  7.10it/s]

{'epoch': 0, 'iter': 3860, 'avg_loss': 9.806584451335047, 'avg_acc': 50.078509453509454, 'loss': 8.823877334594727}


EP_train:0:  14%|| 3872/27626 [09:04<55:31,  7.13it/s]

{'epoch': 0, 'iter': 3870, 'avg_loss': 9.803839811888261, 'avg_acc': 50.08153577886851, 'loss': 7.878249168395996}


EP_train:0:  14%|| 3882/27626 [09:06<55:50,  7.09it/s]

{'epoch': 0, 'iter': 3880, 'avg_loss': 9.800537876540993, 'avg_acc': 50.074078845658335, 'loss': 9.270371437072754}


EP_train:0:  14%|| 3892/27626 [09:07<56:05,  7.05it/s]

{'epoch': 0, 'iter': 3890, 'avg_loss': 9.79808842544536, 'avg_acc': 50.063447699820095, 'loss': 9.352724075317383}


EP_train:0:  14%|| 3902/27626 [09:08<55:44,  7.09it/s]

{'epoch': 0, 'iter': 3900, 'avg_loss': 9.794931991868555, 'avg_acc': 50.05687644193797, 'loss': 8.478650093078613}


EP_train:0:  14%|| 3912/27626 [09:10<55:50,  7.08it/s]

{'epoch': 0, 'iter': 3910, 'avg_loss': 9.792985161440173, 'avg_acc': 50.052735873178214, 'loss': 9.15049934387207}


EP_train:0:  14%|| 3922/27626 [09:11<55:58,  7.06it/s]

{'epoch': 0, 'iter': 3920, 'avg_loss': 9.79083749816844, 'avg_acc': 50.051007396072436, 'loss': 9.42595100402832}


EP_train:0:  14%|| 3932/27626 [09:13<55:34,  7.11it/s]

{'epoch': 0, 'iter': 3930, 'avg_loss': 9.78863919051177, 'avg_acc': 50.05405749173239, 'loss': 8.513544082641602}


EP_train:0:  14%|| 3942/27626 [09:14<56:10,  7.03it/s]

{'epoch': 0, 'iter': 3940, 'avg_loss': 9.78522497698366, 'avg_acc': 50.059470946460294, 'loss': 8.351840019226074}


EP_train:0:  14%|| 3952/27626 [09:15<55:41,  7.09it/s]

{'epoch': 0, 'iter': 3950, 'avg_loss': 9.783321331616023, 'avg_acc': 50.06406605922551, 'loss': 9.032096862792969}


EP_train:0:  14%|| 3962/27626 [09:17<55:38,  7.09it/s]

{'epoch': 0, 'iter': 3960, 'avg_loss': 9.781781301809001, 'avg_acc': 50.067060085836914, 'loss': 9.320123672485352}


EP_train:0:  14%|| 3972/27626 [09:18<55:45,  7.07it/s]

{'epoch': 0, 'iter': 3970, 'avg_loss': 9.778537314354217, 'avg_acc': 50.070039032989165, 'loss': 8.92551040649414}


EP_train:0:  14%|| 3982/27626 [09:20<55:39,  7.08it/s]

{'epoch': 0, 'iter': 3980, 'avg_loss': 9.776175130045434, 'avg_acc': 50.071433057020855, 'loss': 8.721574783325195}


EP_train:0:  14%|| 3992/27626 [09:21<55:50,  7.05it/s]

{'epoch': 0, 'iter': 3990, 'avg_loss': 9.77315307816477, 'avg_acc': 50.0822162365322, 'loss': 8.776571273803711}


EP_train:0:  14%|| 4002/27626 [09:22<55:55,  7.04it/s]

{'epoch': 0, 'iter': 4000, 'avg_loss': 9.770067381221216, 'avg_acc': 50.079667583104225, 'loss': 8.494660377502441}


EP_train:0:  15%|| 4012/27626 [09:24<56:08,  7.01it/s]

{'epoch': 0, 'iter': 4010, 'avg_loss': 9.768017400443421, 'avg_acc': 50.07791074545002, 'loss': 9.493911743164062}


EP_train:0:  15%|| 4022/27626 [09:25<55:35,  7.08it/s]

{'epoch': 0, 'iter': 4020, 'avg_loss': 9.765747402473872, 'avg_acc': 50.08859736383984, 'loss': 8.34725570678711}


EP_train:0:  15%|| 4032/27626 [09:27<55:16,  7.11it/s]

{'epoch': 0, 'iter': 4030, 'avg_loss': 9.7625900254123, 'avg_acc': 50.089152815678496, 'loss': 9.030362129211426}


EP_train:0:  15%|| 4042/27626 [09:28<55:09,  7.13it/s]

{'epoch': 0, 'iter': 4040, 'avg_loss': 9.76007294731546, 'avg_acc': 50.08661222469686, 'loss': 8.295331001281738}


EP_train:0:  15%|| 4052/27626 [09:29<55:30,  7.08it/s]

{'epoch': 0, 'iter': 4050, 'avg_loss': 9.757801752389433, 'avg_acc': 50.09488397926438, 'loss': 9.163006782531738}


EP_train:0:  15%|| 4062/27626 [09:31<55:19,  7.10it/s]

{'epoch': 0, 'iter': 4060, 'avg_loss': 9.755610528470143, 'avg_acc': 50.07541245998522, 'loss': 8.331343650817871}


EP_train:0:  15%|| 4072/27626 [09:32<55:04,  7.13it/s]

{'epoch': 0, 'iter': 4070, 'avg_loss': 9.753341063736288, 'avg_acc': 50.07599484156226, 'loss': 8.713326454162598}


EP_train:0:  15%|| 4082/27626 [09:34<55:03,  7.13it/s]

{'epoch': 0, 'iter': 4080, 'avg_loss': 9.751059366014477, 'avg_acc': 50.0712141631953, 'loss': 8.856331825256348}


EP_train:0:  15%|| 4092/27626 [09:35<55:26,  7.07it/s]

{'epoch': 0, 'iter': 4090, 'avg_loss': 9.748219281180043, 'avg_acc': 50.06569298460034, 'loss': 8.415985107421875}


EP_train:0:  15%|| 4102/27626 [09:36<55:54,  7.01it/s]

{'epoch': 0, 'iter': 4100, 'avg_loss': 9.745608809516941, 'avg_acc': 50.06477078761278, 'loss': 9.28286075592041}


EP_train:0:  15%|| 4112/27626 [09:38<55:50,  7.02it/s]

{'epoch': 0, 'iter': 4110, 'avg_loss': 9.74351579085721, 'avg_acc': 50.06461323279008, 'loss': 8.773903846740723}


EP_train:0:  15%|| 4122/27626 [09:39<55:14,  7.09it/s]

{'epoch': 0, 'iter': 4120, 'avg_loss': 9.741392161307536, 'avg_acc': 50.069006309148264, 'loss': 9.196390151977539}


EP_train:0:  15%|| 4132/27626 [09:41<55:35,  7.04it/s]

{'epoch': 0, 'iter': 4130, 'avg_loss': 9.73755850938526, 'avg_acc': 50.055979181796175, 'loss': 8.856904983520508}


EP_train:0:  15%|| 4142/27626 [09:42<55:14,  7.09it/s]

{'epoch': 0, 'iter': 4140, 'avg_loss': 9.734251077726134, 'avg_acc': 50.06565443129679, 'loss': 7.5727996826171875}


EP_train:0:  15%|| 4152/27626 [09:43<55:06,  7.10it/s]

{'epoch': 0, 'iter': 4150, 'avg_loss': 9.730873883646444, 'avg_acc': 50.07151891110576, 'loss': 8.293368339538574}


EP_train:0:  15%|| 4162/27626 [09:45<55:13,  7.08it/s]

{'epoch': 0, 'iter': 4160, 'avg_loss': 9.727900823759533, 'avg_acc': 50.07284907474164, 'loss': 8.597936630249023}


EP_train:0:  15%|| 4172/27626 [09:46<55:39,  7.02it/s]

{'epoch': 0, 'iter': 4170, 'avg_loss': 9.725375483802571, 'avg_acc': 50.07417286022536, 'loss': 8.353729248046875}


EP_train:0:  15%|| 4182/27626 [09:48<55:21,  7.06it/s]

{'epoch': 0, 'iter': 4180, 'avg_loss': 9.723353798895241, 'avg_acc': 50.067268596029656, 'loss': 9.691327095031738}


EP_train:0:  15%|| 4192/27626 [09:49<55:32,  7.03it/s]

{'epoch': 0, 'iter': 4190, 'avg_loss': 9.720879636553812, 'avg_acc': 50.05965163445478, 'loss': 8.69965934753418}


EP_train:0:  15%|| 4202/27626 [09:51<55:10,  7.08it/s]

{'epoch': 0, 'iter': 4200, 'avg_loss': 9.718304882104043, 'avg_acc': 50.05950964056177, 'loss': 9.26876449584961}


EP_train:0:  15%|| 4212/27626 [09:52<55:10,  7.07it/s]

{'epoch': 0, 'iter': 4210, 'avg_loss': 9.715346212257996, 'avg_acc': 50.05565780099739, 'loss': 8.123138427734375}


EP_train:0:  15%|| 4222/27626 [09:53<55:10,  7.07it/s]

{'epoch': 0, 'iter': 4220, 'avg_loss': 9.713112958995293, 'avg_acc': 50.057746979388774, 'loss': 8.565118789672852}


EP_train:0:  15%|| 4232/27626 [09:55<54:41,  7.13it/s]

{'epoch': 0, 'iter': 4230, 'avg_loss': 9.710286440789572, 'avg_acc': 50.048747341054124, 'loss': 7.407374858856201}


EP_train:0:  15%|| 4242/27626 [09:56<54:54,  7.10it/s]

{'epoch': 0, 'iter': 4240, 'avg_loss': 9.707984921617964, 'avg_acc': 50.041263852864894, 'loss': 7.676700115203857}


EP_train:0:  15%|| 4252/27626 [09:58<54:53,  7.10it/s]

{'epoch': 0, 'iter': 4250, 'avg_loss': 9.705404804106797, 'avg_acc': 50.03822629969419, 'loss': 8.313894271850586}


EP_train:0:  15%|| 4262/27626 [09:59<54:47,  7.11it/s]

{'epoch': 0, 'iter': 4260, 'avg_loss': 9.703170302663434, 'avg_acc': 50.03960337948838, 'loss': 9.19677448272705}


EP_train:0:  15%|| 4272/27626 [10:00<55:26,  7.02it/s]

{'epoch': 0, 'iter': 4270, 'avg_loss': 9.701182644195843, 'avg_acc': 50.0395106532428, 'loss': 8.41950511932373}


EP_train:0:  15%|| 4282/27626 [10:02<55:14,  7.04it/s]

{'epoch': 0, 'iter': 4280, 'avg_loss': 9.698613644537316, 'avg_acc': 50.02481896753095, 'loss': 9.56389045715332}


EP_train:0:  16%|| 4292/27626 [10:03<54:44,  7.10it/s]

{'epoch': 0, 'iter': 4290, 'avg_loss': 9.696929101426127, 'avg_acc': 50.02985900722442, 'loss': 9.902262687683105}


EP_train:0:  16%|| 4302/27626 [10:05<55:00,  7.07it/s]

{'epoch': 0, 'iter': 4300, 'avg_loss': 9.69437474069194, 'avg_acc': 50.03342245989305, 'loss': 8.688980102539062}


EP_train:0:  16%|| 4312/27626 [10:06<54:37,  7.11it/s]

{'epoch': 0, 'iter': 4310, 'avg_loss': 9.691094137570941, 'avg_acc': 50.02537114358617, 'loss': 8.375460624694824}


EP_train:0:  16%|| 4322/27626 [10:07<54:52,  7.08it/s]

{'epoch': 0, 'iter': 4320, 'avg_loss': 9.689052734626605, 'avg_acc': 50.03399097431151, 'loss': 8.656213760375977}


EP_train:0:  16%|| 4332/27626 [10:09<54:50,  7.08it/s]

{'epoch': 0, 'iter': 4330, 'avg_loss': 9.686927814897613, 'avg_acc': 50.031026321865625, 'loss': 8.552188873291016}


EP_train:0:  16%|| 4342/27626 [10:10<54:41,  7.09it/s]

{'epoch': 0, 'iter': 4340, 'avg_loss': 9.684384421361973, 'avg_acc': 50.02591568762958, 'loss': 7.955320835113525}


EP_train:0:  16%|| 4352/27626 [10:12<54:31,  7.11it/s]

{'epoch': 0, 'iter': 4350, 'avg_loss': 9.682382719512919, 'avg_acc': 50.015800965295334, 'loss': 8.108888626098633}


EP_train:0:  16%|| 4362/27626 [10:13<54:42,  7.09it/s]

{'epoch': 0, 'iter': 4360, 'avg_loss': 9.678911824260275, 'avg_acc': 50.01361499656042, 'loss': 8.110590934753418}


EP_train:0:  16%|| 4372/27626 [10:14<55:02,  7.04it/s]

{'epoch': 0, 'iter': 4370, 'avg_loss': 9.676354523464353, 'avg_acc': 50.01501372683597, 'loss': 8.572260856628418}


EP_train:0:  16%|| 4382/27626 [10:16<54:49,  7.07it/s]

{'epoch': 0, 'iter': 4380, 'avg_loss': 9.673871095200647, 'avg_acc': 50.01925930152933, 'loss': 8.745767593383789}


EP_train:0:  16%|| 4392/27626 [10:17<54:47,  7.07it/s]

{'epoch': 0, 'iter': 4390, 'avg_loss': 9.671561866218152, 'avg_acc': 50.02063880664996, 'loss': 9.265955924987793}


EP_train:0:  16%|| 4402/27626 [10:19<54:30,  7.10it/s]

{'epoch': 0, 'iter': 4400, 'avg_loss': 9.669584804327751, 'avg_acc': 50.02059191092933, 'loss': 8.373910903930664}


EP_train:0:  16%|| 4412/27626 [10:20<54:23,  7.11it/s]

{'epoch': 0, 'iter': 4410, 'avg_loss': 9.6672411803836, 'avg_acc': 50.01133529811834, 'loss': 8.02772045135498}


EP_train:0:  16%|| 4422/27626 [10:21<54:38,  7.08it/s]

{'epoch': 0, 'iter': 4420, 'avg_loss': 9.665484775059149, 'avg_acc': 50.00636168287718, 'loss': 8.850403785705566}


EP_train:0:  16%|| 4432/27626 [10:23<54:22,  7.11it/s]

{'epoch': 0, 'iter': 4430, 'avg_loss': 9.663521167253215, 'avg_acc': 49.99788422477996, 'loss': 8.785938262939453}


EP_train:0:  16%|| 4442/27626 [10:24<54:28,  7.09it/s]

{'epoch': 0, 'iter': 4440, 'avg_loss': 9.660998902646197, 'avg_acc': 50.00703670344517, 'loss': 8.522087097167969}


EP_train:0:  16%|| 4452/27626 [10:26<54:47,  7.05it/s]

{'epoch': 0, 'iter': 4450, 'avg_loss': 9.659043901182512, 'avg_acc': 50.012637609525946, 'loss': 8.403515815734863}


EP_train:0:  16%|| 4462/27626 [10:27<54:46,  7.05it/s]

{'epoch': 0, 'iter': 4460, 'avg_loss': 9.656696084956101, 'avg_acc': 50.004903609056264, 'loss': 9.323217391967773}


EP_train:0:  16%|| 4472/27626 [10:28<54:25,  7.09it/s]

{'epoch': 0, 'iter': 4470, 'avg_loss': 9.654848075504939, 'avg_acc': 50.01118318049653, 'loss': 9.957796096801758}


EP_train:0:  16%|| 4482/27626 [10:30<54:02,  7.14it/s]

{'epoch': 0, 'iter': 4480, 'avg_loss': 9.652699002621564, 'avg_acc': 50.00627650078108, 'loss': 8.60007381439209}


EP_train:0:  16%|| 4492/27626 [10:31<54:01,  7.14it/s]

{'epoch': 0, 'iter': 4490, 'avg_loss': 9.650585303139882, 'avg_acc': 50.01391672233355, 'loss': 9.124258041381836}


EP_train:0:  16%|| 4502/27626 [10:33<54:15,  7.10it/s]

{'epoch': 0, 'iter': 4500, 'avg_loss': 9.647287854934316, 'avg_acc': 50.01944012441679, 'loss': 8.308867454528809}


EP_train:0:  16%|| 4512/27626 [10:34<54:47,  7.03it/s]

{'epoch': 0, 'iter': 4510, 'avg_loss': 9.64537943072817, 'avg_acc': 50.01593327421857, 'loss': 9.395551681518555}


EP_train:0:  16%|| 4522/27626 [10:35<54:27,  7.07it/s]

{'epoch': 0, 'iter': 4520, 'avg_loss': 9.643010104026871, 'avg_acc': 50.016589250165886, 'loss': 8.317902565002441}


EP_train:0:  16%|| 4532/27626 [10:37<54:10,  7.11it/s]

{'epoch': 0, 'iter': 4530, 'avg_loss': 9.640437812843063, 'avg_acc': 50.013793864489074, 'loss': 8.81576156616211}


EP_train:0:  16%|| 4542/27626 [10:38<54:40,  7.04it/s]

{'epoch': 0, 'iter': 4540, 'avg_loss': 9.63840895028819, 'avg_acc': 50.016516185862145, 'loss': 8.816279411315918}


EP_train:0:  16%|| 4552/27626 [10:40<54:42,  7.03it/s]

{'epoch': 0, 'iter': 4550, 'avg_loss': 9.635916925005745, 'avg_acc': 50.01922654361679, 'loss': 8.47067642211914}


EP_train:0:  17%|| 4562/27626 [10:41<54:20,  7.07it/s]

{'epoch': 0, 'iter': 4560, 'avg_loss': 9.63387736989803, 'avg_acc': 50.03014689761017, 'loss': 9.227890968322754}


EP_train:0:  17%|| 4572/27626 [10:43<54:15,  7.08it/s]

{'epoch': 0, 'iter': 4570, 'avg_loss': 9.632965072034303, 'avg_acc': 50.025978998031064, 'loss': 10.380985260009766}


EP_train:0:  17%|| 4582/27626 [10:44<54:06,  7.10it/s]

{'epoch': 0, 'iter': 4580, 'avg_loss': 9.631398346506854, 'avg_acc': 50.040929927963326, 'loss': 9.5642671585083}


EP_train:0:  17%|| 4592/27626 [10:45<54:17,  7.07it/s]

{'epoch': 0, 'iter': 4590, 'avg_loss': 9.629484070902738, 'avg_acc': 50.0435634937922, 'loss': 7.674310684204102}


EP_train:0:  17%|| 4602/27626 [10:47<53:55,  7.12it/s]

{'epoch': 0, 'iter': 4600, 'avg_loss': 9.627920231283346, 'avg_acc': 50.047544012171265, 'loss': 9.206869125366211}


EP_train:0:  17%|| 4612/27626 [10:48<54:35,  7.03it/s]

{'epoch': 0, 'iter': 4610, 'avg_loss': 9.62632723698464, 'avg_acc': 50.0555736282802, 'loss': 9.265951156616211}


EP_train:0:  17%|| 4622/27626 [10:50<54:11,  7.07it/s]

{'epoch': 0, 'iter': 4620, 'avg_loss': 9.62346600247936, 'avg_acc': 50.0554533650725, 'loss': 8.305229187011719}


EP_train:0:  17%|| 4632/27626 [10:51<53:59,  7.10it/s]

{'epoch': 0, 'iter': 4630, 'avg_loss': 9.62142221893149, 'avg_acc': 50.060732023321094, 'loss': 8.538291931152344}


EP_train:0:  17%|| 4642/27626 [10:52<54:52,  6.98it/s]

{'epoch': 0, 'iter': 4640, 'avg_loss': 9.619840162988625, 'avg_acc': 50.063294548588665, 'loss': 8.967377662658691}


EP_train:0:  17%|| 4652/27626 [10:54<53:58,  7.09it/s]

{'epoch': 0, 'iter': 4650, 'avg_loss': 9.61774711223398, 'avg_acc': 50.06517415609546, 'loss': 9.171889305114746}


EP_train:0:  17%|| 4662/27626 [10:55<54:02,  7.08it/s]

{'epoch': 0, 'iter': 4660, 'avg_loss': 9.616197352807324, 'avg_acc': 50.06436387041408, 'loss': 8.657854080200195}


EP_train:0:  17%|| 4672/27626 [10:57<53:47,  7.11it/s]

{'epoch': 0, 'iter': 4670, 'avg_loss': 9.613857865461163, 'avg_acc': 50.06890922714622, 'loss': 8.306705474853516}


EP_train:0:  17%|| 4682/27626 [10:58<53:45,  7.11it/s]

{'epoch': 0, 'iter': 4680, 'avg_loss': 9.6116611986582, 'avg_acc': 50.07009720145268, 'loss': 7.872117519378662}


EP_train:0:  17%|| 4692/27626 [10:59<54:16,  7.04it/s]

{'epoch': 0, 'iter': 4690, 'avg_loss': 9.609976208405666, 'avg_acc': 50.06595075676829, 'loss': 8.412466049194336}


EP_train:0:  17%|| 4702/27626 [11:01<53:47,  7.10it/s]

{'epoch': 0, 'iter': 4700, 'avg_loss': 9.609187528849207, 'avg_acc': 50.0511859178898, 'loss': 9.046127319335938}


EP_train:0:  17%|| 4712/27626 [11:02<53:48,  7.10it/s]

{'epoch': 0, 'iter': 4710, 'avg_loss': 9.607561887588453, 'avg_acc': 50.0537306304394, 'loss': 9.14288330078125}


EP_train:0:  17%|| 4722/27626 [11:04<54:05,  7.06it/s]

{'epoch': 0, 'iter': 4720, 'avg_loss': 9.60516715741617, 'avg_acc': 50.05295488244016, 'loss': 8.273347854614258}


EP_train:0:  17%|| 4732/27626 [11:05<54:27,  7.01it/s]

{'epoch': 0, 'iter': 4730, 'avg_loss': 9.603876151385455, 'avg_acc': 50.04954026632847, 'loss': 9.446646690368652}


EP_train:0:  17%|| 4742/27626 [11:06<53:47,  7.09it/s]

{'epoch': 0, 'iter': 4740, 'avg_loss': 9.60241479155535, 'avg_acc': 50.04745834212192, 'loss': 8.795557022094727}


EP_train:0:  17%|| 4752/27626 [11:08<54:12,  7.03it/s]

{'epoch': 0, 'iter': 4750, 'avg_loss': 9.60052423812394, 'avg_acc': 50.04867396337613, 'loss': 9.111350059509277}


EP_train:0:  17%|| 4762/27626 [11:09<53:26,  7.13it/s]

{'epoch': 0, 'iter': 4760, 'avg_loss': 9.598369393071264, 'avg_acc': 50.04528985507246, 'loss': 8.246519088745117}


EP_train:0:  17%|| 4772/27626 [11:11<53:43,  7.09it/s]

{'epoch': 0, 'iter': 4770, 'avg_loss': 9.596590386324522, 'avg_acc': 50.05305491511214, 'loss': 8.46528434753418}


EP_train:0:  17%|| 4782/27626 [11:12<53:39,  7.10it/s]

{'epoch': 0, 'iter': 4780, 'avg_loss': 9.59485396359461, 'avg_acc': 50.05032942898975, 'loss': 8.653675079345703}


EP_train:0:  17%|| 4792/27626 [11:13<53:29,  7.11it/s]

{'epoch': 0, 'iter': 4790, 'avg_loss': 9.592246756875124, 'avg_acc': 50.046963055729485, 'loss': 8.609888076782227}


EP_train:0:  17%|| 4802/27626 [11:15<53:55,  7.05it/s]

{'epoch': 0, 'iter': 4800, 'avg_loss': 9.590143341294082, 'avg_acc': 50.047516142470315, 'loss': 8.88916301727295}


EP_train:0:  17%|| 4812/27626 [11:16<53:45,  7.07it/s]

{'epoch': 0, 'iter': 4810, 'avg_loss': 9.58818489894181, 'avg_acc': 50.044819164414875, 'loss': 8.407393455505371}


EP_train:0:  17%|| 4822/27626 [11:18<53:57,  7.04it/s]

{'epoch': 0, 'iter': 4820, 'avg_loss': 9.585858686364457, 'avg_acc': 50.03241028832193, 'loss': 9.16295337677002}


EP_train:0:  17%|| 4832/27626 [11:19<53:39,  7.08it/s]

{'epoch': 0, 'iter': 4830, 'avg_loss': 9.58402008468432, 'avg_acc': 50.032990064168914, 'loss': 9.632918357849121}


EP_train:0:  18%|| 4842/27626 [11:21<53:23,  7.11it/s]

{'epoch': 0, 'iter': 4840, 'avg_loss': 9.5815590672492, 'avg_acc': 50.02711216690766, 'loss': 7.287034511566162}


EP_train:0:  18%|| 4852/27626 [11:22<53:23,  7.11it/s]

{'epoch': 0, 'iter': 4850, 'avg_loss': 9.57933326623681, 'avg_acc': 50.02770047412904, 'loss': 8.29706859588623}


EP_train:0:  18%|| 4862/27626 [11:23<53:17,  7.12it/s]

{'epoch': 0, 'iter': 4860, 'avg_loss': 9.577275898201037, 'avg_acc': 50.02571487348282, 'loss': 7.771999835968018}


EP_train:0:  18%|| 4872/27626 [11:25<53:22,  7.10it/s]

{'epoch': 0, 'iter': 4870, 'avg_loss': 9.575766081292143, 'avg_acc': 50.02052966536645, 'loss': 8.64338207244873}


EP_train:0:  18%|| 4882/27626 [11:26<53:41,  7.06it/s]

{'epoch': 0, 'iter': 4880, 'avg_loss': 9.573329521871644, 'avg_acc': 50.027530219217375, 'loss': 8.239713668823242}


EP_train:0:  18%|| 4892/27626 [11:28<53:24,  7.09it/s]

{'epoch': 0, 'iter': 4890, 'avg_loss': 9.571521618253637, 'avg_acc': 50.0325853608669, 'loss': 8.419611930847168}


EP_train:0:  18%|| 4902/27626 [11:29<53:59,  7.02it/s]

{'epoch': 0, 'iter': 4900, 'avg_loss': 9.569334334583823, 'avg_acc': 50.03124362375026, 'loss': 8.387290954589844}


EP_train:0:  18%|| 4912/27626 [11:30<53:48,  7.04it/s]

{'epoch': 0, 'iter': 4910, 'avg_loss': 9.567590140441943, 'avg_acc': 50.02799837100387, 'loss': 7.998661994934082}


EP_train:0:  18%|| 4922/27626 [11:32<53:25,  7.08it/s]

{'epoch': 0, 'iter': 4920, 'avg_loss': 9.565884004107241, 'avg_acc': 50.02159114001219, 'loss': 9.020760536193848}


EP_train:0:  18%|| 4932/27626 [11:33<53:49,  7.03it/s]

{'epoch': 0, 'iter': 4930, 'avg_loss': 9.564635088999308, 'avg_acc': 50.00823869397688, 'loss': 8.295653343200684}


EP_train:0:  18%|| 4942/27626 [11:35<53:18,  7.09it/s]

{'epoch': 0, 'iter': 4940, 'avg_loss': 9.562889217244688, 'avg_acc': 50.00948694596236, 'loss': 9.371074676513672}


EP_train:0:  18%|| 4952/27626 [11:36<53:02,  7.13it/s]

{'epoch': 0, 'iter': 4950, 'avg_loss': 9.561291718526311, 'avg_acc': 50.00631185619066, 'loss': 9.166695594787598}


EP_train:0:  18%|| 4962/27626 [11:37<53:19,  7.08it/s]

{'epoch': 0, 'iter': 4960, 'avg_loss': 9.559414406741634, 'avg_acc': 50.00818887321105, 'loss': 7.81808614730835}


EP_train:0:  18%|| 4972/27626 [11:39<53:32,  7.05it/s]

{'epoch': 0, 'iter': 4970, 'avg_loss': 9.557394693708833, 'avg_acc': 50.010058338362505, 'loss': 7.570525169372559}


EP_train:0:  18%|| 4982/27626 [11:40<53:22,  7.07it/s]

{'epoch': 0, 'iter': 4980, 'avg_loss': 9.555267937856849, 'avg_acc': 50.018821521782776, 'loss': 8.277118682861328}


EP_train:0:  18%|| 4992/27626 [11:42<53:31,  7.05it/s]

{'epoch': 0, 'iter': 4990, 'avg_loss': 9.55336032145918, 'avg_acc': 50.02504508114606, 'loss': 7.754993438720703}


EP_train:0:  18%|| 5002/27626 [11:43<53:27,  7.05it/s]

{'epoch': 0, 'iter': 5000, 'avg_loss': 9.551847116729302, 'avg_acc': 50.01624675064987, 'loss': 9.675779342651367}


EP_train:0:  18%|| 5012/27626 [11:44<53:25,  7.05it/s]

{'epoch': 0, 'iter': 5010, 'avg_loss': 9.549909811680294, 'avg_acc': 50.02120335262422, 'loss': 8.812675476074219}


EP_train:0:  18%|| 5022/27626 [11:46<53:12,  7.08it/s]

{'epoch': 0, 'iter': 5020, 'avg_loss': 9.548338349894493, 'avg_acc': 50.021783509261105, 'loss': 9.211851119995117}


EP_train:0:  18%|| 5032/27626 [11:47<52:47,  7.13it/s]

{'epoch': 0, 'iter': 5030, 'avg_loss': 9.546435733553288, 'avg_acc': 50.01987676406281, 'loss': 8.40908145904541}


EP_train:0:  18%|| 5042/27626 [11:49<54:09,  6.95it/s]

{'epoch': 0, 'iter': 5040, 'avg_loss': 9.54393812447071, 'avg_acc': 50.02851616742709, 'loss': 8.2642240524292}


EP_train:0:  18%|| 5052/27626 [11:50<53:15,  7.06it/s]

{'epoch': 0, 'iter': 5050, 'avg_loss': 9.542190593579008, 'avg_acc': 50.03217184715898, 'loss': 8.424660682678223}


EP_train:0:  18%|| 5062/27626 [11:51<53:24,  7.04it/s]

{'epoch': 0, 'iter': 5060, 'avg_loss': 9.540451938562821, 'avg_acc': 50.03272574590002, 'loss': 9.9910249710083}


EP_train:0:  18%|| 5072/27626 [11:53<53:17,  7.05it/s]

{'epoch': 0, 'iter': 5070, 'avg_loss': 9.53849323184691, 'avg_acc': 50.03820745415105, 'loss': 8.235963821411133}


EP_train:0:  18%|| 5082/27626 [11:54<53:03,  7.08it/s]

{'epoch': 0, 'iter': 5080, 'avg_loss': 9.536168274882272, 'avg_acc': 50.03874729383979, 'loss': 8.496068000793457}


EP_train:0:  18%|| 5092/27626 [11:56<53:24,  7.03it/s]

{'epoch': 0, 'iter': 5090, 'avg_loss': 9.534445760573696, 'avg_acc': 50.03498821449617, 'loss': 9.1970853805542}


EP_train:0:  18%|| 5102/27626 [11:57<53:18,  7.04it/s]

{'epoch': 0, 'iter': 5100, 'avg_loss': 9.532470279851397, 'avg_acc': 50.030631248774746, 'loss': 9.243886947631836}


EP_train:0:  19%|| 5112/27626 [11:59<53:12,  7.05it/s]

{'epoch': 0, 'iter': 5110, 'avg_loss': 9.530467502759688, 'avg_acc': 50.03118274310311, 'loss': 9.070610046386719}


EP_train:0:  19%|| 5122/27626 [12:00<53:44,  6.98it/s]

{'epoch': 0, 'iter': 5120, 'avg_loss': 9.52868534917595, 'avg_acc': 50.031121851200936, 'loss': 8.075543403625488}


EP_train:0:  19%|| 5132/27626 [12:01<53:24,  7.02it/s]

{'epoch': 0, 'iter': 5130, 'avg_loss': 9.526546655493949, 'avg_acc': 50.02557980900409, 'loss': 8.744109153747559}


EP_train:0:  19%|| 5142/27626 [12:03<53:17,  7.03it/s]

{'epoch': 0, 'iter': 5140, 'avg_loss': 9.524437445329104, 'avg_acc': 50.02431433573234, 'loss': 8.991199493408203}


EP_train:0:  19%|| 5152/27626 [12:04<53:17,  7.03it/s]

{'epoch': 0, 'iter': 5150, 'avg_loss': 9.522541881880466, 'avg_acc': 50.016986992816925, 'loss': 9.13493537902832}


EP_train:0:  19%|| 5162/27626 [12:06<53:33,  6.99it/s]

{'epoch': 0, 'iter': 5160, 'avg_loss': 9.521234262684846, 'avg_acc': 50.01998159271459, 'loss': 9.614943504333496}


EP_train:0:  19%|| 5172/27626 [12:07<53:31,  6.99it/s]

{'epoch': 0, 'iter': 5170, 'avg_loss': 9.519796927693134, 'avg_acc': 50.017525623670466, 'loss': 8.526399612426758}


EP_train:0:  19%|| 5182/27626 [12:08<53:19,  7.01it/s]

{'epoch': 0, 'iter': 5180, 'avg_loss': 9.519244243177846, 'avg_acc': 50.004825323296664, 'loss': 9.22443675994873}


EP_train:0:  19%|| 5192/27626 [12:10<53:01,  7.05it/s]

{'epoch': 0, 'iter': 5190, 'avg_loss': 9.518066040469694, 'avg_acc': 50.00722404161048, 'loss': 9.21334171295166}


EP_train:0:  19%|| 5202/27626 [12:11<53:08,  7.03it/s]

{'epoch': 0, 'iter': 5200, 'avg_loss': 9.51656451405894, 'avg_acc': 50.004205921938095, 'loss': 9.214075088500977}


EP_train:0:  19%|| 5212/27626 [12:13<53:06,  7.03it/s]

{'epoch': 0, 'iter': 5210, 'avg_loss': 9.51463639733888, 'avg_acc': 49.99820092112838, 'loss': 8.126720428466797}


EP_train:0:  19%|| 5222/27626 [12:14<52:59,  7.05it/s]

{'epoch': 0, 'iter': 5220, 'avg_loss': 9.513579630710183, 'avg_acc': 49.996408733959015, 'loss': 8.877941131591797}


EP_train:0:  19%|| 5232/27626 [12:16<53:04,  7.03it/s]

{'epoch': 0, 'iter': 5230, 'avg_loss': 9.512317847986791, 'avg_acc': 49.995818199197096, 'loss': 9.97971248626709}


EP_train:0:  19%|| 5242/27626 [12:17<52:59,  7.04it/s]

{'epoch': 0, 'iter': 5240, 'avg_loss': 9.510528766888285, 'avg_acc': 50.0023850410227, 'loss': 8.686554908752441}


EP_train:0:  19%|| 5252/27626 [12:18<52:52,  7.05it/s]

{'epoch': 0, 'iter': 5250, 'avg_loss': 9.508392756608663, 'avg_acc': 50.00297562369072, 'loss': 8.183978080749512}


EP_train:0:  19%|| 5262/27626 [12:20<52:53,  7.05it/s]

{'epoch': 0, 'iter': 5260, 'avg_loss': 9.506132993515035, 'avg_acc': 49.99940600646265, 'loss': 9.372201919555664}


EP_train:0:  19%|| 5272/27626 [12:21<53:09,  7.01it/s]

{'epoch': 0, 'iter': 5270, 'avg_loss': 9.503845701541513, 'avg_acc': 50.0, 'loss': 7.988766193389893}


EP_train:0:  19%|| 5282/27626 [12:23<52:34,  7.08it/s]

{'epoch': 0, 'iter': 5280, 'avg_loss': 9.502036289878921, 'avg_acc': 49.997633024048476, 'loss': 8.568975448608398}


EP_train:0:  19%|| 5292/27626 [12:24<52:29,  7.09it/s]

{'epoch': 0, 'iter': 5290, 'avg_loss': 9.500292350474883, 'avg_acc': 49.999409374409375, 'loss': 8.000275611877441}


EP_train:0:  19%|| 5302/27626 [12:25<52:40,  7.06it/s]

{'epoch': 0, 'iter': 5300, 'avg_loss': 9.499029636652914, 'avg_acc': 50.00117902282588, 'loss': 8.298759460449219}


EP_train:0:  19%|| 5312/27626 [12:27<52:39,  7.06it/s]

{'epoch': 0, 'iter': 5310, 'avg_loss': 9.497972642111701, 'avg_acc': 49.991173978535116, 'loss': 9.365602493286133}


EP_train:0:  19%|| 5322/27626 [12:28<52:40,  7.06it/s]

{'epoch': 0, 'iter': 5320, 'avg_loss': 9.497406887663104, 'avg_acc': 49.99823811313663, 'loss': 9.730905532836914}


EP_train:0:  19%|| 5332/27626 [12:30<52:35,  7.06it/s]

{'epoch': 0, 'iter': 5330, 'avg_loss': 9.496490229497683, 'avg_acc': 50.00117238791971, 'loss': 9.11010456085205}


EP_train:0:  19%|| 5342/27626 [12:31<52:41,  7.05it/s]

{'epoch': 0, 'iter': 5340, 'avg_loss': 9.495041377444858, 'avg_acc': 50.00702115708668, 'loss': 8.65925121307373}


EP_train:0:  19%|| 5352/27626 [12:32<52:50,  7.03it/s]

{'epoch': 0, 'iter': 5350, 'avg_loss': 9.493584222335544, 'avg_acc': 50.00525602691086, 'loss': 9.993731498718262}


EP_train:0:  19%|| 5362/27626 [12:34<52:57,  7.01it/s]

{'epoch': 0, 'iter': 5360, 'avg_loss': 9.493029428626858, 'avg_acc': 50.00582913635516, 'loss': 9.021489143371582}


EP_train:0:  19%|| 5372/27626 [12:35<52:30,  7.06it/s]

{'epoch': 0, 'iter': 5370, 'avg_loss': 9.492130383058447, 'avg_acc': 50.00058182833737, 'loss': 8.885570526123047}


EP_train:0:  19%|| 5382/27626 [12:37<52:29,  7.06it/s]

{'epoch': 0, 'iter': 5380, 'avg_loss': 9.490314264763601, 'avg_acc': 49.99593477048876, 'loss': 7.642246246337891}


EP_train:0:  20%|| 5392/27626 [12:38<52:36,  7.04it/s]

{'epoch': 0, 'iter': 5390, 'avg_loss': 9.489194062441184, 'avg_acc': 49.99710165089965, 'loss': 9.37608814239502}


EP_train:0:  20%|| 5402/27626 [12:39<52:38,  7.04it/s]

{'epoch': 0, 'iter': 5400, 'avg_loss': 9.488027629135406, 'avg_acc': 50.00173578966858, 'loss': 9.099709510803223}


EP_train:0:  20%|| 5412/27626 [12:41<52:46,  7.02it/s]

{'epoch': 0, 'iter': 5410, 'avg_loss': 9.486591543381987, 'avg_acc': 49.996534836444276, 'loss': 8.711895942687988}


EP_train:0:  20%|| 5422/27626 [12:42<52:25,  7.06it/s]

{'epoch': 0, 'iter': 5420, 'avg_loss': 9.484999185205716, 'avg_acc': 49.98904722375946, 'loss': 8.28442668914795}


EP_train:0:  20%|| 5432/27626 [12:44<52:41,  7.02it/s]

{'epoch': 0, 'iter': 5430, 'avg_loss': 9.483485552514374, 'avg_acc': 49.99251979377647, 'loss': 8.09554672241211}


EP_train:0:  20%|| 5442/27626 [12:45<52:50,  7.00it/s]

{'epoch': 0, 'iter': 5440, 'avg_loss': 9.482432476542185, 'avg_acc': 49.99023616982172, 'loss': 9.308969497680664}


EP_train:0:  20%|| 5452/27626 [12:47<52:46,  7.00it/s]

{'epoch': 0, 'iter': 5450, 'avg_loss': 9.481111227142945, 'avg_acc': 49.9805081636397, 'loss': 8.585583686828613}


EP_train:0:  20%|| 5462/27626 [12:48<53:05,  6.96it/s]

{'epoch': 0, 'iter': 5460, 'avg_loss': 9.48019730889782, 'avg_acc': 49.98626625160227, 'loss': 9.104783058166504}


EP_train:0:  20%|| 5472/27626 [12:49<52:41,  7.01it/s]

{'epoch': 0, 'iter': 5470, 'avg_loss': 9.47919742439199, 'avg_acc': 49.99086090294279, 'loss': 9.2890043258667}


EP_train:0:  20%|| 5482/27626 [12:51<52:33,  7.02it/s]

{'epoch': 0, 'iter': 5480, 'avg_loss': 9.477868141037904, 'avg_acc': 49.99315818281336, 'loss': 8.65937614440918}


EP_train:0:  20%|| 5492/27626 [12:52<52:29,  7.03it/s]

{'epoch': 0, 'iter': 5490, 'avg_loss': 9.476426681823536, 'avg_acc': 49.990894190493535, 'loss': 8.181615829467773}


EP_train:0:  20%|| 5502/27626 [12:54<51:56,  7.10it/s]

{'epoch': 0, 'iter': 5500, 'avg_loss': 9.474609597945859, 'avg_acc': 49.986366115251776, 'loss': 8.669001579284668}


EP_train:0:  20%|| 5512/27626 [12:55<52:36,  7.00it/s]

{'epoch': 0, 'iter': 5510, 'avg_loss': 9.473018550890108, 'avg_acc': 49.99432952277264, 'loss': 8.7035493850708}


EP_train:0:  20%|| 5522/27626 [12:56<52:24,  7.03it/s]

{'epoch': 0, 'iter': 5520, 'avg_loss': 9.471984315190577, 'avg_acc': 49.999433979351565, 'loss': 8.719152450561523}


EP_train:0:  20%|| 5532/27626 [12:58<52:19,  7.04it/s]

{'epoch': 0, 'iter': 5530, 'avg_loss': 9.470290969237563, 'avg_acc': 49.99265503525583, 'loss': 8.807846069335938}


EP_train:0:  20%|| 5542/27626 [12:59<52:42,  6.98it/s]

{'epoch': 0, 'iter': 5540, 'avg_loss': 9.468869728552393, 'avg_acc': 49.98928442519401, 'loss': 8.222285270690918}


EP_train:0:  20%|| 5552/27626 [13:01<52:11,  7.05it/s]

{'epoch': 0, 'iter': 5550, 'avg_loss': 9.467923619987255, 'avg_acc': 49.98817780580075, 'loss': 8.501246452331543}


EP_train:0:  20%|| 5562/27626 [13:02<52:01,  7.07it/s]

{'epoch': 0, 'iter': 5560, 'avg_loss': 9.466835988680465, 'avg_acc': 49.98932296349577, 'loss': 8.133245468139648}


EP_train:0:  20%|| 5572/27626 [13:04<51:44,  7.10it/s]

{'epoch': 0, 'iter': 5570, 'avg_loss': 9.465662647419185, 'avg_acc': 49.989903069466884, 'loss': 8.34537124633789}


EP_train:0:  20%|| 5582/27626 [13:05<51:51,  7.09it/s]

{'epoch': 0, 'iter': 5580, 'avg_loss': 9.464499648637059, 'avg_acc': 49.99664038702741, 'loss': 9.215526580810547}


EP_train:0:  20%|| 5592/27626 [13:06<51:46,  7.09it/s]

{'epoch': 0, 'iter': 5590, 'avg_loss': 9.464064333951018, 'avg_acc': 49.99329279198712, 'loss': 8.18297004699707}


EP_train:0:  20%|| 5602/27626 [13:08<51:32,  7.12it/s]

{'epoch': 0, 'iter': 5600, 'avg_loss': 9.462369954324242, 'avg_acc': 49.998326191751474, 'loss': 7.989068984985352}


EP_train:0:  20%|| 5612/27626 [13:09<51:41,  7.10it/s]

{'epoch': 0, 'iter': 5610, 'avg_loss': 9.460868605146475, 'avg_acc': 49.99832917483514, 'loss': 7.65060567855835}


EP_train:0:  20%|| 5622/27626 [13:11<51:59,  7.05it/s]

{'epoch': 0, 'iter': 5620, 'avg_loss': 9.459236701767844, 'avg_acc': 50.00611545988258, 'loss': 8.821197509765625}


EP_train:0:  20%|| 5632/27626 [13:12<52:19,  7.00it/s]

{'epoch': 0, 'iter': 5630, 'avg_loss': 9.458213231085546, 'avg_acc': 50.009989344698994, 'loss': 8.683985710144043}


EP_train:0:  20%|| 5642/27626 [13:13<51:48,  7.07it/s]

{'epoch': 0, 'iter': 5640, 'avg_loss': 9.456812940829824, 'avg_acc': 50.01163357560716, 'loss': 8.563172340393066}


EP_train:0:  20%|| 5652/27626 [13:15<51:41,  7.09it/s]

{'epoch': 0, 'iter': 5650, 'avg_loss': 9.455550984943088, 'avg_acc': 50.0243319766413, 'loss': 8.41899299621582}


EP_train:0:  20%|| 5662/27626 [13:16<51:29,  7.11it/s]

{'epoch': 0, 'iter': 5660, 'avg_loss': 9.453893854053296, 'avg_acc': 50.01876876876877, 'loss': 7.936224937438965}


EP_train:0:  21%|| 5672/27626 [13:18<51:37,  7.09it/s]

{'epoch': 0, 'iter': 5670, 'avg_loss': 9.452454367930077, 'avg_acc': 50.022041967906894, 'loss': 9.220389366149902}


EP_train:0:  21%|| 5682/27626 [13:19<51:37,  7.08it/s]

{'epoch': 0, 'iter': 5680, 'avg_loss': 9.451297204609048, 'avg_acc': 50.0253036437247, 'loss': 8.146620750427246}


EP_train:0:  21%|| 5692/27626 [13:20<51:46,  7.06it/s]

{'epoch': 0, 'iter': 5690, 'avg_loss': 9.449995651958991, 'avg_acc': 50.022513617993326, 'loss': 8.570819854736328}


EP_train:0:  21%|| 5702/27626 [13:22<52:06,  7.01it/s]

{'epoch': 0, 'iter': 5700, 'avg_loss': 9.44886599078845, 'avg_acc': 50.02247412734609, 'loss': 8.856593132019043}


EP_train:0:  21%|| 5712/27626 [13:23<51:52,  7.04it/s]

{'epoch': 0, 'iter': 5710, 'avg_loss': 9.448077603702348, 'avg_acc': 50.02188758536158, 'loss': 9.940138816833496}


EP_train:0:  21%|| 5722/27626 [13:25<52:01,  7.02it/s]

{'epoch': 0, 'iter': 5720, 'avg_loss': 9.446217352809283, 'avg_acc': 50.018571927984624, 'loss': 8.666725158691406}


EP_train:0:  21%|| 5732/27626 [13:26<51:40,  7.06it/s]

{'epoch': 0, 'iter': 5730, 'avg_loss': 9.444520659855243, 'avg_acc': 50.01690368173094, 'loss': 8.385797500610352}


EP_train:0:  21%|| 5742/27626 [13:27<51:26,  7.09it/s]

{'epoch': 0, 'iter': 5740, 'avg_loss': 9.44268836594691, 'avg_acc': 50.01306392614527, 'loss': 7.992169380187988}


EP_train:0:  21%|| 5752/27626 [13:29<51:43,  7.05it/s]

{'epoch': 0, 'iter': 5750, 'avg_loss': 9.441366956744313, 'avg_acc': 50.02064858285515, 'loss': 8.302849769592285}


EP_train:0:  21%|| 5762/27626 [13:30<51:26,  7.08it/s]

{'epoch': 0, 'iter': 5760, 'avg_loss': 9.440050309781626, 'avg_acc': 50.023324943586175, 'loss': 8.567888259887695}


EP_train:0:  21%|| 5772/27626 [13:32<51:26,  7.08it/s]

{'epoch': 0, 'iter': 5770, 'avg_loss': 9.439164451453038, 'avg_acc': 50.028158031537, 'loss': 8.783404350280762}


EP_train:0:  21%|| 5782/27626 [13:33<51:29,  7.07it/s]

{'epoch': 0, 'iter': 5780, 'avg_loss': 9.437565182179569, 'avg_acc': 50.03081214322782, 'loss': 8.667815208435059}


EP_train:0:  21%|| 5792/27626 [13:35<51:26,  7.07it/s]

{'epoch': 0, 'iter': 5790, 'avg_loss': 9.436197770566155, 'avg_acc': 50.03831376273528, 'loss': 9.111662864685059}


EP_train:0:  21%|| 5802/27626 [13:36<51:37,  7.05it/s]

{'epoch': 0, 'iter': 5800, 'avg_loss': 9.434977451453351, 'avg_acc': 50.05009912084123, 'loss': 7.6666669845581055}


EP_train:0:  21%|| 5812/27626 [13:37<51:29,  7.06it/s]

{'epoch': 0, 'iter': 5810, 'avg_loss': 9.43392307625808, 'avg_acc': 50.04302185510239, 'loss': 8.846334457397461}


EP_train:0:  21%|| 5822/27626 [13:39<51:09,  7.10it/s]

{'epoch': 0, 'iter': 5820, 'avg_loss': 9.431985211155297, 'avg_acc': 50.04187424841092, 'loss': 8.1309175491333}


EP_train:0:  21%|| 5832/27626 [13:40<51:07,  7.10it/s]

{'epoch': 0, 'iter': 5830, 'avg_loss': 9.431237715592088, 'avg_acc': 50.04823357914594, 'loss': 10.034786224365234}


EP_train:0:  21%|| 5842/27626 [13:42<51:18,  7.08it/s]

{'epoch': 0, 'iter': 5840, 'avg_loss': 9.430410915522355, 'avg_acc': 50.04119585687382, 'loss': 8.68908405303955}


EP_train:0:  21%|| 5852/27626 [13:43<51:18,  7.07it/s]

{'epoch': 0, 'iter': 5850, 'avg_loss': 9.428962515268829, 'avg_acc': 50.04486412579047, 'loss': 8.864840507507324}


EP_train:0:  21%|| 5862/27626 [13:44<51:25,  7.05it/s]

{'epoch': 0, 'iter': 5860, 'avg_loss': 9.427614708724557, 'avg_acc': 50.051718989933455, 'loss': 7.955208778381348}


EP_train:0:  21%|| 5872/27626 [13:46<51:41,  7.01it/s]

{'epoch': 0, 'iter': 5870, 'avg_loss': 9.426605704857733, 'avg_acc': 50.06227644353602, 'loss': 8.58615493774414}


EP_train:0:  21%|| 5882/27626 [13:47<51:26,  7.04it/s]

{'epoch': 0, 'iter': 5880, 'avg_loss': 9.425463982533282, 'avg_acc': 50.068547015813635, 'loss': 8.340123176574707}


EP_train:0:  21%|| 5892/27626 [13:49<51:16,  7.06it/s]

{'epoch': 0, 'iter': 5890, 'avg_loss': 9.423555881579246, 'avg_acc': 50.06577830589034, 'loss': 8.754596710205078}


EP_train:0:  21%|| 5902/27626 [13:50<51:35,  7.02it/s]

{'epoch': 0, 'iter': 5900, 'avg_loss': 9.422567856026957, 'avg_acc': 50.06354855109304, 'loss': 9.121841430664062}


EP_train:0:  21%|| 5912/27626 [13:51<51:35,  7.02it/s]

{'epoch': 0, 'iter': 5910, 'avg_loss': 9.421843663223543, 'avg_acc': 50.06185501607173, 'loss': 8.751786231994629}


EP_train:0:  21%|| 5922/27626 [13:53<51:09,  7.07it/s]

{'epoch': 0, 'iter': 5920, 'avg_loss': 9.420156111408781, 'avg_acc': 50.0680839385239, 'loss': 7.754256725311279}


EP_train:0:  21%|| 5932/27626 [13:54<50:56,  7.10it/s]

{'epoch': 0, 'iter': 5930, 'avg_loss': 9.419183289295223, 'avg_acc': 50.07060360816051, 'loss': 9.395367622375488}


EP_train:0:  22%|| 5942/27626 [13:56<50:49,  7.11it/s]

{'epoch': 0, 'iter': 5940, 'avg_loss': 9.417891359433565, 'avg_acc': 50.071536778320144, 'loss': 7.953742980957031}


EP_train:0:  22%|| 5952/27626 [13:57<51:15,  7.05it/s]

{'epoch': 0, 'iter': 5950, 'avg_loss': 9.416558892761753, 'avg_acc': 50.08191900520921, 'loss': 8.202475547790527}


EP_train:0:  22%|| 5962/27626 [13:58<51:29,  7.01it/s]

{'epoch': 0, 'iter': 5960, 'avg_loss': 9.41580967326389, 'avg_acc': 50.088072471061906, 'loss': 8.684943199157715}


EP_train:0:  22%|| 5972/27626 [14:00<50:59,  7.08it/s]

{'epoch': 0, 'iter': 5970, 'avg_loss': 9.415111463007227, 'avg_acc': 50.087401607770886, 'loss': 9.445688247680664}


EP_train:0:  22%|| 5982/27626 [14:01<51:04,  7.06it/s]

{'epoch': 0, 'iter': 5980, 'avg_loss': 9.414215611685998, 'avg_acc': 50.08777796355125, 'loss': 8.152979850769043}


EP_train:0:  22%|| 5992/27626 [14:03<51:09,  7.05it/s]

{'epoch': 0, 'iter': 5990, 'avg_loss': 9.412446317002562, 'avg_acc': 50.082415289601066, 'loss': 8.257299423217773}


EP_train:0:  22%|| 6002/27626 [14:04<50:50,  7.09it/s]

{'epoch': 0, 'iter': 6000, 'avg_loss': 9.410975720997236, 'avg_acc': 50.077591234794205, 'loss': 8.462518692016602}


EP_train:0:  22%|| 6012/27626 [14:05<50:42,  7.10it/s]

{'epoch': 0, 'iter': 6010, 'avg_loss': 9.40974921336132, 'avg_acc': 50.080061553818, 'loss': 8.631196975708008}


EP_train:0:  22%|| 6022/27626 [14:07<50:54,  7.07it/s]

{'epoch': 0, 'iter': 6020, 'avg_loss': 9.40808506705717, 'avg_acc': 50.073700381996346, 'loss': 7.697455883026123}


EP_train:0:  22%|| 6032/27626 [14:08<50:50,  7.08it/s]

{'epoch': 0, 'iter': 6030, 'avg_loss': 9.406944318783063, 'avg_acc': 50.07513264798541, 'loss': 8.494808197021484}


EP_train:0:  22%|| 6042/27626 [14:10<51:09,  7.03it/s]

{'epoch': 0, 'iter': 6040, 'avg_loss': 9.405692962083846, 'avg_acc': 50.07190448601225, 'loss': 8.390291213989258}


EP_train:0:  22%|| 6052/27626 [14:11<50:43,  7.09it/s]

{'epoch': 0, 'iter': 6050, 'avg_loss': 9.40486906595692, 'avg_acc': 50.07230209882664, 'loss': 8.35804271697998}


EP_train:0:  22%|| 6062/27626 [14:13<51:11,  7.02it/s]

{'epoch': 0, 'iter': 6060, 'avg_loss': 9.403650140077874, 'avg_acc': 50.08043227190233, 'loss': 8.373706817626953}


EP_train:0:  22%|| 6072/27626 [14:14<50:55,  7.05it/s]

{'epoch': 0, 'iter': 6070, 'avg_loss': 9.4020219400557, 'avg_acc': 50.07875555921595, 'loss': 7.927149772644043}


EP_train:0:  22%|| 6082/27626 [14:15<51:17,  7.00it/s]

{'epoch': 0, 'iter': 6080, 'avg_loss': 9.400544764787385, 'avg_acc': 50.07502877816149, 'loss': 8.825338363647461}


EP_train:0:  22%|| 6092/27626 [14:17<50:36,  7.09it/s]

{'epoch': 0, 'iter': 6090, 'avg_loss': 9.399049382044595, 'avg_acc': 50.07285339024791, 'loss': 7.506089210510254}


EP_train:0:  22%|| 6102/27626 [14:18<50:52,  7.05it/s]

{'epoch': 0, 'iter': 6100, 'avg_loss': 9.398366185763374, 'avg_acc': 50.068636289132925, 'loss': 8.103838920593262}


EP_train:0:  22%|| 6112/27626 [14:20<50:45,  7.06it/s]

{'epoch': 0, 'iter': 6110, 'avg_loss': 9.396996496391266, 'avg_acc': 50.06545573555883, 'loss': 9.164725303649902}


EP_train:0:  22%|| 6122/27626 [14:21<50:59,  7.03it/s]

{'epoch': 0, 'iter': 6120, 'avg_loss': 9.395937176121546, 'avg_acc': 50.069943636660675, 'loss': 9.242197036743164}


EP_train:0:  22%|| 6132/27626 [14:22<50:46,  7.05it/s]

{'epoch': 0, 'iter': 6130, 'avg_loss': 9.394860307148747, 'avg_acc': 50.069319849942914, 'loss': 8.307205200195312}


EP_train:0:  22%|| 6142/27626 [14:24<50:49,  7.04it/s]

{'epoch': 0, 'iter': 6140, 'avg_loss': 9.393629730378663, 'avg_acc': 50.07531346686207, 'loss': 9.173666000366211}


EP_train:0:  22%|| 6152/27626 [14:25<50:32,  7.08it/s]

{'epoch': 0, 'iter': 6150, 'avg_loss': 9.392742788785842, 'avg_acc': 50.07112664607381, 'loss': 8.173057556152344}


EP_train:0:  22%|| 6162/27626 [14:27<50:34,  7.07it/s]

{'epoch': 0, 'iter': 6160, 'avg_loss': 9.392190609866779, 'avg_acc': 50.0770978737218, 'loss': 8.297392845153809}


EP_train:0:  22%|| 6172/27626 [14:28<50:46,  7.04it/s]

{'epoch': 0, 'iter': 6170, 'avg_loss': 9.392466181314777, 'avg_acc': 50.07393453249068, 'loss': 9.706412315368652}


EP_train:0:  22%|| 6182/27626 [14:29<50:22,  7.09it/s]

{'epoch': 0, 'iter': 6180, 'avg_loss': 9.391553544512151, 'avg_acc': 50.07280375343795, 'loss': 8.933459281921387}


EP_train:0:  22%|| 6192/27626 [14:31<50:30,  7.07it/s]

{'epoch': 0, 'iter': 6190, 'avg_loss': 9.390244235940058, 'avg_acc': 50.07016233241802, 'loss': 8.4580078125}


EP_train:0:  22%|| 6202/27626 [14:32<50:30,  7.07it/s]

{'epoch': 0, 'iter': 6200, 'avg_loss': 9.389491284245388, 'avg_acc': 50.073576842444766, 'loss': 9.005253791809082}


EP_train:0:  22%|| 6212/27626 [14:34<50:52,  7.01it/s]

{'epoch': 0, 'iter': 6210, 'avg_loss': 9.388556557027178, 'avg_acc': 50.07245210111093, 'loss': 9.03138256072998}


EP_train:0:  23%|| 6222/27626 [14:35<50:32,  7.06it/s]

{'epoch': 0, 'iter': 6220, 'avg_loss': 9.387120161192955, 'avg_acc': 50.07384262980228, 'loss': 8.656801223754883}


EP_train:0:  23%|| 6232/27626 [14:36<50:11,  7.10it/s]

{'epoch': 0, 'iter': 6230, 'avg_loss': 9.385990069280217, 'avg_acc': 50.077736318407965, 'loss': 8.667984008789062}


EP_train:0:  23%|| 6242/27626 [14:38<50:00,  7.13it/s]

{'epoch': 0, 'iter': 6240, 'avg_loss': 9.38509785343947, 'avg_acc': 50.08161752924211, 'loss': 8.411903381347656}


EP_train:0:  23%|| 6252/27626 [14:39<50:17,  7.08it/s]

{'epoch': 0, 'iter': 6250, 'avg_loss': 9.384047917415002, 'avg_acc': 50.08398656215005, 'loss': 8.293266296386719}


EP_train:0:  23%|| 6262/27626 [14:41<50:34,  7.04it/s]

{'epoch': 0, 'iter': 6260, 'avg_loss': 9.383020092705685, 'avg_acc': 50.09034099984028, 'loss': 9.19913101196289}


EP_train:0:  23%|| 6272/27626 [14:42<50:29,  7.05it/s]

{'epoch': 0, 'iter': 6270, 'avg_loss': 9.381490262192301, 'avg_acc': 50.08621033328018, 'loss': 7.523767948150635}


EP_train:0:  23%|| 6282/27626 [14:44<50:12,  7.09it/s]

{'epoch': 0, 'iter': 6280, 'avg_loss': 9.381130029775216, 'avg_acc': 50.08308788409489, 'loss': 9.865399360656738}


EP_train:0:  23%|| 6292/27626 [14:45<50:16,  7.07it/s]

{'epoch': 0, 'iter': 6290, 'avg_loss': 9.380071374271353, 'avg_acc': 50.08295580988714, 'loss': 8.437209129333496}


EP_train:0:  23%|| 6302/27626 [14:46<50:03,  7.10it/s]

{'epoch': 0, 'iter': 6300, 'avg_loss': 9.378562844380408, 'avg_acc': 50.07588081256943, 'loss': 7.975605010986328}


EP_train:0:  23%|| 6312/27626 [14:48<50:20,  7.06it/s]

{'epoch': 0, 'iter': 6310, 'avg_loss': 9.376547132575782, 'avg_acc': 50.068333069244176, 'loss': 8.245085716247559}


EP_train:0:  23%|| 6322/27626 [14:49<50:04,  7.09it/s]

{'epoch': 0, 'iter': 6320, 'avg_loss': 9.375620836623956, 'avg_acc': 50.058337288403735, 'loss': 8.414769172668457}


EP_train:0:  23%|| 6332/27626 [14:51<50:06,  7.08it/s]

{'epoch': 0, 'iter': 6330, 'avg_loss': 9.374794439199205, 'avg_acc': 50.06170036329174, 'loss': 8.467070579528809}


EP_train:0:  23%|| 6342/27626 [14:52<50:34,  7.01it/s]

{'epoch': 0, 'iter': 6340, 'avg_loss': 9.373740861965192, 'avg_acc': 50.0680097776376, 'loss': 8.795066833496094}


EP_train:0:  23%|| 6352/27626 [14:53<50:59,  6.95it/s]

{'epoch': 0, 'iter': 6350, 'avg_loss': 9.373050920420079, 'avg_acc': 50.06839474098567, 'loss': 8.090502738952637}


EP_train:0:  23%|| 6362/27626 [14:55<50:14,  7.05it/s]

{'epoch': 0, 'iter': 6360, 'avg_loss': 9.37198258623202, 'avg_acc': 50.069269768904256, 'loss': 9.456826210021973}


EP_train:0:  23%|| 6372/27626 [14:56<50:09,  7.06it/s]

{'epoch': 0, 'iter': 6370, 'avg_loss': 9.371806540718971, 'avg_acc': 50.07553759221472, 'loss': 9.08250904083252}


EP_train:0:  23%|| 6382/27626 [14:58<50:19,  7.04it/s]

{'epoch': 0, 'iter': 6380, 'avg_loss': 9.370781405594931, 'avg_acc': 50.07982682965052, 'loss': 8.923532485961914}


EP_train:0:  23%|| 6392/27626 [14:59<50:11,  7.05it/s]

{'epoch': 0, 'iter': 6390, 'avg_loss': 9.369161578303602, 'avg_acc': 50.08067986230637, 'loss': 8.400429725646973}


EP_train:0:  23%|| 6402/27626 [15:00<49:59,  7.08it/s]

{'epoch': 0, 'iter': 6400, 'avg_loss': 9.36806251346795, 'avg_acc': 50.07615997500391, 'loss': 8.4295015335083}


EP_train:0:  23%|| 6412/27626 [15:02<49:54,  7.08it/s]

{'epoch': 0, 'iter': 6410, 'avg_loss': 9.367323864493684, 'avg_acc': 50.079940726875684, 'loss': 9.71828556060791}


EP_train:0:  23%|| 6422/27626 [15:03<49:58,  7.07it/s]

{'epoch': 0, 'iter': 6420, 'avg_loss': 9.366772013686376, 'avg_acc': 50.08468307117272, 'loss': 8.571573257446289}


EP_train:0:  23%|| 6432/27626 [15:05<50:03,  7.06it/s]

{'epoch': 0, 'iter': 6430, 'avg_loss': 9.365489032290398, 'avg_acc': 50.08600917431193, 'loss': 8.273640632629395}


EP_train:0:  23%|| 6442/27626 [15:06<50:08,  7.04it/s]

{'epoch': 0, 'iter': 6440, 'avg_loss': 9.36428864537846, 'avg_acc': 50.08830150597733, 'loss': 8.901249885559082}


EP_train:0:  23%|| 6452/27626 [15:07<50:02,  7.05it/s]

{'epoch': 0, 'iter': 6450, 'avg_loss': 9.362907642210164, 'avg_acc': 50.08864904665943, 'loss': 9.044879913330078}


EP_train:0:  23%|| 6462/27626 [15:09<49:43,  7.09it/s]

{'epoch': 0, 'iter': 6460, 'avg_loss': 9.36197106916826, 'avg_acc': 50.087544497755765, 'loss': 8.152427673339844}


EP_train:0:  23%|| 6472/27626 [15:10<49:38,  7.10it/s]

{'epoch': 0, 'iter': 6470, 'avg_loss': 9.360972143107487, 'avg_acc': 50.09465306753207, 'loss': 8.716438293457031}


EP_train:0:  23%|| 6482/27626 [15:12<49:29,  7.12it/s]

{'epoch': 0, 'iter': 6480, 'avg_loss': 9.360123511983774, 'avg_acc': 50.0892030550841, 'loss': 8.343852996826172}


EP_train:0:  23%|| 6492/27626 [15:13<49:35,  7.10it/s]

{'epoch': 0, 'iter': 6490, 'avg_loss': 9.359676815882759, 'avg_acc': 50.08906562933292, 'loss': 9.826181411743164}


EP_train:0:  24%|| 6502/27626 [15:14<49:42,  7.08it/s]

{'epoch': 0, 'iter': 6500, 'avg_loss': 9.35840350887332, 'avg_acc': 50.09085140747578, 'loss': 8.707426071166992}


EP_train:0:  24%|| 6512/27626 [15:16<50:00,  7.04it/s]

{'epoch': 0, 'iter': 6510, 'avg_loss': 9.35758798767978, 'avg_acc': 50.086872216249425, 'loss': 8.612217903137207}


EP_train:0:  24%|| 6522/27626 [15:17<50:00,  7.03it/s]

{'epoch': 0, 'iter': 6520, 'avg_loss': 9.356591865376924, 'avg_acc': 50.08769743904309, 'loss': 9.231439590454102}


EP_train:0:  24%|| 6532/27626 [15:19<50:21,  6.98it/s]

{'epoch': 0, 'iter': 6530, 'avg_loss': 9.35560890931372, 'avg_acc': 50.08756316031236, 'loss': 9.29750919342041}


EP_train:0:  24%|| 6542/27626 [15:20<49:52,  7.05it/s]

{'epoch': 0, 'iter': 6540, 'avg_loss': 9.354086893338565, 'avg_acc': 50.07882968964989, 'loss': 7.9958367347717285}


EP_train:0:  24%|| 6552/27626 [15:22<49:23,  7.11it/s]

{'epoch': 0, 'iter': 6550, 'avg_loss': 9.352797975032098, 'avg_acc': 50.07012288200274, 'loss': 8.589356422424316}


EP_train:0:  24%|| 6562/27626 [15:23<49:32,  7.09it/s]

{'epoch': 0, 'iter': 6560, 'avg_loss': 9.35184575915064, 'avg_acc': 50.074302697759485, 'loss': 8.955336570739746}


EP_train:0:  24%|| 6572/27626 [15:24<49:35,  7.08it/s]

{'epoch': 0, 'iter': 6570, 'avg_loss': 9.351273009410969, 'avg_acc': 50.071811748592296, 'loss': 8.166312217712402}


EP_train:0:  24%|| 6582/27626 [15:26<49:18,  7.11it/s]

{'epoch': 0, 'iter': 6580, 'avg_loss': 9.349955923451132, 'avg_acc': 50.072652332472266, 'loss': 8.951957702636719}


EP_train:0:  24%|| 6592/27626 [15:27<49:25,  7.09it/s]

{'epoch': 0, 'iter': 6590, 'avg_loss': 9.349207264829742, 'avg_acc': 50.06543013199818, 'loss': 8.732563018798828}


EP_train:0:  24%|| 6602/27626 [15:29<49:33,  7.07it/s]

{'epoch': 0, 'iter': 6600, 'avg_loss': 9.347812711750084, 'avg_acc': 50.06438418421452, 'loss': 7.599575042724609}


EP_train:0:  24%|| 6612/27626 [15:30<50:00,  7.00it/s]

{'epoch': 0, 'iter': 6610, 'avg_loss': 9.347236137566128, 'avg_acc': 50.06948646195735, 'loss': 9.00065803527832}


EP_train:0:  24%|| 6622/27626 [15:31<50:06,  6.99it/s]

{'epoch': 0, 'iter': 6620, 'avg_loss': 9.346792542489702, 'avg_acc': 50.0755172934602, 'loss': 9.312726974487305}


EP_train:0:  24%|| 6632/27626 [15:33<49:37,  7.05it/s]

{'epoch': 0, 'iter': 6630, 'avg_loss': 9.345695780945514, 'avg_acc': 50.07775976474137, 'loss': 8.40833568572998}


EP_train:0:  24%|| 6642/27626 [15:34<49:37,  7.05it/s]

{'epoch': 0, 'iter': 6640, 'avg_loss': 9.344832372672585, 'avg_acc': 50.071995934347235, 'loss': 9.07229995727539}


EP_train:0:  24%|| 6652/27626 [15:36<49:28,  7.07it/s]

{'epoch': 0, 'iter': 6650, 'avg_loss': 9.34349572542036, 'avg_acc': 50.07188768606225, 'loss': 9.677656173706055}


EP_train:0:  24%|| 6662/27626 [15:37<49:50,  7.01it/s]

{'epoch': 0, 'iter': 6660, 'avg_loss': 9.342215331856007, 'avg_acc': 50.07318720912776, 'loss': 8.051966667175293}


EP_train:0:  24%|| 6672/27626 [15:38<49:12,  7.10it/s]

{'epoch': 0, 'iter': 6670, 'avg_loss': 9.341522292707692, 'avg_acc': 50.07260905411483, 'loss': 9.535323143005371}


EP_train:0:  24%|| 6682/27626 [15:40<49:05,  7.11it/s]

{'epoch': 0, 'iter': 6680, 'avg_loss': 9.340769097719733, 'avg_acc': 50.0804520281395, 'loss': 8.479681015014648}


EP_train:0:  24%|| 6692/27626 [15:41<49:29,  7.05it/s]

{'epoch': 0, 'iter': 6690, 'avg_loss': 9.339775787699011, 'avg_acc': 50.07192497384546, 'loss': 8.228310585021973}


EP_train:0:  24%|| 6702/27626 [15:43<49:37,  7.03it/s]

{'epoch': 0, 'iter': 6700, 'avg_loss': 9.339012632155095, 'avg_acc': 50.06855320101478, 'loss': 9.064294815063477}


EP_train:0:  24%|| 6712/27626 [15:44<49:34,  7.03it/s]

{'epoch': 0, 'iter': 6710, 'avg_loss': 9.338099488556926, 'avg_acc': 50.072641931157804, 'loss': 10.007623672485352}


EP_train:0:  24%|| 6722/27626 [15:45<49:25,  7.05it/s]

{'epoch': 0, 'iter': 6720, 'avg_loss': 9.337196036084674, 'avg_acc': 50.076253533700346, 'loss': 9.33234691619873}


EP_train:0:  24%|| 6732/27626 [15:47<49:08,  7.09it/s]

{'epoch': 0, 'iter': 6730, 'avg_loss': 9.335942738478755, 'avg_acc': 50.07567597682365, 'loss': 7.855754852294922}


EP_train:0:  24%|| 6742/27626 [15:48<49:23,  7.05it/s]

{'epoch': 0, 'iter': 6740, 'avg_loss': 9.335433068959476, 'avg_acc': 50.08159026850616, 'loss': 9.55894660949707}


EP_train:0:  24%|| 6752/27626 [15:50<49:16,  7.06it/s]

{'epoch': 0, 'iter': 6750, 'avg_loss': 9.334546860898447, 'avg_acc': 50.07776625685084, 'loss': 9.365265846252441}


EP_train:0:  24%|| 6762/27626 [15:51<49:09,  7.07it/s]

{'epoch': 0, 'iter': 6760, 'avg_loss': 9.333888821375474, 'avg_acc': 50.07811344475669, 'loss': 9.002694129943848}


EP_train:0:  25%|| 6772/27626 [15:52<49:19,  7.05it/s]

{'epoch': 0, 'iter': 6770, 'avg_loss': 9.33344620423098, 'avg_acc': 50.08076724265249, 'loss': 8.60117244720459}


EP_train:0:  25%|| 6782/27626 [15:54<49:16,  7.05it/s]

{'epoch': 0, 'iter': 6780, 'avg_loss': 9.332499055801952, 'avg_acc': 50.08156982745907, 'loss': 9.154598236083984}


EP_train:0:  25%|| 6792/27626 [15:55<50:00,  6.94it/s]

{'epoch': 0, 'iter': 6790, 'avg_loss': 9.331354553895045, 'avg_acc': 50.08467088793992, 'loss': 8.665295600891113}


EP_train:0:  25%|| 6802/27626 [15:57<49:28,  7.01it/s]

{'epoch': 0, 'iter': 6800, 'avg_loss': 9.330873824308732, 'avg_acc': 50.08408689898545, 'loss': 8.696505546569824}


EP_train:0:  25%|| 6812/27626 [15:58<49:01,  7.08it/s]

{'epoch': 0, 'iter': 6810, 'avg_loss': 9.33011415116396, 'avg_acc': 50.07983409191014, 'loss': 8.670230865478516}


EP_train:0:  25%|| 6822/27626 [16:00<48:58,  7.08it/s]

{'epoch': 0, 'iter': 6820, 'avg_loss': 9.32918908564231, 'avg_acc': 50.07238674681132, 'loss': 8.058876991271973}


EP_train:0:  25%|| 6832/27626 [16:01<48:59,  7.07it/s]

{'epoch': 0, 'iter': 6830, 'avg_loss': 9.327742916967733, 'avg_acc': 50.075940565071, 'loss': 8.734321594238281}


EP_train:0:  25%|| 6842/27626 [16:02<48:53,  7.08it/s]

{'epoch': 0, 'iter': 6840, 'avg_loss': 9.326534964713833, 'avg_acc': 50.079027189007455, 'loss': 9.055152893066406}


EP_train:0:  25%|| 6852/27626 [16:04<49:01,  7.06it/s]

{'epoch': 0, 'iter': 6850, 'avg_loss': 9.325696984723857, 'avg_acc': 50.08575390453949, 'loss': 9.690035820007324}


EP_train:0:  25%|| 6862/27626 [16:05<49:00,  7.06it/s]

{'epoch': 0, 'iter': 6860, 'avg_loss': 9.324706915741567, 'avg_acc': 50.091094592624984, 'loss': 7.433672904968262}


EP_train:0:  25%|| 6872/27626 [16:07<49:02,  7.05it/s]

{'epoch': 0, 'iter': 6870, 'avg_loss': 9.324057126284652, 'avg_acc': 50.093690874690736, 'loss': 9.249162673950195}


EP_train:0:  25%|| 6882/27626 [16:08<48:56,  7.06it/s]

{'epoch': 0, 'iter': 6880, 'avg_loss': 9.323058482398649, 'avg_acc': 50.093554715884316, 'loss': 7.960327625274658}


EP_train:0:  25%|| 6892/27626 [16:09<49:13,  7.02it/s]

{'epoch': 0, 'iter': 6890, 'avg_loss': 9.322272745657514, 'avg_acc': 50.08162821070962, 'loss': 7.985804557800293}


EP_train:0:  25%|| 6902/27626 [16:11<48:43,  7.09it/s]

{'epoch': 0, 'iter': 6900, 'avg_loss': 9.321091404135306, 'avg_acc': 50.07879292856108, 'loss': 8.305270195007324}


EP_train:0:  25%|| 6912/27626 [16:12<48:41,  7.09it/s]

{'epoch': 0, 'iter': 6910, 'avg_loss': 9.320194860940523, 'avg_acc': 50.07551367385328, 'loss': 8.713544845581055}


EP_train:0:  25%|| 6922/27626 [16:14<48:51,  7.06it/s]

{'epoch': 0, 'iter': 6920, 'avg_loss': 9.319365743990666, 'avg_acc': 50.07495304146799, 'loss': 8.60042953491211}


EP_train:0:  25%|| 6932/27626 [16:15<48:41,  7.08it/s]

{'epoch': 0, 'iter': 6930, 'avg_loss': 9.318592238780516, 'avg_acc': 50.07349228105612, 'loss': 8.526636123657227}


EP_train:0:  25%|| 6942/27626 [16:17<48:45,  7.07it/s]

{'epoch': 0, 'iter': 6940, 'avg_loss': 9.317746724163209, 'avg_acc': 50.08058997262642, 'loss': 9.393463134765625}


EP_train:0:  25%|| 6952/27626 [16:18<48:55,  7.04it/s]

{'epoch': 0, 'iter': 6950, 'avg_loss': 9.317407254263097, 'avg_acc': 50.080024456912675, 'loss': 8.262190818786621}


EP_train:0:  25%|| 6962/27626 [16:19<48:27,  7.11it/s]

{'epoch': 0, 'iter': 6960, 'avg_loss': 9.316687795044173, 'avg_acc': 50.07631805775033, 'loss': 7.937877178192139}


EP_train:0:  25%|| 6972/27626 [16:21<49:01,  7.02it/s]

{'epoch': 0, 'iter': 6970, 'avg_loss': 9.315978579807787, 'avg_acc': 50.076208578396205, 'loss': 9.12935733795166}


EP_train:0:  25%|| 6982/27626 [16:22<49:15,  6.99it/s]

{'epoch': 0, 'iter': 6980, 'avg_loss': 9.315506124612579, 'avg_acc': 50.078337630711935, 'loss': 9.142693519592285}


EP_train:0:  25%|| 6992/27626 [16:24<48:28,  7.09it/s]

{'epoch': 0, 'iter': 6990, 'avg_loss': 9.315090463322097, 'avg_acc': 50.08090759547991, 'loss': 9.017463684082031}


EP_train:0:  25%|| 7002/27626 [16:25<48:43,  7.05it/s]

{'epoch': 0, 'iter': 7000, 'avg_loss': 9.314211885582768, 'avg_acc': 50.07989930009998, 'loss': 8.949833869934082}


EP_train:0:  25%|| 7012/27626 [16:26<48:23,  7.10it/s]

{'epoch': 0, 'iter': 7010, 'avg_loss': 9.31267481097786, 'avg_acc': 50.074882327770645, 'loss': 8.09934139251709}


EP_train:0:  25%|| 7022/27626 [16:28<48:47,  7.04it/s]

{'epoch': 0, 'iter': 7020, 'avg_loss': 9.311770101266612, 'avg_acc': 50.07833641931349, 'loss': 8.633938789367676}


EP_train:0:  25%|| 7032/27626 [16:29<48:44,  7.04it/s]

{'epoch': 0, 'iter': 7030, 'avg_loss': 9.311172910306993, 'avg_acc': 50.07555824207083, 'loss': 7.592393398284912}


EP_train:0:  25%|| 7042/27626 [16:31<48:28,  7.08it/s]

{'epoch': 0, 'iter': 7040, 'avg_loss': 9.310577249276403, 'avg_acc': 50.067905837239024, 'loss': 8.961878776550293}


EP_train:0:  26%|| 7052/27626 [16:32<48:39,  7.05it/s]

{'epoch': 0, 'iter': 7050, 'avg_loss': 9.309757061840507, 'avg_acc': 50.06293433555524, 'loss': 9.258963584899902}


EP_train:0:  26%|| 7062/27626 [16:33<48:46,  7.03it/s]

{'epoch': 0, 'iter': 7060, 'avg_loss': 9.308631805805659, 'avg_acc': 50.06018977481943, 'loss': 8.987332344055176}


EP_train:0:  26%|| 7072/27626 [16:35<48:59,  6.99it/s]

{'epoch': 0, 'iter': 7070, 'avg_loss': 9.308447052879156, 'avg_acc': 50.06010465280723, 'loss': 9.12551212310791}


EP_train:0:  26%|| 7082/27626 [16:36<48:24,  7.07it/s]

{'epoch': 0, 'iter': 7080, 'avg_loss': 9.307314194018112, 'avg_acc': 50.05472390905239, 'loss': 9.161901473999023}


EP_train:0:  26%|| 7092/27626 [16:38<48:06,  7.11it/s]

{'epoch': 0, 'iter': 7090, 'avg_loss': 9.30703012053275, 'avg_acc': 50.05685023268932, 'loss': 9.093966484069824}


EP_train:0:  26%|| 7102/27626 [16:39<48:27,  7.06it/s]

{'epoch': 0, 'iter': 7100, 'avg_loss': 9.306354509946445, 'avg_acc': 50.062931277284896, 'loss': 8.421285629272461}


EP_train:0:  26%|| 7112/27626 [16:40<48:19,  7.08it/s]

{'epoch': 0, 'iter': 7110, 'avg_loss': 9.305292166531665, 'avg_acc': 50.0654795387428, 'loss': 8.084753036499023}


EP_train:0:  26%|| 7122/27626 [16:42<48:22,  7.06it/s]

{'epoch': 0, 'iter': 7120, 'avg_loss': 9.30452280837387, 'avg_acc': 50.06450990029491, 'loss': 7.299344539642334}


EP_train:0:  26%|| 7132/27626 [16:43<48:09,  7.09it/s]

{'epoch': 0, 'iter': 7130, 'avg_loss': 9.30349877625892, 'avg_acc': 50.05653134202777, 'loss': 9.122325897216797}


EP_train:0:  26%|| 7142/27626 [16:45<48:22,  7.06it/s]

{'epoch': 0, 'iter': 7140, 'avg_loss': 9.303066438293778, 'avg_acc': 50.053826494888675, 'loss': 8.104721069335938}


EP_train:0:  26%|| 7152/27626 [16:46<48:17,  7.07it/s]

{'epoch': 0, 'iter': 7150, 'avg_loss': 9.301898677571472, 'avg_acc': 50.04850720178996, 'loss': 9.411327362060547}


EP_train:0:  26%|| 7162/27626 [16:48<48:39,  7.01it/s]

{'epoch': 0, 'iter': 7160, 'avg_loss': 9.30139605457299, 'avg_acc': 50.04494833123866, 'loss': 8.637813568115234}


EP_train:0:  26%|| 7172/27626 [16:49<48:28,  7.03it/s]

{'epoch': 0, 'iter': 7170, 'avg_loss': 9.300584890506304, 'avg_acc': 50.04357830149212, 'loss': 9.027467727661133}


EP_train:0:  26%|| 7182/27626 [16:50<48:35,  7.01it/s]

{'epoch': 0, 'iter': 7180, 'avg_loss': 9.300207953884017, 'avg_acc': 50.050045258320566, 'loss': 9.253747940063477}


EP_train:0:  26%|| 7192/27626 [16:52<48:33,  7.01it/s]

{'epoch': 0, 'iter': 7190, 'avg_loss': 9.29911946938708, 'avg_acc': 50.044326241134755, 'loss': 7.885655403137207}


EP_train:0:  26%|| 7202/27626 [16:53<48:07,  7.07it/s]

{'epoch': 0, 'iter': 7200, 'avg_loss': 9.297994110389247, 'avg_acc': 50.03688723788363, 'loss': 8.212052345275879}


EP_train:0:  26%|| 7212/27626 [16:55<47:57,  7.10it/s]

{'epoch': 0, 'iter': 7210, 'avg_loss': 9.29706813650643, 'avg_acc': 50.030768964082654, 'loss': 9.54250431060791}


EP_train:0:  26%|| 7222/27626 [16:56<48:01,  7.08it/s]

{'epoch': 0, 'iter': 7220, 'avg_loss': 9.296791459727924, 'avg_acc': 50.029860822600746, 'loss': 9.345937728881836}


EP_train:0:  26%|| 7232/27626 [16:57<47:55,  7.09it/s]

{'epoch': 0, 'iter': 7230, 'avg_loss': 9.296276153526998, 'avg_acc': 50.02679435762688, 'loss': 8.627212524414062}


EP_train:0:  26%|| 7242/27626 [16:59<48:17,  7.04it/s]

{'epoch': 0, 'iter': 7240, 'avg_loss': 9.295418513708558, 'avg_acc': 50.025031073056205, 'loss': 7.875092029571533}


EP_train:0:  26%|| 7252/27626 [17:00<48:40,  6.98it/s]

{'epoch': 0, 'iter': 7250, 'avg_loss': 9.294309592283177, 'avg_acc': 50.02241070197214, 'loss': 8.04541015625}


EP_train:0:  26%|| 7262/27626 [17:02<47:56,  7.08it/s]

{'epoch': 0, 'iter': 7260, 'avg_loss': 9.293575645494586, 'avg_acc': 50.019367167056885, 'loss': 7.9869303703308105}


EP_train:0:  26%|| 7272/27626 [17:03<47:55,  7.08it/s]

{'epoch': 0, 'iter': 7270, 'avg_loss': 9.292819383367025, 'avg_acc': 50.018910741301056, 'loss': 7.954293251037598}


EP_train:0:  26%|| 7282/27626 [17:04<47:53,  7.08it/s]

{'epoch': 0, 'iter': 7280, 'avg_loss': 9.29178689964, 'avg_acc': 50.01673877214669, 'loss': 8.499815940856934}


EP_train:0:  26%|| 7292/27626 [17:06<47:45,  7.10it/s]

{'epoch': 0, 'iter': 7290, 'avg_loss': 9.290273031143965, 'avg_acc': 50.016715814017275, 'loss': 8.452376365661621}


EP_train:0:  26%|| 7302/27626 [17:07<47:42,  7.10it/s]

{'epoch': 0, 'iter': 7300, 'avg_loss': 9.289725476593992, 'avg_acc': 50.01112861251883, 'loss': 9.927275657653809}


EP_train:0:  26%|| 7312/27626 [17:09<47:29,  7.13it/s]

{'epoch': 0, 'iter': 7310, 'avg_loss': 9.288844910093216, 'avg_acc': 50.004701819176574, 'loss': 9.042323112487793}


EP_train:0:  27%|| 7322/27626 [17:10<47:52,  7.07it/s]

{'epoch': 0, 'iter': 7320, 'avg_loss': 9.288079758221913, 'avg_acc': 50.00469539680371, 'loss': 8.79045581817627}


EP_train:0:  27%|| 7332/27626 [17:11<48:13,  7.01it/s]

{'epoch': 0, 'iter': 7330, 'avg_loss': 9.287566987379659, 'avg_acc': 50.00170508798254, 'loss': 9.62297534942627}


EP_train:0:  27%|| 7342/27626 [17:13<48:06,  7.03it/s]

{'epoch': 0, 'iter': 7340, 'avg_loss': 9.286898814792318, 'avg_acc': 50.0051082958725, 'loss': 8.872343063354492}


EP_train:0:  27%|| 7352/27626 [17:14<48:07,  7.02it/s]

{'epoch': 0, 'iter': 7350, 'avg_loss': 9.28669179778345, 'avg_acc': 50.00170044891852, 'loss': 9.408259391784668}


EP_train:0:  27%|| 7362/27626 [17:16<47:47,  7.07it/s]

{'epoch': 0, 'iter': 7360, 'avg_loss': 9.285938739711833, 'avg_acc': 50.00976429832903, 'loss': 8.726524353027344}


EP_train:0:  27%|| 7372/27626 [17:17<47:38,  7.08it/s]

{'epoch': 0, 'iter': 7370, 'avg_loss': 9.285095350241955, 'avg_acc': 50.0055114638448, 'loss': 8.80098819732666}


EP_train:0:  27%|| 7382/27626 [17:19<47:32,  7.10it/s]

{'epoch': 0, 'iter': 7380, 'avg_loss': 9.284448590257306, 'avg_acc': 50.001693537461044, 'loss': 9.526082992553711}


EP_train:0:  27%|| 7392/27626 [17:20<47:24,  7.11it/s]

{'epoch': 0, 'iter': 7390, 'avg_loss': 9.28370065815538, 'avg_acc': 50.003805303747804, 'loss': 8.993468284606934}


EP_train:0:  27%|| 7402/27626 [17:21<47:26,  7.10it/s]

{'epoch': 0, 'iter': 7400, 'avg_loss': 9.28298522594603, 'avg_acc': 50.00295568166464, 'loss': 8.086629867553711}


EP_train:0:  27%|| 7412/27626 [17:23<47:34,  7.08it/s]

{'epoch': 0, 'iter': 7410, 'avg_loss': 9.281837756777563, 'avg_acc': 50.00759006881662, 'loss': 8.763191223144531}


EP_train:0:  27%|| 7422/27626 [17:24<47:39,  7.06it/s]

{'epoch': 0, 'iter': 7420, 'avg_loss': 9.280793761230354, 'avg_acc': 50.01010645465571, 'loss': 8.737188339233398}


EP_train:0:  27%|| 7432/27626 [17:26<48:00,  7.01it/s]

{'epoch': 0, 'iter': 7430, 'avg_loss': 9.279885955941964, 'avg_acc': 50.007990176288516, 'loss': 7.684420585632324}


EP_train:0:  27%|| 7442/27626 [17:27<47:54,  7.02it/s]

{'epoch': 0, 'iter': 7440, 'avg_loss': 9.279101634118494, 'avg_acc': 50.01175917215428, 'loss': 9.591631889343262}


EP_train:0:  27%|| 7452/27626 [17:28<47:44,  7.04it/s]

{'epoch': 0, 'iter': 7450, 'avg_loss': 9.27842688717277, 'avg_acc': 50.00796872902966, 'loss': 8.854307174682617}


EP_train:0:  27%|| 7462/27626 [17:30<47:23,  7.09it/s]

{'epoch': 0, 'iter': 7460, 'avg_loss': 9.27751180119368, 'avg_acc': 50.00502613590672, 'loss': 8.679423332214355}


EP_train:0:  27%|| 7472/27626 [17:31<47:18,  7.10it/s]

{'epoch': 0, 'iter': 7470, 'avg_loss': 9.27638557978127, 'avg_acc': 50.00460112434748, 'loss': 8.684418678283691}


EP_train:0:  27%|| 7482/27626 [17:33<47:22,  7.09it/s]

{'epoch': 0, 'iter': 7480, 'avg_loss': 9.275916740515385, 'avg_acc': 50.00208862451544, 'loss': 8.731322288513184}


EP_train:0:  27%|| 7492/27626 [17:34<47:41,  7.04it/s]

{'epoch': 0, 'iter': 7490, 'avg_loss': 9.275610488241815, 'avg_acc': 50.00083433453477, 'loss': 8.981796264648438}


EP_train:0:  27%|| 7502/27626 [17:35<47:24,  7.07it/s]

{'epoch': 0, 'iter': 7500, 'avg_loss': 9.274916183660864, 'avg_acc': 49.99875016664445, 'loss': 9.198843002319336}


EP_train:0:  27%|| 7512/27626 [17:37<47:41,  7.03it/s]

{'epoch': 0, 'iter': 7510, 'avg_loss': 9.274754717479833, 'avg_acc': 49.99750366129677, 'loss': 8.863139152526855}


EP_train:0:  27%|| 7522/27626 [17:38<47:27,  7.06it/s]

{'epoch': 0, 'iter': 7520, 'avg_loss': 9.27375785151531, 'avg_acc': 49.99376745113682, 'loss': 8.493372917175293}


EP_train:0:  27%|| 7532/27626 [17:40<47:29,  7.05it/s]

{'epoch': 0, 'iter': 7530, 'avg_loss': 9.27265131772712, 'avg_acc': 49.99128601779312, 'loss': 7.673415660858154}


EP_train:0:  27%|| 7542/27626 [17:41<47:20,  7.07it/s]

{'epoch': 0, 'iter': 7540, 'avg_loss': 9.271907139545315, 'avg_acc': 49.98715356053574, 'loss': 8.498356819152832}


EP_train:0:  27%|| 7552/27626 [17:42<47:12,  7.09it/s]

{'epoch': 0, 'iter': 7550, 'avg_loss': 9.271478186662806, 'avg_acc': 49.97889352403655, 'loss': 8.74553108215332}


EP_train:0:  27%|| 7562/27626 [17:44<47:25,  7.05it/s]

{'epoch': 0, 'iter': 7560, 'avg_loss': 9.27063872999906, 'avg_acc': 49.98222787991006, 'loss': 8.427693367004395}


EP_train:0:  27%|| 7572/27626 [17:45<47:18,  7.07it/s]

{'epoch': 0, 'iter': 7570, 'avg_loss': 9.270306909372463, 'avg_acc': 49.98101307621186, 'loss': 9.036469459533691}


EP_train:0:  27%|| 7582/27626 [17:47<47:23,  7.05it/s]

{'epoch': 0, 'iter': 7580, 'avg_loss': 9.269806671482232, 'avg_acc': 49.983099195356814, 'loss': 8.56503677368164}


EP_train:0:  27%|| 7592/27626 [17:48<47:30,  7.03it/s]

{'epoch': 0, 'iter': 7590, 'avg_loss': 9.269112737168438, 'avg_acc': 49.98229811619023, 'loss': 8.893960952758789}


EP_train:0:  28%|| 7602/27626 [17:50<47:11,  7.07it/s]

{'epoch': 0, 'iter': 7600, 'avg_loss': 9.26860025841505, 'avg_acc': 49.97574332324694, 'loss': 9.01713752746582}


EP_train:0:  28%|| 7612/27626 [17:51<47:30,  7.02it/s]

{'epoch': 0, 'iter': 7610, 'avg_loss': 9.26777127311916, 'avg_acc': 49.97207988437788, 'loss': 8.932263374328613}


EP_train:0:  28%|| 7622/27626 [17:52<47:26,  7.03it/s]

{'epoch': 0, 'iter': 7620, 'avg_loss': 9.266806003131073, 'avg_acc': 49.97293662249049, 'loss': 8.99924087524414}


EP_train:0:  28%|| 7632/27626 [17:54<47:02,  7.08it/s]

{'epoch': 0, 'iter': 7630, 'avg_loss': 9.26609439809862, 'avg_acc': 49.96723889398506, 'loss': 8.494616508483887}


EP_train:0:  28%|| 7642/27626 [17:55<47:09,  7.06it/s]

{'epoch': 0, 'iter': 7640, 'avg_loss': 9.265322018942742, 'avg_acc': 49.96441892422458, 'loss': 7.824680805206299}


EP_train:0:  28%|| 7652/27626 [17:57<47:12,  7.05it/s]

{'epoch': 0, 'iter': 7650, 'avg_loss': 9.26485015470021, 'avg_acc': 49.957930335903804, 'loss': 8.520512580871582}


EP_train:0:  28%|| 7662/27626 [17:58<47:08,  7.06it/s]

{'epoch': 0, 'iter': 7660, 'avg_loss': 9.264121359780445, 'avg_acc': 49.957577339772875, 'loss': 7.997520446777344}


EP_train:0:  28%|| 7672/27626 [17:59<47:26,  7.01it/s]

{'epoch': 0, 'iter': 7670, 'avg_loss': 9.262967509748478, 'avg_acc': 49.9560031286664, 'loss': 8.548715591430664}


EP_train:0:  28%|| 7682/27626 [18:01<47:06,  7.06it/s]

{'epoch': 0, 'iter': 7680, 'avg_loss': 9.261611671035542, 'avg_acc': 49.96053573753417, 'loss': 7.669604301452637}


EP_train:0:  28%|| 7692/27626 [18:02<47:20,  7.02it/s]

{'epoch': 0, 'iter': 7690, 'avg_loss': 9.260160260540877, 'avg_acc': 49.95652385905604, 'loss': 9.65174388885498}


EP_train:0:  28%|| 7702/27626 [18:04<47:29,  6.99it/s]

{'epoch': 0, 'iter': 7700, 'avg_loss': 9.259283473779282, 'avg_acc': 49.95130502532138, 'loss': 8.603461265563965}


EP_train:0:  28%|| 7712/27626 [18:05<46:52,  7.08it/s]

{'epoch': 0, 'iter': 7710, 'avg_loss': 9.258536217293376, 'avg_acc': 49.95258397095059, 'loss': 8.123151779174805}


EP_train:0:  28%|| 7722/27626 [18:06<47:03,  7.05it/s]

{'epoch': 0, 'iter': 7720, 'avg_loss': 9.257674335429835, 'avg_acc': 49.94738375858049, 'loss': 10.266033172607422}


EP_train:0:  28%|| 7732/27626 [18:08<47:13,  7.02it/s]

{'epoch': 0, 'iter': 7730, 'avg_loss': 9.257109534337996, 'avg_acc': 49.94785603414824, 'loss': 9.42551326751709}


EP_train:0:  28%|| 7742/27626 [18:09<46:46,  7.08it/s]

{'epoch': 0, 'iter': 7740, 'avg_loss': 9.256789257846403, 'avg_acc': 49.94751970029712, 'loss': 8.93420696258545}


EP_train:0:  28%|| 7752/27626 [18:11<46:48,  7.08it/s]

{'epoch': 0, 'iter': 7750, 'avg_loss': 9.255933911765625, 'avg_acc': 49.95565088375694, 'loss': 8.531023979187012}


EP_train:0:  28%|| 7762/27626 [18:12<46:56,  7.05it/s]

{'epoch': 0, 'iter': 7760, 'avg_loss': 9.254788424424667, 'avg_acc': 49.95329210153331, 'loss': 7.910821914672852}


EP_train:0:  28%|| 7772/27626 [18:13<47:05,  7.03it/s]

{'epoch': 0, 'iter': 7770, 'avg_loss': 9.254329362564174, 'avg_acc': 49.95214579848153, 'loss': 9.149149894714355}


EP_train:0:  28%|| 7782/27626 [18:15<47:03,  7.03it/s]

{'epoch': 0, 'iter': 7780, 'avg_loss': 9.253586666394959, 'avg_acc': 49.94297005526282, 'loss': 9.00759220123291}


EP_train:0:  28%|| 7792/27626 [18:16<47:02,  7.03it/s]

{'epoch': 0, 'iter': 7790, 'avg_loss': 9.252382704541766, 'avg_acc': 49.937427801309205, 'loss': 7.5682783126831055}


EP_train:0:  28%|| 7802/27626 [18:18<46:53,  7.05it/s]

{'epoch': 0, 'iter': 7800, 'avg_loss': 9.251282459068811, 'avg_acc': 49.93270093577746, 'loss': 7.659228801727295}


EP_train:0:  28%|| 7812/27626 [18:19<46:57,  7.03it/s]

{'epoch': 0, 'iter': 7810, 'avg_loss': 9.250398110311691, 'avg_acc': 49.93318717193701, 'loss': 8.468793869018555}


EP_train:0:  28%|| 7822/27626 [18:21<46:35,  7.08it/s]

{'epoch': 0, 'iter': 7820, 'avg_loss': 9.24971853822504, 'avg_acc': 49.93007607722797, 'loss': 8.872614860534668}


EP_train:0:  28%|| 7832/27626 [18:22<46:54,  7.03it/s]

{'epoch': 0, 'iter': 7830, 'avg_loss': 9.249020903644213, 'avg_acc': 49.92657387306858, 'loss': 9.17139720916748}


EP_train:0:  28%|| 7842/27626 [18:23<46:46,  7.05it/s]

{'epoch': 0, 'iter': 7840, 'avg_loss': 9.248429511247462, 'avg_acc': 49.923877694171665, 'loss': 7.848311901092529}


EP_train:0:  28%|| 7852/27626 [18:25<46:34,  7.08it/s]

{'epoch': 0, 'iter': 7850, 'avg_loss': 9.247654213478361, 'avg_acc': 49.92357661444402, 'loss': 9.486109733581543}


EP_train:0:  28%|| 7862/27626 [18:26<47:06,  6.99it/s]

{'epoch': 0, 'iter': 7860, 'avg_loss': 9.246972641861783, 'avg_acc': 49.92486642920748, 'loss': 8.337392807006836}


EP_train:0:  28%|| 7872/27626 [18:28<46:41,  7.05it/s]

{'epoch': 0, 'iter': 7870, 'avg_loss': 9.246069208667839, 'avg_acc': 49.92575593952484, 'loss': 8.621678352355957}


EP_train:0:  29%|| 7882/27626 [18:29<46:45,  7.04it/s]

{'epoch': 0, 'iter': 7880, 'avg_loss': 9.245507169444464, 'avg_acc': 49.924264052785176, 'loss': 9.05597972869873}


EP_train:0:  29%|| 7892/27626 [18:30<46:49,  7.02it/s]

{'epoch': 0, 'iter': 7890, 'avg_loss': 9.24501676085693, 'avg_acc': 49.92475605119757, 'loss': 9.098847389221191}


EP_train:0:  29%|| 7902/27626 [18:32<46:35,  7.05it/s]

{'epoch': 0, 'iter': 7900, 'avg_loss': 9.244748241734525, 'avg_acc': 49.92761992152892, 'loss': 8.952409744262695}


EP_train:0:  29%|| 7912/27626 [18:33<46:35,  7.05it/s]

{'epoch': 0, 'iter': 7910, 'avg_loss': 9.244367304319619, 'avg_acc': 49.9257363165213, 'loss': 8.347408294677734}


EP_train:0:  29%|| 7922/27626 [18:35<46:53,  7.00it/s]

{'epoch': 0, 'iter': 7920, 'avg_loss': 9.243759413648021, 'avg_acc': 49.92622459285444, 'loss': 8.544831275939941}


EP_train:0:  29%|| 7932/27626 [18:36<46:24,  7.07it/s]

{'epoch': 0, 'iter': 7930, 'avg_loss': 9.243070982996896, 'avg_acc': 49.92828773168579, 'loss': 7.946597576141357}


EP_train:0:  29%|| 7942/27626 [18:38<46:23,  7.07it/s]

{'epoch': 0, 'iter': 7940, 'avg_loss': 9.2431814658253, 'avg_acc': 49.93231331066617, 'loss': 9.241533279418945}


EP_train:0:  29%|| 7952/27626 [18:39<46:14,  7.09it/s]

{'epoch': 0, 'iter': 7950, 'avg_loss': 9.242740340839166, 'avg_acc': 49.93239844044774, 'loss': 8.479009628295898}


EP_train:0:  29%|| 7962/27626 [18:40<46:41,  7.02it/s]

{'epoch': 0, 'iter': 7960, 'avg_loss': 9.242533000715563, 'avg_acc': 49.93091320185906, 'loss': 9.00955581665039}


EP_train:0:  29%|| 7972/27626 [18:42<46:31,  7.04it/s]

{'epoch': 0, 'iter': 7970, 'avg_loss': 9.242010039706523, 'avg_acc': 49.92825555137373, 'loss': 8.404994010925293}


EP_train:0:  29%|| 7982/27626 [18:43<46:25,  7.05it/s]

{'epoch': 0, 'iter': 7980, 'avg_loss': 9.24124866290105, 'avg_acc': 49.92482145094599, 'loss': 7.108347415924072}


EP_train:0:  29%|| 7992/27626 [18:45<46:20,  7.06it/s]

{'epoch': 0, 'iter': 7990, 'avg_loss': 9.240377118479786, 'avg_acc': 49.92647978976348, 'loss': 8.941048622131348}


EP_train:0:  29%|| 8002/27626 [18:46<46:25,  7.04it/s]

{'epoch': 0, 'iter': 8000, 'avg_loss': 9.239762690734482, 'avg_acc': 49.9246187976503, 'loss': 8.3854398727417}


EP_train:0:  29%|| 8012/27626 [18:47<46:07,  7.09it/s]

{'epoch': 0, 'iter': 8010, 'avg_loss': 9.239210544729513, 'avg_acc': 49.92978404693547, 'loss': 8.872156143188477}


EP_train:0:  29%|| 8022/27626 [18:49<46:00,  7.10it/s]

{'epoch': 0, 'iter': 8020, 'avg_loss': 9.238950337247203, 'avg_acc': 49.92597556414412, 'loss': 8.306689262390137}


EP_train:0:  29%|| 8032/27626 [18:50<46:30,  7.02it/s]

{'epoch': 0, 'iter': 8030, 'avg_loss': 9.23777930108062, 'avg_acc': 49.92528950317519, 'loss': 9.129643440246582}


EP_train:0:  29%|| 8042/27626 [18:52<46:06,  7.08it/s]

{'epoch': 0, 'iter': 8040, 'avg_loss': 9.237097391722259, 'avg_acc': 49.92849148115906, 'loss': 9.161697387695312}


EP_train:0:  29%|| 8052/27626 [18:53<46:29,  7.02it/s]

{'epoch': 0, 'iter': 8050, 'avg_loss': 9.236843421139076, 'avg_acc': 49.928968451124085, 'loss': 8.340310096740723}


EP_train:0:  29%|| 8062/27626 [18:54<46:35,  7.00it/s]

{'epoch': 0, 'iter': 8060, 'avg_loss': 9.23605490109418, 'avg_acc': 49.92944423768763, 'loss': 9.255105018615723}


EP_train:0:  29%|| 8072/27626 [18:56<46:06,  7.07it/s]

{'epoch': 0, 'iter': 8070, 'avg_loss': 9.235210657681014, 'avg_acc': 49.926821335646146, 'loss': 8.267786026000977}


EP_train:0:  29%|| 8082/27626 [18:57<46:12,  7.05it/s]

{'epoch': 0, 'iter': 8080, 'avg_loss': 9.23475715898963, 'avg_acc': 49.92227137730479, 'loss': 8.965476036071777}


EP_train:0:  29%|| 8092/27626 [18:59<50:59,  6.39it/s]

{'epoch': 0, 'iter': 8090, 'avg_loss': 9.234056476019704, 'avg_acc': 49.92816091954023, 'loss': 8.347697257995605}


EP_train:0:  29%|| 8102/27626 [19:00<46:10,  7.05it/s]

{'epoch': 0, 'iter': 8100, 'avg_loss': 9.23313870322276, 'avg_acc': 49.92863535366004, 'loss': 9.058774948120117}


EP_train:0:  29%|| 8112/27626 [19:02<46:00,  7.07it/s]

{'epoch': 0, 'iter': 8110, 'avg_loss': 9.231639156416803, 'avg_acc': 49.92795278017507, 'loss': 8.32516860961914}


EP_train:0:  29%|| 8122/27626 [19:03<45:39,  7.12it/s]

{'epoch': 0, 'iter': 8120, 'avg_loss': 9.231278562821744, 'avg_acc': 49.93188954562246, 'loss': 9.304234504699707}


EP_train:0:  29%|| 8132/27626 [19:04<46:13,  7.03it/s]

{'epoch': 0, 'iter': 8130, 'avg_loss': 9.230691795726791, 'avg_acc': 49.93696962243266, 'loss': 8.190122604370117}


EP_train:0:  29%|| 8142/27626 [19:06<46:01,  7.05it/s]

{'epoch': 0, 'iter': 8140, 'avg_loss': 9.229979256478945, 'avg_acc': 49.93781476477091, 'loss': 8.893988609313965}


EP_train:0:  30%|| 8152/27626 [19:07<46:36,  6.96it/s]

{'epoch': 0, 'iter': 8150, 'avg_loss': 9.22915608305797, 'avg_acc': 49.940574776101094, 'loss': 8.702690124511719}


EP_train:0:  30%|| 8162/27626 [19:09<46:07,  7.03it/s]

{'epoch': 0, 'iter': 8160, 'avg_loss': 9.228788307668122, 'avg_acc': 49.94141342972675, 'loss': 8.946524620056152}


EP_train:0:  30%|| 8172/27626 [19:10<45:41,  7.10it/s]

{'epoch': 0, 'iter': 8170, 'avg_loss': 9.228466421602814, 'avg_acc': 49.941102680210506, 'loss': 9.007464408874512}


EP_train:0:  30%|| 8182/27626 [19:11<45:30,  7.12it/s]

{'epoch': 0, 'iter': 8180, 'avg_loss': 9.22817672119355, 'avg_acc': 49.94079269038015, 'loss': 8.862618446350098}


EP_train:0:  30%|| 8192/27626 [19:13<46:02,  7.03it/s]

{'epoch': 0, 'iter': 8190, 'avg_loss': 9.227204073481094, 'avg_acc': 49.94010194115492, 'loss': 8.564364433288574}


EP_train:0:  30%|| 8202/27626 [19:14<45:45,  7.07it/s]

{'epoch': 0, 'iter': 8200, 'avg_loss': 9.226464769663192, 'avg_acc': 49.938269723204485, 'loss': 8.671210289001465}


EP_train:0:  30%|| 8212/27626 [19:16<46:04,  7.02it/s]

{'epoch': 0, 'iter': 8210, 'avg_loss': 9.226321788874255, 'avg_acc': 49.94291194738765, 'loss': 8.997956275939941}


EP_train:0:  30%|| 8222/27626 [19:17<45:47,  7.06it/s]

{'epoch': 0, 'iter': 8220, 'avg_loss': 9.225654744736515, 'avg_acc': 49.9448820094879, 'loss': 8.72983169555664}


EP_train:0:  30%|| 8232/27626 [19:18<45:47,  7.06it/s]

{'epoch': 0, 'iter': 8230, 'avg_loss': 9.224820515318987, 'avg_acc': 49.94760660916049, 'loss': 8.598383903503418}


EP_train:0:  30%|| 8242/27626 [19:20<46:04,  7.01it/s]

{'epoch': 0, 'iter': 8240, 'avg_loss': 9.224298079895345, 'avg_acc': 49.9484285887635, 'loss': 8.671939849853516}


EP_train:0:  30%|| 8252/27626 [19:21<45:40,  7.07it/s]

{'epoch': 0, 'iter': 8250, 'avg_loss': 9.22362576171335, 'avg_acc': 49.955687189431586, 'loss': 9.037999153137207}


EP_train:0:  30%|| 8262/27626 [19:23<45:38,  7.07it/s]

{'epoch': 0, 'iter': 8260, 'avg_loss': 9.222968681256024, 'avg_acc': 49.958767098414235, 'loss': 8.42019271850586}


EP_train:0:  30%|| 8272/27626 [19:24<45:52,  7.03it/s]

{'epoch': 0, 'iter': 8270, 'avg_loss': 9.221241969610267, 'avg_acc': 49.95654999395478, 'loss': 7.965289115905762}


EP_train:0:  30%|| 8282/27626 [19:25<45:22,  7.10it/s]

{'epoch': 0, 'iter': 8280, 'avg_loss': 9.220632870205069, 'avg_acc': 49.95697983335346, 'loss': 8.606620788574219}


EP_train:0:  30%|| 8292/27626 [19:27<45:44,  7.04it/s]

{'epoch': 0, 'iter': 8290, 'avg_loss': 9.220286142206382, 'avg_acc': 49.95439331805572, 'loss': 9.075989723205566}


EP_train:0:  30%|| 8302/27626 [19:28<45:29,  7.08it/s]

{'epoch': 0, 'iter': 8300, 'avg_loss': 9.219985865466697, 'avg_acc': 49.95670702325021, 'loss': 9.660665512084961}


EP_train:0:  30%|| 8312/27626 [19:30<45:21,  7.10it/s]

{'epoch': 0, 'iter': 8310, 'avg_loss': 9.220121727395956, 'avg_acc': 49.954127060522204, 'loss': 8.61227798461914}


EP_train:0:  30%|| 8322/27626 [19:31<45:41,  7.04it/s]

{'epoch': 0, 'iter': 8320, 'avg_loss': 9.219630861035936, 'avg_acc': 49.95230441052758, 'loss': 9.605027198791504}


EP_train:0:  30%|| 8332/27626 [19:33<45:35,  7.05it/s]

{'epoch': 0, 'iter': 8330, 'avg_loss': 9.218972643860361, 'avg_acc': 49.951986556235745, 'loss': 9.358207702636719}


EP_train:0:  30%|| 8342/27626 [19:34<45:35,  7.05it/s]

{'epoch': 0, 'iter': 8340, 'avg_loss': 9.218522685249566, 'avg_acc': 49.95616532789833, 'loss': 8.082335472106934}


EP_train:0:  30%|| 8352/27626 [19:35<45:31,  7.06it/s]

{'epoch': 0, 'iter': 8350, 'avg_loss': 9.217762400641183, 'avg_acc': 49.95546940486169, 'loss': 8.39009952545166}


EP_train:0:  30%|| 8362/27626 [19:37<45:46,  7.01it/s]

{'epoch': 0, 'iter': 8360, 'avg_loss': 9.216418968406451, 'avg_acc': 49.95627018299246, 'loss': 9.583067893981934}


EP_train:0:  30%|| 8372/27626 [19:38<45:33,  7.04it/s]

{'epoch': 0, 'iter': 8370, 'avg_loss': 9.216260801975395, 'avg_acc': 49.95968223629196, 'loss': 8.921066284179688}


EP_train:0:  30%|| 8382/27626 [19:40<45:07,  7.11it/s]

{'epoch': 0, 'iter': 8380, 'avg_loss': 9.215319166944727, 'avg_acc': 49.96122181123971, 'loss': 7.674213409423828}


EP_train:0:  30%|| 8392/27626 [19:41<45:09,  7.10it/s]

{'epoch': 0, 'iter': 8390, 'avg_loss': 9.21447095034475, 'avg_acc': 49.959405911095224, 'loss': 8.201611518859863}


EP_train:0:  30%|| 8402/27626 [19:42<45:09,  7.10it/s]

{'epoch': 0, 'iter': 8400, 'avg_loss': 9.21365630826756, 'avg_acc': 49.96205808832282, 'loss': 9.113852500915527}


EP_train:0:  30%|| 8412/27626 [19:44<45:14,  7.08it/s]

{'epoch': 0, 'iter': 8410, 'avg_loss': 9.213364342449253, 'avg_acc': 49.95838782546665, 'loss': 8.6699800491333}


EP_train:0:  30%|| 8422/27626 [19:45<45:29,  7.04it/s]

{'epoch': 0, 'iter': 8420, 'avg_loss': 9.213088866438694, 'avg_acc': 49.9588083363021, 'loss': 9.786027908325195}


EP_train:0:  31%|| 8432/27626 [19:47<45:11,  7.08it/s]

{'epoch': 0, 'iter': 8430, 'avg_loss': 9.21244231306521, 'avg_acc': 49.954038666824815, 'loss': 8.939797401428223}


EP_train:0:  31%|| 8442/27626 [19:48<45:29,  7.03it/s]

{'epoch': 0, 'iter': 8440, 'avg_loss': 9.211793357508821, 'avg_acc': 49.953722900130316, 'loss': 8.328137397766113}


EP_train:0:  31%|| 8452/27626 [19:49<45:12,  7.07it/s]

{'epoch': 0, 'iter': 8450, 'avg_loss': 9.210773874155771, 'avg_acc': 49.953038101999766, 'loss': 8.21739387512207}


EP_train:0:  31%|| 8462/27626 [19:51<45:19,  7.05it/s]

{'epoch': 0, 'iter': 8460, 'avg_loss': 9.21055854729284, 'avg_acc': 49.95235492258598, 'loss': 9.120570182800293}


EP_train:0:  31%|| 8472/27626 [19:52<45:01,  7.09it/s]

{'epoch': 0, 'iter': 8470, 'avg_loss': 9.210148237627156, 'avg_acc': 49.94945992208712, 'loss': 8.501566886901855}


EP_train:0:  31%|| 8482/27626 [19:54<45:22,  7.03it/s]

{'epoch': 0, 'iter': 8480, 'avg_loss': 9.209828729520442, 'avg_acc': 49.950624926305856, 'loss': 9.39992618560791}


EP_train:0:  31%|| 8492/27626 [19:55<45:08,  7.06it/s]

{'epoch': 0, 'iter': 8490, 'avg_loss': 9.209125434955007, 'avg_acc': 49.95325933341185, 'loss': 9.466200828552246}


EP_train:0:  31%|| 8502/27626 [19:56<45:02,  7.08it/s]

{'epoch': 0, 'iter': 8500, 'avg_loss': 9.208547836331084, 'avg_acc': 49.95588754264204, 'loss': 8.611934661865234}


EP_train:0:  31%|| 8512/27626 [19:58<45:10,  7.05it/s]

{'epoch': 0, 'iter': 8510, 'avg_loss': 9.208338559381186, 'avg_acc': 49.96144695100458, 'loss': 9.487333297729492}


EP_train:0:  31%|| 8522/27626 [19:59<44:56,  7.09it/s]

{'epoch': 0, 'iter': 8520, 'avg_loss': 9.208109144788997, 'avg_acc': 49.956357821851896, 'loss': 9.029441833496094}


EP_train:0:  31%|| 8532/27626 [20:01<45:13,  7.04it/s]

{'epoch': 0, 'iter': 8530, 'avg_loss': 9.20757334303037, 'avg_acc': 49.954211112413546, 'loss': 10.1520357131958}


EP_train:0:  31%|| 8542/27626 [20:02<44:45,  7.11it/s]

{'epoch': 0, 'iter': 8540, 'avg_loss': 9.206944808651784, 'avg_acc': 49.955362369745934, 'loss': 8.131619453430176}


EP_train:0:  31%|| 8552/27626 [20:04<44:55,  7.08it/s]

{'epoch': 0, 'iter': 8550, 'avg_loss': 9.2062505973095, 'avg_acc': 49.95285639106537, 'loss': 8.918551445007324}


EP_train:0:  31%|| 8562/27626 [20:05<44:56,  7.07it/s]

{'epoch': 0, 'iter': 8560, 'avg_loss': 9.205975146929163, 'avg_acc': 49.956561733442356, 'loss': 8.686370849609375}


EP_train:0:  31%|| 8572/27626 [20:06<44:54,  7.07it/s]

{'epoch': 0, 'iter': 8570, 'avg_loss': 9.205171790204767, 'avg_acc': 49.951143390502864, 'loss': 8.441367149353027}


EP_train:0:  31%|| 8582/27626 [20:08<44:40,  7.11it/s]

{'epoch': 0, 'iter': 8580, 'avg_loss': 9.204254654123154, 'avg_acc': 49.95156450297168, 'loss': 8.551544189453125}


EP_train:0:  31%|| 8592/27626 [20:09<44:55,  7.06it/s]

{'epoch': 0, 'iter': 8590, 'avg_loss': 9.20324087958857, 'avg_acc': 49.9548946571994, 'loss': 8.216986656188965}


EP_train:0:  31%|| 8602/27626 [20:11<45:13,  7.01it/s]

{'epoch': 0, 'iter': 8600, 'avg_loss': 9.202378810942, 'avg_acc': 49.95494709917451, 'loss': 8.277366638183594}


EP_train:0:  31%|| 8612/27626 [20:12<44:57,  7.05it/s]

{'epoch': 0, 'iter': 8610, 'avg_loss': 9.201891233109524, 'avg_acc': 49.95391069562188, 'loss': 9.014440536499023}


EP_train:0:  31%|| 8622/27626 [20:13<44:41,  7.09it/s]

{'epoch': 0, 'iter': 8620, 'avg_loss': 9.201176070514322, 'avg_acc': 49.95940146154739, 'loss': 8.86920166015625}


EP_train:0:  31%|| 8632/27626 [20:15<44:27,  7.12it/s]

{'epoch': 0, 'iter': 8630, 'avg_loss': 9.200567113056255, 'avg_acc': 49.963431236241455, 'loss': 8.700163841247559}


EP_train:0:  31%|| 8642/27626 [20:16<44:41,  7.08it/s]

{'epoch': 0, 'iter': 8640, 'avg_loss': 9.200762847109727, 'avg_acc': 49.96709003587548, 'loss': 9.234945297241211}


EP_train:0:  31%|| 8652/27626 [20:18<44:29,  7.11it/s]

{'epoch': 0, 'iter': 8650, 'avg_loss': 9.200247739893952, 'avg_acc': 49.970017917003815, 'loss': 7.950128078460693}


EP_train:0:  31%|| 8662/27626 [20:19<44:33,  7.09it/s]

{'epoch': 0, 'iter': 8660, 'avg_loss': 9.199671164683197, 'avg_acc': 49.969691721510216, 'loss': 8.517095565795898}


EP_train:0:  31%|| 8672/27626 [20:20<44:33,  7.09it/s]

{'epoch': 0, 'iter': 8670, 'avg_loss': 9.199142414810556, 'avg_acc': 49.96864548494983, 'loss': 9.078207015991211}


EP_train:0:  31%|| 8682/27626 [20:22<44:44,  7.06it/s]

{'epoch': 0, 'iter': 8680, 'avg_loss': 9.198798333866614, 'avg_acc': 49.976961179587605, 'loss': 8.548835754394531}


EP_train:0:  31%|| 8692/27626 [20:23<44:45,  7.05it/s]

{'epoch': 0, 'iter': 8690, 'avg_loss': 9.198154818708556, 'avg_acc': 49.97411114946496, 'loss': 8.978450775146484}


EP_train:0:  31%|| 8702/27626 [20:25<44:24,  7.10it/s]

{'epoch': 0, 'iter': 8700, 'avg_loss': 9.197215716902681, 'avg_acc': 49.97414090334444, 'loss': 7.47410249710083}


EP_train:0:  32%|| 8712/27626 [20:26<44:31,  7.08it/s]

{'epoch': 0, 'iter': 8710, 'avg_loss': 9.196559746713456, 'avg_acc': 49.96986568706234, 'loss': 9.165771484375}


EP_train:0:  32%|| 8722/27626 [20:27<44:32,  7.07it/s]

{'epoch': 0, 'iter': 8720, 'avg_loss': 9.196012380049051, 'avg_acc': 49.969900240798076, 'loss': 8.353922843933105}


EP_train:0:  32%|| 8732/27626 [20:29<44:34,  7.06it/s]

{'epoch': 0, 'iter': 8730, 'avg_loss': 9.195847487479684, 'avg_acc': 49.97029263543695, 'loss': 8.827853202819824}


EP_train:0:  32%|| 8742/27626 [20:30<44:17,  7.11it/s]

{'epoch': 0, 'iter': 8740, 'avg_loss': 9.195094918893767, 'avg_acc': 49.97318670632651, 'loss': 8.021416664123535}


EP_train:0:  32%|| 8752/27626 [20:32<44:21,  7.09it/s]

{'epoch': 0, 'iter': 8750, 'avg_loss': 9.19474405104086, 'avg_acc': 49.97678836704377, 'loss': 8.799062728881836}


EP_train:0:  32%|| 8762/27626 [20:33<44:31,  7.06it/s]

{'epoch': 0, 'iter': 8760, 'avg_loss': 9.194152351765174, 'avg_acc': 49.97645816687593, 'loss': 8.307282447814941}


EP_train:0:  32%|| 8772/27626 [20:35<44:57,  6.99it/s]

{'epoch': 0, 'iter': 8770, 'avg_loss': 9.193919034820233, 'avg_acc': 49.97612871964428, 'loss': 9.007185935974121}


EP_train:0:  32%|| 8782/27626 [20:36<44:42,  7.02it/s]

{'epoch': 0, 'iter': 8780, 'avg_loss': 9.193001190033932, 'avg_acc': 49.97046179250655, 'loss': 8.353606224060059}


EP_train:0:  32%|| 8792/27626 [20:37<44:21,  7.08it/s]

{'epoch': 0, 'iter': 8790, 'avg_loss': 9.190533332186828, 'avg_acc': 49.975116596519165, 'loss': 7.167230606079102}


EP_train:0:  32%|| 8802/27626 [20:39<44:22,  7.07it/s]

{'epoch': 0, 'iter': 8800, 'avg_loss': 9.189126569348945, 'avg_acc': 49.97230428360414, 'loss': 8.967540740966797}


EP_train:0:  32%|| 8812/27626 [20:40<44:22,  7.07it/s]

{'epoch': 0, 'iter': 8810, 'avg_loss': 9.1889118508575, 'avg_acc': 49.97304505731472, 'loss': 9.436572074890137}


EP_train:0:  32%|| 8822/27626 [20:42<44:14,  7.08it/s]

{'epoch': 0, 'iter': 8820, 'avg_loss': 9.18824872639752, 'avg_acc': 49.97484695612742, 'loss': 8.271608352661133}


EP_train:0:  32%|| 8832/27626 [20:43<44:15,  7.08it/s]

{'epoch': 0, 'iter': 8830, 'avg_loss': 9.187800611675886, 'avg_acc': 49.9695674329068, 'loss': 9.817404747009277}


EP_train:0:  32%|| 8842/27626 [20:44<44:05,  7.10it/s]

{'epoch': 0, 'iter': 8840, 'avg_loss': 9.187504830666223, 'avg_acc': 49.96783452098179, 'loss': 8.913326263427734}


EP_train:0:  32%|| 8852/27626 [20:46<44:08,  7.09it/s]

{'epoch': 0, 'iter': 8850, 'avg_loss': 9.187429114070104, 'avg_acc': 49.96822392949949, 'loss': 8.802435874938965}


EP_train:0:  32%|| 8862/27626 [20:47<44:28,  7.03it/s]

{'epoch': 0, 'iter': 8860, 'avg_loss': 9.1866340723846, 'avg_acc': 49.967554452093445, 'loss': 8.39644718170166}


EP_train:0:  32%|| 8872/27626 [20:49<44:25,  7.04it/s]

{'epoch': 0, 'iter': 8870, 'avg_loss': 9.1858469433398, 'avg_acc': 49.96900011272686, 'loss': 8.286822319030762}


EP_train:0:  32%|| 8882/27626 [20:50<44:00,  7.10it/s]

{'epoch': 0, 'iter': 8880, 'avg_loss': 9.185421691044482, 'avg_acc': 49.96833126900124, 'loss': 8.775022506713867}


EP_train:0:  32%|| 8892/27626 [20:51<44:20,  7.04it/s]

{'epoch': 0, 'iter': 8890, 'avg_loss': 9.184835725929537, 'avg_acc': 49.97082724103026, 'loss': 8.400321960449219}


EP_train:0:  32%|| 8902/27626 [20:53<44:14,  7.05it/s]

{'epoch': 0, 'iter': 8900, 'avg_loss': 9.183948110293946, 'avg_acc': 49.97015784743287, 'loss': 8.684196472167969}


EP_train:0:  32%|| 8912/27626 [20:54<44:06,  7.07it/s]

{'epoch': 0, 'iter': 8910, 'avg_loss': 9.183258709743253, 'avg_acc': 49.97124340702502, 'loss': 7.5003252029418945}


EP_train:0:  32%|| 8922/27626 [20:56<43:56,  7.09it/s]

{'epoch': 0, 'iter': 8920, 'avg_loss': 9.18281440271977, 'avg_acc': 49.971976235848, 'loss': 8.701074600219727}


EP_train:0:  32%|| 8932/27626 [20:57<44:11,  7.05it/s]

{'epoch': 0, 'iter': 8930, 'avg_loss': 9.182571103278926, 'avg_acc': 49.972707423580786, 'loss': 9.149260520935059}


EP_train:0:  32%|| 8942/27626 [20:58<44:12,  7.04it/s]

{'epoch': 0, 'iter': 8940, 'avg_loss': 9.18214100823948, 'avg_acc': 49.97413600268427, 'loss': 8.983709335327148}


EP_train:0:  32%|| 8952/27626 [21:00<44:10,  7.05it/s]

{'epoch': 0, 'iter': 8950, 'avg_loss': 9.182015224989932, 'avg_acc': 49.97346665177076, 'loss': 9.740386009216309}


EP_train:0:  32%|| 8962/27626 [21:01<44:18,  7.02it/s]

{'epoch': 0, 'iter': 8960, 'avg_loss': 9.181484479268818, 'avg_acc': 49.976286128780266, 'loss': 8.241610527038574}


EP_train:0:  32%|| 8972/27626 [21:03<44:09,  7.04it/s]

{'epoch': 0, 'iter': 8970, 'avg_loss': 9.181272975896968, 'avg_acc': 49.9700423587114, 'loss': 8.555252075195312}


EP_train:0:  33%|| 8982/27626 [21:04<44:10,  7.03it/s]

{'epoch': 0, 'iter': 8980, 'avg_loss': 9.181075421277988, 'avg_acc': 49.97425119697139, 'loss': 9.142104148864746}


EP_train:0:  33%|| 8992/27626 [21:06<43:39,  7.11it/s]

{'epoch': 0, 'iter': 8990, 'avg_loss': 9.180767643718191, 'avg_acc': 49.97532254476699, 'loss': 8.802631378173828}


EP_train:0:  33%|| 9002/27626 [21:07<44:01,  7.05it/s]

{'epoch': 0, 'iter': 9000, 'avg_loss': 9.180050217010036, 'avg_acc': 49.97430841017665, 'loss': 8.69701099395752}


EP_train:0:  33%|| 9012/27626 [21:08<44:09,  7.03it/s]

{'epoch': 0, 'iter': 9010, 'avg_loss': 9.179757700790882, 'avg_acc': 49.97364332482521, 'loss': 8.428069114685059}


EP_train:0:  33%|| 9022/27626 [21:10<43:50,  7.07it/s]

{'epoch': 0, 'iter': 9020, 'avg_loss': 9.179533380215164, 'avg_acc': 49.9736725418468, 'loss': 9.033162117004395}


EP_train:0:  33%|| 9032/27626 [21:11<43:43,  7.09it/s]

{'epoch': 0, 'iter': 9030, 'avg_loss': 9.178958055677636, 'avg_acc': 49.96954933008526, 'loss': 8.970980644226074}


EP_train:0:  33%|| 9042/27626 [21:13<43:41,  7.09it/s]

{'epoch': 0, 'iter': 9040, 'avg_loss': 9.178241378687979, 'avg_acc': 49.97615031523062, 'loss': 8.376876831054688}


EP_train:0:  33%|| 9052/27626 [21:14<44:13,  7.00it/s]

{'epoch': 0, 'iter': 9050, 'avg_loss': 9.177610625457849, 'avg_acc': 49.97272400839686, 'loss': 9.03488540649414}


EP_train:0:  33%|| 9062/27626 [21:15<43:39,  7.09it/s]

{'epoch': 0, 'iter': 9060, 'avg_loss': 9.177383949630059, 'avg_acc': 49.972409226354706, 'loss': 9.222376823425293}


EP_train:0:  33%|| 9072/27626 [21:17<43:31,  7.10it/s]

{'epoch': 0, 'iter': 9070, 'avg_loss': 9.1766904694363, 'avg_acc': 49.97278414728255, 'loss': 8.319846153259277}


EP_train:0:  33%|| 9082/27626 [21:18<43:42,  7.07it/s]

{'epoch': 0, 'iter': 9080, 'avg_loss': 9.176367278876723, 'avg_acc': 49.96627574055721, 'loss': 8.642008781433105}


EP_train:0:  33%|| 9092/27626 [21:20<43:40,  7.07it/s]

{'epoch': 0, 'iter': 9090, 'avg_loss': 9.176063863479216, 'avg_acc': 49.966656583434165, 'loss': 8.224504470825195}


EP_train:0:  33%|| 9102/27626 [21:21<44:08,  7.00it/s]

{'epoch': 0, 'iter': 9100, 'avg_loss': 9.17572758001569, 'avg_acc': 49.96257279419844, 'loss': 8.239908218383789}


EP_train:0:  33%|| 9112/27626 [21:22<43:38,  7.07it/s]

{'epoch': 0, 'iter': 9110, 'avg_loss': 9.175319529851487, 'avg_acc': 49.961241905389095, 'loss': 8.577110290527344}


EP_train:0:  33%|| 9122/27626 [21:24<43:36,  7.07it/s]

{'epoch': 0, 'iter': 9120, 'avg_loss': 9.174499172636873, 'avg_acc': 49.96334009428791, 'loss': 7.810338973999023}


EP_train:0:  33%|| 9132/27626 [21:25<43:50,  7.03it/s]

{'epoch': 0, 'iter': 9130, 'avg_loss': 9.17384920474303, 'avg_acc': 49.96132679881722, 'loss': 7.876558303833008}


EP_train:0:  33%|| 9142/27626 [21:27<43:41,  7.05it/s]

{'epoch': 0, 'iter': 9140, 'avg_loss': 9.172834838509964, 'avg_acc': 49.961027239908105, 'loss': 9.488619804382324}


EP_train:0:  33%|| 9152/27626 [21:28<43:35,  7.06it/s]

{'epoch': 0, 'iter': 9150, 'avg_loss': 9.172424837482808, 'avg_acc': 49.96243579936619, 'loss': 8.339027404785156}


EP_train:0:  33%|| 9162/27626 [21:30<43:34,  7.06it/s]

{'epoch': 0, 'iter': 9160, 'avg_loss': 9.172109512376572, 'avg_acc': 49.95770112433141, 'loss': 9.527191162109375}


EP_train:0:  33%|| 9172/27626 [21:31<43:35,  7.05it/s]

{'epoch': 0, 'iter': 9170, 'avg_loss': 9.17157380399703, 'avg_acc': 49.95570275869589, 'loss': 8.808655738830566}


EP_train:0:  33%|| 9182/27626 [21:32<43:39,  7.04it/s]

{'epoch': 0, 'iter': 9180, 'avg_loss': 9.17094077400205, 'avg_acc': 49.95575100751552, 'loss': 8.460819244384766}


EP_train:0:  33%|| 9192/27626 [21:34<43:14,  7.10it/s]

{'epoch': 0, 'iter': 9190, 'avg_loss': 9.17021371941872, 'avg_acc': 49.95647916439996, 'loss': 8.74915885925293}


EP_train:0:  33%|| 9202/27626 [21:35<43:15,  7.10it/s]

{'epoch': 0, 'iter': 9200, 'avg_loss': 9.16993955876073, 'avg_acc': 49.951771546571024, 'loss': 8.610346794128418}


EP_train:0:  33%|| 9212/27626 [21:37<43:43,  7.02it/s]

{'epoch': 0, 'iter': 9210, 'avg_loss': 9.169798729150772, 'avg_acc': 49.95318097926392, 'loss': 9.421597480773926}


EP_train:0:  33%|| 9222/27626 [21:38<43:32,  7.05it/s]

{'epoch': 0, 'iter': 9220, 'avg_loss': 9.16895284596861, 'avg_acc': 49.95865415898493, 'loss': 8.4690523147583}


EP_train:0:  33%|| 9232/27626 [21:39<43:35,  7.03it/s]

{'epoch': 0, 'iter': 9230, 'avg_loss': 9.168328254171989, 'avg_acc': 49.96276134763298, 'loss': 8.670120239257812}


EP_train:0:  33%|| 9242/27626 [21:41<43:35,  7.03it/s]

{'epoch': 0, 'iter': 9240, 'avg_loss': 9.167764597493273, 'avg_acc': 49.96550697976409, 'loss': 9.380606651306152}


EP_train:0:  33%|| 9252/27626 [21:42<43:15,  7.08it/s]

{'epoch': 0, 'iter': 9250, 'avg_loss': 9.167255063686303, 'avg_acc': 49.961828450978274, 'loss': 9.946184158325195}


EP_train:0:  34%|| 9262/27626 [21:44<43:18,  7.07it/s]

{'epoch': 0, 'iter': 9260, 'avg_loss': 9.167246039311992, 'avg_acc': 49.9628819781881, 'loss': 8.886548042297363}


EP_train:0:  34%|| 9272/27626 [21:45<43:09,  7.09it/s]

{'epoch': 0, 'iter': 9270, 'avg_loss': 9.166887526964244, 'avg_acc': 49.96595566821271, 'loss': 9.12498664855957}


EP_train:0:  34%|| 9282/27626 [21:46<43:08,  7.09it/s]

{'epoch': 0, 'iter': 9280, 'avg_loss': 9.166591161284618, 'avg_acc': 49.96801260640017, 'loss': 8.40958023071289}


EP_train:0:  34%|| 9292/27626 [21:48<43:29,  7.03it/s]

{'epoch': 0, 'iter': 9290, 'avg_loss': 9.166109548678966, 'avg_acc': 49.97006511677968, 'loss': 9.77795696258545}


EP_train:0:  34%|| 9302/27626 [21:49<43:22,  7.04it/s]

{'epoch': 0, 'iter': 9300, 'avg_loss': 9.165683391978312, 'avg_acc': 49.97076927212128, 'loss': 8.497159957885742}


EP_train:0:  34%|| 9312/27626 [21:51<43:22,  7.04it/s]

{'epoch': 0, 'iter': 9310, 'avg_loss': 9.16563900783794, 'avg_acc': 49.97348566212007, 'loss': 10.373003959655762}


EP_train:0:  34%|| 9322/27626 [21:52<43:17,  7.05it/s]

{'epoch': 0, 'iter': 9320, 'avg_loss': 9.165642381059728, 'avg_acc': 49.97083199227551, 'loss': 9.460188865661621}


EP_train:0:  34%|| 9332/27626 [21:53<42:55,  7.10it/s]

{'epoch': 0, 'iter': 9330, 'avg_loss': 9.165219443798218, 'avg_acc': 49.970528346372305, 'loss': 9.479405403137207}


EP_train:0:  34%|| 9342/27626 [21:55<43:23,  7.02it/s]

{'epoch': 0, 'iter': 9340, 'avg_loss': 9.164824419620906, 'avg_acc': 49.969221710737614, 'loss': 8.994332313537598}


EP_train:0:  34%|| 9352/27626 [21:56<43:04,  7.07it/s]

{'epoch': 0, 'iter': 9350, 'avg_loss': 9.164792593730706, 'avg_acc': 49.972262324885044, 'loss': 8.339398384094238}


EP_train:0:  34%|| 9362/27626 [21:58<43:06,  7.06it/s]

{'epoch': 0, 'iter': 9360, 'avg_loss': 9.164344559086654, 'avg_acc': 49.97262578784318, 'loss': 8.900479316711426}


EP_train:0:  34%|| 9372/27626 [21:59<43:20,  7.02it/s]

{'epoch': 0, 'iter': 9370, 'avg_loss': 9.164126438413202, 'avg_acc': 49.97232152385018, 'loss': 9.136321067810059}


EP_train:0:  34%|| 9382/27626 [22:01<43:11,  7.04it/s]

{'epoch': 0, 'iter': 9380, 'avg_loss': 9.163856804212037, 'avg_acc': 49.969352947446964, 'loss': 8.07640552520752}


EP_train:0:  34%|| 9392/27626 [22:02<43:05,  7.05it/s]

{'epoch': 0, 'iter': 9390, 'avg_loss': 9.163503883872826, 'avg_acc': 49.97071664359493, 'loss': 8.965388298034668}


EP_train:0:  34%|| 9402/27626 [22:03<42:57,  7.07it/s]

{'epoch': 0, 'iter': 9400, 'avg_loss': 9.163105099039653, 'avg_acc': 49.97706361025423, 'loss': 8.323478698730469}


EP_train:0:  34%|| 9412/27626 [22:05<43:07,  7.04it/s]

{'epoch': 0, 'iter': 9410, 'avg_loss': 9.16252114569871, 'avg_acc': 49.97908033152694, 'loss': 8.929147720336914}


EP_train:0:  34%|| 9422/27626 [22:06<43:02,  7.05it/s]

{'epoch': 0, 'iter': 9420, 'avg_loss': 9.162265287995933, 'avg_acc': 49.982419594522874, 'loss': 8.68398666381836}


EP_train:0:  34%|| 9432/27626 [22:08<43:15,  7.01it/s]

{'epoch': 0, 'iter': 9430, 'avg_loss': 9.162043030321907, 'avg_acc': 49.98011875728979, 'loss': 9.601490020751953}


EP_train:0:  34%|| 9442/27626 [22:09<42:53,  7.07it/s]

{'epoch': 0, 'iter': 9440, 'avg_loss': 9.161478999256772, 'avg_acc': 49.983118843342865, 'loss': 9.333917617797852}


EP_train:0:  34%|| 9452/27626 [22:10<42:57,  7.05it/s]

{'epoch': 0, 'iter': 9450, 'avg_loss': 9.161071961953544, 'avg_acc': 49.977515606814094, 'loss': 9.138171195983887}


EP_train:0:  34%|| 9462/27626 [22:12<43:02,  7.03it/s]

{'epoch': 0, 'iter': 9460, 'avg_loss': 9.161136577149776, 'avg_acc': 49.97985149561357, 'loss': 9.677850723266602}


EP_train:0:  34%|| 9472/27626 [22:13<42:37,  7.10it/s]

{'epoch': 0, 'iter': 9470, 'avg_loss': 9.160796420474249, 'avg_acc': 49.98449213388238, 'loss': 9.648709297180176}


EP_train:0:  34%|| 9482/27626 [22:15<42:55,  7.04it/s]

{'epoch': 0, 'iter': 9480, 'avg_loss': 9.159981747566805, 'avg_acc': 49.97989399852336, 'loss': 8.875212669372559}


EP_train:0:  34%|| 9492/27626 [22:16<43:08,  7.01it/s]

{'epoch': 0, 'iter': 9490, 'avg_loss': 9.15961174894065, 'avg_acc': 49.98287851648931, 'loss': 8.45970630645752}


EP_train:0:  34%|| 9502/27626 [22:18<42:54,  7.04it/s]

{'epoch': 0, 'iter': 9500, 'avg_loss': 9.159026355671138, 'avg_acc': 49.98190979896853, 'loss': 8.200881958007812}


EP_train:0:  34%|| 9512/27626 [22:19<42:54,  7.04it/s]

{'epoch': 0, 'iter': 9510, 'avg_loss': 9.158355730488127, 'avg_acc': 49.9756860477342, 'loss': 8.995759963989258}


EP_train:0:  34%|| 9522/27626 [22:20<42:57,  7.02it/s]

{'epoch': 0, 'iter': 9520, 'avg_loss': 9.15808559823795, 'avg_acc': 49.97505514126667, 'loss': 9.140189170837402}


EP_train:0:  35%|| 9532/27626 [22:22<42:38,  7.07it/s]

{'epoch': 0, 'iter': 9530, 'avg_loss': 9.157629303586482, 'avg_acc': 49.97540919106075, 'loss': 8.65384578704834}


EP_train:0:  35%|| 9542/27626 [22:23<42:47,  7.04it/s]

{'epoch': 0, 'iter': 9540, 'avg_loss': 9.157222643899113, 'avg_acc': 49.97641756629284, 'loss': 8.873558044433594}


EP_train:0:  35%|| 9552/27626 [22:25<42:31,  7.08it/s]

{'epoch': 0, 'iter': 9550, 'avg_loss': 9.156721773078687, 'avg_acc': 49.97807821170558, 'loss': 8.19920825958252}


EP_train:0:  35%|| 9562/27626 [22:26<42:49,  7.03it/s]

{'epoch': 0, 'iter': 9560, 'avg_loss': 9.156298398946621, 'avg_acc': 49.97581319945613, 'loss': 8.364148139953613}


EP_train:0:  35%|| 9572/27626 [22:27<42:36,  7.06it/s]

{'epoch': 0, 'iter': 9570, 'avg_loss': 9.155775930151373, 'avg_acc': 49.97485894890816, 'loss': 8.303669929504395}


EP_train:0:  35%|| 9582/27626 [22:29<42:37,  7.05it/s]

{'epoch': 0, 'iter': 9580, 'avg_loss': 9.155396752436616, 'avg_acc': 49.97292819121177, 'loss': 7.38705587387085}


EP_train:0:  35%|| 9592/27626 [22:30<42:55,  7.00it/s]

{'epoch': 0, 'iter': 9590, 'avg_loss': 9.154712383621067, 'avg_acc': 49.9710014597018, 'loss': 9.12217903137207}


EP_train:0:  35%|| 9602/27626 [22:32<42:37,  7.05it/s]

{'epoch': 0, 'iter': 9600, 'avg_loss': 9.154067932621984, 'avg_acc': 49.971357150296846, 'loss': 7.963227272033691}


EP_train:0:  35%|| 9612/27626 [22:33<42:16,  7.10it/s]

{'epoch': 0, 'iter': 9610, 'avg_loss': 9.153283300688532, 'avg_acc': 49.967485173239, 'loss': 7.9293532371521}


EP_train:0:  35%|| 9622/27626 [22:34<42:38,  7.04it/s]

{'epoch': 0, 'iter': 9620, 'avg_loss': 9.152818605169017, 'avg_acc': 49.96459567612514, 'loss': 8.04373836517334}


EP_train:0:  35%|| 9632/27626 [22:36<42:22,  7.08it/s]

{'epoch': 0, 'iter': 9630, 'avg_loss': 9.15245692235586, 'avg_acc': 49.96203665247638, 'loss': 9.4730863571167}


EP_train:0:  35%|| 9642/27626 [22:37<42:22,  7.07it/s]

{'epoch': 0, 'iter': 9640, 'avg_loss': 9.152059939649266, 'avg_acc': 49.96240016595789, 'loss': 8.392743110656738}


EP_train:0:  35%|| 9652/27626 [22:39<42:22,  7.07it/s]

{'epoch': 0, 'iter': 9650, 'avg_loss': 9.151791349147567, 'avg_acc': 49.96114392290954, 'loss': 8.780277252197266}


EP_train:0:  35%|| 9662/27626 [22:40<42:39,  7.02it/s]

{'epoch': 0, 'iter': 9660, 'avg_loss': 9.15152442387426, 'avg_acc': 49.9631249353069, 'loss': 8.710046768188477}


EP_train:0:  35%|| 9672/27626 [22:41<42:31,  7.04it/s]

{'epoch': 0, 'iter': 9670, 'avg_loss': 9.150938424412264, 'avg_acc': 49.967363767966084, 'loss': 8.010290145874023}


EP_train:0:  35%|| 9682/27626 [22:43<42:36,  7.02it/s]

{'epoch': 0, 'iter': 9680, 'avg_loss': 9.150461241918437, 'avg_acc': 49.96642908790414, 'loss': 8.972789764404297}


EP_train:0:  35%|| 9692/27626 [22:44<42:10,  7.09it/s]

{'epoch': 0, 'iter': 9690, 'avg_loss': 9.14992983842361, 'avg_acc': 49.97130069136312, 'loss': 8.564676284790039}


EP_train:0:  35%|| 9702/27626 [22:46<42:26,  7.04it/s]

{'epoch': 0, 'iter': 9700, 'avg_loss': 9.14965739632105, 'avg_acc': 49.965854035666425, 'loss': 9.095841407775879}


EP_train:0:  35%|| 9712/27626 [22:47<42:21,  7.05it/s]

{'epoch': 0, 'iter': 9710, 'avg_loss': 9.149352108090486, 'avg_acc': 49.9665327978581, 'loss': 9.122136116027832}


EP_train:0:  35%|| 9722/27626 [22:49<42:16,  7.06it/s]

{'epoch': 0, 'iter': 9720, 'avg_loss': 9.14914006311797, 'avg_acc': 49.970424853410144, 'loss': 8.824911117553711}


EP_train:0:  35%|| 9732/27626 [22:50<42:10,  7.07it/s]

{'epoch': 0, 'iter': 9730, 'avg_loss': 9.148908050561806, 'avg_acc': 49.971097523378894, 'loss': 9.327131271362305}


EP_train:0:  35%|| 9742/27626 [22:51<42:10,  7.07it/s]

{'epoch': 0, 'iter': 9740, 'avg_loss': 9.14839253402591, 'avg_acc': 49.970485576429525, 'loss': 8.88982105255127}


EP_train:0:  35%|| 9752/27626 [22:53<41:57,  7.10it/s]

{'epoch': 0, 'iter': 9750, 'avg_loss': 9.148038709460742, 'avg_acc': 49.9663496051687, 'loss': 8.976934432983398}


EP_train:0:  35%|| 9762/27626 [22:54<42:23,  7.02it/s]

{'epoch': 0, 'iter': 9760, 'avg_loss': 9.148191703268402, 'avg_acc': 49.96158180514292, 'loss': 8.154457092285156}


EP_train:0:  35%|| 9772/27626 [22:56<42:06,  7.07it/s]

{'epoch': 0, 'iter': 9770, 'avg_loss': 9.147909217963699, 'avg_acc': 49.962260771671275, 'loss': 8.33369255065918}


EP_train:0:  35%|| 9782/27626 [22:57<42:15,  7.04it/s]

{'epoch': 0, 'iter': 9780, 'avg_loss': 9.147210559380857, 'avg_acc': 49.96517482874962, 'loss': 9.181011199951172}


EP_train:0:  35%|| 9792/27626 [22:58<41:47,  7.11it/s]

{'epoch': 0, 'iter': 9790, 'avg_loss': 9.1465584846308, 'avg_acc': 49.96967878664079, 'loss': 7.99799919128418}


EP_train:0:  35%|| 9802/27626 [23:00<41:53,  7.09it/s]

{'epoch': 0, 'iter': 9800, 'avg_loss': 9.146009442465925, 'avg_acc': 49.96907203346597, 'loss': 8.908113479614258}


EP_train:0:  36%|| 9812/27626 [23:01<42:06,  7.05it/s]

{'epoch': 0, 'iter': 9810, 'avg_loss': 9.14562414467924, 'avg_acc': 49.96305167668943, 'loss': 8.659314155578613}


EP_train:0:  36%|| 9822/27626 [23:03<42:02,  7.06it/s]

{'epoch': 0, 'iter': 9820, 'avg_loss': 9.145403155604663, 'avg_acc': 49.95927095000509, 'loss': 8.723772048950195}


EP_train:0:  36%|| 9832/27626 [23:04<41:51,  7.08it/s]

{'epoch': 0, 'iter': 9830, 'avg_loss': 9.14487271580033, 'avg_acc': 49.955180042722006, 'loss': 8.607924461364746}


EP_train:0:  36%|| 9842/27626 [23:05<41:52,  7.08it/s]

{'epoch': 0, 'iter': 9840, 'avg_loss': 9.144445404920646, 'avg_acc': 49.95649578294889, 'loss': 9.217674255371094}


EP_train:0:  36%|| 9852/27626 [23:07<42:22,  6.99it/s]

{'epoch': 0, 'iter': 9850, 'avg_loss': 9.143978728460015, 'avg_acc': 49.954953811795754, 'loss': 8.994341850280762}


EP_train:0:  36%|| 9862/27626 [23:08<41:54,  7.06it/s]

{'epoch': 0, 'iter': 9860, 'avg_loss': 9.143353072229209, 'avg_acc': 49.95373187303519, 'loss': 8.582772254943848}


EP_train:0:  36%|| 9872/27626 [23:10<41:39,  7.10it/s]

{'epoch': 0, 'iter': 9870, 'avg_loss': 9.142996745369933, 'avg_acc': 49.95029632256104, 'loss': 8.425048828125}


EP_train:0:  36%|| 9882/27626 [23:11<41:32,  7.12it/s]

{'epoch': 0, 'iter': 9880, 'avg_loss': 9.142623925882244, 'avg_acc': 49.94876530715515, 'loss': 8.724610328674316}


EP_train:0:  36%|| 9892/27626 [23:13<42:14,  7.00it/s]

{'epoch': 0, 'iter': 9890, 'avg_loss': 9.142175619479993, 'avg_acc': 49.94944899403498, 'loss': 8.397117614746094}


EP_train:0:  36%|| 9902/27626 [23:14<41:39,  7.09it/s]

{'epoch': 0, 'iter': 9900, 'avg_loss': 9.141143808267584, 'avg_acc': 49.95297192202808, 'loss': 7.965013027191162}


EP_train:0:  36%|| 9912/27626 [23:15<41:37,  7.09it/s]

{'epoch': 0, 'iter': 9910, 'avg_loss': 9.140430476467998, 'avg_acc': 49.948289779033395, 'loss': 8.338580131530762}


EP_train:0:  36%|| 9922/27626 [23:17<41:56,  7.04it/s]

{'epoch': 0, 'iter': 9920, 'avg_loss': 9.13985515032525, 'avg_acc': 49.95086180828546, 'loss': 8.971556663513184}


EP_train:0:  36%|| 9932/27626 [23:18<42:17,  6.97it/s]

{'epoch': 0, 'iter': 9930, 'avg_loss': 9.139488382265535, 'avg_acc': 49.944303192024975, 'loss': 9.007868766784668}


EP_train:0:  36%|| 9942/27626 [23:20<42:13,  6.98it/s]

{'epoch': 0, 'iter': 9940, 'avg_loss': 9.1388767675763, 'avg_acc': 49.9402726083895, 'loss': 9.391692161560059}


EP_train:0:  36%|| 9952/27626 [23:21<42:04,  7.00it/s]

{'epoch': 0, 'iter': 9950, 'avg_loss': 9.138451864510706, 'avg_acc': 49.9397045523063, 'loss': 8.882909774780273}


EP_train:0:  36%|| 9962/27626 [23:22<41:31,  7.09it/s]

{'epoch': 0, 'iter': 9960, 'avg_loss': 9.138191517260333, 'avg_acc': 49.94101997791386, 'loss': 8.830229759216309}


EP_train:0:  36%|| 9972/27626 [23:24<41:20,  7.12it/s]

{'epoch': 0, 'iter': 9970, 'avg_loss': 9.137904973365325, 'avg_acc': 49.94013890281817, 'loss': 7.993310928344727}


EP_train:0:  36%|| 9982/27626 [23:25<41:39,  7.06it/s]

{'epoch': 0, 'iter': 9980, 'avg_loss': 9.137540704895148, 'avg_acc': 49.938007213706044, 'loss': 7.557921409606934}


EP_train:0:  36%|| 9992/27626 [23:27<41:35,  7.07it/s]

{'epoch': 0, 'iter': 9990, 'avg_loss': 9.137553115278116, 'avg_acc': 49.93838204383946, 'loss': 10.087565422058105}


EP_train:0:  36%|| 10002/27626 [23:28<41:42,  7.04it/s]

{'epoch': 0, 'iter': 10000, 'avg_loss': 9.137211540391142, 'avg_acc': 49.93750624937506, 'loss': 9.539741516113281}


EP_train:0:  36%|| 10012/27626 [23:29<41:40,  7.04it/s]

{'epoch': 0, 'iter': 10010, 'avg_loss': 9.136804731393978, 'avg_acc': 49.93694436120268, 'loss': 8.533758163452148}


EP_train:0:  36%|| 10022/27626 [23:31<41:57,  6.99it/s]

{'epoch': 0, 'iter': 10020, 'avg_loss': 9.136505632578476, 'avg_acc': 49.942620496956394, 'loss': 8.127501487731934}


EP_train:0:  36%|| 10032/27626 [23:32<41:38,  7.04it/s]

{'epoch': 0, 'iter': 10030, 'avg_loss': 9.13593186891016, 'avg_acc': 49.94049695942578, 'loss': 8.898238182067871}


EP_train:0:  36%|| 10042/27626 [23:34<41:32,  7.05it/s]

{'epoch': 0, 'iter': 10040, 'avg_loss': 9.135630739956854, 'avg_acc': 49.94335723533513, 'loss': 8.98228931427002}


EP_train:0:  36%|| 10052/27626 [23:35<41:18,  7.09it/s]

{'epoch': 0, 'iter': 10050, 'avg_loss': 9.135569283860487, 'avg_acc': 49.94621181971943, 'loss': 9.26754379272461}


EP_train:0:  36%|| 10062/27626 [23:36<41:30,  7.05it/s]

{'epoch': 0, 'iter': 10060, 'avg_loss': 9.134978141714525, 'avg_acc': 49.947197097704006, 'loss': 7.982196807861328}


EP_train:0:  36%|| 10072/27626 [23:38<41:30,  7.05it/s]

{'epoch': 0, 'iter': 10070, 'avg_loss': 9.134355632021391, 'avg_acc': 49.949421606593184, 'loss': 9.087942123413086}


EP_train:0:  36%|| 10082/27626 [23:39<41:19,  7.08it/s]

{'epoch': 0, 'iter': 10080, 'avg_loss': 9.133864118967509, 'avg_acc': 49.950091756770156, 'loss': 8.48410701751709}


EP_train:0:  37%|| 10092/27626 [23:41<41:29,  7.04it/s]

{'epoch': 0, 'iter': 10090, 'avg_loss': 9.13370994552372, 'avg_acc': 49.94921216925974, 'loss': 8.511932373046875}


EP_train:0:  37%|| 10102/27626 [23:42<41:17,  7.07it/s]

{'epoch': 0, 'iter': 10100, 'avg_loss': 9.133499141448016, 'avg_acc': 49.947406197406195, 'loss': 9.498332023620605}


EP_train:0:  37%|| 10112/27626 [23:44<41:34,  7.02it/s]

{'epoch': 0, 'iter': 10110, 'avg_loss': 9.133547067005548, 'avg_acc': 49.94467658985264, 'loss': 9.245339393615723}


EP_train:0:  37%|| 10122/27626 [23:45<41:19,  7.06it/s]

{'epoch': 0, 'iter': 10120, 'avg_loss': 9.133023795538794, 'avg_acc': 49.94812765537002, 'loss': 7.767118453979492}


EP_train:0:  37%|| 10132/27626 [23:46<41:32,  7.02it/s]

{'epoch': 0, 'iter': 10130, 'avg_loss': 9.131963664293878, 'avg_acc': 49.94879577534301, 'loss': 7.71051025390625}


EP_train:0:  37%|| 10142/27626 [23:48<41:09,  7.08it/s]

{'epoch': 0, 'iter': 10140, 'avg_loss': 9.131732107983833, 'avg_acc': 49.94792180258357, 'loss': 9.163822174072266}


EP_train:0:  37%|| 10152/27626 [23:49<41:01,  7.10it/s]

{'epoch': 0, 'iter': 10150, 'avg_loss': 9.131534152190188, 'avg_acc': 49.94581814599547, 'loss': 9.393497467041016}


EP_train:0:  37%|| 10162/27626 [23:51<41:19,  7.04it/s]

{'epoch': 0, 'iter': 10160, 'avg_loss': 9.131200962490881, 'avg_acc': 49.945871469343565, 'loss': 8.005656242370605}


EP_train:0:  37%|| 10172/27626 [23:52<41:18,  7.04it/s]

{'epoch': 0, 'iter': 10170, 'avg_loss': 9.130695434484425, 'avg_acc': 49.943466719103334, 'loss': 8.936915397644043}


EP_train:0:  37%|| 10182/27626 [23:53<41:14,  7.05it/s]

{'epoch': 0, 'iter': 10180, 'avg_loss': 9.130389751613802, 'avg_acc': 49.94290835870739, 'loss': 7.794127941131592}


EP_train:0:  37%|| 10192/27626 [23:55<41:09,  7.06it/s]

{'epoch': 0, 'iter': 10190, 'avg_loss': 9.129829507945109, 'avg_acc': 49.942657737219115, 'loss': 8.169963836669922}


EP_train:0:  37%|| 10202/27626 [23:56<41:11,  7.05it/s]

{'epoch': 0, 'iter': 10200, 'avg_loss': 9.1296307417753, 'avg_acc': 49.94455200470542, 'loss': 8.569510459899902}


EP_train:0:  37%|| 10212/27626 [23:58<41:18,  7.03it/s]

{'epoch': 0, 'iter': 10210, 'avg_loss': 9.128587002922792, 'avg_acc': 49.943382136911175, 'loss': 9.413740158081055}


EP_train:0:  37%|| 10222/27626 [23:59<41:01,  7.07it/s]

{'epoch': 0, 'iter': 10220, 'avg_loss': 9.127931336905903, 'avg_acc': 49.94038009979454, 'loss': 8.777609825134277}


EP_train:0:  37%|| 10232/27626 [24:00<40:51,  7.10it/s]

{'epoch': 0, 'iter': 10230, 'avg_loss': 9.127205798125154, 'avg_acc': 49.94227103899912, 'loss': 8.00256061553955}


EP_train:0:  37%|| 10242/27626 [24:02<41:01,  7.06it/s]

{'epoch': 0, 'iter': 10240, 'avg_loss': 9.127233859722345, 'avg_acc': 49.93805536568694, 'loss': 9.308756828308105}


EP_train:0:  37%|| 10252/27626 [24:03<41:08,  7.04it/s]

{'epoch': 0, 'iter': 10250, 'avg_loss': 9.127010448027118, 'avg_acc': 49.93567700712126, 'loss': 8.995709419250488}


EP_train:0:  37%|| 10262/27626 [24:05<41:07,  7.04it/s]

{'epoch': 0, 'iter': 10260, 'avg_loss': 9.127026822155488, 'avg_acc': 49.934216937920276, 'loss': 8.858564376831055}


EP_train:0:  37%|| 10272/27626 [24:06<41:07,  7.03it/s]

{'epoch': 0, 'iter': 10270, 'avg_loss': 9.126794411718015, 'avg_acc': 49.93671502287995, 'loss': 8.768500328063965}


EP_train:0:  37%|| 10282/27626 [24:08<41:00,  7.05it/s]

{'epoch': 0, 'iter': 10280, 'avg_loss': 9.12627264855263, 'avg_acc': 49.92796177414648, 'loss': 9.404650688171387}


EP_train:0:  37%|| 10292/27626 [24:09<41:23,  6.98it/s]

{'epoch': 0, 'iter': 10290, 'avg_loss': 9.125821668544996, 'avg_acc': 49.926513458361676, 'loss': 9.35813045501709}


EP_train:0:  37%|| 10302/27626 [24:10<41:14,  7.00it/s]

{'epoch': 0, 'iter': 10300, 'avg_loss': 9.125369941895476, 'avg_acc': 49.929618483642365, 'loss': 8.837700843811035}


EP_train:0:  37%|| 10312/27626 [24:12<41:07,  7.02it/s]

{'epoch': 0, 'iter': 10310, 'avg_loss': 9.12539912370784, 'avg_acc': 49.93211133740665, 'loss': 8.177351951599121}


EP_train:0:  37%|| 10322/27626 [24:13<40:34,  7.11it/s]

{'epoch': 0, 'iter': 10320, 'avg_loss': 9.12538883083378, 'avg_acc': 49.92854374576107, 'loss': 9.236393928527832}


EP_train:0:  37%|| 10332/27626 [24:15<40:32,  7.11it/s]

{'epoch': 0, 'iter': 10330, 'avg_loss': 9.125054747007578, 'avg_acc': 49.927705449617655, 'loss': 8.457121849060059}


EP_train:0:  37%|| 10342/27626 [24:16<41:02,  7.02it/s]

{'epoch': 0, 'iter': 10340, 'avg_loss': 9.124401189870293, 'avg_acc': 49.92656657963446, 'loss': 8.623557090759277}


EP_train:0:  37%|| 10352/27626 [24:17<40:51,  7.05it/s]

{'epoch': 0, 'iter': 10350, 'avg_loss': 9.124214936887432, 'avg_acc': 49.920599458989464, 'loss': 8.616133689880371}


EP_train:0:  38%|| 10362/27626 [24:19<40:36,  7.08it/s]

{'epoch': 0, 'iter': 10360, 'avg_loss': 9.123899370095362, 'avg_acc': 49.923390599362996, 'loss': 9.892584800720215}


EP_train:0:  38%|| 10372/27626 [24:20<40:39,  7.07it/s]

{'epoch': 0, 'iter': 10370, 'avg_loss': 9.123333581077157, 'avg_acc': 49.92195786327259, 'loss': 8.483146667480469}


EP_train:0:  38%|| 10382/27626 [24:22<41:05,  7.00it/s]

{'epoch': 0, 'iter': 10380, 'avg_loss': 9.122942719727327, 'avg_acc': 49.92022685675754, 'loss': 8.529109001159668}


EP_train:0:  38%|| 10392/27626 [24:23<40:43,  7.05it/s]

{'epoch': 0, 'iter': 10390, 'avg_loss': 9.122464376858368, 'avg_acc': 49.91819844095852, 'loss': 9.05970287322998}


EP_train:0:  38%|| 10402/27626 [24:24<40:31,  7.08it/s]

{'epoch': 0, 'iter': 10400, 'avg_loss': 9.12246715397849, 'avg_acc': 49.91887799250072, 'loss': 9.013622283935547}


EP_train:0:  38%|| 10412/27626 [24:26<40:33,  7.07it/s]

{'epoch': 0, 'iter': 10410, 'avg_loss': 9.12200411892844, 'avg_acc': 49.921657381615596, 'loss': 8.546854972839355}


EP_train:0:  38%|| 10422/27626 [24:27<40:21,  7.11it/s]

{'epoch': 0, 'iter': 10420, 'avg_loss': 9.121586512357052, 'avg_acc': 49.92293206026293, 'loss': 8.879776954650879}


EP_train:0:  38%|| 10432/27626 [24:29<40:48,  7.02it/s]

{'epoch': 0, 'iter': 10430, 'avg_loss': 9.121159764724043, 'avg_acc': 49.92180759275237, 'loss': 9.109542846679688}


EP_train:0:  38%|| 10442/27626 [24:30<40:26,  7.08it/s]

{'epoch': 0, 'iter': 10440, 'avg_loss': 9.120799136246292, 'avg_acc': 49.92038597835457, 'loss': 8.13494873046875}


EP_train:0:  38%|| 10452/27626 [24:32<40:50,  7.01it/s]

{'epoch': 0, 'iter': 10450, 'avg_loss': 9.120530179215098, 'avg_acc': 49.919266098937904, 'loss': 9.2682466506958}


EP_train:0:  38%|| 10462/27626 [24:33<40:17,  7.10it/s]

{'epoch': 0, 'iter': 10460, 'avg_loss': 9.120553236916603, 'avg_acc': 49.91934327502151, 'loss': 8.622931480407715}


EP_train:0:  38%|| 10472/27626 [24:34<40:40,  7.03it/s]

{'epoch': 0, 'iter': 10470, 'avg_loss': 9.120256833526492, 'avg_acc': 49.922106293572725, 'loss': 9.292465209960938}


EP_train:0:  38%|| 10482/27626 [24:36<40:40,  7.03it/s]

{'epoch': 0, 'iter': 10480, 'avg_loss': 9.119993562610103, 'avg_acc': 49.922776929682286, 'loss': 8.305083274841309}


EP_train:0:  38%|| 10492/27626 [24:37<40:43,  7.01it/s]

{'epoch': 0, 'iter': 10490, 'avg_loss': 9.119524395916962, 'avg_acc': 49.92285053855686, 'loss': 9.117176055908203}


EP_train:0:  38%|| 10502/27626 [24:39<40:18,  7.08it/s]

{'epoch': 0, 'iter': 10500, 'avg_loss': 9.119384416658848, 'avg_acc': 49.92560232358823, 'loss': 9.393997192382812}


EP_train:0:  38%|| 10512/27626 [24:40<40:24,  7.06it/s]

{'epoch': 0, 'iter': 10510, 'avg_loss': 9.119186411574434, 'avg_acc': 49.92507848920179, 'loss': 8.718387603759766}


EP_train:0:  38%|| 10522/27626 [24:41<40:20,  7.07it/s]

{'epoch': 0, 'iter': 10520, 'avg_loss': 9.118763986099312, 'avg_acc': 49.92099135063207, 'loss': 9.13523006439209}


EP_train:0:  38%|| 10532/27626 [24:43<40:16,  7.07it/s]

{'epoch': 0, 'iter': 10530, 'avg_loss': 9.118355277053041, 'avg_acc': 49.919285917766594, 'loss': 8.115361213684082}


EP_train:0:  38%|| 10542/27626 [24:44<40:32,  7.02it/s]

{'epoch': 0, 'iter': 10540, 'avg_loss': 9.11799814424912, 'avg_acc': 49.91669433640072, 'loss': 8.830209732055664}


EP_train:0:  38%|| 10552/27626 [24:46<40:25,  7.04it/s]

{'epoch': 0, 'iter': 10550, 'avg_loss': 9.117222822104548, 'avg_acc': 49.91381148706284, 'loss': 8.156338691711426}


EP_train:0:  38%|| 10562/27626 [24:47<40:23,  7.04it/s]

{'epoch': 0, 'iter': 10560, 'avg_loss': 9.11701138495225, 'avg_acc': 49.91211769718777, 'loss': 8.950528144836426}


EP_train:0:  38%|| 10572/27626 [24:48<40:16,  7.06it/s]

{'epoch': 0, 'iter': 10570, 'avg_loss': 9.116834601800226, 'avg_acc': 49.912496452558884, 'loss': 9.185863494873047}


EP_train:0:  38%|| 10582/27626 [24:50<40:30,  7.01it/s]

{'epoch': 0, 'iter': 10580, 'avg_loss': 9.116234478320937, 'avg_acc': 49.912874492013984, 'loss': 8.111724853515625}


EP_train:0:  38%|| 10592/27626 [24:51<40:25,  7.02it/s]

{'epoch': 0, 'iter': 10590, 'avg_loss': 9.115850344470513, 'avg_acc': 49.910891322821264, 'loss': 8.10110092163086}


EP_train:0:  38%|| 10602/27626 [24:53<40:10,  7.06it/s]

{'epoch': 0, 'iter': 10600, 'avg_loss': 9.115544298641773, 'avg_acc': 49.91156494670314, 'loss': 8.451888084411621}


EP_train:0:  38%|| 10612/27626 [24:54<40:00,  7.09it/s]

{'epoch': 0, 'iter': 10610, 'avg_loss': 9.115337201511137, 'avg_acc': 49.91076477240599, 'loss': 8.737105369567871}


EP_train:0:  38%|| 10622/27626 [24:56<40:03,  7.07it/s]

{'epoch': 0, 'iter': 10620, 'avg_loss': 9.11474911936056, 'avg_acc': 49.91379107428679, 'loss': 8.838823318481445}


EP_train:0:  38%|| 10632/27626 [24:57<40:01,  7.08it/s]

{'epoch': 0, 'iter': 10630, 'avg_loss': 9.114279373290229, 'avg_acc': 49.91651773116358, 'loss': 8.434798240661621}


EP_train:0:  39%|| 10642/27626 [24:58<39:56,  7.09it/s]

{'epoch': 0, 'iter': 10640, 'avg_loss': 9.11358652320104, 'avg_acc': 49.91483413213044, 'loss': 8.281490325927734}


EP_train:0:  39%|| 10652/27626 [25:00<39:58,  7.08it/s]

{'epoch': 0, 'iter': 10650, 'avg_loss': 9.113404396654529, 'avg_acc': 49.918141489062066, 'loss': 8.48619270324707}


EP_train:0:  39%|| 10662/27626 [25:01<40:29,  6.98it/s]

{'epoch': 0, 'iter': 10660, 'avg_loss': 9.11291781859241, 'avg_acc': 49.921442641403246, 'loss': 7.425472736358643}


EP_train:0:  39%|| 10672/27626 [25:03<40:16,  7.02it/s]

{'epoch': 0, 'iter': 10670, 'avg_loss': 9.112432938240515, 'avg_acc': 49.922101958579326, 'loss': 9.049818992614746}


EP_train:0:  39%|| 10682/27626 [25:04<40:14,  7.02it/s]

{'epoch': 0, 'iter': 10680, 'avg_loss': 9.112121054255866, 'avg_acc': 49.92276004119465, 'loss': 8.853002548217773}


EP_train:0:  39%|| 10692/27626 [25:05<39:55,  7.07it/s]

{'epoch': 0, 'iter': 10690, 'avg_loss': 9.111963464506616, 'avg_acc': 49.92487840239454, 'loss': 8.118548393249512}


EP_train:0:  39%|| 10702/27626 [25:07<39:48,  7.08it/s]

{'epoch': 0, 'iter': 10700, 'avg_loss': 9.112012449062581, 'avg_acc': 49.930205121016726, 'loss': 8.693436622619629}


EP_train:0:  39%|| 10712/27626 [25:08<39:53,  7.07it/s]

{'epoch': 0, 'iter': 10710, 'avg_loss': 9.11171644803089, 'avg_acc': 49.932896088133695, 'loss': 8.689835548400879}


EP_train:0:  39%|| 10722/27626 [25:10<39:54,  7.06it/s]

{'epoch': 0, 'iter': 10720, 'avg_loss': 9.111497599668727, 'avg_acc': 49.93208422721761, 'loss': 8.26938533782959}


EP_train:0:  39%|| 10732/27626 [25:11<39:56,  7.05it/s]

{'epoch': 0, 'iter': 10730, 'avg_loss': 9.110889829979195, 'avg_acc': 49.93098266703942, 'loss': 8.500104904174805}


EP_train:0:  39%|| 10742/27626 [25:12<39:59,  7.04it/s]

{'epoch': 0, 'iter': 10740, 'avg_loss': 9.110476882239537, 'avg_acc': 49.92842845172702, 'loss': 8.92861557006836}


EP_train:0:  39%|| 10752/27626 [25:14<40:03,  7.02it/s]

{'epoch': 0, 'iter': 10750, 'avg_loss': 9.10995556660424, 'avg_acc': 49.9319830713422, 'loss': 8.90259838104248}


EP_train:0:  39%|| 10762/27626 [25:15<39:57,  7.03it/s]

{'epoch': 0, 'iter': 10760, 'avg_loss': 9.109503828718166, 'avg_acc': 49.929142273022954, 'loss': 8.599579811096191}


EP_train:0:  39%|| 10772/27626 [25:17<39:49,  7.05it/s]

{'epoch': 0, 'iter': 10770, 'avg_loss': 9.109522195305512, 'avg_acc': 49.92775740414075, 'loss': 8.940935134887695}


EP_train:0:  39%|| 10782/27626 [25:18<39:29,  7.11it/s]

{'epoch': 0, 'iter': 10780, 'avg_loss': 9.109048448149874, 'avg_acc': 49.92637510435024, 'loss': 8.70739459991455}


EP_train:0:  39%|| 10792/27626 [25:19<39:36,  7.08it/s]

{'epoch': 0, 'iter': 10790, 'avg_loss': 9.108327003661413, 'avg_acc': 49.927601705124644, 'loss': 7.723453044891357}


EP_train:0:  39%|| 10802/27626 [25:21<39:54,  7.03it/s]

{'epoch': 0, 'iter': 10800, 'avg_loss': 9.107847935998233, 'avg_acc': 49.92390750856402, 'loss': 9.301270484924316}


EP_train:0:  39%|| 10812/27626 [25:22<39:40,  7.06it/s]

{'epoch': 0, 'iter': 10810, 'avg_loss': 9.107422905067061, 'avg_acc': 49.92339977800388, 'loss': 8.21299934387207}


EP_train:0:  39%|| 10822/27626 [25:24<39:29,  7.09it/s]

{'epoch': 0, 'iter': 10820, 'avg_loss': 9.107156653510435, 'avg_acc': 49.92462572775159, 'loss': 9.065022468566895}


EP_train:0:  39%|| 10832/27626 [25:25<40:01,  6.99it/s]

{'epoch': 0, 'iter': 10830, 'avg_loss': 9.106890017235008, 'avg_acc': 49.92382974794572, 'loss': 9.170870780944824}


EP_train:0:  39%|| 10842/27626 [25:27<40:14,  6.95it/s]

{'epoch': 0, 'iter': 10840, 'avg_loss': 9.106913591176752, 'avg_acc': 49.92390000922424, 'loss': 9.122191429138184}


EP_train:0:  39%|| 10852/27626 [25:28<39:44,  7.03it/s]

{'epoch': 0, 'iter': 10850, 'avg_loss': 9.106838467263723, 'avg_acc': 49.92310616533038, 'loss': 9.111783027648926}


EP_train:0:  39%|| 10862/27626 [25:29<39:25,  7.09it/s]

{'epoch': 0, 'iter': 10860, 'avg_loss': 9.106345095431188, 'avg_acc': 49.92404014363318, 'loss': 8.719316482543945}


EP_train:0:  39%|| 10872/27626 [25:31<39:18,  7.10it/s]

{'epoch': 0, 'iter': 10870, 'avg_loss': 9.106102455266539, 'avg_acc': 49.92525986569773, 'loss': 8.622892379760742}


EP_train:0:  39%|| 10882/27626 [25:32<39:45,  7.02it/s]

{'epoch': 0, 'iter': 10880, 'avg_loss': 9.105702682733865, 'avg_acc': 49.92820053303924, 'loss': 9.758846282958984}


EP_train:0:  39%|| 10892/27626 [25:34<39:44,  7.02it/s]

{'epoch': 0, 'iter': 10890, 'avg_loss': 9.10557758458969, 'avg_acc': 49.92367551189055, 'loss': 8.374289512634277}


EP_train:0:  39%|| 10902/27626 [25:35<39:38,  7.03it/s]

{'epoch': 0, 'iter': 10900, 'avg_loss': 9.105191926017463, 'avg_acc': 49.92460554077607, 'loss': 9.075369834899902}


EP_train:0:  39%|| 10912/27626 [25:36<39:29,  7.05it/s]

{'epoch': 0, 'iter': 10910, 'avg_loss': 9.105136654661267, 'avg_acc': 49.92467464027129, 'loss': 8.905190467834473}


EP_train:0:  40%|| 10922/27626 [25:38<39:42,  7.01it/s]

{'epoch': 0, 'iter': 10920, 'avg_loss': 9.105262923662004, 'avg_acc': 49.92417132130757, 'loss': 9.280706405639648}


EP_train:0:  40%|| 10932/27626 [25:39<39:45,  7.00it/s]

{'epoch': 0, 'iter': 10930, 'avg_loss': 9.10452356064552, 'avg_acc': 49.923954807428416, 'loss': 7.3630218505859375}


EP_train:0:  40%|| 10942/27626 [25:41<39:21,  7.07it/s]

{'epoch': 0, 'iter': 10940, 'avg_loss': 9.104336229454566, 'avg_acc': 49.92345306644731, 'loss': 9.3167085647583}


EP_train:0:  40%|| 10952/27626 [25:42<39:03,  7.11it/s]

{'epoch': 0, 'iter': 10950, 'avg_loss': 9.10430898484373, 'avg_acc': 49.920669345265274, 'loss': 8.931753158569336}


EP_train:0:  40%|| 10962/27626 [25:43<39:00,  7.12it/s]

{'epoch': 0, 'iter': 10960, 'avg_loss': 9.104318548821476, 'avg_acc': 49.922452330991696, 'loss': 9.090095520019531}


EP_train:0:  40%|| 10972/27626 [25:45<39:20,  7.05it/s]

{'epoch': 0, 'iter': 10970, 'avg_loss': 9.104005826203023, 'avg_acc': 49.91796554552913, 'loss': 9.380842208862305}


EP_train:0:  40%|| 10982/27626 [25:46<39:16,  7.06it/s]

{'epoch': 0, 'iter': 10980, 'avg_loss': 9.103722534026836, 'avg_acc': 49.9194631636463, 'loss': 8.710979461669922}


EP_train:0:  40%|| 10992/27626 [25:48<39:48,  6.97it/s]

{'epoch': 0, 'iter': 10990, 'avg_loss': 9.104037607977556, 'avg_acc': 49.921526703666636, 'loss': 9.748669624328613}


EP_train:0:  40%|| 11002/27626 [25:49<39:25,  7.03it/s]

{'epoch': 0, 'iter': 11000, 'avg_loss': 9.10392089154913, 'avg_acc': 49.921313971457145, 'loss': 9.016061782836914}


EP_train:0:  40%|| 11012/27626 [25:51<39:19,  7.04it/s]

{'epoch': 0, 'iter': 11010, 'avg_loss': 9.103618685701825, 'avg_acc': 49.92138543274907, 'loss': 8.40804386138916}


EP_train:0:  40%|| 11022/27626 [25:52<39:14,  7.05it/s]

{'epoch': 0, 'iter': 11020, 'avg_loss': 9.103369989127756, 'avg_acc': 49.920606115597494, 'loss': 9.80597972869873}


EP_train:0:  40%|| 11032/27626 [25:53<39:11,  7.06it/s]

{'epoch': 0, 'iter': 11030, 'avg_loss': 9.103001511979866, 'avg_acc': 49.92067808902185, 'loss': 8.056609153747559}


EP_train:0:  40%|| 11042/27626 [25:55<39:05,  7.07it/s]

{'epoch': 0, 'iter': 11040, 'avg_loss': 9.102553707891893, 'avg_acc': 49.92018386015759, 'loss': 8.025794982910156}


EP_train:0:  40%|| 11052/27626 [25:56<38:58,  7.09it/s]

{'epoch': 0, 'iter': 11050, 'avg_loss': 9.1018565763619, 'avg_acc': 49.924215003167134, 'loss': 7.327312469482422}


EP_train:0:  40%|| 11062/27626 [25:58<39:02,  7.07it/s]

{'epoch': 0, 'iter': 11060, 'avg_loss': 9.101198192186375, 'avg_acc': 49.92852138143025, 'loss': 8.535848617553711}


EP_train:0:  40%|| 11072/27626 [25:59<38:57,  7.08it/s]

{'epoch': 0, 'iter': 11070, 'avg_loss': 9.100835278066297, 'avg_acc': 49.926892331316054, 'loss': 8.719460487365723}


EP_train:0:  40%|| 11082/27626 [26:00<39:16,  7.02it/s]

{'epoch': 0, 'iter': 11080, 'avg_loss': 9.100259763886164, 'avg_acc': 49.92385615016695, 'loss': 8.374984741210938}


EP_train:0:  40%|| 11092/27626 [26:02<38:55,  7.08it/s]

{'epoch': 0, 'iter': 11090, 'avg_loss': 9.100230594588922, 'avg_acc': 49.922234243981606, 'loss': 8.404423713684082}


EP_train:0:  40%|| 11102/27626 [26:03<39:09,  7.03it/s]

{'epoch': 0, 'iter': 11100, 'avg_loss': 9.099785977897854, 'avg_acc': 49.92568237095757, 'loss': 9.190093994140625}


EP_train:0:  40%|| 11112/27626 [26:05<39:06,  7.04it/s]

{'epoch': 0, 'iter': 11110, 'avg_loss': 9.099476549265118, 'avg_acc': 49.930249302493024, 'loss': 8.990033149719238}


EP_train:0:  40%|| 11122/27626 [26:06<39:00,  7.05it/s]

{'epoch': 0, 'iter': 11120, 'avg_loss': 9.099359048698116, 'avg_acc': 49.93284102149087, 'loss': 9.318083763122559}


EP_train:0:  40%|| 11132/27626 [26:07<38:55,  7.06it/s]

{'epoch': 0, 'iter': 11130, 'avg_loss': 9.098764436562613, 'avg_acc': 49.92869014464109, 'loss': 8.937261581420898}


EP_train:0:  40%|| 11142/27626 [26:09<40:57,  6.71it/s]

{'epoch': 0, 'iter': 11140, 'avg_loss': 9.09890308175542, 'avg_acc': 49.925668701193786, 'loss': 9.776413917541504}


EP_train:0:  40%|| 11152/27626 [26:10<38:42,  7.09it/s]

{'epoch': 0, 'iter': 11150, 'avg_loss': 9.098683405687922, 'avg_acc': 49.92545511613308, 'loss': 8.341947555541992}


EP_train:0:  40%|| 11162/27626 [26:12<39:03,  7.02it/s]

{'epoch': 0, 'iter': 11160, 'avg_loss': 9.09824317775419, 'avg_acc': 49.93084177045068, 'loss': 9.200618743896484}


EP_train:0:  40%|| 11172/27626 [26:13<39:13,  6.99it/s]

{'epoch': 0, 'iter': 11170, 'avg_loss': 9.098197771947778, 'avg_acc': 49.93342135887566, 'loss': 8.938274383544922}


EP_train:0:  40%|| 11182/27626 [26:15<38:45,  7.07it/s]

{'epoch': 0, 'iter': 11180, 'avg_loss': 9.097689450122985, 'avg_acc': 49.934039889097576, 'loss': 8.47052001953125}


EP_train:0:  41%|| 11192/27626 [26:16<38:57,  7.03it/s]

{'epoch': 0, 'iter': 11190, 'avg_loss': 9.097441803619695, 'avg_acc': 49.933261102671786, 'loss': 8.632728576660156}


EP_train:0:  41%|| 11202/27626 [26:17<39:01,  7.01it/s]

{'epoch': 0, 'iter': 11200, 'avg_loss': 9.096643458220717, 'avg_acc': 49.930530756182485, 'loss': 8.166586875915527}


EP_train:0:  41%|| 11212/27626 [26:19<38:56,  7.03it/s]

{'epoch': 0, 'iter': 11210, 'avg_loss': 9.096495712359555, 'avg_acc': 49.93031397734368, 'loss': 8.448441505432129}


EP_train:0:  41%|| 11222/27626 [26:20<38:26,  7.11it/s]

{'epoch': 0, 'iter': 11220, 'avg_loss': 9.095878882349075, 'avg_acc': 49.92759112378576, 'loss': 8.786137580871582}


EP_train:0:  41%|| 11232/27626 [26:22<38:35,  7.08it/s]

{'epoch': 0, 'iter': 11230, 'avg_loss': 9.095600625390457, 'avg_acc': 49.92737734841065, 'loss': 8.515942573547363}


EP_train:0:  41%|| 11242/27626 [26:23<38:41,  7.06it/s]

{'epoch': 0, 'iter': 11240, 'avg_loss': 9.095256154790233, 'avg_acc': 49.92855395427453, 'loss': 7.323642253875732}


EP_train:0:  41%|| 11252/27626 [26:24<38:39,  7.06it/s]

{'epoch': 0, 'iter': 11250, 'avg_loss': 9.094887899341673, 'avg_acc': 49.928895209314724, 'loss': 8.563337326049805}


EP_train:0:  41%|| 11262/27626 [26:26<38:27,  7.09it/s]

{'epoch': 0, 'iter': 11260, 'avg_loss': 9.094740237364833, 'avg_acc': 49.92729331320487, 'loss': 9.837759017944336}


EP_train:0:  41%|| 11272/27626 [26:27<38:38,  7.05it/s]

{'epoch': 0, 'iter': 11270, 'avg_loss': 9.09450321114589, 'avg_acc': 49.9295759027593, 'loss': 8.756321907043457}


EP_train:0:  41%|| 11282/27626 [26:29<38:49,  7.01it/s]

{'epoch': 0, 'iter': 11280, 'avg_loss': 9.09396141846228, 'avg_acc': 49.92991534438436, 'loss': 8.949711799621582}


EP_train:0:  41%|| 11292/27626 [26:30<38:29,  7.07it/s]

{'epoch': 0, 'iter': 11290, 'avg_loss': 9.093819213646231, 'avg_acc': 49.93053095385706, 'loss': 9.193882942199707}


EP_train:0:  41%|| 11302/27626 [26:32<38:44,  7.02it/s]

{'epoch': 0, 'iter': 11300, 'avg_loss': 9.093527501435968, 'avg_acc': 49.929486328643485, 'loss': 9.224150657653809}


EP_train:0:  41%|| 11312/27626 [26:33<38:26,  7.07it/s]

{'epoch': 0, 'iter': 11310, 'avg_loss': 9.092958017239129, 'avg_acc': 49.93065378834763, 'loss': 7.960420608520508}


EP_train:0:  41%|| 11322/27626 [26:34<38:42,  7.02it/s]

{'epoch': 0, 'iter': 11320, 'avg_loss': 9.09264574896667, 'avg_acc': 49.931267114212524, 'loss': 8.609371185302734}


EP_train:0:  41%|| 11332/27626 [26:36<38:39,  7.03it/s]

{'epoch': 0, 'iter': 11330, 'avg_loss': 9.092210982885504, 'avg_acc': 49.93105198129027, 'loss': 8.98764705657959}


EP_train:0:  41%|| 11342/27626 [26:37<38:36,  7.03it/s]

{'epoch': 0, 'iter': 11340, 'avg_loss': 9.091875586178807, 'avg_acc': 49.927255092143554, 'loss': 9.207594871520996}


EP_train:0:  41%|| 11352/27626 [26:39<38:27,  7.05it/s]

{'epoch': 0, 'iter': 11350, 'avg_loss': 9.091607052348762, 'avg_acc': 49.926217954365256, 'loss': 8.918025970458984}


EP_train:0:  41%|| 11362/27626 [26:40<38:33,  7.03it/s]

{'epoch': 0, 'iter': 11360, 'avg_loss': 9.0914139278799, 'avg_acc': 49.92050655752134, 'loss': 9.323076248168945}


EP_train:0:  41%|| 11372/27626 [26:41<38:27,  7.05it/s]

{'epoch': 0, 'iter': 11370, 'avg_loss': 9.090826816456518, 'avg_acc': 49.91535485005716, 'loss': 8.601324081420898}


EP_train:0:  41%|| 11382/27626 [26:43<38:30,  7.03it/s]

{'epoch': 0, 'iter': 11380, 'avg_loss': 9.090445346922774, 'avg_acc': 49.912958000175735, 'loss': 7.793630123138428}


EP_train:0:  41%|| 11392/27626 [26:44<38:09,  7.09it/s]

{'epoch': 0, 'iter': 11390, 'avg_loss': 9.089809200173674, 'avg_acc': 49.9160521464314, 'loss': 8.316272735595703}


EP_train:0:  41%|| 11402/27626 [26:46<38:19,  7.05it/s]

{'epoch': 0, 'iter': 11400, 'avg_loss': 9.08952388607842, 'avg_acc': 49.915303482150684, 'loss': 9.09857177734375}


EP_train:0:  41%|| 11412/27626 [26:47<38:33,  7.01it/s]

{'epoch': 0, 'iter': 11410, 'avg_loss': 9.089528242677343, 'avg_acc': 49.91893786697047, 'loss': 9.243690490722656}


EP_train:0:  41%|| 11422/27626 [26:48<38:08,  7.08it/s]

{'epoch': 0, 'iter': 11420, 'avg_loss': 9.089141313963518, 'avg_acc': 49.91572541808949, 'loss': 8.614136695861816}


EP_train:0:  41%|| 11432/27626 [26:50<38:00,  7.10it/s]

{'epoch': 0, 'iter': 11430, 'avg_loss': 9.088637773889056, 'avg_acc': 49.91798617793719, 'loss': 7.635489463806152}


EP_train:0:  41%|| 11442/27626 [26:51<38:05,  7.08it/s]

{'epoch': 0, 'iter': 11440, 'avg_loss': 9.08793392382284, 'avg_acc': 49.92160868805174, 'loss': 8.583497047424316}


EP_train:0:  41%|| 11452/27626 [26:53<38:04,  7.08it/s]

{'epoch': 0, 'iter': 11450, 'avg_loss': 9.087813934335832, 'avg_acc': 49.916492009431494, 'loss': 7.7943267822265625}


EP_train:0:  41%|| 11462/27626 [26:54<38:08,  7.06it/s]

{'epoch': 0, 'iter': 11460, 'avg_loss': 9.08736339733349, 'avg_acc': 49.914656225460256, 'loss': 7.728584289550781}


EP_train:0:  42%|| 11472/27626 [26:55<38:12,  7.05it/s]

{'epoch': 0, 'iter': 11470, 'avg_loss': 9.087218257753406, 'avg_acc': 49.91200636387412, 'loss': 9.10474967956543}


EP_train:0:  42%|| 11482/27626 [26:57<38:21,  7.01it/s]

{'epoch': 0, 'iter': 11480, 'avg_loss': 9.086920689709999, 'avg_acc': 49.91208300670673, 'loss': 8.784162521362305}


EP_train:0:  42%|| 11492/27626 [26:58<38:20,  7.01it/s]

{'epoch': 0, 'iter': 11490, 'avg_loss': 9.08640180618808, 'avg_acc': 49.91297537203028, 'loss': 8.885151863098145}


EP_train:0:  42%|| 11502/27626 [27:00<38:17,  7.02it/s]

{'epoch': 0, 'iter': 11500, 'avg_loss': 9.08636029249025, 'avg_acc': 49.91250760803408, 'loss': 9.498026847839355}


EP_train:0:  42%|| 11512/27626 [27:01<38:10,  7.04it/s]

{'epoch': 0, 'iter': 11510, 'avg_loss': 9.08613212536734, 'avg_acc': 49.91394101294414, 'loss': 9.452071189880371}


EP_train:0:  42%|| 11522/27626 [27:03<37:58,  7.07it/s]

{'epoch': 0, 'iter': 11520, 'avg_loss': 9.085900125792431, 'avg_acc': 49.90859083412898, 'loss': 8.697354316711426}


EP_train:0:  42%|| 11532/27626 [27:04<37:54,  7.08it/s]

{'epoch': 0, 'iter': 11530, 'avg_loss': 9.085274559555604, 'avg_acc': 49.9040629607146, 'loss': 8.424384117126465}


EP_train:0:  42%|| 11542/27626 [27:05<38:00,  7.05it/s]

{'epoch': 0, 'iter': 11540, 'avg_loss': 9.085005851420359, 'avg_acc': 49.90712459925483, 'loss': 8.326406478881836}


EP_train:0:  42%|| 11552/27626 [27:07<38:37,  6.94it/s]

{'epoch': 0, 'iter': 11550, 'avg_loss': 9.084855188632794, 'avg_acc': 49.90747554324301, 'loss': 9.744867324829102}


EP_train:0:  42%|| 11562/27626 [27:08<38:11,  7.01it/s]

{'epoch': 0, 'iter': 11560, 'avg_loss': 9.084505388041947, 'avg_acc': 49.90701496410345, 'loss': 8.16092586517334}


EP_train:0:  42%|| 11572/27626 [27:10<38:09,  7.01it/s]

{'epoch': 0, 'iter': 11570, 'avg_loss': 9.084305427303352, 'avg_acc': 49.904934750669774, 'loss': 8.974926948547363}


EP_train:0:  42%|| 11582/27626 [27:11<37:58,  7.04it/s]

{'epoch': 0, 'iter': 11580, 'avg_loss': 9.083960960940905, 'avg_acc': 49.90420732233831, 'loss': 9.137361526489258}


EP_train:0:  42%|| 11592/27626 [27:12<37:49,  7.06it/s]

{'epoch': 0, 'iter': 11590, 'avg_loss': 9.083805772559058, 'avg_acc': 49.90455957208179, 'loss': 9.317590713500977}


EP_train:0:  42%|| 11602/27626 [27:14<37:47,  7.07it/s]

{'epoch': 0, 'iter': 11600, 'avg_loss': 9.08337115633129, 'avg_acc': 49.90652745452978, 'loss': 9.020715713500977}


EP_train:0:  42%|| 11612/27626 [27:15<37:38,  7.09it/s]

{'epoch': 0, 'iter': 11610, 'avg_loss': 9.082791667033428, 'avg_acc': 49.908222805959866, 'loss': 8.449617385864258}


EP_train:0:  42%|| 11622/27626 [27:17<37:39,  7.08it/s]

{'epoch': 0, 'iter': 11620, 'avg_loss': 9.082283677276015, 'avg_acc': 49.90830178125807, 'loss': 8.544905662536621}


EP_train:0:  42%|| 11632/27626 [27:18<37:38,  7.08it/s]

{'epoch': 0, 'iter': 11630, 'avg_loss': 9.08178776553343, 'avg_acc': 49.906231192502794, 'loss': 7.304301738739014}


EP_train:0:  42%|| 11642/27626 [27:20<37:43,  7.06it/s]

{'epoch': 0, 'iter': 11640, 'avg_loss': 9.08130904598218, 'avg_acc': 49.90792242934456, 'loss': 8.61725902557373}


EP_train:0:  42%|| 11652/27626 [27:21<38:25,  6.93it/s]

{'epoch': 0, 'iter': 11650, 'avg_loss': 9.080764099145977, 'avg_acc': 49.911756501587845, 'loss': 9.133153915405273}


EP_train:0:  42%|| 11662/27626 [27:22<37:47,  7.04it/s]

{'epoch': 0, 'iter': 11660, 'avg_loss': 9.08061176315097, 'avg_acc': 49.912100162936284, 'loss': 8.379230499267578}


EP_train:0:  42%|| 11672/27626 [27:24<37:28,  7.09it/s]

{'epoch': 0, 'iter': 11670, 'avg_loss': 9.080599625469551, 'avg_acc': 49.912175477679725, 'loss': 9.700620651245117}


EP_train:0:  42%|| 11682/27626 [27:25<37:34,  7.07it/s]

{'epoch': 0, 'iter': 11680, 'avg_loss': 9.080482597333113, 'avg_acc': 49.91144807807551, 'loss': 9.00360107421875}


EP_train:0:  42%|| 11692/27626 [27:27<37:35,  7.07it/s]

{'epoch': 0, 'iter': 11690, 'avg_loss': 9.0802044804842, 'avg_acc': 49.91339491916859, 'loss': 8.828899383544922}


EP_train:0:  42%|| 11702/27626 [27:28<37:31,  7.07it/s]

{'epoch': 0, 'iter': 11700, 'avg_loss': 9.079958284912879, 'avg_acc': 49.908661652850185, 'loss': 8.868147850036621}


EP_train:0:  42%|| 11712/27626 [27:29<37:27,  7.08it/s]

{'epoch': 0, 'iter': 11710, 'avg_loss': 9.07928012398873, 'avg_acc': 49.90660490137478, 'loss': 8.461719512939453}


EP_train:0:  42%|| 11722/27626 [27:31<37:17,  7.11it/s]

{'epoch': 0, 'iter': 11720, 'avg_loss': 9.078497042337402, 'avg_acc': 49.90561812132071, 'loss': 9.042703628540039}


EP_train:0:  42%|| 11732/27626 [27:32<37:57,  6.98it/s]

{'epoch': 0, 'iter': 11730, 'avg_loss': 9.078290181903418, 'avg_acc': 49.90676412923025, 'loss': 8.199474334716797}


EP_train:0:  43%|| 11742/27626 [27:34<37:42,  7.02it/s]

{'epoch': 0, 'iter': 11740, 'avg_loss': 9.077851086233048, 'avg_acc': 49.904448087897116, 'loss': 8.34837818145752}


EP_train:0:  43%|| 11752/27626 [27:35<37:34,  7.04it/s]

{'epoch': 0, 'iter': 11750, 'avg_loss': 9.077372073518521, 'avg_acc': 49.902667858054635, 'loss': 8.950180053710938}


EP_train:0:  43%|| 11762/27626 [27:36<37:16,  7.09it/s]

{'epoch': 0, 'iter': 11760, 'avg_loss': 9.07705648565118, 'avg_acc': 49.9035477425389, 'loss': 8.562723159790039}


EP_train:0:  43%|| 11772/27626 [27:38<37:20,  7.08it/s]

{'epoch': 0, 'iter': 11770, 'avg_loss': 9.077171669447091, 'avg_acc': 49.90442613201937, 'loss': 8.207462310791016}


EP_train:0:  43%|| 11782/27626 [27:39<37:26,  7.05it/s]

{'epoch': 0, 'iter': 11780, 'avg_loss': 9.076866761400161, 'avg_acc': 49.902650454121044, 'loss': 8.266396522521973}


EP_train:0:  43%|| 11792/27626 [27:41<37:23,  7.06it/s]

{'epoch': 0, 'iter': 11790, 'avg_loss': 9.076578492497925, 'avg_acc': 49.898492494275295, 'loss': 8.637679100036621}


EP_train:0:  43%|| 11802/27626 [27:42<37:05,  7.11it/s]

{'epoch': 0, 'iter': 11800, 'avg_loss': 9.07631786022537, 'avg_acc': 49.89937293449708, 'loss': 9.272819519042969}


EP_train:0:  43%|| 11812/27626 [27:43<37:37,  7.00it/s]

{'epoch': 0, 'iter': 11810, 'avg_loss': 9.075945572792126, 'avg_acc': 49.899193548387096, 'loss': 8.237942695617676}


EP_train:0:  43%|| 11822/27626 [27:45<37:30,  7.02it/s]

{'epoch': 0, 'iter': 11820, 'avg_loss': 9.075620064728556, 'avg_acc': 49.89716394552068, 'loss': 8.28050708770752}


EP_train:0:  43%|| 11832/27626 [27:46<37:39,  6.99it/s]

{'epoch': 0, 'iter': 11830, 'avg_loss': 9.075459831495094, 'avg_acc': 49.89540191023582, 'loss': 9.718329429626465}


EP_train:0:  43%|| 11842/27626 [27:48<37:15,  7.06it/s]

{'epoch': 0, 'iter': 11840, 'avg_loss': 9.075052088272047, 'avg_acc': 49.894962418714634, 'loss': 8.295151710510254}


EP_train:0:  43%|| 11852/27626 [27:49<37:11,  7.07it/s]

{'epoch': 0, 'iter': 11850, 'avg_loss': 9.074811937919439, 'avg_acc': 49.891095688127585, 'loss': 8.692584037780762}


EP_train:0:  43%|| 11862/27626 [27:51<37:09,  7.07it/s]

{'epoch': 0, 'iter': 11860, 'avg_loss': 9.074528817050664, 'avg_acc': 49.89171444228986, 'loss': 9.110633850097656}


EP_train:0:  43%|| 11872/27626 [27:52<37:04,  7.08it/s]

{'epoch': 0, 'iter': 11870, 'avg_loss': 9.074217010795612, 'avg_acc': 49.89180566085418, 'loss': 8.908883094787598}


EP_train:0:  43%|| 11882/27626 [27:53<37:20,  7.03it/s]

{'epoch': 0, 'iter': 11880, 'avg_loss': 9.073961467436293, 'avg_acc': 49.88795135089639, 'loss': 8.645734786987305}


EP_train:0:  43%|| 11892/27626 [27:55<37:44,  6.95it/s]

{'epoch': 0, 'iter': 11890, 'avg_loss': 9.073540873146971, 'avg_acc': 49.88515473887814, 'loss': 8.88793659210205}


EP_train:0:  43%|| 11902/27626 [27:56<37:15,  7.03it/s]

{'epoch': 0, 'iter': 11900, 'avg_loss': 9.073325962511875, 'avg_acc': 49.884200907486765, 'loss': 8.647221565246582}


EP_train:0:  43%|| 11912/27626 [27:58<37:23,  7.00it/s]

{'epoch': 0, 'iter': 11910, 'avg_loss': 9.073015315257855, 'avg_acc': 49.88482285282512, 'loss': 8.946771621704102}


EP_train:0:  43%|| 11922/27626 [27:59<37:20,  7.01it/s]

{'epoch': 0, 'iter': 11920, 'avg_loss': 9.072869266842488, 'avg_acc': 49.887278751782574, 'loss': 9.158638954162598}


EP_train:0:  43%|| 11932/27626 [28:00<37:12,  7.03it/s]

{'epoch': 0, 'iter': 11930, 'avg_loss': 9.07253233543756, 'avg_acc': 49.88684938395776, 'loss': 8.403075218200684}


EP_train:0:  43%|| 11942/27626 [28:02<36:53,  7.09it/s]

{'epoch': 0, 'iter': 11940, 'avg_loss': 9.07215162857365, 'avg_acc': 49.88694414203166, 'loss': 9.544572830200195}


EP_train:0:  43%|| 11952/27626 [28:03<37:06,  7.04it/s]

{'epoch': 0, 'iter': 11950, 'avg_loss': 9.071828947337778, 'avg_acc': 49.88834616350096, 'loss': 7.773604393005371}


EP_train:0:  43%|| 11962/27626 [28:05<36:50,  7.09it/s]

{'epoch': 0, 'iter': 11960, 'avg_loss': 9.071052293336548, 'avg_acc': 49.89157470111195, 'loss': 8.202868461608887}


EP_train:0:  43%|| 11972/27626 [28:06<37:16,  7.00it/s]

{'epoch': 0, 'iter': 11970, 'avg_loss': 9.07068355252157, 'avg_acc': 49.89218736947623, 'loss': 8.213794708251953}


EP_train:0:  43%|| 11982/27626 [28:08<36:44,  7.10it/s]

{'epoch': 0, 'iter': 11980, 'avg_loss': 9.070565069669993, 'avg_acc': 49.89279901510725, 'loss': 9.477519989013672}


EP_train:0:  43%|| 11992/27626 [28:09<36:58,  7.05it/s]

{'epoch': 0, 'iter': 11990, 'avg_loss': 9.070566716041847, 'avg_acc': 49.89836127095322, 'loss': 9.587798118591309}


EP_train:0:  43%|| 12002/27626 [28:10<37:00,  7.04it/s]

{'epoch': 0, 'iter': 12000, 'avg_loss': 9.070319066772003, 'avg_acc': 49.89922714773769, 'loss': 8.369134902954102}


EP_train:0:  43%|| 12012/27626 [28:12<37:17,  6.98it/s]

{'epoch': 0, 'iter': 12010, 'avg_loss': 9.06996266028633, 'avg_acc': 49.89488801931562, 'loss': 8.73951530456543}


EP_train:0:  44%|| 12022/27626 [28:13<36:58,  7.03it/s]

{'epoch': 0, 'iter': 12020, 'avg_loss': 9.069712681179682, 'avg_acc': 49.89341568921055, 'loss': 8.08935260772705}


EP_train:0:  44%|| 12032/27626 [28:15<36:47,  7.06it/s]

{'epoch': 0, 'iter': 12030, 'avg_loss': 9.069474016873976, 'avg_acc': 49.89480300889369, 'loss': 8.672188758850098}


EP_train:0:  44%|| 12042/27626 [28:16<36:55,  7.04it/s]

{'epoch': 0, 'iter': 12040, 'avg_loss': 9.069113785321012, 'avg_acc': 49.89359272485674, 'loss': 9.164188385009766}


EP_train:0:  44%|| 12052/27626 [28:17<36:53,  7.04it/s]

{'epoch': 0, 'iter': 12050, 'avg_loss': 9.068823861676167, 'avg_acc': 49.89757074101735, 'loss': 8.403473854064941}


EP_train:0:  44%|| 12062/27626 [28:19<36:36,  7.09it/s]

{'epoch': 0, 'iter': 12060, 'avg_loss': 9.068402931174475, 'avg_acc': 49.89506467125446, 'loss': 8.54821491241455}


EP_train:0:  44%|| 12072/27626 [28:20<36:57,  7.01it/s]

{'epoch': 0, 'iter': 12070, 'avg_loss': 9.068039489556798, 'avg_acc': 49.89644602766962, 'loss': 8.398828506469727}


EP_train:0:  44%|| 12082/27626 [28:22<36:45,  7.05it/s]

{'epoch': 0, 'iter': 12080, 'avg_loss': 9.067672074724236, 'avg_acc': 49.89549706150153, 'loss': 9.654969215393066}


EP_train:0:  44%|| 12092/27626 [28:23<36:55,  7.01it/s]

{'epoch': 0, 'iter': 12090, 'avg_loss': 9.0673354617144, 'avg_acc': 49.89584194855678, 'loss': 9.244443893432617}


EP_train:0:  44%|| 12102/27626 [28:24<36:50,  7.02it/s]

{'epoch': 0, 'iter': 12100, 'avg_loss': 9.06673147453571, 'avg_acc': 49.89463680687546, 'loss': 8.358598709106445}


EP_train:0:  44%|| 12112/27626 [28:26<36:26,  7.09it/s]

{'epoch': 0, 'iter': 12110, 'avg_loss': 9.065884008699735, 'avg_acc': 49.89291759557427, 'loss': 8.92417049407959}


EP_train:0:  44%|| 12122/27626 [28:27<36:36,  7.06it/s]

{'epoch': 0, 'iter': 12120, 'avg_loss': 9.065643450635738, 'avg_acc': 49.89377939113934, 'loss': 10.069080352783203}


EP_train:0:  44%|| 12132/27626 [28:29<36:31,  7.07it/s]

{'epoch': 0, 'iter': 12130, 'avg_loss': 9.065507012594367, 'avg_acc': 49.89489737037343, 'loss': 9.17023754119873}


EP_train:0:  44%|| 12142/27626 [28:30<36:27,  7.08it/s]

{'epoch': 0, 'iter': 12140, 'avg_loss': 9.064985347298812, 'avg_acc': 49.89575611564121, 'loss': 8.008484840393066}


EP_train:0:  44%|| 12152/27626 [28:32<36:26,  7.08it/s]

{'epoch': 0, 'iter': 12150, 'avg_loss': 9.064618633643226, 'avg_acc': 49.8917270183524, 'loss': 9.099666595458984}


EP_train:0:  44%|| 12162/27626 [28:33<36:43,  7.02it/s]

{'epoch': 0, 'iter': 12160, 'avg_loss': 9.064045433856558, 'avg_acc': 49.89310089630787, 'loss': 8.773534774780273}


EP_train:0:  44%|| 12172/27626 [28:34<36:24,  7.08it/s]

{'epoch': 0, 'iter': 12170, 'avg_loss': 9.06370507308387, 'avg_acc': 49.891904937967304, 'loss': 8.666248321533203}


EP_train:0:  44%|| 12182/27626 [28:36<36:22,  7.08it/s]

{'epoch': 0, 'iter': 12180, 'avg_loss': 9.063529989009087, 'avg_acc': 49.89378950825055, 'loss': 8.53098201751709}


EP_train:0:  44%|| 12192/27626 [28:37<36:36,  7.03it/s]

{'epoch': 0, 'iter': 12190, 'avg_loss': 9.063247749348013, 'avg_acc': 49.89669633336067, 'loss': 9.164246559143066}


EP_train:0:  44%|| 12202/27626 [28:39<36:33,  7.03it/s]

{'epoch': 0, 'iter': 12200, 'avg_loss': 9.062907773425819, 'avg_acc': 49.89345135644619, 'loss': 7.889560222625732}


EP_train:0:  44%|| 12212/27626 [28:40<36:27,  7.05it/s]

{'epoch': 0, 'iter': 12210, 'avg_loss': 9.061953199845979, 'avg_acc': 49.892259028744576, 'loss': 8.401585578918457}


EP_train:0:  44%|| 12222/27626 [28:41<36:22,  7.06it/s]

{'epoch': 0, 'iter': 12220, 'avg_loss': 9.061721538972586, 'avg_acc': 49.89362572620898, 'loss': 8.145573616027832}


EP_train:0:  44%|| 12232/27626 [28:43<36:18,  7.07it/s]

{'epoch': 0, 'iter': 12230, 'avg_loss': 9.061352931861544, 'avg_acc': 49.891157714005395, 'loss': 8.794034957885742}


EP_train:0:  44%|| 12242/27626 [28:44<36:33,  7.01it/s]

{'epoch': 0, 'iter': 12240, 'avg_loss': 9.061136513186401, 'avg_acc': 49.89073605097622, 'loss': 9.08257007598877}


EP_train:0:  44%|| 12252/27626 [28:46<36:04,  7.10it/s]

{'epoch': 0, 'iter': 12250, 'avg_loss': 9.060650875632417, 'avg_acc': 49.89235572606318, 'loss': 8.20905590057373}


EP_train:0:  44%|| 12262/27626 [28:47<36:09,  7.08it/s]

{'epoch': 0, 'iter': 12260, 'avg_loss': 9.060399696786295, 'avg_acc': 49.88989478835332, 'loss': 9.341692924499512}


EP_train:0:  44%|| 12272/27626 [28:48<36:25,  7.03it/s]

{'epoch': 0, 'iter': 12270, 'avg_loss': 9.06010305769781, 'avg_acc': 49.88743786162497, 'loss': 8.425370216369629}


EP_train:0:  44%|| 12282/27626 [28:50<36:28,  7.01it/s]

{'epoch': 0, 'iter': 12280, 'avg_loss': 9.059769221581442, 'avg_acc': 49.888038433352335, 'loss': 9.296881675720215}


EP_train:0:  44%|| 12292/27626 [28:51<36:30,  7.00it/s]

{'epoch': 0, 'iter': 12290, 'avg_loss': 9.05927800476556, 'avg_acc': 49.890163534293386, 'loss': 8.494956016540527}


EP_train:0:  45%|| 12302/27626 [28:53<36:26,  7.01it/s]

{'epoch': 0, 'iter': 12300, 'avg_loss': 9.058708553478569, 'avg_acc': 49.89177709129339, 'loss': 8.708099365234375}


EP_train:0:  45%|| 12312/27626 [28:54<36:07,  7.07it/s]

{'epoch': 0, 'iter': 12310, 'avg_loss': 9.058591190200557, 'avg_acc': 49.89008813256437, 'loss': 9.8234281539917}


EP_train:0:  45%|| 12322/27626 [28:56<36:03,  7.07it/s]

{'epoch': 0, 'iter': 12320, 'avg_loss': 9.058524547231999, 'avg_acc': 49.889670075480886, 'loss': 8.338456153869629}


EP_train:0:  45%|| 12332/27626 [28:57<36:13,  7.04it/s]

{'epoch': 0, 'iter': 12330, 'avg_loss': 9.05843119845175, 'avg_acc': 49.88823899116049, 'loss': 7.590373516082764}


EP_train:0:  45%|| 12342/27626 [28:58<35:54,  7.09it/s]

{'epoch': 0, 'iter': 12340, 'avg_loss': 9.058756834876643, 'avg_acc': 49.8847844583097, 'loss': 8.666838645935059}


EP_train:0:  45%|| 12352/27626 [29:00<35:53,  7.09it/s]

{'epoch': 0, 'iter': 12350, 'avg_loss': 9.058570163335485, 'avg_acc': 49.886901870293904, 'loss': 8.30395221710205}


EP_train:0:  45%|| 12362/27626 [29:01<36:09,  7.03it/s]

{'epoch': 0, 'iter': 12360, 'avg_loss': 9.058475404502833, 'avg_acc': 49.888510233799856, 'loss': 8.919029235839844}


EP_train:0:  45%|| 12372/27626 [29:03<36:24,  6.98it/s]

{'epoch': 0, 'iter': 12370, 'avg_loss': 9.05784654409168, 'avg_acc': 49.88531646592838, 'loss': 8.356314659118652}


EP_train:0:  45%|| 12382/27626 [29:04<36:13,  7.01it/s]

{'epoch': 0, 'iter': 12380, 'avg_loss': 9.057420165865576, 'avg_acc': 49.88667110895727, 'loss': 7.977809906005859}


EP_train:0:  45%|| 12392/27626 [29:05<36:01,  7.05it/s]

{'epoch': 0, 'iter': 12390, 'avg_loss': 9.056945410375878, 'avg_acc': 49.88524937454604, 'loss': 8.256976127624512}


EP_train:0:  45%|| 12402/27626 [29:07<36:02,  7.04it/s]

{'epoch': 0, 'iter': 12400, 'avg_loss': 9.056454117072608, 'avg_acc': 49.88609789533102, 'loss': 9.063920974731445}


EP_train:0:  45%|| 12412/27626 [29:08<36:04,  7.03it/s]

{'epoch': 0, 'iter': 12410, 'avg_loss': 9.05626363990943, 'avg_acc': 49.88618967045363, 'loss': 9.162028312683105}


EP_train:0:  45%|| 12422/27626 [29:10<35:47,  7.08it/s]

{'epoch': 0, 'iter': 12420, 'avg_loss': 9.055911687940476, 'avg_acc': 49.886029707752996, 'loss': 9.115365982055664}


EP_train:0:  45%|| 12432/27626 [29:11<35:49,  7.07it/s]

{'epoch': 0, 'iter': 12430, 'avg_loss': 9.055656910556367, 'avg_acc': 49.8861213900732, 'loss': 8.101680755615234}


EP_train:0:  45%|| 12442/27626 [29:12<36:04,  7.01it/s]

{'epoch': 0, 'iter': 12440, 'avg_loss': 9.05537598842728, 'avg_acc': 49.887971224178116, 'loss': 9.439163208007812}


EP_train:0:  45%|| 12452/27626 [29:14<35:50,  7.06it/s]

{'epoch': 0, 'iter': 12450, 'avg_loss': 9.054738205509985, 'avg_acc': 49.89082202232753, 'loss': 9.171059608459473}


EP_train:0:  45%|| 12462/27626 [29:15<36:10,  6.98it/s]

{'epoch': 0, 'iter': 12460, 'avg_loss': 9.054431559788684, 'avg_acc': 49.89517293957147, 'loss': 8.650873184204102}


EP_train:0:  45%|| 12472/27626 [29:17<36:06,  7.00it/s]

{'epoch': 0, 'iter': 12470, 'avg_loss': 9.054165022058328, 'avg_acc': 49.896760484323636, 'loss': 8.395402908325195}


EP_train:0:  45%|| 12482/27626 [29:18<35:54,  7.03it/s]

{'epoch': 0, 'iter': 12480, 'avg_loss': 9.053803399323405, 'avg_acc': 49.89634244050957, 'loss': 8.518107414245605}


EP_train:0:  45%|| 12492/27626 [29:20<36:01,  7.00it/s]

{'epoch': 0, 'iter': 12490, 'avg_loss': 9.053737900921536, 'avg_acc': 49.89292290449123, 'loss': 8.933588981628418}


EP_train:0:  45%|| 12502/27626 [29:21<35:50,  7.03it/s]

{'epoch': 0, 'iter': 12500, 'avg_loss': 9.053356883373464, 'avg_acc': 49.88725901927846, 'loss': 8.642651557922363}


EP_train:0:  45%|| 12512/27626 [29:22<35:45,  7.04it/s]

{'epoch': 0, 'iter': 12510, 'avg_loss': 9.053188104136328, 'avg_acc': 49.88834825353688, 'loss': 9.112890243530273}


EP_train:0:  45%|| 12522/27626 [29:24<35:50,  7.03it/s]

{'epoch': 0, 'iter': 12520, 'avg_loss': 9.05287839676871, 'avg_acc': 49.89043407076112, 'loss': 8.136731147766113}


EP_train:0:  45%|| 12532/27626 [29:25<36:08,  6.96it/s]

{'epoch': 0, 'iter': 12530, 'avg_loss': 9.052023826027114, 'avg_acc': 49.890521506663475, 'loss': 7.993747711181641}


EP_train:0:  45%|| 12542/27626 [29:27<35:47,  7.02it/s]

{'epoch': 0, 'iter': 12540, 'avg_loss': 9.05166023236296, 'avg_acc': 49.89135635116817, 'loss': 9.046502113342285}


EP_train:0:  45%|| 12552/27626 [29:28<35:59,  6.98it/s]

{'epoch': 0, 'iter': 12550, 'avg_loss': 9.051367110269933, 'avg_acc': 49.887957134889646, 'loss': 9.126099586486816}


EP_train:0:  45%|| 12562/27626 [29:29<35:22,  7.10it/s]

{'epoch': 0, 'iter': 12560, 'avg_loss': 9.05135513435797, 'avg_acc': 49.89351962423374, 'loss': 9.628264427185059}


EP_train:0:  46%|| 12572/27626 [29:31<35:32,  7.06it/s]

{'epoch': 0, 'iter': 12570, 'avg_loss': 9.051115656456641, 'avg_acc': 49.89633879564076, 'loss': 8.722557067871094}


EP_train:0:  46%|| 12582/27626 [29:32<35:27,  7.07it/s]

{'epoch': 0, 'iter': 12580, 'avg_loss': 9.050826810139862, 'avg_acc': 49.89865670455449, 'loss': 9.428290367126465}


EP_train:0:  46%|| 12592/27626 [29:34<35:27,  7.07it/s]

{'epoch': 0, 'iter': 12590, 'avg_loss': 9.050735817769041, 'avg_acc': 49.89799261377174, 'loss': 8.481995582580566}


EP_train:0:  46%|| 12602/27626 [29:35<35:23,  7.08it/s]

{'epoch': 0, 'iter': 12600, 'avg_loss': 9.050373284648916, 'avg_acc': 49.89782556939926, 'loss': 9.07541275024414}


EP_train:0:  46%|| 12612/27626 [29:37<35:22,  7.08it/s]

{'epoch': 0, 'iter': 12610, 'avg_loss': 9.049963371805305, 'avg_acc': 49.89641979224487, 'loss': 8.45289134979248}


EP_train:0:  46%|| 12622/27626 [29:38<35:59,  6.95it/s]

{'epoch': 0, 'iter': 12620, 'avg_loss': 9.049887351024058, 'avg_acc': 49.89328302036289, 'loss': 8.993840217590332}


EP_train:0:  46%|| 12632/27626 [29:39<35:41,  7.00it/s]

{'epoch': 0, 'iter': 12630, 'avg_loss': 9.049360842104905, 'avg_acc': 49.895594173066264, 'loss': 8.758841514587402}


EP_train:0:  46%|| 12642/27626 [29:41<35:47,  6.98it/s]

{'epoch': 0, 'iter': 12640, 'avg_loss': 9.049127363609783, 'avg_acc': 49.89691282335259, 'loss': 8.379945755004883}


EP_train:0:  46%|| 12652/27626 [29:42<35:34,  7.02it/s]

{'epoch': 0, 'iter': 12650, 'avg_loss': 9.04858600491168, 'avg_acc': 49.894771164334834, 'loss': 7.635091781616211}


EP_train:0:  46%|| 12662/27626 [29:44<35:44,  6.98it/s]

{'epoch': 0, 'iter': 12660, 'avg_loss': 9.048340135215348, 'avg_acc': 49.89485427691336, 'loss': 8.180620193481445}


EP_train:0:  46%|| 12672/27626 [29:45<35:26,  7.03it/s]

{'epoch': 0, 'iter': 12670, 'avg_loss': 9.048162129051203, 'avg_acc': 49.89617038907742, 'loss': 8.698142051696777}


EP_train:0:  46%|| 12682/27626 [29:46<35:37,  6.99it/s]

{'epoch': 0, 'iter': 12680, 'avg_loss': 9.047714368767396, 'avg_acc': 49.89674513051021, 'loss': 8.186507225036621}


EP_train:0:  46%|| 12692/27626 [29:48<35:38,  6.98it/s]

{'epoch': 0, 'iter': 12690, 'avg_loss': 9.04698573236403, 'avg_acc': 49.898303916161055, 'loss': 7.758645534515381}


EP_train:0:  46%|| 12702/27626 [29:49<35:24,  7.03it/s]

{'epoch': 0, 'iter': 12700, 'avg_loss': 9.046601919853428, 'avg_acc': 49.89715376741989, 'loss': 8.820587158203125}


EP_train:0:  46%|| 12712/27626 [29:51<35:20,  7.03it/s]

{'epoch': 0, 'iter': 12710, 'avg_loss': 9.046251317066922, 'avg_acc': 49.89723467862481, 'loss': 8.241711616516113}


EP_train:0:  46%|| 12722/27626 [29:52<35:40,  6.96it/s]

{'epoch': 0, 'iter': 12720, 'avg_loss': 9.045968560038212, 'avg_acc': 49.895104551528966, 'loss': 9.019758224487305}


EP_train:0:  46%|| 12732/27626 [29:54<35:42,  6.95it/s]

{'epoch': 0, 'iter': 12730, 'avg_loss': 9.045912975284013, 'avg_acc': 49.8924868431388, 'loss': 8.920825004577637}


EP_train:0:  46%|| 12742/27626 [29:55<35:28,  6.99it/s]

{'epoch': 0, 'iter': 12740, 'avg_loss': 9.045769815345395, 'avg_acc': 49.896250294325405, 'loss': 8.749359130859375}


EP_train:0:  46%|| 12752/27626 [29:56<35:45,  6.93it/s]

{'epoch': 0, 'iter': 12750, 'avg_loss': 9.045530889840343, 'avg_acc': 49.89706689671398, 'loss': 9.463019371032715}


EP_train:0:  46%|| 12762/27626 [29:58<35:13,  7.03it/s]

{'epoch': 0, 'iter': 12760, 'avg_loss': 9.04533441118495, 'avg_acc': 49.89714755896873, 'loss': 9.548100471496582}


EP_train:0:  46%|| 12772/27626 [29:59<35:18,  7.01it/s]

{'epoch': 0, 'iter': 12770, 'avg_loss': 9.045079667588181, 'avg_acc': 49.89771748492679, 'loss': 7.9474592208862305}


EP_train:0:  46%|| 12782/27626 [30:01<35:11,  7.03it/s]

{'epoch': 0, 'iter': 12780, 'avg_loss': 9.045090663266121, 'avg_acc': 49.89486346921211, 'loss': 8.457125663757324}


EP_train:0:  46%|| 12792/27626 [30:02<35:13,  7.02it/s]

{'epoch': 0, 'iter': 12790, 'avg_loss': 9.044808462463694, 'avg_acc': 49.894457040106325, 'loss': 8.314189910888672}


EP_train:0:  46%|| 12802/27626 [30:04<35:07,  7.03it/s]

{'epoch': 0, 'iter': 12800, 'avg_loss': 9.044342149673854, 'avg_acc': 49.89283063823139, 'loss': 8.451231956481934}


EP_train:0:  46%|| 12812/27626 [30:05<35:24,  6.97it/s]

{'epoch': 0, 'iter': 12810, 'avg_loss': 9.044113606984531, 'avg_acc': 49.89096284443057, 'loss': 9.325166702270508}


EP_train:0:  46%|| 12822/27626 [30:06<35:44,  6.90it/s]

{'epoch': 0, 'iter': 12820, 'avg_loss': 9.043887476106207, 'avg_acc': 49.89202285313158, 'loss': 8.62743091583252}


EP_train:0:  46%|| 12832/27626 [30:08<35:22,  6.97it/s]

{'epoch': 0, 'iter': 12830, 'avg_loss': 9.04367725156621, 'avg_acc': 49.89259410801964, 'loss': 9.311393737792969}


EP_train:0:  46%|| 12842/27626 [30:09<35:10,  7.01it/s]

{'epoch': 0, 'iter': 12840, 'avg_loss': 9.043462398703731, 'avg_acc': 49.89121758430029, 'loss': 8.5867280960083}


EP_train:0:  47%|| 12852/27626 [30:11<35:05,  7.02it/s]

{'epoch': 0, 'iter': 12850, 'avg_loss': 9.04334090698666, 'avg_acc': 49.8900863746012, 'loss': 9.677510261535645}


EP_train:0:  47%|| 12862/27626 [30:12<35:18,  6.97it/s]

{'epoch': 0, 'iter': 12860, 'avg_loss': 9.043372281592626, 'avg_acc': 49.89381657724905, 'loss': 9.610703468322754}


EP_train:0:  47%|| 12872/27626 [30:13<35:13,  6.98it/s]

{'epoch': 0, 'iter': 12870, 'avg_loss': 9.04314265411575, 'avg_acc': 49.89317069380779, 'loss': 8.555148124694824}


EP_train:0:  47%|| 12882/27626 [30:15<36:25,  6.75it/s]

{'epoch': 0, 'iter': 12880, 'avg_loss': 9.042819233842447, 'avg_acc': 49.89398144553994, 'loss': 8.473136901855469}


EP_train:0:  47%|| 12892/27626 [30:16<35:02,  7.01it/s]

{'epoch': 0, 'iter': 12890, 'avg_loss': 9.042483997885023, 'avg_acc': 49.89115468156078, 'loss': 8.330901145935059}


EP_train:0:  47%|| 12902/27626 [30:18<35:03,  7.00it/s]

{'epoch': 0, 'iter': 12900, 'avg_loss': 9.042223043177868, 'avg_acc': 49.89220796837455, 'loss': 8.567276000976562}


EP_train:0:  47%|| 12912/27626 [30:19<35:29,  6.91it/s]

{'epoch': 0, 'iter': 12910, 'avg_loss': 9.042067746744546, 'avg_acc': 49.89471187359616, 'loss': 9.293447494506836}


EP_train:0:  47%|| 12922/27626 [30:21<34:44,  7.06it/s]

{'epoch': 0, 'iter': 12920, 'avg_loss': 9.04169747456891, 'avg_acc': 49.89503521399273, 'loss': 8.77061653137207}


EP_train:0:  47%|| 12932/27626 [30:22<34:37,  7.07it/s]

{'epoch': 0, 'iter': 12930, 'avg_loss': 9.041204379501147, 'avg_acc': 49.89342471579924, 'loss': 8.409605979919434}


EP_train:0:  47%|| 12942/27626 [30:23<34:47,  7.03it/s]

{'epoch': 0, 'iter': 12940, 'avg_loss': 9.040709201689527, 'avg_acc': 49.89471447337918, 'loss': 8.330552101135254}


EP_train:0:  47%|| 12952/27626 [30:25<34:47,  7.03it/s]

{'epoch': 0, 'iter': 12950, 'avg_loss': 9.040398342969675, 'avg_acc': 49.8909350629295, 'loss': 8.278817176818848}


EP_train:0:  47%|| 12962/27626 [30:26<34:45,  7.03it/s]

{'epoch': 0, 'iter': 12960, 'avg_loss': 9.039540456314622, 'avg_acc': 49.88860813208858, 'loss': 7.655327320098877}


EP_train:0:  47%|| 12972/27626 [30:28<34:39,  7.05it/s]

{'epoch': 0, 'iter': 12970, 'avg_loss': 9.039108471939903, 'avg_acc': 49.8874893994295, 'loss': 8.741569519042969}


EP_train:0:  47%|| 12982/27626 [30:29<34:48,  7.01it/s]

{'epoch': 0, 'iter': 12980, 'avg_loss': 9.038955998874373, 'avg_acc': 49.88950196440952, 'loss': 8.465060234069824}


EP_train:0:  47%|| 12992/27626 [30:31<34:50,  7.00it/s]

{'epoch': 0, 'iter': 12990, 'avg_loss': 9.038688347563715, 'avg_acc': 49.887422061427145, 'loss': 9.031696319580078}


EP_train:0:  47%|| 13002/27626 [30:32<34:48,  7.00it/s]

{'epoch': 0, 'iter': 13000, 'avg_loss': 9.038462005113788, 'avg_acc': 49.887749019306206, 'loss': 8.809706687927246}


EP_train:0:  47%|| 13012/27626 [30:33<34:32,  7.05it/s]

{'epoch': 0, 'iter': 13010, 'avg_loss': 9.038157426754799, 'avg_acc': 49.88879601875336, 'loss': 8.541372299194336}


EP_train:0:  47%|| 13022/27626 [30:35<34:57,  6.96it/s]

{'epoch': 0, 'iter': 13020, 'avg_loss': 9.038180892693266, 'avg_acc': 49.891281391598184, 'loss': 9.337852478027344}


EP_train:0:  47%|| 13032/27626 [30:36<34:35,  7.03it/s]

{'epoch': 0, 'iter': 13030, 'avg_loss': 9.038087760429685, 'avg_acc': 49.890165758575705, 'loss': 8.864408493041992}


EP_train:0:  47%|| 13042/27626 [30:38<34:33,  7.03it/s]

{'epoch': 0, 'iter': 13040, 'avg_loss': 9.037913352212204, 'avg_acc': 49.89432367149759, 'loss': 9.45022201538086}


EP_train:0:  47%|| 13052/27626 [30:39<34:31,  7.03it/s]

{'epoch': 0, 'iter': 13050, 'avg_loss': 9.037559113331877, 'avg_acc': 49.894404643322346, 'loss': 8.521064758300781}


EP_train:0:  47%|| 13062/27626 [30:41<34:35,  7.02it/s]

{'epoch': 0, 'iter': 13060, 'avg_loss': 9.037319085981514, 'avg_acc': 49.89568180078095, 'loss': 8.731908798217773}


EP_train:0:  47%|| 13072/27626 [30:42<34:45,  6.98it/s]

{'epoch': 0, 'iter': 13070, 'avg_loss': 9.037053933587584, 'avg_acc': 49.896717925177875, 'loss': 9.064742088317871}


EP_train:0:  47%|| 13082/27626 [30:43<35:01,  6.92it/s]

{'epoch': 0, 'iter': 13080, 'avg_loss': 9.03665775757355, 'avg_acc': 49.89488571210152, 'loss': 8.415471076965332}


EP_train:0:  47%|| 13092/27626 [30:45<34:46,  6.96it/s]

{'epoch': 0, 'iter': 13090, 'avg_loss': 9.036249338155415, 'avg_acc': 49.89353372546024, 'loss': 9.052997589111328}


EP_train:0:  47%|| 13102/27626 [30:46<34:38,  6.99it/s]

{'epoch': 0, 'iter': 13100, 'avg_loss': 9.035656940990728, 'avg_acc': 49.89433058545149, 'loss': 8.419657707214355}


EP_train:0:  47%|| 13112/27626 [30:48<34:48,  6.95it/s]

{'epoch': 0, 'iter': 13110, 'avg_loss': 9.035453229062881, 'avg_acc': 49.89202768667531, 'loss': 8.8367919921875}


EP_train:0:  47%|| 13122/27626 [30:49<34:02,  7.10it/s]

{'epoch': 0, 'iter': 13120, 'avg_loss': 9.035592031010076, 'avg_acc': 49.8925863120189, 'loss': 9.92344856262207}


EP_train:0:  48%|| 13132/27626 [30:50<34:36,  6.98it/s]

{'epoch': 0, 'iter': 13130, 'avg_loss': 9.035385760610243, 'avg_acc': 49.89433401873429, 'loss': 8.573955535888672}


EP_train:0:  48%|| 13142/27626 [30:52<34:36,  6.97it/s]

{'epoch': 0, 'iter': 13140, 'avg_loss': 9.03505870394398, 'avg_acc': 49.895127844151894, 'loss': 8.073640823364258}


EP_train:0:  48%|| 13152/27626 [30:53<34:03,  7.08it/s]

{'epoch': 0, 'iter': 13150, 'avg_loss': 9.034734170433778, 'avg_acc': 49.89378184168504, 'loss': 8.79941177368164}


EP_train:0:  48%|| 13162/27626 [30:55<34:09,  7.06it/s]

{'epoch': 0, 'iter': 13160, 'avg_loss': 9.034300625337837, 'avg_acc': 49.895287212217916, 'loss': 8.458416938781738}


EP_train:0:  48%|| 13172/27626 [30:56<34:13,  7.04it/s]

{'epoch': 0, 'iter': 13170, 'avg_loss': 9.034017637264538, 'avg_acc': 49.89797661529117, 'loss': 9.102965354919434}


EP_train:0:  48%|| 13182/27626 [30:58<34:12,  7.04it/s]

{'epoch': 0, 'iter': 13180, 'avg_loss': 9.033711902072005, 'avg_acc': 49.900187770275394, 'loss': 9.248832702636719}


EP_train:0:  48%|| 13192/27626 [30:59<34:08,  7.05it/s]

{'epoch': 0, 'iter': 13190, 'avg_loss': 9.033436383645084, 'avg_acc': 49.902869380638315, 'loss': 7.452517986297607}


EP_train:0:  48%|| 13202/27626 [31:00<33:57,  7.08it/s]

{'epoch': 0, 'iter': 13200, 'avg_loss': 9.033223739926287, 'avg_acc': 49.89962881599879, 'loss': 8.566514015197754}


EP_train:0:  48%|| 13212/27626 [31:02<34:00,  7.06it/s]

{'epoch': 0, 'iter': 13210, 'avg_loss': 9.032758551175945, 'avg_acc': 49.8994682461585, 'loss': 7.946786880493164}


EP_train:0:  48%|| 13222/27626 [31:03<34:04,  7.05it/s]

{'epoch': 0, 'iter': 13220, 'avg_loss': 9.032434349927101, 'avg_acc': 49.899307919219424, 'loss': 9.231027603149414}


EP_train:0:  48%|| 13232/27626 [31:05<34:03,  7.04it/s]

{'epoch': 0, 'iter': 13230, 'avg_loss': 9.032195083210775, 'avg_acc': 49.90174589978082, 'loss': 8.587580680847168}


EP_train:0:  48%|| 13242/27626 [31:06<33:55,  7.06it/s]

{'epoch': 0, 'iter': 13240, 'avg_loss': 9.03201746153747, 'avg_acc': 49.90252813231629, 'loss': 9.137360572814941}


EP_train:0:  48%|| 13252/27626 [31:07<34:07,  7.02it/s]

{'epoch': 0, 'iter': 13250, 'avg_loss': 9.03182816179768, 'avg_acc': 49.90307335295449, 'loss': 8.661392211914062}


EP_train:0:  48%|| 13262/27626 [31:09<34:23,  6.96it/s]

{'epoch': 0, 'iter': 13260, 'avg_loss': 9.031669037382164, 'avg_acc': 49.90008295000377, 'loss': 7.9719085693359375}


EP_train:0:  48%|| 13272/27626 [31:10<34:16,  6.98it/s]

{'epoch': 0, 'iter': 13270, 'avg_loss': 9.031338222276005, 'avg_acc': 49.89874538467335, 'loss': 8.676554679870605}


EP_train:0:  48%|| 13282/27626 [31:12<33:54,  7.05it/s]

{'epoch': 0, 'iter': 13280, 'avg_loss': 9.03078307090112, 'avg_acc': 49.89858632633084, 'loss': 8.761312484741211}


EP_train:0:  48%|| 13292/27626 [31:13<33:47,  7.07it/s]

{'epoch': 0, 'iter': 13290, 'avg_loss': 9.030641841651642, 'avg_acc': 49.89795726431419, 'loss': 8.601421356201172}


EP_train:0:  48%|| 13302/27626 [31:14<33:47,  7.06it/s]

{'epoch': 0, 'iter': 13300, 'avg_loss': 9.030381617228095, 'avg_acc': 49.897094203443345, 'loss': 9.11792278289795}


EP_train:0:  48%|| 13312/27626 [31:16<33:59,  7.02it/s]

{'epoch': 0, 'iter': 13310, 'avg_loss': 9.03042725830359, 'avg_acc': 49.89787581699346, 'loss': 9.422428131103516}


EP_train:0:  48%|| 13322/27626 [31:17<33:50,  7.04it/s]

{'epoch': 0, 'iter': 13320, 'avg_loss': 9.029983139997416, 'avg_acc': 49.89795248104497, 'loss': 8.764791488647461}


EP_train:0:  48%|| 13332/27626 [31:19<33:51,  7.03it/s]

{'epoch': 0, 'iter': 13330, 'avg_loss': 9.029865486589127, 'avg_acc': 49.89709136598905, 'loss': 8.586978912353516}


EP_train:0:  48%|| 13342/27626 [31:20<33:52,  7.03it/s]

{'epoch': 0, 'iter': 13340, 'avg_loss': 9.029546605674824, 'avg_acc': 49.89435761936886, 'loss': 7.887180805206299}


EP_train:0:  48%|| 13352/27626 [31:22<33:57,  7.01it/s]

{'epoch': 0, 'iter': 13350, 'avg_loss': 9.029289894484016, 'avg_acc': 49.894436746311136, 'loss': 7.887979030609131}


EP_train:0:  48%|| 13362/27626 [31:23<34:13,  6.95it/s]

{'epoch': 0, 'iter': 13360, 'avg_loss': 9.029093085091214, 'avg_acc': 49.894983534166606, 'loss': 8.547239303588867}


EP_train:0:  48%|| 13372/27626 [31:24<34:04,  6.97it/s]

{'epoch': 0, 'iter': 13370, 'avg_loss': 9.028899429570354, 'avg_acc': 49.89459464512752, 'loss': 8.654641151428223}


EP_train:0:  48%|| 13382/27626 [31:26<33:51,  7.01it/s]

{'epoch': 0, 'iter': 13380, 'avg_loss': 9.028624893776033, 'avg_acc': 49.89327217696734, 'loss': 8.542830467224121}


EP_train:0:  48%|| 13392/27626 [31:27<33:44,  7.03it/s]

{'epoch': 0, 'iter': 13390, 'avg_loss': 9.028646973158477, 'avg_acc': 49.89428534090061, 'loss': 9.525449752807617}


EP_train:0:  49%|| 13402/27626 [31:29<33:46,  7.02it/s]

{'epoch': 0, 'iter': 13400, 'avg_loss': 9.028302256913872, 'avg_acc': 49.896462950526086, 'loss': 8.556829452514648}


EP_train:0:  49%|| 13412/27626 [31:30<33:26,  7.08it/s]

{'epoch': 0, 'iter': 13410, 'avg_loss': 9.028227176933639, 'avg_acc': 49.89723920662143, 'loss': 9.078081130981445}


EP_train:0:  49%|| 13422/27626 [31:31<33:32,  7.06it/s]

{'epoch': 0, 'iter': 13420, 'avg_loss': 9.027888412509695, 'avg_acc': 49.89964421429104, 'loss': 8.72221565246582}


EP_train:0:  49%|| 13432/27626 [31:33<33:37,  7.04it/s]

{'epoch': 0, 'iter': 13430, 'avg_loss': 9.02749572074668, 'avg_acc': 49.897159556250465, 'loss': 8.514941215515137}


EP_train:0:  49%|| 13442/27626 [31:34<33:42,  7.01it/s]

{'epoch': 0, 'iter': 13440, 'avg_loss': 9.027063565225475, 'avg_acc': 49.893981102596534, 'loss': 9.154021263122559}


EP_train:0:  49%|| 13452/27626 [31:36<33:46,  6.99it/s]

{'epoch': 0, 'iter': 13450, 'avg_loss': 9.027030922490756, 'avg_acc': 49.89522154486655, 'loss': 8.97023868560791}


EP_train:0:  49%|| 13462/27626 [31:37<33:35,  7.03it/s]

{'epoch': 0, 'iter': 13460, 'avg_loss': 9.026378312460531, 'avg_acc': 49.89460292697422, 'loss': 8.375057220458984}


EP_train:0:  49%|| 13472/27626 [31:39<33:23,  7.06it/s]

{'epoch': 0, 'iter': 13470, 'avg_loss': 9.026044747141137, 'avg_acc': 49.894449187142754, 'loss': 8.05813217163086}


EP_train:0:  49%|| 13482/27626 [31:40<33:16,  7.09it/s]

{'epoch': 0, 'iter': 13480, 'avg_loss': 9.025844146303214, 'avg_acc': 49.891977598101036, 'loss': 8.075977325439453}


EP_train:0:  49%|| 13492/27626 [31:41<33:14,  7.09it/s]

{'epoch': 0, 'iter': 13490, 'avg_loss': 9.025695641764841, 'avg_acc': 49.89252093988585, 'loss': 8.577004432678223}


EP_train:0:  49%|| 13502/27626 [31:43<33:11,  7.09it/s]

{'epoch': 0, 'iter': 13500, 'avg_loss': 9.025404003247289, 'avg_acc': 49.89537812013925, 'loss': 9.091025352478027}


EP_train:0:  49%|| 13512/27626 [31:44<33:13,  7.08it/s]

{'epoch': 0, 'iter': 13510, 'avg_loss': 9.025151498417772, 'avg_acc': 49.89915624306121, 'loss': 8.91897964477539}


EP_train:0:  49%|| 13522/27626 [31:46<33:22,  7.04it/s]

{'epoch': 0, 'iter': 13520, 'avg_loss': 9.024751959057939, 'avg_acc': 49.89923082612233, 'loss': 9.293044090270996}


EP_train:0:  49%|| 13532/27626 [31:47<33:23,  7.04it/s]

{'epoch': 0, 'iter': 13530, 'avg_loss': 9.02464052212581, 'avg_acc': 49.90115290813687, 'loss': 8.830883026123047}


EP_train:0:  49%|| 13542/27626 [31:48<33:28,  7.01it/s]

{'epoch': 0, 'iter': 13540, 'avg_loss': 9.024469685179472, 'avg_acc': 49.901918248282996, 'loss': 8.6787109375}


EP_train:0:  49%|| 13552/27626 [31:50<33:18,  7.04it/s]

{'epoch': 0, 'iter': 13550, 'avg_loss': 9.024213204022466, 'avg_acc': 49.90568039259095, 'loss': 7.9766716957092285}


EP_train:0:  49%|| 13562/27626 [31:51<33:21,  7.03it/s]

{'epoch': 0, 'iter': 13560, 'avg_loss': 9.023824105720923, 'avg_acc': 49.90229334119903, 'loss': 8.10588550567627}


EP_train:0:  49%|| 13572/27626 [31:53<33:25,  7.01it/s]

{'epoch': 0, 'iter': 13570, 'avg_loss': 9.023345970104337, 'avg_acc': 49.90213506742318, 'loss': 8.163877487182617}


EP_train:0:  49%|| 13582/27626 [31:54<33:05,  7.07it/s]

{'epoch': 0, 'iter': 13580, 'avg_loss': 9.023003758724778, 'avg_acc': 49.89829541270893, 'loss': 8.653712272644043}


EP_train:0:  49%|| 13592/27626 [31:56<33:04,  7.07it/s]

{'epoch': 0, 'iter': 13590, 'avg_loss': 9.022933318049919, 'avg_acc': 49.89997976602163, 'loss': 9.161174774169922}


EP_train:0:  49%|| 13602/27626 [31:57<33:15,  7.03it/s]

{'epoch': 0, 'iter': 13600, 'avg_loss': 9.022943146745384, 'avg_acc': 49.8961473421072, 'loss': 9.04403305053711}


EP_train:0:  49%|| 13612/27626 [31:58<32:52,  7.10it/s]

{'epoch': 0, 'iter': 13610, 'avg_loss': 9.022590126843081, 'avg_acc': 49.89783079861876, 'loss': 8.964309692382812}


EP_train:0:  49%|| 13622/27626 [32:00<33:09,  7.04it/s]

{'epoch': 0, 'iter': 13620, 'avg_loss': 9.022523740184928, 'avg_acc': 49.89905293297115, 'loss': 8.642704010009766}


EP_train:0:  49%|| 13632/27626 [32:01<33:21,  6.99it/s]

{'epoch': 0, 'iter': 13630, 'avg_loss': 9.022244688263118, 'avg_acc': 49.898897733108356, 'loss': 8.608332633972168}


EP_train:0:  49%|| 13642/27626 [32:03<32:54,  7.08it/s]

{'epoch': 0, 'iter': 13640, 'avg_loss': 9.022104888356996, 'avg_acc': 49.90034638223004, 'loss': 9.259824752807617}


EP_train:0:  49%|| 13652/27626 [32:04<33:06,  7.03it/s]

{'epoch': 0, 'iter': 13650, 'avg_loss': 9.021731554793837, 'avg_acc': 49.9017929089444, 'loss': 9.369072914123535}


EP_train:0:  49%|| 13662/27626 [32:05<33:04,  7.04it/s]

{'epoch': 0, 'iter': 13660, 'avg_loss': 9.021626343394468, 'avg_acc': 49.901178537442355, 'loss': 8.747482299804688}


EP_train:0:  49%|| 13672/27626 [32:07<32:54,  7.07it/s]

{'epoch': 0, 'iter': 13670, 'avg_loss': 9.021505487888613, 'avg_acc': 49.9023937532002, 'loss': 9.293362617492676}


EP_train:0:  50%|| 13682/27626 [32:08<32:49,  7.08it/s]

{'epoch': 0, 'iter': 13680, 'avg_loss': 9.021321517596354, 'avg_acc': 49.904064030407135, 'loss': 8.917535781860352}


EP_train:0:  50%|| 13692/27626 [32:10<32:57,  7.05it/s]

{'epoch': 0, 'iter': 13690, 'avg_loss': 9.020911361343822, 'avg_acc': 49.90596011978672, 'loss': 8.410572052001953}


EP_train:0:  50%|| 13702/27626 [32:11<33:04,  7.02it/s]

{'epoch': 0, 'iter': 13700, 'avg_loss': 9.020605364312882, 'avg_acc': 49.90648492810744, 'loss': 8.027795791625977}


EP_train:0:  50%|| 13712/27626 [32:12<33:03,  7.02it/s]

{'epoch': 0, 'iter': 13710, 'avg_loss': 9.020330781444889, 'avg_acc': 49.90655313252133, 'loss': 8.138457298278809}


EP_train:0:  50%|| 13722/27626 [32:14<33:02,  7.01it/s]

{'epoch': 0, 'iter': 13720, 'avg_loss': 9.020095412684777, 'avg_acc': 49.90776000291524, 'loss': 8.964658737182617}


EP_train:0:  50%|| 13732/27626 [32:15<32:45,  7.07it/s]

{'epoch': 0, 'iter': 13730, 'avg_loss': 9.020331032355505, 'avg_acc': 49.90555130726094, 'loss': 9.32386302947998}


EP_train:0:  50%|| 13742/27626 [32:17<32:53,  7.04it/s]

{'epoch': 0, 'iter': 13740, 'avg_loss': 9.020077481973242, 'avg_acc': 49.90425551269922, 'loss': 8.418988227844238}


EP_train:0:  50%|| 13752/27626 [32:18<33:14,  6.96it/s]

{'epoch': 0, 'iter': 13750, 'avg_loss': 9.020189407617169, 'avg_acc': 49.905461420987564, 'loss': 10.206938743591309}


EP_train:0:  50%|| 13762/27626 [32:20<33:04,  6.99it/s]

{'epoch': 0, 'iter': 13760, 'avg_loss': 9.02011529516233, 'avg_acc': 49.90371339292203, 'loss': 9.10594367980957}


EP_train:0:  50%|| 13772/27626 [32:21<32:46,  7.04it/s]

{'epoch': 0, 'iter': 13770, 'avg_loss': 9.019749156651903, 'avg_acc': 49.90060634667054, 'loss': 8.393464088439941}


EP_train:0:  50%|| 13782/27626 [32:22<32:38,  7.07it/s]

{'epoch': 0, 'iter': 13780, 'avg_loss': 9.019856577021283, 'avg_acc': 49.90249256222335, 'loss': 8.719330787658691}


EP_train:0:  50%|| 13792/27626 [32:24<32:48,  7.03it/s]

{'epoch': 0, 'iter': 13790, 'avg_loss': 9.019756198819714, 'avg_acc': 49.90075048944964, 'loss': 8.729082107543945}


EP_train:0:  50%|| 13802/27626 [32:25<32:41,  7.05it/s]

{'epoch': 0, 'iter': 13800, 'avg_loss': 9.019512652452507, 'avg_acc': 49.89833164263459, 'loss': 8.885072708129883}


EP_train:0:  50%|| 13812/27626 [32:27<32:54,  7.00it/s]

{'epoch': 0, 'iter': 13810, 'avg_loss': 9.018971249382412, 'avg_acc': 49.901799290420676, 'loss': 9.533635139465332}


EP_train:0:  50%|| 13822/27626 [32:28<32:47,  7.02it/s]

{'epoch': 0, 'iter': 13820, 'avg_loss': 9.018642354982736, 'avg_acc': 49.90141813182838, 'loss': 9.117254257202148}


EP_train:0:  50%|| 13832/27626 [32:29<32:36,  7.05it/s]

{'epoch': 0, 'iter': 13830, 'avg_loss': 9.018480728232984, 'avg_acc': 49.90103752440171, 'loss': 8.784799575805664}


EP_train:0:  50%|| 13842/27626 [32:31<32:33,  7.06it/s]

{'epoch': 0, 'iter': 13840, 'avg_loss': 9.018339988718694, 'avg_acc': 49.90133480239867, 'loss': 8.199874877929688}


EP_train:0:  50%|| 13852/27626 [32:32<32:34,  7.05it/s]

{'epoch': 0, 'iter': 13850, 'avg_loss': 9.018008784966884, 'avg_acc': 49.902534113060426, 'loss': 9.074857711791992}


EP_train:0:  50%|| 13862/27626 [32:34<32:42,  7.01it/s]

{'epoch': 0, 'iter': 13860, 'avg_loss': 9.01785868107434, 'avg_acc': 49.90328078782195, 'loss': 9.281158447265625}


EP_train:0:  50%|| 13872/27626 [32:35<32:42,  7.01it/s]

{'epoch': 0, 'iter': 13870, 'avg_loss': 9.017609105905299, 'avg_acc': 49.90695515824382, 'loss': 8.616751670837402}


EP_train:0:  50%|| 13882/27626 [32:37<32:31,  7.04it/s]

{'epoch': 0, 'iter': 13880, 'avg_loss': 9.017184897618527, 'avg_acc': 49.908598083711546, 'loss': 8.40850830078125}


EP_train:0:  50%|| 13892/27626 [32:38<32:44,  6.99it/s]

{'epoch': 0, 'iter': 13890, 'avg_loss': 9.016769590975786, 'avg_acc': 49.90483946440141, 'loss': 8.921957015991211}


EP_train:0:  50%|| 13902/27626 [32:39<32:33,  7.02it/s]

{'epoch': 0, 'iter': 13900, 'avg_loss': 9.016326663185561, 'avg_acc': 49.90580713617725, 'loss': 7.898500919342041}


EP_train:0:  50%|| 13912/27626 [32:41<32:31,  7.03it/s]

{'epoch': 0, 'iter': 13910, 'avg_loss': 9.015912190783453, 'avg_acc': 49.90924448278341, 'loss': 9.355093955993652}


EP_train:0:  50%|| 13922/27626 [32:42<32:30,  7.02it/s]

{'epoch': 0, 'iter': 13920, 'avg_loss': 9.016117957857574, 'avg_acc': 49.90796279002945, 'loss': 8.626228332519531}


EP_train:0:  50%|| 13932/27626 [32:44<32:29,  7.02it/s]

{'epoch': 0, 'iter': 13930, 'avg_loss': 9.015923958894005, 'avg_acc': 49.90982341540449, 'loss': 8.999001502990723}


EP_train:0:  50%|| 13942/27626 [32:45<32:30,  7.02it/s]

{'epoch': 0, 'iter': 13940, 'avg_loss': 9.015310382665158, 'avg_acc': 49.90988809984936, 'loss': 7.655409812927246}


EP_train:0:  51%|| 13952/27626 [32:46<32:34,  7.00it/s]

{'epoch': 0, 'iter': 13950, 'avg_loss': 9.014960240911014, 'avg_acc': 49.90860870188517, 'loss': 8.52100658416748}


EP_train:0:  51%|| 13962/27626 [32:48<32:17,  7.05it/s]

{'epoch': 0, 'iter': 13960, 'avg_loss': 9.014583043694863, 'avg_acc': 49.91046486641358, 'loss': 8.409804344177246}


EP_train:0:  51%|| 13972/27626 [32:49<32:39,  6.97it/s]

{'epoch': 0, 'iter': 13970, 'avg_loss': 9.014318535607952, 'avg_acc': 49.91075263044878, 'loss': 8.403058052062988}


EP_train:0:  51%|| 13982/27626 [32:51<32:38,  6.97it/s]

{'epoch': 0, 'iter': 13980, 'avg_loss': 9.014231116276648, 'avg_acc': 49.91081646520277, 'loss': 8.199332237243652}


EP_train:0:  51%|| 13992/27626 [32:52<32:17,  7.04it/s]

{'epoch': 0, 'iter': 13990, 'avg_loss': 9.014191281709445, 'avg_acc': 49.912667071688944, 'loss': 8.99669075012207}


EP_train:0:  51%|| 14002/27626 [32:54<32:24,  7.01it/s]

{'epoch': 0, 'iter': 14000, 'avg_loss': 9.014069997006745, 'avg_acc': 49.91049746446682, 'loss': 8.783075332641602}


EP_train:0:  51%|| 14012/27626 [32:55<32:18,  7.02it/s]

{'epoch': 0, 'iter': 14010, 'avg_loss': 9.013849794205264, 'avg_acc': 49.910561344657765, 'loss': 8.493701934814453}


EP_train:0:  51%|| 14022/27626 [32:56<32:11,  7.05it/s]

{'epoch': 0, 'iter': 14020, 'avg_loss': 9.013537329891491, 'avg_acc': 49.91352257328293, 'loss': 9.589258193969727}


EP_train:0:  51%|| 14032/27626 [32:58<32:15,  7.02it/s]

{'epoch': 0, 'iter': 14030, 'avg_loss': 9.013194016096437, 'avg_acc': 49.91157971634239, 'loss': 8.417098045349121}


EP_train:0:  51%|| 14042/27626 [32:59<32:23,  6.99it/s]

{'epoch': 0, 'iter': 14040, 'avg_loss': 9.012835285040602, 'avg_acc': 49.909417064311654, 'loss': 8.357192993164062}


EP_train:0:  51%|| 14052/27626 [33:01<32:22,  6.99it/s]

{'epoch': 0, 'iter': 14050, 'avg_loss': 9.01259851397997, 'avg_acc': 49.909036723364885, 'loss': 8.667867660522461}


EP_train:0:  51%|| 14062/27626 [33:02<32:14,  7.01it/s]

{'epoch': 0, 'iter': 14060, 'avg_loss': 9.012363783674468, 'avg_acc': 49.907323447834436, 'loss': 9.599146842956543}


EP_train:0:  51%|| 14072/27626 [33:04<32:32,  6.94it/s]

{'epoch': 0, 'iter': 14070, 'avg_loss': 9.011966357394819, 'avg_acc': 49.903391727666836, 'loss': 8.171976089477539}


EP_train:0:  51%|| 14082/27626 [33:05<32:22,  6.97it/s]

{'epoch': 0, 'iter': 14080, 'avg_loss': 9.011685599980051, 'avg_acc': 49.90235068532065, 'loss': 9.032641410827637}


EP_train:0:  51%|| 14092/27626 [33:06<32:01,  7.04it/s]

{'epoch': 0, 'iter': 14090, 'avg_loss': 9.011595228161486, 'avg_acc': 49.90530303030303, 'loss': 8.724821090698242}


EP_train:0:  51%|| 14102/27626 [33:08<31:52,  7.07it/s]

{'epoch': 0, 'iter': 14100, 'avg_loss': 9.011193838857368, 'avg_acc': 49.90603503297638, 'loss': 8.839302062988281}


EP_train:0:  51%|| 14112/27626 [33:09<32:04,  7.02it/s]

{'epoch': 0, 'iter': 14110, 'avg_loss': 9.010768241407886, 'avg_acc': 49.905880164410746, 'loss': 8.745220184326172}


EP_train:0:  51%|| 14122/27626 [33:11<32:02,  7.03it/s]

{'epoch': 0, 'iter': 14120, 'avg_loss': 9.010594815845218, 'avg_acc': 49.89952907017916, 'loss': 8.804182052612305}


EP_train:0:  51%|| 14132/27626 [33:12<32:10,  6.99it/s]

{'epoch': 0, 'iter': 14130, 'avg_loss': 9.010152760723596, 'avg_acc': 49.90225390984361, 'loss': 7.997747421264648}


EP_train:0:  51%|| 14142/27626 [33:13<31:56,  7.03it/s]

{'epoch': 0, 'iter': 14140, 'avg_loss': 9.009870928388093, 'avg_acc': 49.90298599816138, 'loss': 7.871623992919922}


EP_train:0:  51%|| 14152/27626 [33:15<32:09,  6.98it/s]

{'epoch': 0, 'iter': 14150, 'avg_loss': 9.00981638381173, 'avg_acc': 49.89930040279839, 'loss': 10.02517032623291}


EP_train:0:  51%|| 14162/27626 [33:16<32:08,  6.98it/s]

{'epoch': 0, 'iter': 14160, 'avg_loss': 9.009462490382147, 'avg_acc': 49.899592189817106, 'loss': 8.095551490783691}


EP_train:0:  51%|| 14172/27626 [33:18<31:55,  7.02it/s]

{'epoch': 0, 'iter': 14170, 'avg_loss': 9.00913387529011, 'avg_acc': 49.89811939877214, 'loss': 9.647296905517578}


EP_train:0:  51%|| 14182/27626 [33:19<32:31,  6.89it/s]

{'epoch': 0, 'iter': 14180, 'avg_loss': 9.00857840505005, 'avg_acc': 49.898191241802415, 'loss': 7.854417324066162}


EP_train:0:  51%|| 14192/27626 [33:21<31:51,  7.03it/s]

{'epoch': 0, 'iter': 14190, 'avg_loss': 9.008262565864937, 'avg_acc': 49.89936403354239, 'loss': 7.9347429275512695}


EP_train:0:  51%|| 14202/27626 [33:22<31:54,  7.01it/s]

{'epoch': 0, 'iter': 14200, 'avg_loss': 9.008044962227894, 'avg_acc': 49.898554679247944, 'loss': 7.936628341674805}


EP_train:0:  51%|| 14212/27626 [33:23<31:47,  7.03it/s]

{'epoch': 0, 'iter': 14210, 'avg_loss': 9.007781969939755, 'avg_acc': 49.903683766096684, 'loss': 7.969570636749268}


EP_train:0:  51%|| 14222/27626 [33:25<32:02,  6.97it/s]

{'epoch': 0, 'iter': 14220, 'avg_loss': 9.007367924206804, 'avg_acc': 49.90572920329091, 'loss': 8.02736759185791}


EP_train:0:  52%|| 14232/27626 [33:26<31:32,  7.08it/s]

{'epoch': 0, 'iter': 14230, 'avg_loss': 9.006932608049757, 'avg_acc': 49.90491708242569, 'loss': 8.789233207702637}


EP_train:0:  52%|| 14242/27626 [33:28<31:52,  7.00it/s]

{'epoch': 0, 'iter': 14240, 'avg_loss': 9.006914978892555, 'avg_acc': 49.904983849448776, 'loss': 7.771748065948486}


EP_train:0:  52%|| 14252/27626 [33:29<31:48,  7.01it/s]

{'epoch': 0, 'iter': 14250, 'avg_loss': 9.00679079613496, 'avg_acc': 49.90461195705564, 'loss': 8.309422492980957}


EP_train:0:  52%|| 14262/27626 [33:30<31:39,  7.03it/s]

{'epoch': 0, 'iter': 14260, 'avg_loss': 9.006793827238512, 'avg_acc': 49.90993794264077, 'loss': 8.962149620056152}


EP_train:0:  52%|| 14272/27626 [33:32<31:27,  7.07it/s]

{'epoch': 0, 'iter': 14270, 'avg_loss': 9.006435329165141, 'avg_acc': 49.91087695326186, 'loss': 8.43726921081543}


EP_train:0:  52%|| 14282/27626 [33:33<31:27,  7.07it/s]

{'epoch': 0, 'iter': 14280, 'avg_loss': 9.00620800139448, 'avg_acc': 49.91203347104545, 'loss': 7.387600898742676}


EP_train:0:  52%|| 14292/27626 [33:35<31:41,  7.01it/s]

{'epoch': 0, 'iter': 14290, 'avg_loss': 9.005903228303048, 'avg_acc': 49.91078301028619, 'loss': 8.532092094421387}


EP_train:0:  52%|| 14302/27626 [33:36<31:28,  7.06it/s]

{'epoch': 0, 'iter': 14300, 'avg_loss': 9.005575668794293, 'avg_acc': 49.90975281448849, 'loss': 8.2914457321167}


EP_train:0:  52%|| 14312/27626 [33:38<31:32,  7.04it/s]

{'epoch': 0, 'iter': 14310, 'avg_loss': 9.005585985075093, 'avg_acc': 49.9126546013556, 'loss': 8.079468727111816}


EP_train:0:  52%|| 14322/27626 [33:39<31:26,  7.05it/s]

{'epoch': 0, 'iter': 14320, 'avg_loss': 9.00538547145081, 'avg_acc': 49.90922421618602, 'loss': 8.572792053222656}


EP_train:0:  52%|| 14332/27626 [33:40<31:41,  6.99it/s]

{'epoch': 0, 'iter': 14330, 'avg_loss': 9.005260441243529, 'avg_acc': 49.910813969716, 'loss': 9.654206275939941}


EP_train:0:  52%|| 14342/27626 [33:42<31:22,  7.06it/s]

{'epoch': 0, 'iter': 14340, 'avg_loss': 9.005000621834757, 'avg_acc': 49.90956871905725, 'loss': 8.64378833770752}


EP_train:0:  52%|| 14352/27626 [33:43<31:28,  7.03it/s]

{'epoch': 0, 'iter': 14350, 'avg_loss': 9.004847932315423, 'avg_acc': 49.90941397811999, 'loss': 8.969680786132812}


EP_train:0:  52%|| 14362/27626 [33:45<31:06,  7.10it/s]

{'epoch': 0, 'iter': 14360, 'avg_loss': 9.004758979938678, 'avg_acc': 49.90882424622241, 'loss': 8.44307804107666}


EP_train:0:  52%|| 14372/27626 [33:46<31:13,  7.08it/s]

{'epoch': 0, 'iter': 14370, 'avg_loss': 9.004600803286852, 'avg_acc': 49.90867023867511, 'loss': 8.14327621459961}


EP_train:0:  52%|| 14382/27626 [33:47<31:29,  7.01it/s]

{'epoch': 0, 'iter': 14380, 'avg_loss': 9.004253101401929, 'avg_acc': 49.91025485014951, 'loss': 7.509406566619873}


EP_train:0:  52%|| 14392/27626 [33:49<31:06,  7.09it/s]

{'epoch': 0, 'iter': 14390, 'avg_loss': 9.004059278147405, 'avg_acc': 49.90792856646515, 'loss': 8.145931243896484}


EP_train:0:  52%|| 14402/27626 [33:50<31:13,  7.06it/s]

{'epoch': 0, 'iter': 14400, 'avg_loss': 9.003897431575377, 'avg_acc': 49.91016248871606, 'loss': 9.273735046386719}


EP_train:0:  52%|| 14412/27626 [33:52<31:13,  7.05it/s]

{'epoch': 0, 'iter': 14410, 'avg_loss': 9.003775030409683, 'avg_acc': 49.9134775518701, 'loss': 8.641776084899902}


EP_train:0:  52%|| 14422/27626 [33:53<31:22,  7.02it/s]

{'epoch': 0, 'iter': 14420, 'avg_loss': 9.003619451346257, 'avg_acc': 49.91202066430899, 'loss': 8.844185829162598}


EP_train:0:  52%|| 14432/27626 [33:54<31:21,  7.01it/s]

{'epoch': 0, 'iter': 14430, 'avg_loss': 9.003284895827324, 'avg_acc': 49.912298177534474, 'loss': 8.159483909606934}


EP_train:0:  52%|| 14442/27626 [33:56<31:03,  7.07it/s]

{'epoch': 0, 'iter': 14440, 'avg_loss': 9.002890049986096, 'avg_acc': 49.91084412436812, 'loss': 7.909985542297363}


EP_train:0:  52%|| 14452/27626 [33:57<31:06,  7.06it/s]

{'epoch': 0, 'iter': 14450, 'avg_loss': 9.00268826756244, 'avg_acc': 49.91155456369801, 'loss': 8.48714828491211}


EP_train:0:  52%|| 14462/27626 [33:59<31:10,  7.04it/s]

{'epoch': 0, 'iter': 14460, 'avg_loss': 9.002511952990245, 'avg_acc': 49.91204792199709, 'loss': 8.832839965820312}


EP_train:0:  52%|| 14472/27626 [34:00<31:18,  7.00it/s]

{'epoch': 0, 'iter': 14470, 'avg_loss': 9.002400755478611, 'avg_acc': 49.911460852739964, 'loss': 8.739886283874512}


EP_train:0:  52%|| 14482/27626 [34:02<31:10,  7.03it/s]

{'epoch': 0, 'iter': 14480, 'avg_loss': 9.002151689825116, 'avg_acc': 49.91475899454458, 'loss': 7.464079856872559}


EP_train:0:  52%|| 14492/27626 [34:03<31:03,  7.05it/s]

{'epoch': 0, 'iter': 14490, 'avg_loss': 9.0017722522848, 'avg_acc': 49.914170864674624, 'loss': 7.8250813484191895}


EP_train:0:  52%|| 14502/27626 [34:04<30:51,  7.09it/s]

{'epoch': 0, 'iter': 14500, 'avg_loss': 9.001547553803821, 'avg_acc': 49.913152541204056, 'loss': 8.853063583374023}


EP_train:0:  53%|| 14512/27626 [34:06<30:53,  7.08it/s]

{'epoch': 0, 'iter': 14510, 'avg_loss': 9.00109006298639, 'avg_acc': 49.913643098339186, 'loss': 8.300747871398926}


EP_train:0:  53%|| 14522/27626 [34:07<30:55,  7.06it/s]

{'epoch': 0, 'iter': 14520, 'avg_loss': 9.000782714321236, 'avg_acc': 49.90939845740651, 'loss': 8.969597816467285}


EP_train:0:  53%|| 14532/27626 [34:09<30:48,  7.09it/s]

{'epoch': 0, 'iter': 14530, 'avg_loss': 9.000722523344646, 'avg_acc': 49.907955405684405, 'loss': 9.389505386352539}


EP_train:0:  53%|| 14542/27626 [34:10<30:49,  7.07it/s]

{'epoch': 0, 'iter': 14540, 'avg_loss': 9.000465046514238, 'avg_acc': 49.90801870572863, 'loss': 8.790227890014648}


EP_train:0:  53%|| 14552/27626 [34:11<30:52,  7.06it/s]

{'epoch': 0, 'iter': 14550, 'avg_loss': 9.000393914058932, 'avg_acc': 49.906578585664214, 'loss': 7.760885715484619}


EP_train:0:  53%|| 14562/27626 [34:13<30:47,  7.07it/s]

{'epoch': 0, 'iter': 14560, 'avg_loss': 9.000240604576685, 'avg_acc': 49.90299429984205, 'loss': 8.96242618560791}


EP_train:0:  53%|| 14572/27626 [34:14<30:57,  7.03it/s]

{'epoch': 0, 'iter': 14570, 'avg_loss': 8.999928904204761, 'avg_acc': 49.90306087433944, 'loss': 8.313457489013672}


EP_train:0:  53%|| 14582/27626 [34:16<31:00,  7.01it/s]

{'epoch': 0, 'iter': 14580, 'avg_loss': 8.999844562205322, 'avg_acc': 49.904627597558466, 'loss': 7.872659206390381}


EP_train:0:  53%|| 14592/27626 [34:17<30:52,  7.04it/s]

{'epoch': 0, 'iter': 14590, 'avg_loss': 8.999609654291737, 'avg_acc': 49.90490713453499, 'loss': 8.41164493560791}


EP_train:0:  53%|| 14602/27626 [34:18<30:56,  7.02it/s]

{'epoch': 0, 'iter': 14600, 'avg_loss': 8.999378424949429, 'avg_acc': 49.905614341483464, 'loss': 8.118141174316406}


EP_train:0:  53%|| 14612/27626 [34:20<30:51,  7.03it/s]

{'epoch': 0, 'iter': 14610, 'avg_loss': 8.999098676796788, 'avg_acc': 49.904181780850045, 'loss': 9.427227973937988}


EP_train:0:  53%|| 14622/27626 [34:21<30:39,  7.07it/s]

{'epoch': 0, 'iter': 14620, 'avg_loss': 8.99884898241368, 'avg_acc': 49.90488851651734, 'loss': 8.473490715026855}


EP_train:0:  53%|| 14632/27626 [34:23<30:41,  7.06it/s]

{'epoch': 0, 'iter': 14630, 'avg_loss': 8.99860250712761, 'avg_acc': 49.90559428610485, 'loss': 8.48759651184082}


EP_train:0:  53%|| 14642/27626 [34:24<30:55,  7.00it/s]

{'epoch': 0, 'iter': 14640, 'avg_loss': 8.998425203352937, 'avg_acc': 49.902670582610476, 'loss': 8.517017364501953}


EP_train:0:  53%|| 14652/27626 [34:26<30:37,  7.06it/s]

{'epoch': 0, 'iter': 14650, 'avg_loss': 8.998281638294054, 'avg_acc': 49.90167053443451, 'loss': 8.51966381072998}


EP_train:0:  53%|| 14662/27626 [34:27<30:37,  7.05it/s]

{'epoch': 0, 'iter': 14660, 'avg_loss': 8.997988515673958, 'avg_acc': 49.903655957983766, 'loss': 8.525336265563965}


EP_train:0:  53%|| 14672/27626 [34:28<30:37,  7.05it/s]

{'epoch': 0, 'iter': 14670, 'avg_loss': 8.997714336384343, 'avg_acc': 49.90457364869471, 'loss': 7.921025276184082}


EP_train:0:  53%|| 14682/27626 [34:30<30:49,  7.00it/s]

{'epoch': 0, 'iter': 14680, 'avg_loss': 8.997405135560918, 'avg_acc': 49.907618690824876, 'loss': 8.379803657531738}


EP_train:0:  53%|| 14692/27626 [34:31<30:50,  6.99it/s]

{'epoch': 0, 'iter': 14690, 'avg_loss': 8.997462666584047, 'avg_acc': 49.9108723027704, 'loss': 9.402937889099121}


EP_train:0:  53%|| 14702/27626 [34:33<30:49,  6.99it/s]

{'epoch': 0, 'iter': 14700, 'avg_loss': 8.997329105009765, 'avg_acc': 49.910932929732674, 'loss': 8.55679702758789}


EP_train:0:  53%|| 14712/27626 [34:34<30:31,  7.05it/s]

{'epoch': 0, 'iter': 14710, 'avg_loss': 8.997106115965993, 'avg_acc': 49.906532526680714, 'loss': 8.723653793334961}


EP_train:0:  53%|| 14722/27626 [34:35<30:36,  7.03it/s]

{'epoch': 0, 'iter': 14720, 'avg_loss': 8.996859241848641, 'avg_acc': 49.9061714557435, 'loss': 9.32755184173584}


EP_train:0:  53%|| 14732/27626 [34:37<30:33,  7.03it/s]

{'epoch': 0, 'iter': 14730, 'avg_loss': 8.996581619378707, 'avg_acc': 49.90729583870749, 'loss': 8.678864479064941}


EP_train:0:  53%|| 14742/27626 [34:38<30:29,  7.04it/s]

{'epoch': 0, 'iter': 14740, 'avg_loss': 8.996521624867963, 'avg_acc': 49.906298758564546, 'loss': 9.193909645080566}


EP_train:0:  53%|| 14752/27626 [34:40<30:27,  7.04it/s]

{'epoch': 0, 'iter': 14750, 'avg_loss': 8.996084663101993, 'avg_acc': 49.90932818114026, 'loss': 8.255688667297363}


EP_train:0:  53%|| 14762/27626 [34:41<30:30,  7.03it/s]

{'epoch': 0, 'iter': 14760, 'avg_loss': 8.995978265021536, 'avg_acc': 49.9091779012262, 'loss': 8.49348258972168}


EP_train:0:  53%|| 14772/27626 [34:43<30:30,  7.02it/s]

{'epoch': 0, 'iter': 14770, 'avg_loss': 8.995915895712157, 'avg_acc': 49.91072033037709, 'loss': 8.683850288391113}


EP_train:0:  54%|| 14782/27626 [34:44<30:32,  7.01it/s]

{'epoch': 0, 'iter': 14780, 'avg_loss': 8.995784550604805, 'avg_acc': 49.91162641228604, 'loss': 7.885533332824707}


EP_train:0:  54%|| 14792/27626 [34:45<30:45,  6.95it/s]

{'epoch': 0, 'iter': 14790, 'avg_loss': 8.995654480749547, 'avg_acc': 49.914855317422756, 'loss': 8.292766571044922}


EP_train:0:  54%|| 14802/27626 [34:47<30:48,  6.94it/s]

{'epoch': 0, 'iter': 14800, 'avg_loss': 8.995369868281403, 'avg_acc': 49.91744645632052, 'loss': 9.337564468383789}


EP_train:0:  54%|| 14812/27626 [34:48<30:23,  7.03it/s]

{'epoch': 0, 'iter': 14810, 'avg_loss': 8.995184199234224, 'avg_acc': 49.917291202484634, 'loss': 8.198590278625488}


EP_train:0:  54%|| 14822/27626 [34:50<30:16,  7.05it/s]

{'epoch': 0, 'iter': 14820, 'avg_loss': 8.994831094663962, 'avg_acc': 49.91671445921328, 'loss': 8.364233016967773}


EP_train:0:  54%|| 14832/27626 [34:51<30:12,  7.06it/s]

{'epoch': 0, 'iter': 14830, 'avg_loss': 8.994558347146837, 'avg_acc': 49.918456274020635, 'loss': 8.784258842468262}


EP_train:0:  54%|| 14842/27626 [34:53<30:11,  7.06it/s]

{'epoch': 0, 'iter': 14840, 'avg_loss': 8.99420913092636, 'avg_acc': 49.91724782696584, 'loss': 10.184247016906738}


EP_train:0:  54%|| 14852/27626 [34:54<30:15,  7.04it/s]

{'epoch': 0, 'iter': 14850, 'avg_loss': 8.994070854486258, 'avg_acc': 49.91961820752811, 'loss': 8.379830360412598}


EP_train:0:  54%|| 14862/27626 [34:55<30:02,  7.08it/s]

{'epoch': 0, 'iter': 14860, 'avg_loss': 8.993761368630816, 'avg_acc': 49.92261624385977, 'loss': 7.943248748779297}


EP_train:0:  54%|| 14872/27626 [34:57<30:14,  7.03it/s]

{'epoch': 0, 'iter': 14870, 'avg_loss': 8.99365709810922, 'avg_acc': 49.92224799946204, 'loss': 8.135612487792969}


EP_train:0:  54%|| 14882/27626 [34:58<30:31,  6.96it/s]

{'epoch': 0, 'iter': 14880, 'avg_loss': 8.993229464364383, 'avg_acc': 49.9216702506552, 'loss': 8.439640998840332}


EP_train:0:  54%|| 14892/27626 [35:00<30:22,  6.99it/s]

{'epoch': 0, 'iter': 14890, 'avg_loss': 8.993092271657382, 'avg_acc': 49.924451010677586, 'loss': 8.249754905700684}


EP_train:0:  54%|| 14902/27626 [35:01<30:19,  6.99it/s]

{'epoch': 0, 'iter': 14900, 'avg_loss': 8.992857927636376, 'avg_acc': 49.925550298637674, 'loss': 8.425715446472168}


EP_train:0:  54%|| 14912/27626 [35:02<30:18,  6.99it/s]

{'epoch': 0, 'iter': 14910, 'avg_loss': 8.992591788324102, 'avg_acc': 49.92455234390718, 'loss': 8.247777938842773}


EP_train:0:  54%|| 14922/27626 [35:04<30:15,  7.00it/s]

{'epoch': 0, 'iter': 14920, 'avg_loss': 8.992212477625795, 'avg_acc': 49.92355572682796, 'loss': 8.669017791748047}


EP_train:0:  54%|| 14932/27626 [35:05<30:03,  7.04it/s]

{'epoch': 0, 'iter': 14930, 'avg_loss': 8.992090254089916, 'avg_acc': 49.91983959547251, 'loss': 8.954379081726074}


EP_train:0:  54%|| 14942/27626 [35:07<29:57,  7.06it/s]

{'epoch': 0, 'iter': 14940, 'avg_loss': 8.991840476962938, 'avg_acc': 49.923030586975436, 'loss': 8.737282752990723}


EP_train:0:  54%|| 14952/27626 [35:08<30:04,  7.02it/s]

{'epoch': 0, 'iter': 14950, 'avg_loss': 8.99185379190466, 'avg_acc': 49.923709116447064, 'loss': 9.528324127197266}


EP_train:0:  54%|| 14962/27626 [35:10<30:17,  6.97it/s]

{'epoch': 0, 'iter': 14960, 'avg_loss': 8.99178423507672, 'avg_acc': 49.92501336809037, 'loss': 8.734441757202148}


EP_train:0:  54%|| 14972/27626 [35:11<30:11,  6.99it/s]

{'epoch': 0, 'iter': 14970, 'avg_loss': 8.991630764063625, 'avg_acc': 49.92694208803687, 'loss': 9.039972305297852}


EP_train:0:  54%|| 14982/27626 [35:12<29:55,  7.04it/s]

{'epoch': 0, 'iter': 14980, 'avg_loss': 8.991350160581822, 'avg_acc': 49.92553067218476, 'loss': 8.594707489013672}


EP_train:0:  54%|| 14992/27626 [35:14<30:06,  6.99it/s]

{'epoch': 0, 'iter': 14990, 'avg_loss': 8.991064440885193, 'avg_acc': 49.925788806617305, 'loss': 8.489269256591797}


EP_train:0:  54%|| 15002/27626 [35:15<30:03,  7.00it/s]

{'epoch': 0, 'iter': 15000, 'avg_loss': 8.990721510899924, 'avg_acc': 49.92583827744817, 'loss': 8.1621732711792}


EP_train:0:  54%|| 15012/27626 [35:17<29:57,  7.02it/s]

{'epoch': 0, 'iter': 15010, 'avg_loss': 8.990461661491473, 'avg_acc': 49.92692858570382, 'loss': 8.678791999816895}


EP_train:0:  54%|| 15022/27626 [35:18<29:47,  7.05it/s]

{'epoch': 0, 'iter': 15020, 'avg_loss': 8.990016388612187, 'avg_acc': 49.92760135809866, 'loss': 8.399358749389648}


EP_train:0:  54%|| 15032/27626 [35:19<29:59,  7.00it/s]

{'epoch': 0, 'iter': 15030, 'avg_loss': 8.989755775818892, 'avg_acc': 49.9272337169849, 'loss': 9.260595321655273}


EP_train:0:  54%|| 15042/27626 [35:21<29:51,  7.02it/s]

{'epoch': 0, 'iter': 15040, 'avg_loss': 8.989649270698512, 'avg_acc': 49.92832092281098, 'loss': 8.849528312683105}


EP_train:0:  54%|| 15052/27626 [35:22<30:05,  6.96it/s]

{'epoch': 0, 'iter': 15050, 'avg_loss': 8.989479641068156, 'avg_acc': 49.92276260713574, 'loss': 8.475908279418945}


EP_train:0:  55%|| 15062/27626 [35:24<30:16,  6.92it/s]

{'epoch': 0, 'iter': 15060, 'avg_loss': 8.989138932130258, 'avg_acc': 49.923436358807514, 'loss': 9.362631797790527}


EP_train:0:  55%|| 15072/27626 [35:25<29:51,  7.01it/s]

{'epoch': 0, 'iter': 15070, 'avg_loss': 8.989151968097618, 'avg_acc': 49.92079158649061, 'loss': 8.751030921936035}


EP_train:0:  55%|| 15082/27626 [35:27<29:50,  7.01it/s]

{'epoch': 0, 'iter': 15080, 'avg_loss': 8.989017064940843, 'avg_acc': 49.919186393475236, 'loss': 8.630149841308594}


EP_train:0:  55%|| 15092/27626 [35:28<29:41,  7.04it/s]

{'epoch': 0, 'iter': 15090, 'avg_loss': 8.98874390976036, 'avg_acc': 49.92193194619309, 'loss': 8.959553718566895}


EP_train:0:  55%|| 15102/27626 [35:29<29:42,  7.03it/s]

{'epoch': 0, 'iter': 15100, 'avg_loss': 8.988751532502398, 'avg_acc': 49.92343222303159, 'loss': 9.099769592285156}


EP_train:0:  55%|| 15112/27626 [35:31<29:51,  6.98it/s]

{'epoch': 0, 'iter': 15110, 'avg_loss': 8.988461584852686, 'avg_acc': 49.92493051419496, 'loss': 8.783862113952637}


EP_train:0:  55%|| 15122/27626 [35:32<29:50,  6.98it/s]

{'epoch': 0, 'iter': 15120, 'avg_loss': 8.988365431199604, 'avg_acc': 49.92394682891343, 'loss': 8.091276168823242}


EP_train:0:  55%|| 15132/27626 [35:34<29:40,  7.02it/s]

{'epoch': 0, 'iter': 15130, 'avg_loss': 8.9881610960176, 'avg_acc': 49.92193179565131, 'loss': 8.261129379272461}


EP_train:0:  55%|| 15142/27626 [35:35<29:38,  7.02it/s]

{'epoch': 0, 'iter': 15140, 'avg_loss': 8.987870879331059, 'avg_acc': 49.92569843471369, 'loss': 8.542465209960938}


EP_train:0:  55%|| 15152/27626 [35:37<29:39,  7.01it/s]

{'epoch': 0, 'iter': 15150, 'avg_loss': 8.987661331323546, 'avg_acc': 49.927191274503336, 'loss': 9.34850025177002}


EP_train:0:  55%|| 15162/27626 [35:38<29:41,  6.99it/s]

{'epoch': 0, 'iter': 15160, 'avg_loss': 8.987555925421976, 'avg_acc': 49.92579645142141, 'loss': 7.96134614944458}


EP_train:0:  55%|| 15172/27626 [35:39<29:24,  7.06it/s]

{'epoch': 0, 'iter': 15170, 'avg_loss': 8.987122615382727, 'avg_acc': 49.926669303275986, 'loss': 8.741496086120605}


EP_train:0:  55%|| 15182/27626 [35:41<29:38,  7.00it/s]

{'epoch': 0, 'iter': 15180, 'avg_loss': 8.986481231880175, 'avg_acc': 49.92712930636981, 'loss': 8.004685401916504}


EP_train:0:  55%|| 15192/27626 [35:42<29:28,  7.03it/s]

{'epoch': 0, 'iter': 15190, 'avg_loss': 8.986415651283869, 'avg_acc': 49.9275887038378, 'loss': 8.943603515625}


EP_train:0:  55%|| 15202/27626 [35:44<29:25,  7.04it/s]

{'epoch': 0, 'iter': 15200, 'avg_loss': 8.98642706929065, 'avg_acc': 49.92496381816986, 'loss': 9.151333808898926}


EP_train:0:  55%|| 15212/27626 [35:45<29:30,  7.01it/s]

{'epoch': 0, 'iter': 15210, 'avg_loss': 8.986146048754678, 'avg_acc': 49.92604036552495, 'loss': 8.970010757446289}


EP_train:0:  55%|| 15222/27626 [35:46<29:27,  7.02it/s]

{'epoch': 0, 'iter': 15220, 'avg_loss': 8.985897262731886, 'avg_acc': 49.926088956047565, 'loss': 9.478981018066406}


EP_train:0:  55%|| 15232/27626 [35:48<29:22,  7.03it/s]

{'epoch': 0, 'iter': 15230, 'avg_loss': 8.98571743090744, 'avg_acc': 49.92531678812947, 'loss': 8.352945327758789}


EP_train:0:  55%|| 15242/27626 [35:49<29:21,  7.03it/s]

{'epoch': 0, 'iter': 15240, 'avg_loss': 8.985302561654889, 'avg_acc': 49.924955711567485, 'loss': 8.34819507598877}


EP_train:0:  55%|| 15252/27626 [35:51<29:22,  7.02it/s]

{'epoch': 0, 'iter': 15250, 'avg_loss': 8.985040397536489, 'avg_acc': 49.92602944069242, 'loss': 9.445276260375977}


EP_train:0:  55%|| 15262/27626 [35:52<29:21,  7.02it/s]

{'epoch': 0, 'iter': 15260, 'avg_loss': 8.984706182325505, 'avg_acc': 49.9264874516742, 'loss': 9.01373291015625}


EP_train:0:  55%|| 15272/27626 [35:54<29:21,  7.01it/s]

{'epoch': 0, 'iter': 15270, 'avg_loss': 8.984695354322032, 'avg_acc': 49.92653559033462, 'loss': 8.902932167053223}


EP_train:0:  55%|| 15282/27626 [35:55<29:22,  7.00it/s]

{'epoch': 0, 'iter': 15280, 'avg_loss': 8.984605118223731, 'avg_acc': 49.923720633466395, 'loss': 8.878170013427734}


EP_train:0:  55%|| 15292/27626 [35:56<29:23,  6.99it/s]

{'epoch': 0, 'iter': 15290, 'avg_loss': 8.98447508472355, 'avg_acc': 49.923361781440065, 'loss': 9.375251770019531}


EP_train:0:  55%|| 15302/27626 [35:58<29:14,  7.02it/s]

{'epoch': 0, 'iter': 15300, 'avg_loss': 8.984113262865359, 'avg_acc': 49.92157375334945, 'loss': 7.967818737030029}


EP_train:0:  55%|| 15312/27626 [35:59<29:08,  7.04it/s]

{'epoch': 0, 'iter': 15310, 'avg_loss': 8.983869734053966, 'avg_acc': 49.922441382012934, 'loss': 9.000292778015137}


EP_train:0:  55%|| 15322/27626 [36:01<29:09,  7.03it/s]

{'epoch': 0, 'iter': 15320, 'avg_loss': 8.983557447706483, 'avg_acc': 49.920656288753996, 'loss': 8.19374942779541}


EP_train:0:  55%|| 15332/27626 [36:02<29:03,  7.05it/s]

{'epoch': 0, 'iter': 15330, 'avg_loss': 8.983338737711733, 'avg_acc': 49.92131954862696, 'loss': 8.363142013549805}


EP_train:0:  56%|| 15342/27626 [36:03<29:04,  7.04it/s]

{'epoch': 0, 'iter': 15340, 'avg_loss': 8.983230527784933, 'avg_acc': 49.921370836320975, 'loss': 8.658377647399902}


EP_train:0:  56%|| 15352/27626 [36:05<29:05,  7.03it/s]

{'epoch': 0, 'iter': 15350, 'avg_loss': 8.983164886256924, 'avg_acc': 49.92081134779493, 'loss': 8.26772689819336}


EP_train:0:  56%|| 15362/27626 [36:06<29:03,  7.04it/s]

{'epoch': 0, 'iter': 15360, 'avg_loss': 8.982883328052324, 'avg_acc': 49.919642275893494, 'loss': 9.005949974060059}


EP_train:0:  56%|| 15372/27626 [36:08<29:11,  7.00it/s]

{'epoch': 0, 'iter': 15370, 'avg_loss': 8.982763021600773, 'avg_acc': 49.921930908854335, 'loss': 8.472023963928223}


EP_train:0:  56%|| 15382/27626 [36:09<29:21,  6.95it/s]

{'epoch': 0, 'iter': 15380, 'avg_loss': 8.982599358031315, 'avg_acc': 49.92381022040179, 'loss': 8.76876163482666}


EP_train:0:  56%|| 15392/27626 [36:11<28:57,  7.04it/s]

{'epoch': 0, 'iter': 15390, 'avg_loss': 8.982433699762199, 'avg_acc': 49.923656682476775, 'loss': 9.270179748535156}


EP_train:0:  56%|| 15402/27626 [36:12<28:56,  7.04it/s]

{'epoch': 0, 'iter': 15400, 'avg_loss': 8.98229720785853, 'avg_acc': 49.925532432958896, 'loss': 8.787821769714355}


EP_train:0:  56%|| 15412/27626 [36:13<29:03,  7.00it/s]

{'epoch': 0, 'iter': 15410, 'avg_loss': 8.981974843490379, 'avg_acc': 49.927202971903185, 'loss': 8.964844703674316}


EP_train:0:  56%|| 15422/27626 [36:15<29:13,  6.96it/s]

{'epoch': 0, 'iter': 15420, 'avg_loss': 8.981579211008517, 'avg_acc': 49.926844886842616, 'loss': 7.842829704284668}


EP_train:0:  56%|| 15432/27626 [36:16<29:10,  6.97it/s]

{'epoch': 0, 'iter': 15430, 'avg_loss': 8.981586865956674, 'avg_acc': 49.92689229473138, 'loss': 9.29256534576416}


EP_train:0:  56%|| 15442/27626 [36:18<29:09,  6.97it/s]

{'epoch': 0, 'iter': 15440, 'avg_loss': 8.981425053421276, 'avg_acc': 49.92572534162295, 'loss': 7.860743999481201}


EP_train:0:  56%|| 15452/27626 [36:19<28:58,  7.00it/s]

{'epoch': 0, 'iter': 15450, 'avg_loss': 8.981122484115186, 'avg_acc': 49.922941880784414, 'loss': 8.55908489227295}


EP_train:0:  56%|| 15462/27626 [36:21<28:52,  7.02it/s]

{'epoch': 0, 'iter': 15460, 'avg_loss': 8.98095542105718, 'avg_acc': 49.92198111377013, 'loss': 7.902390480041504}


EP_train:0:  56%|| 15472/27626 [36:22<28:55,  7.00it/s]

{'epoch': 0, 'iter': 15470, 'avg_loss': 8.980761873091009, 'avg_acc': 49.92384946028052, 'loss': 7.918674945831299}


EP_train:0:  56%|| 15482/27626 [36:23<29:33,  6.85it/s]

{'epoch': 0, 'iter': 15480, 'avg_loss': 8.980635562305586, 'avg_acc': 49.92430237064789, 'loss': 8.841654777526855}


EP_train:0:  56%|| 15492/27626 [36:25<28:50,  7.01it/s]

{'epoch': 0, 'iter': 15490, 'avg_loss': 8.980673532644179, 'avg_acc': 49.92354431605449, 'loss': 9.990289688110352}


EP_train:0:  56%|| 15502/27626 [36:26<28:44,  7.03it/s]

{'epoch': 0, 'iter': 15500, 'avg_loss': 8.980805733259105, 'avg_acc': 49.92359363912006, 'loss': 9.039467811584473}


EP_train:0:  56%|| 15512/27626 [36:28<28:54,  6.99it/s]

{'epoch': 0, 'iter': 15510, 'avg_loss': 8.980705586909847, 'avg_acc': 49.92243407904068, 'loss': 8.327225685119629}


EP_train:0:  56%|| 15522/27626 [36:29<28:50,  7.00it/s]

{'epoch': 0, 'iter': 15520, 'avg_loss': 8.980424082914384, 'avg_acc': 49.92449745506089, 'loss': 8.94599723815918}


EP_train:0:  56%|| 15532/27626 [36:31<28:40,  7.03it/s]

{'epoch': 0, 'iter': 15530, 'avg_loss': 8.980446368618992, 'avg_acc': 49.92313759577619, 'loss': 8.744193077087402}


EP_train:0:  56%|| 15542/27626 [36:32<28:36,  7.04it/s]

{'epoch': 0, 'iter': 15540, 'avg_loss': 8.980191066714452, 'avg_acc': 49.924996782703815, 'loss': 8.763131141662598}


EP_train:0:  56%|| 15552/27626 [36:33<28:42,  7.01it/s]

{'epoch': 0, 'iter': 15550, 'avg_loss': 8.980297637303138, 'avg_acc': 49.92283454440229, 'loss': 9.534400939941406}


EP_train:0:  56%|| 15562/27626 [36:35<28:35,  7.03it/s]

{'epoch': 0, 'iter': 15560, 'avg_loss': 8.980034093088896, 'avg_acc': 49.92308495597969, 'loss': 8.62797737121582}


EP_train:0:  56%|| 15572/27626 [36:36<28:33,  7.03it/s]

{'epoch': 0, 'iter': 15570, 'avg_loss': 8.979659109505743, 'avg_acc': 49.92734891786013, 'loss': 9.22562026977539}


EP_train:0:  56%|| 15582/27626 [36:38<28:41,  7.00it/s]

{'epoch': 0, 'iter': 15580, 'avg_loss': 8.979503250238364, 'avg_acc': 49.924788203581286, 'loss': 8.388724327087402}


EP_train:0:  56%|| 15592/27626 [36:39<28:38,  7.00it/s]

{'epoch': 0, 'iter': 15590, 'avg_loss': 8.979328344986259, 'avg_acc': 49.924435571804246, 'loss': 9.085089683532715}


EP_train:0:  56%|| 15602/27626 [36:40<28:31,  7.02it/s]

{'epoch': 0, 'iter': 15600, 'avg_loss': 8.979004033765198, 'avg_acc': 49.922681238382154, 'loss': 8.636582374572754}


EP_train:0:  57%|| 15612/27626 [36:42<28:37,  7.00it/s]

{'epoch': 0, 'iter': 15610, 'avg_loss': 8.978958670100734, 'avg_acc': 49.921729869963485, 'loss': 8.407330513000488}


EP_train:0:  57%|| 15622/27626 [36:43<28:34,  7.00it/s]

{'epoch': 0, 'iter': 15620, 'avg_loss': 8.978918510487318, 'avg_acc': 49.92077971960822, 'loss': 9.118141174316406}


EP_train:0:  57%|| 15632/27626 [36:45<28:27,  7.03it/s]

{'epoch': 0, 'iter': 15630, 'avg_loss': 8.978640969293329, 'avg_acc': 49.9214301708144, 'loss': 7.1128716468811035}


EP_train:0:  57%|| 15642/27626 [36:46<28:31,  7.00it/s]

{'epoch': 0, 'iter': 15640, 'avg_loss': 8.978258097936228, 'avg_acc': 49.92507672143725, 'loss': 8.046061515808105}


EP_train:0:  57%|| 15652/27626 [36:48<28:33,  6.99it/s]

{'epoch': 0, 'iter': 15650, 'avg_loss': 8.977974362727073, 'avg_acc': 49.929117947734966, 'loss': 8.74798583984375}


EP_train:0:  57%|| 15662/27626 [36:49<28:12,  7.07it/s]

{'epoch': 0, 'iter': 15660, 'avg_loss': 8.977791618398076, 'avg_acc': 49.929562288487325, 'loss': 9.09779167175293}


EP_train:0:  57%|| 15672/27626 [36:50<28:40,  6.95it/s]

{'epoch': 0, 'iter': 15670, 'avg_loss': 8.977737461719583, 'avg_acc': 49.9304048880097, 'loss': 8.669668197631836}


EP_train:0:  57%|| 15682/27626 [36:52<28:26,  7.00it/s]

{'epoch': 0, 'iter': 15680, 'avg_loss': 8.977650895729013, 'avg_acc': 49.932043555895675, 'loss': 8.1657133102417}


EP_train:0:  57%|| 15692/27626 [36:53<28:23,  7.01it/s]

{'epoch': 0, 'iter': 15690, 'avg_loss': 8.977291314766456, 'avg_acc': 49.93328181760245, 'loss': 8.237515449523926}


EP_train:0:  57%|| 15702/27626 [36:55<28:28,  6.98it/s]

{'epoch': 0, 'iter': 15700, 'avg_loss': 8.977136499782587, 'avg_acc': 49.933125278644674, 'loss': 7.674647331237793}


EP_train:0:  57%|| 15712/27626 [36:56<28:13,  7.03it/s]

{'epoch': 0, 'iter': 15710, 'avg_loss': 8.976926372398903, 'avg_acc': 49.93058207625231, 'loss': 8.455260276794434}


EP_train:0:  57%|| 15722/27626 [36:58<28:09,  7.05it/s]

{'epoch': 0, 'iter': 15720, 'avg_loss': 8.976650772078955, 'avg_acc': 49.93261401946441, 'loss': 8.675986289978027}


EP_train:0:  57%|| 15732/27626 [36:59<28:16,  7.01it/s]

{'epoch': 0, 'iter': 15730, 'avg_loss': 8.976457562269875, 'avg_acc': 49.9316635941771, 'loss': 7.185781478881836}


EP_train:0:  57%|| 15742/27626 [37:00<28:14,  7.01it/s]

{'epoch': 0, 'iter': 15740, 'avg_loss': 8.976499759547327, 'avg_acc': 49.92872911504987, 'loss': 8.95641803741455}


EP_train:0:  57%|| 15752/27626 [37:02<27:59,  7.07it/s]

{'epoch': 0, 'iter': 15750, 'avg_loss': 8.976042597371505, 'avg_acc': 49.93095676464986, 'loss': 8.355629920959473}


EP_train:0:  57%|| 15762/27626 [37:03<28:05,  7.04it/s]

{'epoch': 0, 'iter': 15760, 'avg_loss': 8.975863068231313, 'avg_acc': 49.93278503902037, 'loss': 8.953508377075195}


EP_train:0:  57%|| 15772/27626 [37:05<28:02,  7.05it/s]

{'epoch': 0, 'iter': 15770, 'avg_loss': 8.975568422026402, 'avg_acc': 49.93183691585822, 'loss': 8.302124977111816}


EP_train:0:  57%|| 15782/27626 [37:06<28:09,  7.01it/s]

{'epoch': 0, 'iter': 15780, 'avg_loss': 8.975316658392279, 'avg_acc': 49.93069197135796, 'loss': 8.6475191116333}


EP_train:0:  57%|| 15792/27626 [37:07<27:59,  7.05it/s]

{'epoch': 0, 'iter': 15790, 'avg_loss': 8.975002814997035, 'avg_acc': 49.93034006712684, 'loss': 8.775202751159668}


EP_train:0:  57%|| 15802/27626 [37:09<27:54,  7.06it/s]

{'epoch': 0, 'iter': 15800, 'avg_loss': 8.975378376389974, 'avg_acc': 49.932361875830644, 'loss': 9.609068870544434}


EP_train:0:  57%|| 15812/27626 [37:10<28:00,  7.03it/s]

{'epoch': 0, 'iter': 15810, 'avg_loss': 8.975454447127845, 'avg_acc': 49.93299759660996, 'loss': 9.028328895568848}


EP_train:0:  57%|| 15822/27626 [37:12<27:59,  7.03it/s]

{'epoch': 0, 'iter': 15820, 'avg_loss': 8.975243529992538, 'avg_acc': 49.93145976866191, 'loss': 9.243317604064941}


EP_train:0:  57%|| 15832/27626 [37:13<27:58,  7.03it/s]

{'epoch': 0, 'iter': 15830, 'avg_loss': 8.975001085023662, 'avg_acc': 49.930121281030885, 'loss': 8.082385063171387}


EP_train:0:  57%|| 15842/27626 [37:15<28:05,  6.99it/s]

{'epoch': 0, 'iter': 15840, 'avg_loss': 8.974800742174457, 'avg_acc': 49.926811754308446, 'loss': 9.078043937683105}


EP_train:0:  57%|| 15852/27626 [37:16<28:09,  6.97it/s]

{'epoch': 0, 'iter': 15850, 'avg_loss': 8.974708044042377, 'avg_acc': 49.92882941139361, 'loss': 8.654239654541016}


EP_train:0:  57%|| 15862/27626 [37:17<28:07,  6.97it/s]

{'epoch': 0, 'iter': 15860, 'avg_loss': 8.974328110042327, 'avg_acc': 49.93104154845218, 'loss': 8.243338584899902}


EP_train:0:  57%|| 15872/27626 [37:19<28:04,  6.98it/s]

{'epoch': 0, 'iter': 15870, 'avg_loss': 8.974000388451842, 'avg_acc': 49.93285709785143, 'loss': 8.374494552612305}


EP_train:0:  57%|| 15882/27626 [37:20<27:51,  7.03it/s]

{'epoch': 0, 'iter': 15880, 'avg_loss': 8.973718558245947, 'avg_acc': 49.9327026005919, 'loss': 8.697569847106934}


EP_train:0:  58%|| 15892/27626 [37:22<27:42,  7.06it/s]

{'epoch': 0, 'iter': 15890, 'avg_loss': 8.973448278020113, 'avg_acc': 49.933531558743944, 'loss': 8.050457954406738}


EP_train:0:  58%|| 15902/27626 [37:23<27:40,  7.06it/s]

{'epoch': 0, 'iter': 15900, 'avg_loss': 8.972946441441195, 'avg_acc': 49.93514558832778, 'loss': 7.933885097503662}


EP_train:0:  58%|| 15912/27626 [37:24<27:34,  7.08it/s]

{'epoch': 0, 'iter': 15910, 'avg_loss': 8.972933576165614, 'avg_acc': 49.93577556407517, 'loss': 9.170628547668457}


EP_train:0:  58%|| 15922/27626 [37:26<27:43,  7.03it/s]

{'epoch': 0, 'iter': 15920, 'avg_loss': 8.972783914800491, 'avg_acc': 49.93620846680485, 'loss': 8.81771469116211}


EP_train:0:  58%|| 15932/27626 [37:27<27:43,  7.03it/s]

{'epoch': 0, 'iter': 15930, 'avg_loss': 8.972796957069749, 'avg_acc': 49.93644466762915, 'loss': 8.544251441955566}


EP_train:0:  58%|| 15942/27626 [37:29<27:50,  6.99it/s]

{'epoch': 0, 'iter': 15940, 'avg_loss': 8.97264969740015, 'avg_acc': 49.93511228906593, 'loss': 8.95313549041748}


EP_train:0:  58%|| 15952/27626 [37:30<27:43,  7.02it/s]

{'epoch': 0, 'iter': 15950, 'avg_loss': 8.97244221244469, 'avg_acc': 49.93397749357407, 'loss': 8.488048553466797}


EP_train:0:  58%|| 15962/27626 [37:32<27:46,  7.00it/s]

{'epoch': 0, 'iter': 15960, 'avg_loss': 8.972118714946037, 'avg_acc': 49.934606227679964, 'loss': 7.805495738983154}


EP_train:0:  58%|| 15972/27626 [37:33<27:25,  7.08it/s]

{'epoch': 0, 'iter': 15970, 'avg_loss': 8.971742817145032, 'avg_acc': 49.93269050153403, 'loss': 9.689239501953125}


EP_train:0:  58%|| 15982/27626 [37:34<27:17,  7.11it/s]

{'epoch': 0, 'iter': 15980, 'avg_loss': 8.971527137712188, 'avg_acc': 49.932928164695575, 'loss': 9.381072044372559}


EP_train:0:  58%|| 15992/27626 [37:36<27:14,  7.12it/s]

{'epoch': 0, 'iter': 15990, 'avg_loss': 8.971381259213528, 'avg_acc': 49.931797573635166, 'loss': 9.108549118041992}


EP_train:0:  58%|| 16002/27626 [37:37<27:20,  7.09it/s]

{'epoch': 0, 'iter': 16000, 'avg_loss': 8.971231866751557, 'avg_acc': 49.930863696019, 'loss': 8.86982250213623}


EP_train:0:  58%|| 16012/27626 [37:39<27:32,  7.03it/s]

{'epoch': 0, 'iter': 16010, 'avg_loss': 8.971083956322882, 'avg_acc': 49.928955093373304, 'loss': 8.673478126525879}


EP_train:0:  58%|| 16022/27626 [37:40<27:30,  7.03it/s]

{'epoch': 0, 'iter': 16020, 'avg_loss': 8.971042667016517, 'avg_acc': 49.93055989014419, 'loss': 9.41133975982666}


EP_train:0:  58%|| 16032/27626 [37:41<27:27,  7.04it/s]

{'epoch': 0, 'iter': 16030, 'avg_loss': 8.970749272580944, 'avg_acc': 49.93079814110162, 'loss': 9.353981018066406}


EP_train:0:  58%|| 16042/27626 [37:43<27:18,  7.07it/s]

{'epoch': 0, 'iter': 16040, 'avg_loss': 8.97036058633273, 'avg_acc': 49.92947758867901, 'loss': 8.421128273010254}


EP_train:0:  58%|| 16052/27626 [37:44<27:32,  7.00it/s]

{'epoch': 0, 'iter': 16050, 'avg_loss': 8.969892997176066, 'avg_acc': 49.92971621705813, 'loss': 7.903543949127197}


EP_train:0:  58%|| 16062/27626 [37:46<27:12,  7.08it/s]

{'epoch': 0, 'iter': 16060, 'avg_loss': 8.96991536466013, 'avg_acc': 49.93170568457755, 'loss': 8.93148136138916}


EP_train:0:  58%|| 16072/27626 [37:47<27:15,  7.06it/s]

{'epoch': 0, 'iter': 16070, 'avg_loss': 8.969899420316601, 'avg_acc': 49.93252597847054, 'loss': 8.663202285766602}


EP_train:0:  58%|| 16082/27626 [37:48<27:15,  7.06it/s]

{'epoch': 0, 'iter': 16080, 'avg_loss': 8.969693173935784, 'avg_acc': 49.93256793731733, 'loss': 8.576431274414062}


EP_train:0:  58%|| 16092/27626 [37:50<27:16,  7.05it/s]

{'epoch': 0, 'iter': 16090, 'avg_loss': 8.969619911354581, 'avg_acc': 49.9320272201852, 'loss': 8.650687217712402}


EP_train:0:  58%|| 16102/27626 [37:51<27:19,  7.03it/s]

{'epoch': 0, 'iter': 16100, 'avg_loss': 8.969134061960364, 'avg_acc': 49.92974038879573, 'loss': 8.428609848022461}


EP_train:0:  58%|| 16112/27626 [37:53<27:30,  6.98it/s]

{'epoch': 0, 'iter': 16110, 'avg_loss': 8.968914620962588, 'avg_acc': 49.93055986593011, 'loss': 8.0941801071167}


EP_train:0:  58%|| 16122/27626 [37:54<27:21,  7.01it/s]

{'epoch': 0, 'iter': 16120, 'avg_loss': 8.968706465160851, 'avg_acc': 49.93234755908442, 'loss': 8.203816413879395}


EP_train:0:  58%|| 16132/27626 [37:56<27:26,  6.98it/s]

{'epoch': 0, 'iter': 16130, 'avg_loss': 8.968441617369882, 'avg_acc': 49.93374558303887, 'loss': 8.414074897766113}


EP_train:0:  58%|| 16142/27626 [37:57<27:27,  6.97it/s]

{'epoch': 0, 'iter': 16140, 'avg_loss': 8.968385955819292, 'avg_acc': 49.933786630320306, 'loss': 9.034833908081055}


EP_train:0:  58%|| 16152/27626 [37:58<27:10,  7.04it/s]

{'epoch': 0, 'iter': 16150, 'avg_loss': 8.96820117011468, 'avg_acc': 49.93498854560089, 'loss': 8.858651161193848}


EP_train:0:  59%|| 16162/27626 [38:00<27:02,  7.07it/s]

{'epoch': 0, 'iter': 16160, 'avg_loss': 8.967946352945992, 'avg_acc': 49.93425530598354, 'loss': 8.81361198425293}


EP_train:0:  59%|| 16172/27626 [38:01<27:05,  7.05it/s]

{'epoch': 0, 'iter': 16170, 'avg_loss': 8.967684976083437, 'avg_acc': 49.93506895059056, 'loss': 9.075830459594727}


EP_train:0:  59%|| 16182/27626 [38:03<26:57,  7.07it/s]

{'epoch': 0, 'iter': 16180, 'avg_loss': 8.967538541521884, 'avg_acc': 49.93665410048823, 'loss': 8.864054679870605}


EP_train:0:  59%|| 16192/27626 [38:04<27:09,  7.02it/s]

{'epoch': 0, 'iter': 16190, 'avg_loss': 8.967360135454753, 'avg_acc': 49.94093941078377, 'loss': 8.134456634521484}


EP_train:0:  59%|| 16202/27626 [38:05<27:09,  7.01it/s]

{'epoch': 0, 'iter': 16200, 'avg_loss': 8.967165162081955, 'avg_acc': 49.940204308376025, 'loss': 9.433353424072266}


EP_train:0:  59%|| 16212/27626 [38:07<27:09,  7.01it/s]

{'epoch': 0, 'iter': 16210, 'avg_loss': 8.96717031873741, 'avg_acc': 49.939662883227435, 'loss': 8.297226905822754}


EP_train:0:  59%|| 16222/27626 [38:08<27:05,  7.01it/s]

{'epoch': 0, 'iter': 16220, 'avg_loss': 8.966898082503628, 'avg_acc': 49.94181924665557, 'loss': 8.167913436889648}


EP_train:0:  59%|| 16232/27626 [38:10<26:59,  7.04it/s]

{'epoch': 0, 'iter': 16230, 'avg_loss': 8.966509956432485, 'avg_acc': 49.940699895262156, 'loss': 8.213201522827148}


EP_train:0:  59%|| 16242/27626 [38:11<27:04,  7.01it/s]

{'epoch': 0, 'iter': 16240, 'avg_loss': 8.966356947844666, 'avg_acc': 49.94381503601995, 'loss': 8.384092330932617}


EP_train:0:  59%|| 16252/27626 [38:13<26:52,  7.06it/s]

{'epoch': 0, 'iter': 16250, 'avg_loss': 8.966071187411886, 'avg_acc': 49.94231124238509, 'loss': 8.733893394470215}


EP_train:0:  59%|| 16262/27626 [38:14<26:38,  7.11it/s]

{'epoch': 0, 'iter': 16260, 'avg_loss': 8.965666349960072, 'avg_acc': 49.94253889674682, 'loss': 9.32483196258545}


EP_train:0:  59%|| 16272/27626 [38:15<26:59,  7.01it/s]

{'epoch': 0, 'iter': 16270, 'avg_loss': 8.965556197230622, 'avg_acc': 49.94180597381845, 'loss': 8.895469665527344}


EP_train:0:  59%|| 16282/27626 [38:17<26:55,  7.02it/s]

{'epoch': 0, 'iter': 16280, 'avg_loss': 8.96555084008036, 'avg_acc': 49.942033658866166, 'loss': 8.880979537963867}


EP_train:0:  59%|| 16292/27626 [38:18<26:43,  7.07it/s]

{'epoch': 0, 'iter': 16290, 'avg_loss': 8.9655525035295, 'avg_acc': 49.93900006138359, 'loss': 8.023225784301758}


EP_train:0:  59%|| 16302/27626 [38:20<26:38,  7.09it/s]

{'epoch': 0, 'iter': 16300, 'avg_loss': 8.964921612467812, 'avg_acc': 49.937503834120605, 'loss': 7.690956115722656}


EP_train:0:  59%|| 16312/27626 [38:21<26:45,  7.05it/s]

{'epoch': 0, 'iter': 16310, 'avg_loss': 8.964652734577207, 'avg_acc': 49.937733737968244, 'loss': 8.55981159210205}


EP_train:0:  59%|| 16322/27626 [38:22<26:40,  7.06it/s]

{'epoch': 0, 'iter': 16320, 'avg_loss': 8.964540823484255, 'avg_acc': 49.93911218675326, 'loss': 8.06276798248291}


EP_train:0:  59%|| 16332/27626 [38:24<26:29,  7.10it/s]

{'epoch': 0, 'iter': 16330, 'avg_loss': 8.964262449940641, 'avg_acc': 49.94201977833568, 'loss': 8.532048225402832}


EP_train:0:  59%|| 16342/27626 [38:25<26:32,  7.09it/s]

{'epoch': 0, 'iter': 16340, 'avg_loss': 8.964204967901003, 'avg_acc': 49.94435010097301, 'loss': 9.416180610656738}


EP_train:0:  59%|| 16352/27626 [38:27<26:32,  7.08it/s]

{'epoch': 0, 'iter': 16350, 'avg_loss': 8.964205713711284, 'avg_acc': 49.94476637514525, 'loss': 8.073246955871582}


EP_train:0:  59%|| 16362/27626 [38:28<26:53,  6.98it/s]

{'epoch': 0, 'iter': 16360, 'avg_loss': 8.963946906302482, 'avg_acc': 49.943654116496546, 'loss': 7.499354839324951}


EP_train:0:  59%|| 16372/27626 [38:30<26:36,  7.05it/s]

{'epoch': 0, 'iter': 16370, 'avg_loss': 8.96352799369393, 'avg_acc': 49.94445207989738, 'loss': 8.34434700012207}


EP_train:0:  59%|| 16382/27626 [38:31<26:29,  7.07it/s]

{'epoch': 0, 'iter': 16380, 'avg_loss': 8.96284751447884, 'avg_acc': 49.94467675966058, 'loss': 8.774670600891113}


EP_train:0:  59%|| 16392/27626 [38:32<26:31,  7.06it/s]

{'epoch': 0, 'iter': 16390, 'avg_loss': 8.962752468509894, 'avg_acc': 49.94585443231042, 'loss': 8.98336410522461}


EP_train:0:  59%|| 16402/27626 [38:34<26:35,  7.03it/s]

{'epoch': 0, 'iter': 16400, 'avg_loss': 8.962802762265946, 'avg_acc': 49.94645905737455, 'loss': 8.780610084533691}


EP_train:0:  59%|| 16412/27626 [38:35<26:24,  7.08it/s]

{'epoch': 0, 'iter': 16410, 'avg_loss': 8.96294030163422, 'avg_acc': 49.944587471817684, 'loss': 9.12891960144043}


EP_train:0:  59%|| 16422/27626 [38:37<26:28,  7.05it/s]

{'epoch': 0, 'iter': 16420, 'avg_loss': 8.962813124253916, 'avg_acc': 49.94385999634614, 'loss': 9.785408973693848}


EP_train:0:  59%|| 16432/27626 [38:38<26:24,  7.07it/s]

{'epoch': 0, 'iter': 16430, 'avg_loss': 8.962488335357595, 'avg_acc': 49.944274542024225, 'loss': 8.677225112915039}


EP_train:0:  60%|| 16442/27626 [38:39<26:32,  7.02it/s]

{'epoch': 0, 'iter': 16440, 'avg_loss': 8.962166121011668, 'avg_acc': 49.94506873061249, 'loss': 8.39291763305664}


EP_train:0:  60%|| 16452/27626 [38:41<26:24,  7.05it/s]

{'epoch': 0, 'iter': 16450, 'avg_loss': 8.961886370553774, 'avg_acc': 49.94491216339432, 'loss': 8.366620063781738}


EP_train:0:  60%|| 16462/27626 [38:42<26:25,  7.04it/s]

{'epoch': 0, 'iter': 16460, 'avg_loss': 8.961610968076302, 'avg_acc': 49.9477932689387, 'loss': 8.090845108032227}


EP_train:0:  60%|| 16472/27626 [38:44<26:22,  7.05it/s]

{'epoch': 0, 'iter': 16470, 'avg_loss': 8.961363944349579, 'avg_acc': 49.94611741849311, 'loss': 8.488398551940918}


EP_train:0:  60%|| 16482/27626 [38:45<26:24,  7.03it/s]

{'epoch': 0, 'iter': 16480, 'avg_loss': 8.961209419506947, 'avg_acc': 49.94368515259996, 'loss': 8.775314331054688}


EP_train:0:  60%|| 16492/27626 [38:46<26:15,  7.07it/s]

{'epoch': 0, 'iter': 16490, 'avg_loss': 8.960873064353766, 'avg_acc': 49.94125583651689, 'loss': 8.897412300109863}


EP_train:0:  60%|| 16502/27626 [38:48<26:32,  6.98it/s]

{'epoch': 0, 'iter': 16500, 'avg_loss': 8.960567036106198, 'avg_acc': 49.941859584267625, 'loss': 8.437763214111328}


EP_train:0:  60%|| 16512/27626 [38:49<26:21,  7.03it/s]

{'epoch': 0, 'iter': 16510, 'avg_loss': 8.960254816188492, 'avg_acc': 49.94208406516867, 'loss': 9.465380668640137}


EP_train:0:  60%|| 16522/27626 [38:51<26:14,  7.05it/s]

{'epoch': 0, 'iter': 16520, 'avg_loss': 8.960301746504408, 'avg_acc': 49.94306488711337, 'loss': 9.320980072021484}


EP_train:0:  60%|| 16532/27626 [38:52<26:16,  7.04it/s]

{'epoch': 0, 'iter': 16530, 'avg_loss': 8.960070522645951, 'avg_acc': 49.94253221220737, 'loss': 9.071500778198242}


EP_train:0:  60%|| 16542/27626 [38:54<26:13,  7.04it/s]

{'epoch': 0, 'iter': 16540, 'avg_loss': 8.959910061532227, 'avg_acc': 49.94426727525543, 'loss': 8.568106651306152}


EP_train:0:  60%|| 16552/27626 [38:55<26:18,  7.02it/s]

{'epoch': 0, 'iter': 16550, 'avg_loss': 8.959593477065336, 'avg_acc': 49.94543381064588, 'loss': 8.168609619140625}


EP_train:0:  60%|| 16562/27626 [38:56<26:19,  7.01it/s]

{'epoch': 0, 'iter': 16560, 'avg_loss': 8.959394848294163, 'avg_acc': 49.94622154459272, 'loss': 8.938488960266113}


EP_train:0:  60%|| 16572/27626 [38:58<26:11,  7.04it/s]

{'epoch': 0, 'iter': 16570, 'avg_loss': 8.95926332410449, 'avg_acc': 49.947762657654934, 'loss': 8.124114036560059}


EP_train:0:  60%|| 16582/27626 [38:59<26:16,  7.01it/s]

{'epoch': 0, 'iter': 16580, 'avg_loss': 8.959338271454687, 'avg_acc': 49.94835956818044, 'loss': 8.674858093261719}


EP_train:0:  60%|| 16592/27626 [39:01<26:17,  6.99it/s]

{'epoch': 0, 'iter': 16590, 'avg_loss': 8.959097778780624, 'avg_acc': 49.94914411427882, 'loss': 9.297585487365723}


EP_train:0:  60%|| 16602/27626 [39:02<26:17,  6.99it/s]

{'epoch': 0, 'iter': 16600, 'avg_loss': 8.959051505778685, 'avg_acc': 49.94729233178724, 'loss': 9.022933006286621}


EP_train:0:  60%|| 16612/27626 [39:04<26:01,  7.05it/s]

{'epoch': 0, 'iter': 16610, 'avg_loss': 8.958631392927849, 'avg_acc': 49.9486409608091, 'loss': 8.64704418182373}


EP_train:0:  60%|| 16622/27626 [39:05<26:03,  7.04it/s]

{'epoch': 0, 'iter': 16620, 'avg_loss': 8.958649700323846, 'avg_acc': 49.94867186089886, 'loss': 9.33200454711914}


EP_train:0:  60%|| 16632/27626 [39:06<26:10,  7.00it/s]

{'epoch': 0, 'iter': 16630, 'avg_loss': 8.9587867899499, 'avg_acc': 49.94889062593951, 'loss': 9.53686237335205}


EP_train:0:  60%|| 16642/27626 [39:08<26:00,  7.04it/s]

{'epoch': 0, 'iter': 16640, 'avg_loss': 8.95882032356127, 'avg_acc': 49.94967249564329, 'loss': 8.739973068237305}


EP_train:0:  60%|| 16652/27626 [39:09<26:02,  7.03it/s]

{'epoch': 0, 'iter': 16650, 'avg_loss': 8.958684308916505, 'avg_acc': 49.95101645546814, 'loss': 8.184438705444336}


EP_train:0:  60%|| 16662/27626 [39:11<26:06,  7.00it/s]

{'epoch': 0, 'iter': 16660, 'avg_loss': 8.958266405031523, 'avg_acc': 49.950858291819216, 'loss': 6.972849369049072}


EP_train:0:  60%|| 16672/27626 [39:12<26:07,  6.99it/s]

{'epoch': 0, 'iter': 16670, 'avg_loss': 8.95768729366586, 'avg_acc': 49.951450122968026, 'loss': 8.278058052062988}


EP_train:0:  60%|| 16682/27626 [39:13<26:07,  6.98it/s]

{'epoch': 0, 'iter': 16680, 'avg_loss': 8.957674691677237, 'avg_acc': 49.950729872309815, 'loss': 8.962014198303223}


EP_train:0:  60%|| 16692/27626 [39:15<25:55,  7.03it/s]

{'epoch': 0, 'iter': 16690, 'avg_loss': 8.95760203062056, 'avg_acc': 49.951133844586906, 'loss': 9.217912673950195}


EP_train:0:  60%|| 16702/27626 [39:16<25:41,  7.09it/s]

{'epoch': 0, 'iter': 16700, 'avg_loss': 8.957468065956554, 'avg_acc': 49.95041464582959, 'loss': 9.226701736450195}


EP_train:0:  60%|| 16712/27626 [39:18<26:03,  6.98it/s]

{'epoch': 0, 'iter': 16710, 'avg_loss': 8.95719557705591, 'avg_acc': 49.94763927951649, 'loss': 8.661416053771973}


EP_train:0:  61%|| 16722/27626 [39:19<25:57,  7.00it/s]

{'epoch': 0, 'iter': 16720, 'avg_loss': 8.957186908658045, 'avg_acc': 49.950473954907004, 'loss': 8.708425521850586}


EP_train:0:  61%|| 16732/27626 [39:21<25:37,  7.09it/s]

{'epoch': 0, 'iter': 16730, 'avg_loss': 8.95703314905507, 'avg_acc': 49.95069033530572, 'loss': 8.887109756469727}


EP_train:0:  61%|| 16742/27626 [39:22<25:37,  7.08it/s]

{'epoch': 0, 'iter': 16740, 'avg_loss': 8.957066226251934, 'avg_acc': 49.953146466758255, 'loss': 9.7052583694458}


EP_train:0:  61%|| 16752/27626 [39:23<25:42,  7.05it/s]

{'epoch': 0, 'iter': 16750, 'avg_loss': 8.956917826634479, 'avg_acc': 49.955413109665095, 'loss': 8.544300079345703}


EP_train:0:  61%|| 16762/27626 [39:25<25:46,  7.02it/s]

{'epoch': 0, 'iter': 16760, 'avg_loss': 8.956749246040937, 'avg_acc': 49.95711771374023, 'loss': 8.8781156539917}


EP_train:0:  61%|| 16772/27626 [39:26<25:55,  6.98it/s]

{'epoch': 0, 'iter': 16770, 'avg_loss': 8.956662170078523, 'avg_acc': 49.95695694949616, 'loss': 8.998269081115723}


EP_train:0:  61%|| 16782/27626 [39:28<25:33,  7.07it/s]

{'epoch': 0, 'iter': 16780, 'avg_loss': 8.956302547346857, 'avg_acc': 49.9562377093141, 'loss': 8.55037784576416}


EP_train:0:  61%|| 16792/27626 [39:29<25:37,  7.05it/s]

{'epoch': 0, 'iter': 16790, 'avg_loss': 8.95627942436554, 'avg_acc': 49.95700821868858, 'loss': 8.642857551574707}


EP_train:0:  61%|| 16802/27626 [39:30<25:38,  7.03it/s]

{'epoch': 0, 'iter': 16800, 'avg_loss': 8.956002999129703, 'avg_acc': 49.9577778108446, 'loss': 8.420184135437012}


EP_train:0:  61%|| 16812/27626 [39:32<25:34,  7.05it/s]

{'epoch': 0, 'iter': 16810, 'avg_loss': 8.955879385556011, 'avg_acc': 49.96096305990125, 'loss': 8.619912147521973}


EP_train:0:  61%|| 16822/27626 [39:33<25:26,  7.08it/s]

{'epoch': 0, 'iter': 16820, 'avg_loss': 8.955745633635962, 'avg_acc': 49.96210094524701, 'loss': 8.875606536865234}


EP_train:0:  61%|| 16832/27626 [39:35<25:20,  7.10it/s]

{'epoch': 0, 'iter': 16830, 'avg_loss': 8.95561585121207, 'avg_acc': 49.96398015566514, 'loss': 8.384859085083008}


EP_train:0:  61%|| 16842/27626 [39:36<25:28,  7.06it/s]

{'epoch': 0, 'iter': 16840, 'avg_loss': 8.955261765972649, 'avg_acc': 49.96641381153138, 'loss': 8.15221118927002}


EP_train:0:  61%|| 16852/27626 [39:37<25:34,  7.02it/s]

{'epoch': 0, 'iter': 16850, 'avg_loss': 8.955100260617916, 'avg_acc': 49.96773188534805, 'loss': 8.483078002929688}


EP_train:0:  61%|| 16862/27626 [39:39<25:35,  7.01it/s]

{'epoch': 0, 'iter': 16860, 'avg_loss': 8.954744179939844, 'avg_acc': 49.96904839570607, 'loss': 7.727576732635498}


EP_train:0:  61%|| 16872/27626 [39:40<25:23,  7.06it/s]

{'epoch': 0, 'iter': 16870, 'avg_loss': 8.95448718430781, 'avg_acc': 49.96851105447217, 'loss': 9.3524169921875}


EP_train:0:  61%|| 16882/27626 [39:42<25:17,  7.08it/s]

{'epoch': 0, 'iter': 16880, 'avg_loss': 8.954439286082343, 'avg_acc': 49.96964042414549, 'loss': 9.535521507263184}


EP_train:0:  61%|| 16892/27626 [39:43<25:18,  7.07it/s]

{'epoch': 0, 'iter': 16890, 'avg_loss': 8.954346848087493, 'avg_acc': 49.970028417500444, 'loss': 8.42708683013916}


EP_train:0:  61%|| 16902/27626 [39:45<25:22,  7.04it/s]

{'epoch': 0, 'iter': 16900, 'avg_loss': 8.954069535694604, 'avg_acc': 49.970415951718834, 'loss': 8.781960487365723}


EP_train:0:  61%|| 16912/27626 [39:46<25:16,  7.07it/s]

{'epoch': 0, 'iter': 16910, 'avg_loss': 8.953923510162957, 'avg_acc': 49.971172609544084, 'loss': 8.80080795288086}


EP_train:0:  61%|| 16922/27626 [39:47<25:26,  7.01it/s]

{'epoch': 0, 'iter': 16920, 'avg_loss': 8.953761689143432, 'avg_acc': 49.97303646356598, 'loss': 9.482091903686523}


EP_train:0:  61%|| 16932/27626 [39:49<25:23,  7.02it/s]

{'epoch': 0, 'iter': 16930, 'avg_loss': 8.95354817051747, 'avg_acc': 49.97360610714075, 'loss': 8.387296676635742}


EP_train:0:  61%|| 16942/27626 [39:50<25:35,  6.96it/s]

{'epoch': 0, 'iter': 16940, 'avg_loss': 8.953464468679083, 'avg_acc': 49.976573106664304, 'loss': 8.782428741455078}


EP_train:0:  61%|| 16952/27626 [39:52<25:07,  7.08it/s]

{'epoch': 0, 'iter': 16950, 'avg_loss': 8.95317114656228, 'avg_acc': 49.97677128193027, 'loss': 8.624083518981934}


EP_train:0:  61%|| 16962/27626 [39:53<25:06,  7.08it/s]

{'epoch': 0, 'iter': 16960, 'avg_loss': 8.952926636979193, 'avg_acc': 49.97512676139378, 'loss': 7.897619724273682}


EP_train:0:  61%|| 16972/27626 [39:54<25:08,  7.06it/s]

{'epoch': 0, 'iter': 16970, 'avg_loss': 8.95255325030342, 'avg_acc': 49.974036591833126, 'loss': 8.148046493530273}


EP_train:0:  61%|| 16982/27626 [39:56<25:09,  7.05it/s]

{'epoch': 0, 'iter': 16980, 'avg_loss': 8.952257950042963, 'avg_acc': 49.97405188151463, 'loss': 8.592649459838867}


EP_train:0:  62%|| 16992/27626 [39:57<25:05,  7.06it/s]

{'epoch': 0, 'iter': 16990, 'avg_loss': 8.952025842837816, 'avg_acc': 49.97314754870225, 'loss': 7.712778568267822}


EP_train:0:  62%|| 17002/27626 [39:59<25:06,  7.05it/s]

{'epoch': 0, 'iter': 17000, 'avg_loss': 8.95159754033131, 'avg_acc': 49.97573672136933, 'loss': 7.85106086730957}


EP_train:0:  62%|| 17012/27626 [40:00<25:15,  7.00it/s]

{'epoch': 0, 'iter': 17010, 'avg_loss': 8.951403966902229, 'avg_acc': 49.97593468931868, 'loss': 8.094218254089355}


EP_train:0:  62%|| 17022/27626 [40:02<25:17,  6.99it/s]

{'epoch': 0, 'iter': 17020, 'avg_loss': 8.95120303119982, 'avg_acc': 49.97558163445156, 'loss': 8.64980411529541}


EP_train:0:  62%|| 17032/27626 [40:03<25:18,  6.98it/s]

{'epoch': 0, 'iter': 17030, 'avg_loss': 8.951008565174869, 'avg_acc': 49.97889877282603, 'loss': 9.010028839111328}


EP_train:0:  62%|| 17042/27626 [40:04<25:03,  7.04it/s]

{'epoch': 0, 'iter': 17040, 'avg_loss': 8.950743212273728, 'avg_acc': 49.977627486649844, 'loss': 8.670612335205078}


EP_train:0:  62%|| 17052/27626 [40:06<24:55,  7.07it/s]

{'epoch': 0, 'iter': 17050, 'avg_loss': 8.950446205749923, 'avg_acc': 49.97672423904756, 'loss': 8.84521198272705}


EP_train:0:  62%|| 17062/27626 [40:07<25:02,  7.03it/s]

{'epoch': 0, 'iter': 17060, 'avg_loss': 8.950229920519803, 'avg_acc': 49.975822050290134, 'loss': 9.232312202453613}


EP_train:0:  62%|| 17072/27626 [40:09<25:06,  7.01it/s]

{'epoch': 0, 'iter': 17070, 'avg_loss': 8.950128537812054, 'avg_acc': 49.977483744361784, 'loss': 9.190937042236328}


EP_train:0:  62%|| 17082/27626 [40:10<24:43,  7.11it/s]

{'epoch': 0, 'iter': 17080, 'avg_loss': 8.949886962922339, 'avg_acc': 49.97749692640946, 'loss': 9.051454544067383}


EP_train:0:  62%|| 17092/27626 [40:11<24:57,  7.03it/s]

{'epoch': 0, 'iter': 17090, 'avg_loss': 8.949829940903035, 'avg_acc': 49.976230179626704, 'loss': 9.147516250610352}


EP_train:0:  62%|| 17102/27626 [40:13<24:55,  7.04it/s]

{'epoch': 0, 'iter': 17100, 'avg_loss': 8.949686950274039, 'avg_acc': 49.97770598210631, 'loss': 8.94599437713623}


EP_train:0:  62%|| 17112/27626 [40:14<24:56,  7.03it/s]

{'epoch': 0, 'iter': 17110, 'avg_loss': 8.949392877604039, 'avg_acc': 49.97571006954591, 'loss': 7.909980773925781}


EP_train:0:  62%|| 17122/27626 [40:16<24:51,  7.04it/s]

{'epoch': 0, 'iter': 17120, 'avg_loss': 8.949063628001593, 'avg_acc': 49.978279598154316, 'loss': 7.733606338500977}


EP_train:0:  62%|| 17132/27626 [40:17<24:55,  7.02it/s]

{'epoch': 0, 'iter': 17130, 'avg_loss': 8.948691430118858, 'avg_acc': 49.97701535228533, 'loss': 9.686214447021484}


EP_train:0:  62%|| 17142/27626 [40:18<24:41,  7.08it/s]

{'epoch': 0, 'iter': 17140, 'avg_loss': 8.948647961789826, 'avg_acc': 49.976846450032085, 'loss': 8.38188362121582}


EP_train:0:  62%|| 17152/27626 [40:20<24:39,  7.08it/s]

{'epoch': 0, 'iter': 17150, 'avg_loss': 8.948461895940964, 'avg_acc': 49.976131129380214, 'loss': 8.670502662658691}


EP_train:0:  62%|| 17162/27626 [40:21<24:42,  7.06it/s]

{'epoch': 0, 'iter': 17160, 'avg_loss': 8.94815247665022, 'avg_acc': 49.975962939222654, 'loss': 8.047712326049805}


EP_train:0:  62%|| 17172/27626 [40:23<24:43,  7.05it/s]

{'epoch': 0, 'iter': 17170, 'avg_loss': 8.947870268179399, 'avg_acc': 49.97779686681032, 'loss': 8.553495407104492}


EP_train:0:  62%|| 17182/27626 [40:24<24:45,  7.03it/s]

{'epoch': 0, 'iter': 17180, 'avg_loss': 8.947746000594654, 'avg_acc': 49.9781735638205, 'loss': 9.045076370239258}


EP_train:0:  62%|| 17192/27626 [40:26<24:44,  7.03it/s]

{'epoch': 0, 'iter': 17190, 'avg_loss': 8.947774570820794, 'avg_acc': 49.97873160374615, 'loss': 8.243329048156738}


EP_train:0:  62%|| 17202/27626 [40:27<24:43,  7.03it/s]

{'epoch': 0, 'iter': 17200, 'avg_loss': 8.947550937842646, 'avg_acc': 49.97729056450206, 'loss': 8.44717025756836}


EP_train:0:  62%|| 17212/27626 [40:28<24:40,  7.03it/s]

{'epoch': 0, 'iter': 17210, 'avg_loss': 8.94727288785246, 'avg_acc': 49.9763959095927, 'loss': 8.82593059539795}


EP_train:0:  62%|| 17222/27626 [40:30<24:47,  7.00it/s]

{'epoch': 0, 'iter': 17220, 'avg_loss': 8.947186149888237, 'avg_acc': 49.97532082922014, 'loss': 9.725166320800781}


EP_train:0:  62%|| 17232/27626 [40:31<24:38,  7.03it/s]

{'epoch': 0, 'iter': 17230, 'avg_loss': 8.947017322046815, 'avg_acc': 49.97733010272184, 'loss': 7.944258689880371}


EP_train:0:  62%|| 17242/27626 [40:33<24:40,  7.02it/s]

{'epoch': 0, 'iter': 17240, 'avg_loss': 8.946944561007957, 'avg_acc': 49.9749869497129, 'loss': 8.053788185119629}


EP_train:0:  62%|| 17252/27626 [40:34<24:39,  7.01it/s]

{'epoch': 0, 'iter': 17250, 'avg_loss': 8.946859310153021, 'avg_acc': 49.97373340675903, 'loss': 8.875345230102539}


EP_train:0:  62%|| 17262/27626 [40:35<24:35,  7.02it/s]

{'epoch': 0, 'iter': 17260, 'avg_loss': 8.946767526998642, 'avg_acc': 49.972300272290134, 'loss': 8.230639457702637}


EP_train:0:  63%|| 17272/27626 [40:37<24:30,  7.04it/s]

{'epoch': 0, 'iter': 17270, 'avg_loss': 8.946620118327425, 'avg_acc': 49.97322100631116, 'loss': 8.949405670166016}


EP_train:0:  63%|| 17282/27626 [40:38<24:18,  7.09it/s]

{'epoch': 0, 'iter': 17280, 'avg_loss': 8.946484886919084, 'avg_acc': 49.97414067472947, 'loss': 8.373735427856445}


EP_train:0:  63%|| 17292/27626 [40:40<24:33,  7.01it/s]

{'epoch': 0, 'iter': 17290, 'avg_loss': 8.946358068097865, 'avg_acc': 49.97758949742641, 'loss': 8.102439880371094}


EP_train:0:  63%|| 17302/27626 [40:41<24:26,  7.04it/s]

{'epoch': 0, 'iter': 17300, 'avg_loss': 8.946405275964535, 'avg_acc': 49.97651869834114, 'loss': 9.267716407775879}


EP_train:0:  63%|| 17312/27626 [40:43<24:30,  7.01it/s]

{'epoch': 0, 'iter': 17310, 'avg_loss': 8.946084424490932, 'avg_acc': 49.97562965744324, 'loss': 8.217906951904297}


EP_train:0:  63%|| 17322/27626 [40:44<24:26,  7.03it/s]

{'epoch': 0, 'iter': 17320, 'avg_loss': 8.945902493167821, 'avg_acc': 49.97690664511287, 'loss': 8.072060585021973}


EP_train:0:  63%|| 17332/27626 [40:45<24:19,  7.05it/s]

{'epoch': 0, 'iter': 17330, 'avg_loss': 8.945873551701352, 'avg_acc': 49.97818215913681, 'loss': 8.0144681930542}


EP_train:0:  63%|| 17342/27626 [40:47<24:25,  7.02it/s]

{'epoch': 0, 'iter': 17340, 'avg_loss': 8.945729171560886, 'avg_acc': 49.97693327951099, 'loss': 8.33975601196289}


EP_train:0:  63%|| 17352/27626 [40:48<24:20,  7.04it/s]

{'epoch': 0, 'iter': 17350, 'avg_loss': 8.94566055124936, 'avg_acc': 49.976046049219065, 'loss': 8.821157455444336}


EP_train:0:  63%|| 17362/27626 [40:50<24:07,  7.09it/s]

{'epoch': 0, 'iter': 17360, 'avg_loss': 8.945471571431264, 'avg_acc': 49.975519843327, 'loss': 8.405231475830078}


EP_train:0:  63%|| 17372/27626 [40:51<24:15,  7.05it/s]

{'epoch': 0, 'iter': 17370, 'avg_loss': 8.945429283763781, 'avg_acc': 49.97445455068793, 'loss': 8.002241134643555}


EP_train:0:  63%|| 17382/27626 [40:52<24:20,  7.01it/s]

{'epoch': 0, 'iter': 17380, 'avg_loss': 8.945071841320532, 'avg_acc': 49.97608739428111, 'loss': 8.381595611572266}


EP_train:0:  63%|| 17392/27626 [40:54<24:13,  7.04it/s]

{'epoch': 0, 'iter': 17390, 'avg_loss': 8.944864945485438, 'avg_acc': 49.97592145362544, 'loss': 9.471964836120605}


EP_train:0:  63%|| 17402/27626 [40:55<24:11,  7.04it/s]

{'epoch': 0, 'iter': 17400, 'avg_loss': 8.94474499938502, 'avg_acc': 49.97449859203494, 'loss': 9.13742733001709}


EP_train:0:  63%|| 17412/27626 [40:57<24:09,  7.05it/s]

{'epoch': 0, 'iter': 17410, 'avg_loss': 8.944565853637938, 'avg_acc': 49.97505169145942, 'loss': 8.364912986755371}


EP_train:0:  63%|| 17422/27626 [40:58<24:06,  7.06it/s]

{'epoch': 0, 'iter': 17420, 'avg_loss': 8.94423393157325, 'avg_acc': 49.97668044314334, 'loss': 8.451186180114746}


EP_train:0:  63%|| 17432/27626 [40:59<24:06,  7.05it/s]

{'epoch': 0, 'iter': 17430, 'avg_loss': 8.944114703660468, 'avg_acc': 49.975438873271756, 'loss': 9.354939460754395}


EP_train:0:  63%|| 17442/27626 [41:01<23:59,  7.08it/s]

{'epoch': 0, 'iter': 17440, 'avg_loss': 8.943994896488858, 'avg_acc': 49.9743779026432, 'loss': 8.51047134399414}


EP_train:0:  63%|| 17452/27626 [41:02<23:58,  7.07it/s]

{'epoch': 0, 'iter': 17450, 'avg_loss': 8.944015730541487, 'avg_acc': 49.97152741963211, 'loss': 9.118717193603516}


EP_train:0:  63%|| 17462/27626 [41:04<23:50,  7.11it/s]

{'epoch': 0, 'iter': 17460, 'avg_loss': 8.943963633719115, 'avg_acc': 49.97046990435828, 'loss': 8.423352241516113}


EP_train:0:  63%|| 17472/27626 [41:05<24:09,  7.00it/s]

{'epoch': 0, 'iter': 17470, 'avg_loss': 8.943754725543302, 'avg_acc': 49.97048680670826, 'loss': 8.334210395812988}


EP_train:0:  63%|| 17482/27626 [41:07<24:08,  7.00it/s]

{'epoch': 0, 'iter': 17480, 'avg_loss': 8.943427039995788, 'avg_acc': 49.96925233110234, 'loss': 8.357187271118164}


EP_train:0:  63%|| 17492/27626 [41:08<24:08,  7.00it/s]

{'epoch': 0, 'iter': 17490, 'avg_loss': 8.943216872988145, 'avg_acc': 49.97069921674004, 'loss': 8.842442512512207}


EP_train:0:  63%|| 17502/27626 [41:09<24:09,  6.98it/s]

{'epoch': 0, 'iter': 17500, 'avg_loss': 8.94301499928633, 'avg_acc': 49.968930346837325, 'loss': 8.704845428466797}


EP_train:0:  63%|| 17512/27626 [41:11<23:59,  7.02it/s]

{'epoch': 0, 'iter': 17510, 'avg_loss': 8.942998039341893, 'avg_acc': 49.96805579350123, 'loss': 8.575141906738281}


EP_train:0:  63%|| 17522/27626 [41:12<23:47,  7.08it/s]

{'epoch': 0, 'iter': 17520, 'avg_loss': 8.942808218085597, 'avg_acc': 49.968609097654245, 'loss': 8.970468521118164}


EP_train:0:  63%|| 17532/27626 [41:14<23:50,  7.06it/s]

{'epoch': 0, 'iter': 17530, 'avg_loss': 8.942357929983276, 'avg_acc': 49.97147909417603, 'loss': 7.830226421356201}


EP_train:0:  63%|| 17542/27626 [41:15<23:43,  7.09it/s]

{'epoch': 0, 'iter': 17540, 'avg_loss': 8.942179042429494, 'avg_acc': 49.97274243201642, 'loss': 8.930282592773438}


EP_train:0:  64%|| 17552/27626 [41:16<23:49,  7.05it/s]

{'epoch': 0, 'iter': 17550, 'avg_loss': 8.94197689241904, 'avg_acc': 49.97257990997664, 'loss': 8.207015037536621}


EP_train:0:  64%|| 17562/27626 [41:18<23:50,  7.03it/s]

{'epoch': 0, 'iter': 17560, 'avg_loss': 8.941747072325311, 'avg_acc': 49.97277347531462, 'loss': 9.432695388793945}


EP_train:0:  64%|| 17572/27626 [41:19<23:54,  7.01it/s]

{'epoch': 0, 'iter': 17570, 'avg_loss': 8.941727005446902, 'avg_acc': 49.972433270730185, 'loss': 8.935779571533203}


EP_train:0:  64%|| 17582/27626 [41:21<24:04,  6.95it/s]

{'epoch': 0, 'iter': 17580, 'avg_loss': 8.941677417617345, 'avg_acc': 49.97191570445367, 'loss': 8.129056930541992}


EP_train:0:  64%|| 17592/27626 [41:22<23:41,  7.06it/s]

{'epoch': 0, 'iter': 17590, 'avg_loss': 8.941499042006798, 'avg_acc': 49.97246461258598, 'loss': 8.842215538024902}


EP_train:0:  64%|| 17602/27626 [41:23<23:36,  7.08it/s]

{'epoch': 0, 'iter': 17600, 'avg_loss': 8.941412299473539, 'avg_acc': 49.970349696039996, 'loss': 8.52616024017334}


EP_train:0:  64%|| 17612/27626 [41:25<23:29,  7.11it/s]

{'epoch': 0, 'iter': 17610, 'avg_loss': 8.941371103708805, 'avg_acc': 49.97072142410993, 'loss': 7.815285682678223}


EP_train:0:  64%|| 17622/27626 [41:26<23:49,  7.00it/s]

{'epoch': 0, 'iter': 17620, 'avg_loss': 8.94101332619623, 'avg_acc': 49.968432552068556, 'loss': 8.022841453552246}


EP_train:0:  64%|| 17632/27626 [41:28<23:34,  7.07it/s]

{'epoch': 0, 'iter': 17630, 'avg_loss': 8.94047704442645, 'avg_acc': 49.96845045658216, 'loss': 7.432144641876221}


EP_train:0:  64%|| 17642/27626 [41:29<23:28,  7.09it/s]

{'epoch': 0, 'iter': 17640, 'avg_loss': 8.940269464079021, 'avg_acc': 49.96953120571396, 'loss': 8.812713623046875}


EP_train:0:  64%|| 17652/27626 [41:31<23:40,  7.02it/s]

{'epoch': 0, 'iter': 17650, 'avg_loss': 8.939982947138347, 'avg_acc': 49.97096481785734, 'loss': 9.152883529663086}


EP_train:0:  64%|| 17662/27626 [41:32<23:46,  6.99it/s]

{'epoch': 0, 'iter': 17660, 'avg_loss': 8.939822173740929, 'avg_acc': 49.96956570975596, 'loss': 9.094018936157227}


EP_train:0:  64%|| 17672/27626 [41:33<23:32,  7.05it/s]

{'epoch': 0, 'iter': 17670, 'avg_loss': 8.939735984176213, 'avg_acc': 49.970113462735554, 'loss': 9.16689682006836}


EP_train:0:  64%|| 17682/27626 [41:35<23:29,  7.06it/s]

{'epoch': 0, 'iter': 17680, 'avg_loss': 8.939703382954097, 'avg_acc': 49.97066059612013, 'loss': 8.73012924194336}


EP_train:0:  64%|| 17692/27626 [41:36<23:26,  7.06it/s]

{'epoch': 0, 'iter': 17690, 'avg_loss': 8.939532011698507, 'avg_acc': 49.97332683285286, 'loss': 8.803055763244629}


EP_train:0:  64%|| 17702/27626 [41:38<23:25,  7.06it/s]

{'epoch': 0, 'iter': 17700, 'avg_loss': 8.939474018743015, 'avg_acc': 49.97281227049319, 'loss': 8.296055793762207}


EP_train:0:  64%|| 17712/27626 [41:39<23:35,  7.00it/s]

{'epoch': 0, 'iter': 17710, 'avg_loss': 8.939160102333558, 'avg_acc': 49.97582716955564, 'loss': 8.114949226379395}


EP_train:0:  64%|| 17722/27626 [41:40<23:16,  7.09it/s]

{'epoch': 0, 'iter': 17720, 'avg_loss': 8.93898216727275, 'avg_acc': 49.97830963263924, 'loss': 8.742164611816406}


EP_train:0:  64%|| 17732/27626 [41:42<23:16,  7.09it/s]

{'epoch': 0, 'iter': 17730, 'avg_loss': 8.938945492702548, 'avg_acc': 49.979203090632225, 'loss': 9.273048400878906}


EP_train:0:  64%|| 17742/27626 [41:43<23:30,  7.01it/s]

{'epoch': 0, 'iter': 17740, 'avg_loss': 8.938593602625307, 'avg_acc': 49.976396482723636, 'loss': 8.122444152832031}


EP_train:0:  64%|| 17752/27626 [41:45<23:28,  7.01it/s]

{'epoch': 0, 'iter': 17750, 'avg_loss': 8.938317524803221, 'avg_acc': 49.976057686890876, 'loss': 8.57018756866455}


EP_train:0:  64%|| 17762/27626 [41:46<23:26,  7.01it/s]

{'epoch': 0, 'iter': 17760, 'avg_loss': 8.938201428453072, 'avg_acc': 49.975367377962954, 'loss': 9.188807487487793}


EP_train:0:  64%|| 17772/27626 [41:47<23:14,  7.07it/s]

{'epoch': 0, 'iter': 17770, 'avg_loss': 8.937994772820003, 'avg_acc': 49.977315570311184, 'loss': 9.039320945739746}


EP_train:0:  64%|| 17782/27626 [41:49<23:09,  7.08it/s]

{'epoch': 0, 'iter': 17780, 'avg_loss': 8.937719879318802, 'avg_acc': 49.97732832799055, 'loss': 8.552730560302734}


EP_train:0:  64%|| 17792/27626 [41:50<23:03,  7.11it/s]

{'epoch': 0, 'iter': 17790, 'avg_loss': 8.937548682712583, 'avg_acc': 49.97681411949863, 'loss': 8.390826225280762}


EP_train:0:  64%|| 17802/27626 [41:52<22:58,  7.13it/s]

{'epoch': 0, 'iter': 17800, 'avg_loss': 8.937365013805309, 'avg_acc': 49.97559828099545, 'loss': 9.071516990661621}


EP_train:0:  64%|| 17812/27626 [41:53<22:59,  7.11it/s]

{'epoch': 0, 'iter': 17810, 'avg_loss': 8.937191628678912, 'avg_acc': 49.97350654090169, 'loss': 8.850693702697754}


EP_train:0:  65%|| 17822/27626 [41:55<23:08,  7.06it/s]

{'epoch': 0, 'iter': 17820, 'avg_loss': 8.936938880797113, 'avg_acc': 49.97492424667527, 'loss': 8.683331489562988}


EP_train:0:  65%|| 17832/27626 [41:56<23:09,  7.05it/s]

{'epoch': 0, 'iter': 17830, 'avg_loss': 8.936765204181407, 'avg_acc': 49.973886770231616, 'loss': 7.9673051834106445}


EP_train:0:  65%|| 17842/27626 [41:57<23:18,  6.99it/s]

{'epoch': 0, 'iter': 17840, 'avg_loss': 8.936400992126918, 'avg_acc': 49.97355109018552, 'loss': 8.1141939163208}


EP_train:0:  65%|| 17852/27626 [41:59<23:06,  7.05it/s]

{'epoch': 0, 'iter': 17850, 'avg_loss': 8.936248832507317, 'avg_acc': 49.9747913282169, 'loss': 8.664273262023926}


EP_train:0:  65%|| 17862/27626 [42:00<22:59,  7.08it/s]

{'epoch': 0, 'iter': 17860, 'avg_loss': 8.93598408862155, 'avg_acc': 49.97393063098371, 'loss': 9.411645889282227}


EP_train:0:  65%|| 17872/27626 [42:02<23:04,  7.04it/s]

{'epoch': 0, 'iter': 17870, 'avg_loss': 8.935953219718307, 'avg_acc': 49.97569386156343, 'loss': 8.938459396362305}


EP_train:0:  65%|| 17882/27626 [42:03<22:50,  7.11it/s]

{'epoch': 0, 'iter': 17880, 'avg_loss': 8.93606441556044, 'avg_acc': 49.977629886471675, 'loss': 8.633835792541504}


EP_train:0:  65%|| 17892/27626 [42:04<22:52,  7.09it/s]

{'epoch': 0, 'iter': 17890, 'avg_loss': 8.93616037319201, 'avg_acc': 49.97851573416801, 'loss': 9.59203052520752}


EP_train:0:  65%|| 17902/27626 [42:06<22:59,  7.05it/s]

{'epoch': 0, 'iter': 17900, 'avg_loss': 8.936176609682654, 'avg_acc': 49.97748030836266, 'loss': 9.143491744995117}


EP_train:0:  65%|| 17912/27626 [42:07<22:51,  7.08it/s]

{'epoch': 0, 'iter': 17910, 'avg_loss': 8.935905369194261, 'avg_acc': 49.97801630283066, 'loss': 9.69775104522705}


EP_train:0:  65%|| 17922/27626 [42:09<22:58,  7.04it/s]

{'epoch': 0, 'iter': 17920, 'avg_loss': 8.935767376544613, 'avg_acc': 49.97820294626416, 'loss': 8.962224006652832}


EP_train:0:  65%|| 17932/27626 [42:10<23:01,  7.02it/s]

{'epoch': 0, 'iter': 17930, 'avg_loss': 8.935996404475187, 'avg_acc': 49.97804082315543, 'loss': 9.737544059753418}


EP_train:0:  65%|| 17942/27626 [42:11<22:57,  7.03it/s]

{'epoch': 0, 'iter': 17940, 'avg_loss': 8.93604022035577, 'avg_acc': 49.9764854244468, 'loss': 8.677916526794434}


EP_train:0:  65%|| 17952/27626 [42:13<22:42,  7.10it/s]

{'epoch': 0, 'iter': 17950, 'avg_loss': 8.935915890540075, 'avg_acc': 49.975454013703974, 'loss': 8.564939498901367}


EP_train:0:  65%|| 17962/27626 [42:14<22:50,  7.05it/s]

{'epoch': 0, 'iter': 17960, 'avg_loss': 8.935901904834896, 'avg_acc': 49.97390178720561, 'loss': 8.002103805541992}


EP_train:0:  65%|| 17972/27626 [42:16<22:41,  7.09it/s]

{'epoch': 0, 'iter': 17970, 'avg_loss': 8.935654128893317, 'avg_acc': 49.97443798341773, 'loss': 8.267971992492676}


EP_train:0:  65%|| 17982/27626 [42:17<22:38,  7.10it/s]

{'epoch': 0, 'iter': 17980, 'avg_loss': 8.935402388806326, 'avg_acc': 49.973235637617485, 'loss': 8.644037246704102}


EP_train:0:  65%|| 17992/27626 [42:18<22:51,  7.03it/s]

{'epoch': 0, 'iter': 17990, 'avg_loss': 8.935225142995828, 'avg_acc': 49.97029764882441, 'loss': 9.158393859863281}


EP_train:0:  65%|| 18002/27626 [42:20<22:52,  7.01it/s]

{'epoch': 0, 'iter': 18000, 'avg_loss': 8.934915221444065, 'avg_acc': 49.96892533748125, 'loss': 8.615914344787598}


EP_train:0:  65%|| 18012/27626 [42:21<22:49,  7.02it/s]

{'epoch': 0, 'iter': 18010, 'avg_loss': 8.934625216336817, 'avg_acc': 49.9685955804786, 'loss': 8.71900749206543}


EP_train:0:  65%|| 18022/27626 [42:23<22:40,  7.06it/s]

{'epoch': 0, 'iter': 18020, 'avg_loss': 8.934379521091522, 'avg_acc': 49.97104073025914, 'loss': 8.480673789978027}


EP_train:0:  65%|| 18032/27626 [42:24<22:43,  7.03it/s]

{'epoch': 0, 'iter': 18030, 'avg_loss': 8.934135407604424, 'avg_acc': 49.97105679108203, 'loss': 8.12226676940918}


EP_train:0:  65%|| 18042/27626 [42:26<22:33,  7.08it/s]

{'epoch': 0, 'iter': 18040, 'avg_loss': 8.934111849100454, 'avg_acc': 49.972458566598306, 'loss': 9.034911155700684}


EP_train:0:  65%|| 18052/27626 [42:27<22:43,  7.02it/s]

{'epoch': 0, 'iter': 18050, 'avg_loss': 8.934081971919422, 'avg_acc': 49.972820065370335, 'loss': 8.655217170715332}


EP_train:0:  65%|| 18062/27626 [42:28<22:36,  7.05it/s]

{'epoch': 0, 'iter': 18060, 'avg_loss': 8.934032863242887, 'avg_acc': 49.97162394108853, 'loss': 9.055633544921875}


EP_train:0:  65%|| 18072/27626 [42:30<22:25,  7.10it/s]

{'epoch': 0, 'iter': 18070, 'avg_loss': 8.933690700512008, 'avg_acc': 49.971120856621106, 'loss': 8.236084938049316}


EP_train:0:  65%|| 18082/27626 [42:31<22:33,  7.05it/s]

{'epoch': 0, 'iter': 18080, 'avg_loss': 8.933619319453097, 'avg_acc': 49.97009982854931, 'loss': 9.268937110900879}


EP_train:0:  65%|| 18092/27626 [42:33<22:56,  6.93it/s]

{'epoch': 0, 'iter': 18090, 'avg_loss': 8.933377408314474, 'avg_acc': 49.96907992924658, 'loss': 8.472496032714844}


EP_train:0:  66%|| 18102/27626 [42:34<22:37,  7.02it/s]

{'epoch': 0, 'iter': 18100, 'avg_loss': 8.93321738617966, 'avg_acc': 49.97220457433291, 'loss': 9.210378646850586}


EP_train:0:  66%|| 18112/27626 [42:36<22:40,  6.99it/s]

{'epoch': 0, 'iter': 18110, 'avg_loss': 8.933011575035056, 'avg_acc': 49.970839545027886, 'loss': 8.469016075134277}


EP_train:0:  66%|| 18122/27626 [42:37<22:26,  7.06it/s]

{'epoch': 0, 'iter': 18120, 'avg_loss': 8.932698195504665, 'avg_acc': 49.96982092599746, 'loss': 8.936331748962402}


EP_train:0:  66%|| 18132/27626 [42:38<22:22,  7.07it/s]

{'epoch': 0, 'iter': 18130, 'avg_loss': 8.93270073438807, 'avg_acc': 49.96949285753682, 'loss': 9.413637161254883}


EP_train:0:  66%|| 18142/27626 [42:40<22:18,  7.08it/s]

{'epoch': 0, 'iter': 18140, 'avg_loss': 8.932684625511323, 'avg_acc': 49.96933741249104, 'loss': 9.921698570251465}


EP_train:0:  66%|| 18152/27626 [42:41<22:17,  7.08it/s]

{'epoch': 0, 'iter': 18150, 'avg_loss': 8.93254501979467, 'avg_acc': 49.9669439700292, 'loss': 8.705018043518066}


EP_train:0:  66%|| 18162/27626 [42:43<22:16,  7.08it/s]

{'epoch': 0, 'iter': 18160, 'avg_loss': 8.93242051248134, 'avg_acc': 49.96747838775398, 'loss': 8.86812973022461}


EP_train:0:  66%|| 18172/27626 [42:44<22:21,  7.05it/s]

{'epoch': 0, 'iter': 18170, 'avg_loss': 8.932365763870347, 'avg_acc': 49.965604534698144, 'loss': 9.419852256774902}


EP_train:0:  66%|| 18182/27626 [42:45<22:16,  7.07it/s]

{'epoch': 0, 'iter': 18180, 'avg_loss': 8.93224700885561, 'avg_acc': 49.96390462570815, 'loss': 8.386767387390137}


EP_train:0:  66%|| 18192/27626 [42:47<22:33,  6.97it/s]

{'epoch': 0, 'iter': 18190, 'avg_loss': 8.932054955519863, 'avg_acc': 49.96237837392117, 'loss': 8.875889778137207}


EP_train:0:  66%|| 18202/27626 [42:48<22:25,  7.01it/s]

{'epoch': 0, 'iter': 18200, 'avg_loss': 8.931784026472524, 'avg_acc': 49.96188396241965, 'loss': 7.816653251647949}


EP_train:0:  66%|| 18212/27626 [42:50<22:23,  7.00it/s]

{'epoch': 0, 'iter': 18210, 'avg_loss': 8.931510167134718, 'avg_acc': 49.96310608972599, 'loss': 9.55915641784668}


EP_train:0:  66%|| 18222/27626 [42:51<22:14,  7.05it/s]

{'epoch': 0, 'iter': 18220, 'avg_loss': 8.93136150358956, 'avg_acc': 49.96278332693046, 'loss': 8.709259986877441}


EP_train:0:  66%|| 18232/27626 [42:52<22:17,  7.02it/s]

{'epoch': 0, 'iter': 18230, 'avg_loss': 8.931174429410719, 'avg_acc': 49.963660797542644, 'loss': 8.506089210510254}


EP_train:0:  66%|| 18242/27626 [42:54<22:02,  7.10it/s]

{'epoch': 0, 'iter': 18240, 'avg_loss': 8.931070523842092, 'avg_acc': 49.96522257551669, 'loss': 8.57809829711914}


EP_train:0:  66%|| 18252/27626 [42:55<22:04,  7.08it/s]

{'epoch': 0, 'iter': 18250, 'avg_loss': 8.930940467137832, 'avg_acc': 49.96472796011177, 'loss': 8.523139953613281}


EP_train:0:  66%|| 18262/27626 [42:57<22:11,  7.03it/s]

{'epoch': 0, 'iter': 18260, 'avg_loss': 8.930946608896905, 'avg_acc': 49.965945183724884, 'loss': 8.9864501953125}


EP_train:0:  66%|| 18272/27626 [42:58<22:07,  7.05it/s]

{'epoch': 0, 'iter': 18270, 'avg_loss': 8.93066719601251, 'avg_acc': 49.96425346177002, 'loss': 7.7161054611206055}


EP_train:0:  66%|| 18282/27626 [43:00<22:07,  7.04it/s]

{'epoch': 0, 'iter': 18280, 'avg_loss': 8.930218373625161, 'avg_acc': 49.96393113068213, 'loss': 8.696572303771973}


EP_train:0:  66%|| 18292/27626 [43:01<22:04,  7.04it/s]

{'epoch': 0, 'iter': 18290, 'avg_loss': 8.930015135225906, 'avg_acc': 49.965146793505, 'loss': 8.052227020263672}


EP_train:0:  66%|| 18302/27626 [43:02<22:00,  7.06it/s]

{'epoch': 0, 'iter': 18300, 'avg_loss': 8.929868888375564, 'avg_acc': 49.96362903666466, 'loss': 8.66238021850586}


EP_train:0:  66%|| 18312/27626 [43:04<21:55,  7.08it/s]

{'epoch': 0, 'iter': 18310, 'avg_loss': 8.92984059800821, 'avg_acc': 49.96160095024849, 'loss': 8.82557487487793}


EP_train:0:  66%|| 18322/27626 [43:05<22:02,  7.04it/s]

{'epoch': 0, 'iter': 18320, 'avg_loss': 8.929962813063653, 'avg_acc': 49.961963047868565, 'loss': 8.904389381408691}


EP_train:0:  66%|| 18332/27626 [43:07<21:57,  7.05it/s]

{'epoch': 0, 'iter': 18330, 'avg_loss': 8.929804074238147, 'avg_acc': 49.963006655392505, 'loss': 8.812355995178223}


EP_train:0:  66%|| 18342/27626 [43:08<21:57,  7.05it/s]

{'epoch': 0, 'iter': 18340, 'avg_loss': 8.929574027393462, 'avg_acc': 49.961152608909, 'loss': 8.801645278930664}


EP_train:0:  66%|| 18352/27626 [43:09<21:52,  7.06it/s]

{'epoch': 0, 'iter': 18350, 'avg_loss': 8.92933110513646, 'avg_acc': 49.963046972917006, 'loss': 8.60921573638916}


EP_train:0:  66%|| 18362/27626 [43:11<21:58,  7.03it/s]

{'epoch': 0, 'iter': 18360, 'avg_loss': 8.929203525217236, 'avg_acc': 49.962045912532, 'loss': 9.116233825683594}


EP_train:0:  67%|| 18372/27626 [43:12<21:56,  7.03it/s]

{'epoch': 0, 'iter': 18370, 'avg_loss': 8.929044503196453, 'avg_acc': 49.961045941973765, 'loss': 8.341423034667969}


EP_train:0:  67%|| 18382/27626 [43:14<21:56,  7.02it/s]

{'epoch': 0, 'iter': 18380, 'avg_loss': 8.928921104054684, 'avg_acc': 49.95987704695066, 'loss': 8.384428977966309}


EP_train:0:  67%|| 18392/27626 [43:15<21:46,  7.07it/s]

{'epoch': 0, 'iter': 18390, 'avg_loss': 8.928819783132816, 'avg_acc': 49.960068783644175, 'loss': 8.556859016418457}


EP_train:0:  67%|| 18402/27626 [43:16<21:49,  7.05it/s]

{'epoch': 0, 'iter': 18400, 'avg_loss': 8.928788521815743, 'avg_acc': 49.96093962284658, 'loss': 9.220185279846191}


EP_train:0:  67%|| 18412/27626 [43:18<21:49,  7.04it/s]

{'epoch': 0, 'iter': 18410, 'avg_loss': 8.92864795725943, 'avg_acc': 49.96248845798708, 'loss': 8.958009719848633}


EP_train:0:  67%|| 18422/27626 [43:19<21:43,  7.06it/s]

{'epoch': 0, 'iter': 18420, 'avg_loss': 8.928609051861532, 'avg_acc': 49.963017751479285, 'loss': 7.816459655761719}


EP_train:0:  67%|| 18432/27626 [43:21<21:40,  7.07it/s]

{'epoch': 0, 'iter': 18430, 'avg_loss': 8.928295504968583, 'avg_acc': 49.96066409852965, 'loss': 7.780099868774414}


EP_train:0:  67%|| 18442/27626 [43:22<21:47,  7.02it/s]

{'epoch': 0, 'iter': 18440, 'avg_loss': 8.928113084314935, 'avg_acc': 49.959329754351714, 'loss': 8.167320251464844}


EP_train:0:  67%|| 18452/27626 [43:24<21:47,  7.01it/s]

{'epoch': 0, 'iter': 18450, 'avg_loss': 8.927879010117232, 'avg_acc': 49.95596444637147, 'loss': 8.591848373413086}


EP_train:0:  67%|| 18462/27626 [43:25<21:49,  7.00it/s]

{'epoch': 0, 'iter': 18460, 'avg_loss': 8.927897000647297, 'avg_acc': 49.95531119657657, 'loss': 8.269553184509277}


EP_train:0:  67%|| 18472/27626 [43:26<21:46,  7.00it/s]

{'epoch': 0, 'iter': 18470, 'avg_loss': 8.927739760670597, 'avg_acc': 49.95465865410644, 'loss': 8.877274513244629}


EP_train:0:  67%|| 18482/27626 [43:28<21:44,  7.01it/s]

{'epoch': 0, 'iter': 18480, 'avg_loss': 8.927532226387463, 'avg_acc': 49.95485228072074, 'loss': 8.559508323669434}


EP_train:0:  67%|| 18492/27626 [43:29<21:31,  7.07it/s]

{'epoch': 0, 'iter': 18490, 'avg_loss': 8.927463379399308, 'avg_acc': 49.95724271267103, 'loss': 8.723028182983398}


EP_train:0:  67%|| 18502/27626 [43:31<21:34,  7.05it/s]

{'epoch': 0, 'iter': 18500, 'avg_loss': 8.927329610279525, 'avg_acc': 49.95760364304632, 'loss': 8.015806198120117}


EP_train:0:  67%|| 18512/27626 [43:32<21:28,  7.07it/s]

{'epoch': 0, 'iter': 18510, 'avg_loss': 8.927182028149485, 'avg_acc': 49.957795364918155, 'loss': 7.863504886627197}


EP_train:0:  67%|| 18522/27626 [43:33<21:48,  6.96it/s]

{'epoch': 0, 'iter': 18520, 'avg_loss': 8.92706405660142, 'avg_acc': 49.95781815236759, 'loss': 8.366250038146973}


EP_train:0:  67%|| 18532/27626 [43:35<21:30,  7.05it/s]

{'epoch': 0, 'iter': 18530, 'avg_loss': 8.926894845093303, 'avg_acc': 49.95784091522314, 'loss': 7.719793319702148}


EP_train:0:  67%|| 18542/27626 [43:36<21:31,  7.03it/s]

{'epoch': 0, 'iter': 18540, 'avg_loss': 8.92668468392427, 'avg_acc': 49.95853783506823, 'loss': 8.603797912597656}


EP_train:0:  67%|| 18552/27626 [43:38<21:38,  6.99it/s]

{'epoch': 0, 'iter': 18550, 'avg_loss': 8.92680104791196, 'avg_acc': 49.956538731065706, 'loss': 8.875823974609375}


EP_train:0:  67%|| 18562/27626 [43:39<21:34,  7.00it/s]

{'epoch': 0, 'iter': 18560, 'avg_loss': 8.926609848730074, 'avg_acc': 49.95572032756856, 'loss': 8.8691987991333}


EP_train:0:  67%|| 18572/27626 [43:40<21:16,  7.09it/s]

{'epoch': 0, 'iter': 18570, 'avg_loss': 8.926403353026954, 'avg_acc': 49.957090355931285, 'loss': 8.614982604980469}


EP_train:0:  67%|| 18582/27626 [43:42<21:25,  7.04it/s]

{'epoch': 0, 'iter': 18580, 'avg_loss': 8.92626007121241, 'avg_acc': 49.9577861794306, 'loss': 8.706321716308594}


EP_train:0:  67%|| 18592/27626 [43:43<21:23,  7.04it/s]

{'epoch': 0, 'iter': 18590, 'avg_loss': 8.925947636851616, 'avg_acc': 49.95747270184498, 'loss': 8.287166595458984}


EP_train:0:  67%|| 18602/27626 [43:45<21:15,  7.07it/s]

{'epoch': 0, 'iter': 18600, 'avg_loss': 8.925674752505657, 'avg_acc': 49.95615155099188, 'loss': 9.577744483947754}


EP_train:0:  67%|| 18612/27626 [43:46<21:09,  7.10it/s]

{'epoch': 0, 'iter': 18610, 'avg_loss': 8.925558794435748, 'avg_acc': 49.956678845843854, 'loss': 8.884344100952148}


EP_train:0:  67%|| 18622/27626 [43:48<21:11,  7.08it/s]

{'epoch': 0, 'iter': 18620, 'avg_loss': 8.925444130712892, 'avg_acc': 49.95905160839912, 'loss': 8.60030460357666}


EP_train:0:  67%|| 18632/27626 [43:49<21:16,  7.05it/s]

{'epoch': 0, 'iter': 18630, 'avg_loss': 8.925246941768812, 'avg_acc': 49.95974451183511, 'loss': 8.527795791625977}


EP_train:0:  67%|| 18642/27626 [43:50<21:17,  7.03it/s]

{'epoch': 0, 'iter': 18640, 'avg_loss': 8.925067167618225, 'avg_acc': 49.96244836650394, 'loss': 8.196202278137207}


EP_train:0:  68%|| 18652/27626 [43:52<21:25,  6.98it/s]

{'epoch': 0, 'iter': 18650, 'avg_loss': 8.924728877652676, 'avg_acc': 49.96263605168624, 'loss': 7.7520575523376465}


EP_train:0:  68%|| 18662/27626 [43:53<21:04,  7.09it/s]

{'epoch': 0, 'iter': 18660, 'avg_loss': 8.924718080651152, 'avg_acc': 49.964665612775306, 'loss': 8.250060081481934}


EP_train:0:  68%|| 18672/27626 [43:55<21:08,  7.06it/s]

{'epoch': 0, 'iter': 18670, 'avg_loss': 8.924658231478308, 'avg_acc': 49.963010818917034, 'loss': 8.92019271850586}


EP_train:0:  68%|| 18682/27626 [43:56<21:02,  7.08it/s]

{'epoch': 0, 'iter': 18680, 'avg_loss': 8.924662921778504, 'avg_acc': 49.96303061934586, 'loss': 9.219502449035645}


EP_train:0:  68%|| 18692/27626 [43:57<21:07,  7.05it/s]

{'epoch': 0, 'iter': 18690, 'avg_loss': 8.924553556877488, 'avg_acc': 49.964053555186986, 'loss': 8.562278747558594}


EP_train:0:  68%|| 18702/27626 [43:59<21:06,  7.05it/s]

{'epoch': 0, 'iter': 18700, 'avg_loss': 8.924256716263587, 'avg_acc': 49.96407277685685, 'loss': 8.272028923034668}


EP_train:0:  68%|| 18712/27626 [44:00<21:01,  7.07it/s]

{'epoch': 0, 'iter': 18710, 'avg_loss': 8.924026092033678, 'avg_acc': 49.961753781198226, 'loss': 9.136309623718262}


EP_train:0:  68%|| 18722/27626 [44:02<21:03,  7.05it/s]

{'epoch': 0, 'iter': 18720, 'avg_loss': 8.923904561581287, 'avg_acc': 49.961607285935585, 'loss': 7.086485385894775}


EP_train:0:  68%|| 18732/27626 [44:03<20:59,  7.06it/s]

{'epoch': 0, 'iter': 18730, 'avg_loss': 8.923545535099143, 'avg_acc': 49.963462975815496, 'loss': 8.272289276123047}


EP_train:0:  68%|| 18742/27626 [44:05<21:03,  7.03it/s]

{'epoch': 0, 'iter': 18740, 'avg_loss': 8.923291496797736, 'avg_acc': 49.96331572488128, 'loss': 8.434407234191895}


EP_train:0:  68%|| 18752/27626 [44:06<20:57,  7.05it/s]

{'epoch': 0, 'iter': 18750, 'avg_loss': 8.923322763743576, 'avg_acc': 49.966501786571385, 'loss': 8.18376350402832}


EP_train:0:  68%|| 18762/27626 [44:07<20:54,  7.06it/s]

{'epoch': 0, 'iter': 18760, 'avg_loss': 8.923350489436293, 'avg_acc': 49.96885160705719, 'loss': 8.38908576965332}


EP_train:0:  68%|| 18772/27626 [44:09<20:47,  7.10it/s]

{'epoch': 0, 'iter': 18770, 'avg_loss': 8.92322967124985, 'avg_acc': 49.969700601992436, 'loss': 8.469579696655273}


EP_train:0:  68%|| 18782/27626 [44:10<20:50,  7.07it/s]

{'epoch': 0, 'iter': 18780, 'avg_loss': 8.922932383592794, 'avg_acc': 49.968052819338695, 'loss': 7.650664806365967}


EP_train:0:  68%|| 18792/27626 [44:12<20:51,  7.06it/s]

{'epoch': 0, 'iter': 18790, 'avg_loss': 8.922647035519685, 'avg_acc': 49.96823612367623, 'loss': 8.942036628723145}


EP_train:0:  68%|| 18802/27626 [44:13<20:53,  7.04it/s]

{'epoch': 0, 'iter': 18800, 'avg_loss': 8.922452545135581, 'avg_acc': 49.96808680389341, 'loss': 8.341215133666992}


EP_train:0:  68%|| 18812/27626 [44:14<20:42,  7.09it/s]

{'epoch': 0, 'iter': 18810, 'avg_loss': 8.922312055032831, 'avg_acc': 49.96826989527404, 'loss': 8.695045471191406}


EP_train:0:  68%|| 18822/27626 [44:16<20:48,  7.05it/s]

{'epoch': 0, 'iter': 18820, 'avg_loss': 8.922065267172195, 'avg_acc': 49.97061128526646, 'loss': 8.476241111755371}


EP_train:0:  68%|| 18832/27626 [44:17<21:02,  6.97it/s]

{'epoch': 0, 'iter': 18830, 'avg_loss': 8.921969404033705, 'avg_acc': 49.96880144442674, 'loss': 9.11434555053711}


EP_train:0:  68%|| 18842/27626 [44:19<20:47,  7.04it/s]

{'epoch': 0, 'iter': 18840, 'avg_loss': 8.92192446505351, 'avg_acc': 49.96682766307521, 'loss': 7.91393518447876}


EP_train:0:  68%|| 18852/27626 [44:20<20:40,  7.07it/s]

{'epoch': 0, 'iter': 18850, 'avg_loss': 8.921596917933567, 'avg_acc': 49.965519070606334, 'loss': 8.185752868652344}


EP_train:0:  68%|| 18862/27626 [44:21<20:44,  7.04it/s]

{'epoch': 0, 'iter': 18860, 'avg_loss': 8.921541070060837, 'avg_acc': 49.96338343672127, 'loss': 9.007736206054688}


EP_train:0:  68%|| 18872/27626 [44:23<20:34,  7.09it/s]

{'epoch': 0, 'iter': 18870, 'avg_loss': 8.921415074705086, 'avg_acc': 49.9619124582693, 'loss': 9.569897651672363}


EP_train:0:  68%|| 18882/27626 [44:24<20:40,  7.05it/s]

{'epoch': 0, 'iter': 18880, 'avg_loss': 8.92128138905593, 'avg_acc': 49.96226365128965, 'loss': 8.236536026000977}


EP_train:0:  68%|| 18892/27626 [44:26<20:38,  7.05it/s]

{'epoch': 0, 'iter': 18890, 'avg_loss': 8.921012821952736, 'avg_acc': 49.96459954475676, 'loss': 8.511008262634277}


EP_train:0:  68%|| 18902/27626 [44:27<20:48,  6.99it/s]

{'epoch': 0, 'iter': 18900, 'avg_loss': 8.921134773823743, 'avg_acc': 49.96643696100735, 'loss': 9.425281524658203}


EP_train:0:  68%|| 18912/27626 [44:28<20:44,  7.00it/s]

{'epoch': 0, 'iter': 18910, 'avg_loss': 8.92137749615808, 'avg_acc': 49.967280947596635, 'loss': 9.284300804138184}


EP_train:0:  68%|| 18922/27626 [44:30<20:35,  7.04it/s]

{'epoch': 0, 'iter': 18920, 'avg_loss': 8.921275563777094, 'avg_acc': 49.96828920247344, 'loss': 8.484882354736328}


EP_train:0:  69%|| 18932/27626 [44:31<20:31,  7.06it/s]

{'epoch': 0, 'iter': 18930, 'avg_loss': 8.921089785695107, 'avg_acc': 49.96665522159421, 'loss': 9.314109802246094}


EP_train:0:  69%|| 18942/27626 [44:33<20:38,  7.01it/s]

{'epoch': 0, 'iter': 18940, 'avg_loss': 8.920822898750352, 'avg_acc': 49.9671677841719, 'loss': 8.771401405334473}


EP_train:0:  69%|| 18952/27626 [44:34<20:27,  7.07it/s]

{'epoch': 0, 'iter': 18950, 'avg_loss': 8.920681664625526, 'avg_acc': 49.96685531106538, 'loss': 8.833670616149902}


EP_train:0:  69%|| 18962/27626 [44:36<20:23,  7.08it/s]

{'epoch': 0, 'iter': 18960, 'avg_loss': 8.920700577063485, 'avg_acc': 49.96736722746691, 'loss': 8.834755897521973}


EP_train:0:  69%|| 18972/27626 [44:37<20:22,  7.08it/s]

{'epoch': 0, 'iter': 18970, 'avg_loss': 8.920512606112206, 'avg_acc': 49.96540772758421, 'loss': 8.346437454223633}


EP_train:0:  69%|| 18982/27626 [44:38<20:24,  7.06it/s]

{'epoch': 0, 'iter': 18980, 'avg_loss': 8.920364485896869, 'avg_acc': 49.96279173910752, 'loss': 9.51174259185791}


EP_train:0:  69%|| 18992/27626 [44:40<20:19,  7.08it/s]

{'epoch': 0, 'iter': 18990, 'avg_loss': 8.920218942264679, 'avg_acc': 49.96116581538624, 'loss': 8.822458267211914}


EP_train:0:  69%|| 19002/27626 [44:41<20:31,  7.01it/s]

{'epoch': 0, 'iter': 19000, 'avg_loss': 8.920127821422854, 'avg_acc': 49.95691016262302, 'loss': 8.668975830078125}


EP_train:0:  69%|| 19012/27626 [44:43<20:27,  7.02it/s]

{'epoch': 0, 'iter': 19010, 'avg_loss': 8.920012894010853, 'avg_acc': 49.95627531429172, 'loss': 9.355428695678711}


EP_train:0:  69%|| 19022/27626 [44:44<20:16,  7.07it/s]

{'epoch': 0, 'iter': 19020, 'avg_loss': 8.919648761973885, 'avg_acc': 49.955312549287626, 'loss': 8.17465877532959}


EP_train:0:  69%|| 19032/27626 [44:45<20:14,  7.08it/s]

{'epoch': 0, 'iter': 19030, 'avg_loss': 8.91939730223539, 'avg_acc': 49.95599285376491, 'loss': 8.010303497314453}


EP_train:0:  69%|| 19042/27626 [44:47<20:14,  7.07it/s]

{'epoch': 0, 'iter': 19040, 'avg_loss': 8.919769651955788, 'avg_acc': 49.956180085079566, 'loss': 8.910884857177734}


EP_train:0:  69%|| 19052/27626 [44:48<20:20,  7.03it/s]

{'epoch': 0, 'iter': 19050, 'avg_loss': 8.919502208792414, 'avg_acc': 49.95767938690882, 'loss': 8.11986255645752}


EP_train:0:  69%|| 19062/27626 [44:50<20:08,  7.09it/s]

{'epoch': 0, 'iter': 19060, 'avg_loss': 8.919384869553037, 'avg_acc': 49.957045800325275, 'loss': 8.246253967285156}


EP_train:0:  69%|| 19072/27626 [44:51<20:15,  7.04it/s]

{'epoch': 0, 'iter': 19070, 'avg_loss': 8.91932560709838, 'avg_acc': 49.95624901683184, 'loss': 9.106637001037598}


EP_train:0:  69%|| 19082/27626 [44:52<20:11,  7.05it/s]

{'epoch': 0, 'iter': 19080, 'avg_loss': 8.919243374958421, 'avg_acc': 49.956927047848644, 'loss': 9.084819793701172}


EP_train:0:  69%|| 19092/27626 [44:54<20:15,  7.02it/s]

{'epoch': 0, 'iter': 19090, 'avg_loss': 8.919139422813872, 'avg_acc': 49.95825912733749, 'loss': 8.90604019165039}


EP_train:0:  69%|| 19102/27626 [44:55<20:13,  7.03it/s]

{'epoch': 0, 'iter': 19100, 'avg_loss': 8.919053647126349, 'avg_acc': 49.95893539605256, 'loss': 8.979228973388672}


EP_train:0:  69%|| 19112/27626 [44:57<19:59,  7.10it/s]

{'epoch': 0, 'iter': 19110, 'avg_loss': 8.918754631530721, 'avg_acc': 49.95895688347025, 'loss': 8.350829124450684}


EP_train:0:  69%|| 19122/27626 [44:58<20:04,  7.06it/s]

{'epoch': 0, 'iter': 19120, 'avg_loss': 8.918730548445472, 'avg_acc': 49.958488049788194, 'loss': 8.296046257019043}


EP_train:0:  69%|| 19132/27626 [45:00<20:07,  7.04it/s]

{'epoch': 0, 'iter': 19130, 'avg_loss': 8.91846873406166, 'avg_acc': 49.956712926663535, 'loss': 8.632737159729004}


EP_train:0:  69%|| 19142/27626 [45:01<20:04,  7.04it/s]

{'epoch': 0, 'iter': 19140, 'avg_loss': 8.918139626380574, 'avg_acc': 49.955755968862654, 'loss': 7.971134662628174}


EP_train:0:  69%|| 19152/27626 [45:02<20:10,  7.00it/s]

{'epoch': 0, 'iter': 19150, 'avg_loss': 8.918105623328799, 'avg_acc': 49.956758132734585, 'loss': 9.122230529785156}


EP_train:0:  69%|| 19162/27626 [45:04<20:04,  7.02it/s]

{'epoch': 0, 'iter': 19160, 'avg_loss': 8.918044777642175, 'avg_acc': 49.956617608684304, 'loss': 8.720015525817871}


EP_train:0:  69%|| 19172/27626 [45:05<19:58,  7.05it/s]

{'epoch': 0, 'iter': 19170, 'avg_loss': 8.917917430994688, 'avg_acc': 49.95941135047728, 'loss': 8.176385879516602}


EP_train:0:  69%|| 19182/27626 [45:07<19:55,  7.06it/s]

{'epoch': 0, 'iter': 19180, 'avg_loss': 8.917685088546511, 'avg_acc': 49.960898806110215, 'loss': 7.7461676597595215}


EP_train:0:  69%|| 19192/27626 [45:08<20:07,  6.99it/s]

{'epoch': 0, 'iter': 19190, 'avg_loss': 8.9174835218893, 'avg_acc': 49.96515293627221, 'loss': 8.547518730163574}


EP_train:0:  70%|| 19202/27626 [45:09<19:57,  7.04it/s]

{'epoch': 0, 'iter': 19200, 'avg_loss': 8.917281624054104, 'avg_acc': 49.965171084839334, 'loss': 8.022214889526367}


EP_train:0:  70%|| 19212/27626 [45:11<19:43,  7.11it/s]

{'epoch': 0, 'iter': 19210, 'avg_loss': 8.917239179815235, 'avg_acc': 49.961773202852534, 'loss': 8.916892051696777}


EP_train:0:  70%|| 19222/27626 [45:12<19:49,  7.07it/s]

{'epoch': 0, 'iter': 19220, 'avg_loss': 8.917000480264054, 'avg_acc': 49.96146792570626, 'loss': 8.687557220458984}


EP_train:0:  70%|| 19232/27626 [45:14<19:53,  7.03it/s]

{'epoch': 0, 'iter': 19230, 'avg_loss': 8.916591892090684, 'avg_acc': 49.96327544069471, 'loss': 8.332778930664062}


EP_train:0:  70%|| 19242/27626 [45:15<19:49,  7.05it/s]

{'epoch': 0, 'iter': 19240, 'avg_loss': 8.916093665530854, 'avg_acc': 49.96329452731147, 'loss': 7.565632343292236}


EP_train:0:  70%|| 19252/27626 [45:17<19:43,  7.08it/s]

{'epoch': 0, 'iter': 19250, 'avg_loss': 8.915918548747067, 'avg_acc': 49.962826606410054, 'loss': 9.264540672302246}


EP_train:0:  70%|| 19262/27626 [45:18<19:43,  7.07it/s]

{'epoch': 0, 'iter': 19260, 'avg_loss': 8.915788973462735, 'avg_acc': 49.9641438658429, 'loss': 8.674271583557129}


EP_train:0:  70%|| 19272/27626 [45:19<19:49,  7.02it/s]

{'epoch': 0, 'iter': 19270, 'avg_loss': 8.915614284740846, 'avg_acc': 49.963189507550204, 'loss': 8.71690845489502}


EP_train:0:  70%|| 19282/27626 [45:21<19:53,  6.99it/s]

{'epoch': 0, 'iter': 19280, 'avg_loss': 8.915521995536915, 'avg_acc': 49.964181059073695, 'loss': 8.294001579284668}


EP_train:0:  70%|| 19292/27626 [45:22<19:36,  7.09it/s]

{'epoch': 0, 'iter': 19290, 'avg_loss': 8.91534595567117, 'avg_acc': 49.96436161940802, 'loss': 8.964494705200195}


EP_train:0:  70%|| 19302/27626 [45:24<19:42,  7.04it/s]

{'epoch': 0, 'iter': 19300, 'avg_loss': 8.915160877458431, 'avg_acc': 49.965351536189836, 'loss': 7.735630512237549}


EP_train:0:  70%|| 19312/27626 [45:25<19:45,  7.01it/s]

{'epoch': 0, 'iter': 19310, 'avg_loss': 8.914856149213096, 'avg_acc': 49.96472217906893, 'loss': 8.347247123718262}


EP_train:0:  70%|| 19322/27626 [45:26<19:44,  7.01it/s]

{'epoch': 0, 'iter': 19320, 'avg_loss': 8.914705569264953, 'avg_acc': 49.96441695564412, 'loss': 8.55616283416748}


EP_train:0:  70%|| 19332/27626 [45:28<19:36,  7.05it/s]

{'epoch': 0, 'iter': 19330, 'avg_loss': 8.914533566222374, 'avg_acc': 49.9631421033573, 'loss': 8.592585563659668}


EP_train:0:  70%|| 19342/27626 [45:29<19:32,  7.07it/s]

{'epoch': 0, 'iter': 19340, 'avg_loss': 8.914407670192798, 'avg_acc': 49.96332273408821, 'loss': 8.596784591674805}


EP_train:0:  70%|| 19352/27626 [45:31<19:25,  7.10it/s]

{'epoch': 0, 'iter': 19350, 'avg_loss': 8.91423755923905, 'avg_acc': 49.96221125523228, 'loss': 8.610102653503418}


EP_train:0:  70%|| 19362/27626 [45:32<19:30,  7.06it/s]

{'epoch': 0, 'iter': 19360, 'avg_loss': 8.913966806754674, 'avg_acc': 49.95980966892206, 'loss': 9.650405883789062}


EP_train:0:  70%|| 19372/27626 [45:33<19:36,  7.01it/s]

{'epoch': 0, 'iter': 19370, 'avg_loss': 8.913890351939475, 'avg_acc': 49.961443652883176, 'loss': 8.816671371459961}


EP_train:0:  70%|| 19382/27626 [45:35<19:25,  7.08it/s]

{'epoch': 0, 'iter': 19380, 'avg_loss': 8.913711134294172, 'avg_acc': 49.961786027552755, 'loss': 8.907629013061523}


EP_train:0:  70%|| 19392/27626 [45:36<19:28,  7.05it/s]

{'epoch': 0, 'iter': 19390, 'avg_loss': 8.913457289775364, 'avg_acc': 49.961966891857045, 'loss': 9.325807571411133}


EP_train:0:  70%|| 19402/27626 [45:38<19:26,  7.05it/s]

{'epoch': 0, 'iter': 19400, 'avg_loss': 8.91337022630955, 'avg_acc': 49.95957038296995, 'loss': 8.944461822509766}


EP_train:0:  70%|| 19412/27626 [45:39<19:22,  7.07it/s]

{'epoch': 0, 'iter': 19410, 'avg_loss': 8.913122957490105, 'avg_acc': 49.959108237597235, 'loss': 9.521671295166016}


EP_train:0:  70%|| 19422/27626 [45:41<19:18,  7.08it/s]

{'epoch': 0, 'iter': 19420, 'avg_loss': 8.912772355453589, 'avg_acc': 49.95880747644302, 'loss': 7.724039554595947}


EP_train:0:  70%|| 19432/27626 [45:42<19:14,  7.10it/s]

{'epoch': 0, 'iter': 19430, 'avg_loss': 8.912620282714265, 'avg_acc': 49.95673794452164, 'loss': 8.437173843383789}


EP_train:0:  70%|| 19442/27626 [45:43<19:21,  7.05it/s]

{'epoch': 0, 'iter': 19440, 'avg_loss': 8.91246491382105, 'avg_acc': 49.95676019752071, 'loss': 8.228205680847168}


EP_train:0:  70%|| 19452/27626 [45:45<19:13,  7.08it/s]

{'epoch': 0, 'iter': 19450, 'avg_loss': 8.912387364174577, 'avg_acc': 49.95678242763868, 'loss': 8.585502624511719}


EP_train:0:  70%|| 19462/27626 [45:46<19:22,  7.03it/s]

{'epoch': 0, 'iter': 19460, 'avg_loss': 8.912615695253782, 'avg_acc': 49.957286367607004, 'loss': 9.113651275634766}


EP_train:0:  70%|| 19472/27626 [45:48<19:05,  7.12it/s]

{'epoch': 0, 'iter': 19470, 'avg_loss': 8.912588625217243, 'avg_acc': 49.95762929484875, 'loss': 8.416156768798828}


EP_train:0:  71%|| 19482/27626 [45:49<19:14,  7.05it/s]

{'epoch': 0, 'iter': 19480, 'avg_loss': 8.912697280630011, 'avg_acc': 49.95652815563883, 'loss': 9.93895435333252}


EP_train:0:  71%|| 19492/27626 [45:50<19:16,  7.04it/s]

{'epoch': 0, 'iter': 19490, 'avg_loss': 8.912553243239293, 'avg_acc': 49.95799343286645, 'loss': 9.365998268127441}


EP_train:0:  71%|| 19502/27626 [45:52<19:07,  7.08it/s]

{'epoch': 0, 'iter': 19500, 'avg_loss': 8.912350292224394, 'avg_acc': 49.9567329880519, 'loss': 8.130964279174805}


EP_train:0:  71%|| 19512/27626 [45:53<19:05,  7.08it/s]

{'epoch': 0, 'iter': 19510, 'avg_loss': 8.912018799108493, 'avg_acc': 49.95579416739275, 'loss': 7.720586776733398}


EP_train:0:  71%|| 19522/27626 [45:55<19:16,  7.01it/s]

{'epoch': 0, 'iter': 19520, 'avg_loss': 8.912080326894227, 'avg_acc': 49.95725756877209, 'loss': 8.81626033782959}


EP_train:0:  71%|| 19532/27626 [45:56<19:06,  7.06it/s]

{'epoch': 0, 'iter': 19530, 'avg_loss': 8.912029764215356, 'avg_acc': 49.957759459321075, 'loss': 9.00972843170166}


EP_train:0:  71%|| 19542/27626 [45:57<19:11,  7.02it/s]

{'epoch': 0, 'iter': 19540, 'avg_loss': 8.912199454836832, 'avg_acc': 49.95602195384064, 'loss': 9.1668062210083}


EP_train:0:  71%|| 19552/27626 [45:59<19:02,  7.07it/s]

{'epoch': 0, 'iter': 19550, 'avg_loss': 8.912070177307605, 'avg_acc': 49.957482993197274, 'loss': 8.469464302062988}


EP_train:0:  71%|| 19562/27626 [46:00<19:05,  7.04it/s]

{'epoch': 0, 'iter': 19560, 'avg_loss': 8.91191159303395, 'avg_acc': 49.95766448545575, 'loss': 8.903637886047363}


EP_train:0:  71%|| 19572/27626 [46:02<19:00,  7.06it/s]

{'epoch': 0, 'iter': 19570, 'avg_loss': 8.911758269574644, 'avg_acc': 49.95976189259619, 'loss': 9.225603103637695}


EP_train:0:  71%|| 19582/27626 [46:03<19:03,  7.04it/s]

{'epoch': 0, 'iter': 19580, 'avg_loss': 8.911595227142305, 'avg_acc': 49.96089959654768, 'loss': 9.24776554107666}


EP_train:0:  71%|| 19592/27626 [46:05<18:52,  7.10it/s]

{'epoch': 0, 'iter': 19590, 'avg_loss': 8.911461850998199, 'avg_acc': 49.960600530856006, 'loss': 8.361603736877441}


EP_train:0:  71%|| 19602/27626 [46:06<18:50,  7.10it/s]

{'epoch': 0, 'iter': 19600, 'avg_loss': 8.911372178793258, 'avg_acc': 49.961736646089484, 'loss': 8.64310359954834}


EP_train:0:  71%|| 19612/27626 [46:07<18:58,  7.04it/s]

{'epoch': 0, 'iter': 19610, 'avg_loss': 8.911039985282097, 'avg_acc': 49.960162663811126, 'loss': 9.333405494689941}


EP_train:0:  71%|| 19622/27626 [46:09<19:01,  7.01it/s]

{'epoch': 0, 'iter': 19620, 'avg_loss': 8.910897403868882, 'avg_acc': 49.96018296722899, 'loss': 8.969942092895508}


EP_train:0:  71%|| 19632/27626 [46:10<18:57,  7.03it/s]

{'epoch': 0, 'iter': 19630, 'avg_loss': 8.91082710039536, 'avg_acc': 49.96163593296317, 'loss': 9.703271865844727}


EP_train:0:  71%|| 19642/27626 [46:12<18:57,  7.02it/s]

{'epoch': 0, 'iter': 19640, 'avg_loss': 8.910735063570822, 'avg_acc': 49.96022351204114, 'loss': 7.676272869110107}


EP_train:0:  71%|| 19652/27626 [46:13<18:52,  7.04it/s]

{'epoch': 0, 'iter': 19650, 'avg_loss': 8.910468395451154, 'avg_acc': 49.9589715536105, 'loss': 8.78624439239502}


EP_train:0:  71%|| 19662/27626 [46:14<18:47,  7.06it/s]

{'epoch': 0, 'iter': 19660, 'avg_loss': 8.91050748648997, 'avg_acc': 49.96010503026296, 'loss': 8.796335220336914}


EP_train:0:  71%|| 19672/27626 [46:16<18:51,  7.03it/s]

{'epoch': 0, 'iter': 19670, 'avg_loss': 8.910470301337371, 'avg_acc': 49.958060088455085, 'loss': 9.59526538848877}


EP_train:0:  71%|| 19682/27626 [46:17<18:53,  7.01it/s]

{'epoch': 0, 'iter': 19680, 'avg_loss': 8.910406183247934, 'avg_acc': 49.96062191961791, 'loss': 8.807282447814941}


EP_train:0:  71%|| 19692/27626 [46:19<18:41,  7.07it/s]

{'epoch': 0, 'iter': 19690, 'avg_loss': 8.910284204532918, 'avg_acc': 49.96048321568229, 'loss': 8.293961524963379}


EP_train:0:  71%|| 19702/27626 [46:20<18:44,  7.05it/s]

{'epoch': 0, 'iter': 19700, 'avg_loss': 8.910070991608087, 'avg_acc': 49.95986878838637, 'loss': 9.333383560180664}


EP_train:0:  71%|| 19712/27626 [46:21<18:35,  7.09it/s]

{'epoch': 0, 'iter': 19710, 'avg_loss': 8.909903760454945, 'avg_acc': 49.95814519811273, 'loss': 8.437387466430664}


EP_train:0:  71%|| 19722/27626 [46:23<18:47,  7.01it/s]

{'epoch': 0, 'iter': 19720, 'avg_loss': 8.909704881110713, 'avg_acc': 49.95468029004614, 'loss': 8.027613639831543}


EP_train:0:  71%|| 19732/27626 [46:24<18:44,  7.02it/s]

{'epoch': 0, 'iter': 19730, 'avg_loss': 8.90953688889532, 'avg_acc': 49.95375297754802, 'loss': 7.9611430168151855}


EP_train:0:  71%|| 19742/27626 [46:26<18:43,  7.02it/s]

{'epoch': 0, 'iter': 19740, 'avg_loss': 8.909345644367338, 'avg_acc': 49.95156020465022, 'loss': 8.039088249206543}


EP_train:0:  71%|| 19752/27626 [46:27<18:39,  7.04it/s]

{'epoch': 0, 'iter': 19750, 'avg_loss': 8.909065054070284, 'avg_acc': 49.953799807604675, 'loss': 7.262459754943848}


EP_train:0:  72%|| 19762/27626 [46:29<18:33,  7.07it/s]

{'epoch': 0, 'iter': 19760, 'avg_loss': 8.908836256956429, 'avg_acc': 49.9530324882344, 'loss': 9.072627067565918}


EP_train:0:  72%|| 19772/27626 [46:30<18:28,  7.08it/s]

{'epoch': 0, 'iter': 19770, 'avg_loss': 8.908543120650432, 'avg_acc': 49.95210788528653, 'loss': 8.350383758544922}


EP_train:0:  72%|| 19782/27626 [46:31<18:28,  7.08it/s]

{'epoch': 0, 'iter': 19780, 'avg_loss': 8.908279308227296, 'avg_acc': 49.95371189525302, 'loss': 8.244230270385742}


EP_train:0:  72%|| 19792/27626 [46:33<18:31,  7.05it/s]

{'epoch': 0, 'iter': 19790, 'avg_loss': 8.907910005722597, 'avg_acc': 49.95247208327017, 'loss': 8.697555541992188}


EP_train:0:  72%|| 19802/27626 [46:34<18:25,  7.08it/s]

{'epoch': 0, 'iter': 19800, 'avg_loss': 8.907731866073165, 'avg_acc': 49.95738851573152, 'loss': 8.994622230529785}


EP_train:0:  72%|| 19812/27626 [46:36<18:34,  7.01it/s]

{'epoch': 0, 'iter': 19810, 'avg_loss': 8.907297932802592, 'avg_acc': 49.95472843369845, 'loss': 7.962324619293213}


EP_train:0:  72%|| 19822/27626 [46:37<18:33,  7.01it/s]

{'epoch': 0, 'iter': 19820, 'avg_loss': 8.907294017817152, 'avg_acc': 49.9553819181676, 'loss': 7.856291770935059}


EP_train:0:  72%|| 19832/27626 [46:38<18:23,  7.06it/s]

{'epoch': 0, 'iter': 19830, 'avg_loss': 8.907241608105751, 'avg_acc': 49.954458927941104, 'loss': 7.991883277893066}


EP_train:0:  72%|| 19842/27626 [46:40<18:24,  7.05it/s]

{'epoch': 0, 'iter': 19840, 'avg_loss': 8.907212770872615, 'avg_acc': 49.9524343531072, 'loss': 9.23613166809082}


EP_train:0:  72%|| 19852/27626 [46:41<18:22,  7.05it/s]

{'epoch': 0, 'iter': 19850, 'avg_loss': 8.907082783762114, 'avg_acc': 49.94993954964485, 'loss': 9.082941055297852}


EP_train:0:  72%|| 19862/27626 [46:43<18:19,  7.06it/s]

{'epoch': 0, 'iter': 19860, 'avg_loss': 8.906794261394579, 'avg_acc': 49.95012209858516, 'loss': 7.61604642868042}


EP_train:0:  72%|| 19872/27626 [46:44<18:24,  7.02it/s]

{'epoch': 0, 'iter': 19870, 'avg_loss': 8.906497477813202, 'avg_acc': 49.948417291530376, 'loss': 7.198825359344482}


EP_train:0:  72%|| 19882/27626 [46:46<18:17,  7.05it/s]

{'epoch': 0, 'iter': 19880, 'avg_loss': 8.90631455304451, 'avg_acc': 49.94671419948695, 'loss': 8.218084335327148}


EP_train:0:  72%|| 19892/27626 [46:47<18:16,  7.06it/s]

{'epoch': 0, 'iter': 19890, 'avg_loss': 8.906172341539527, 'avg_acc': 49.948469156905134, 'loss': 8.864376068115234}


EP_train:0:  72%|| 19902/27626 [46:48<18:16,  7.04it/s]

{'epoch': 0, 'iter': 19900, 'avg_loss': 8.906252626450025, 'avg_acc': 49.9488091050701, 'loss': 9.257829666137695}


EP_train:0:  72%|| 19912/27626 [46:50<18:20,  7.01it/s]

{'epoch': 0, 'iter': 19910, 'avg_loss': 8.906052029813576, 'avg_acc': 49.94852091808548, 'loss': 9.196873664855957}


EP_train:0:  72%|| 19922/27626 [46:51<18:18,  7.01it/s]

{'epoch': 0, 'iter': 19920, 'avg_loss': 8.905683680343303, 'avg_acc': 49.94948797751117, 'loss': 8.509477615356445}


EP_train:0:  72%|| 19932/27626 [46:53<18:12,  7.04it/s]

{'epoch': 0, 'iter': 19930, 'avg_loss': 8.905465728669236, 'avg_acc': 49.949826902814706, 'loss': 9.117524147033691}


EP_train:0:  72%|| 19942/27626 [46:54<18:02,  7.10it/s]

{'epoch': 0, 'iter': 19940, 'avg_loss': 8.905241608805012, 'avg_acc': 49.95047891279274, 'loss': 7.761654376983643}


EP_train:0:  72%|| 19952/27626 [46:55<18:07,  7.06it/s]

{'epoch': 0, 'iter': 19950, 'avg_loss': 8.905108988861663, 'avg_acc': 49.95144353666483, 'loss': 8.984662055969238}


EP_train:0:  72%|| 19962/27626 [46:57<18:06,  7.05it/s]

{'epoch': 0, 'iter': 19960, 'avg_loss': 8.905109597126758, 'avg_acc': 49.94958919893793, 'loss': 9.635910034179688}


EP_train:0:  72%|| 19972/27626 [46:58<17:56,  7.11it/s]

{'epoch': 0, 'iter': 19970, 'avg_loss': 8.90499954745095, 'avg_acc': 49.95117920985429, 'loss': 8.105812072753906}


EP_train:0:  72%|| 19982/27626 [47:00<18:07,  7.03it/s]

{'epoch': 0, 'iter': 19980, 'avg_loss': 8.904795339745771, 'avg_acc': 49.947293678995045, 'loss': 8.527416229248047}


EP_train:0:  72%|| 19992/27626 [47:01<18:15,  6.97it/s]

{'epoch': 0, 'iter': 19990, 'avg_loss': 8.90469807647286, 'avg_acc': 49.94653844229903, 'loss': 9.197659492492676}


EP_train:0:  72%|| 20002/27626 [47:02<18:15,  6.96it/s]

{'epoch': 0, 'iter': 20000, 'avg_loss': 8.904454198242885, 'avg_acc': 49.95187740612969, 'loss': 8.780024528503418}


EP_train:0:  72%|| 20012/27626 [47:04<18:06,  7.01it/s]

{'epoch': 0, 'iter': 20010, 'avg_loss': 8.904447663319438, 'avg_acc': 49.953775423517065, 'loss': 9.19312572479248}


EP_train:0:  72%|| 20022/27626 [47:05<18:01,  7.03it/s]

{'epoch': 0, 'iter': 20020, 'avg_loss': 8.904308246902923, 'avg_acc': 49.955359372658705, 'loss': 8.78402328491211}


EP_train:0:  73%|| 20032/27626 [47:07<17:55,  7.06it/s]

{'epoch': 0, 'iter': 20030, 'avg_loss': 8.904185539605727, 'avg_acc': 49.95896984673755, 'loss': 8.367188453674316}


EP_train:0:  73%|| 20042/27626 [47:08<17:46,  7.11it/s]

{'epoch': 0, 'iter': 20040, 'avg_loss': 8.90410516026622, 'avg_acc': 49.95649543435956, 'loss': 9.209531784057617}


EP_train:0:  73%|| 20052/27626 [47:10<17:49,  7.08it/s]

{'epoch': 0, 'iter': 20050, 'avg_loss': 8.904144954505416, 'avg_acc': 49.95573786843549, 'loss': 7.993018627166748}


EP_train:0:  73%|| 20062/27626 [47:11<17:48,  7.08it/s]

{'epoch': 0, 'iter': 20060, 'avg_loss': 8.903985173530744, 'avg_acc': 49.957006131299536, 'loss': 8.853723526000977}


EP_train:0:  73%|| 20072/27626 [47:12<17:50,  7.06it/s]

{'epoch': 0, 'iter': 20070, 'avg_loss': 8.90384210030907, 'avg_acc': 49.9565604603657, 'loss': 9.137450218200684}


EP_train:0:  73%|| 20082/27626 [47:14<17:47,  7.06it/s]

{'epoch': 0, 'iter': 20080, 'avg_loss': 8.903845431812849, 'avg_acc': 49.95673771226532, 'loss': 8.540730476379395}


EP_train:0:  73%|| 20092/27626 [47:15<17:55,  7.00it/s]

{'epoch': 0, 'iter': 20090, 'avg_loss': 8.903547681515406, 'avg_acc': 49.95800358369419, 'loss': 8.676741600036621}


EP_train:0:  73%|| 20102/27626 [47:17<17:44,  7.07it/s]

{'epoch': 0, 'iter': 20100, 'avg_loss': 8.903494964493524, 'avg_acc': 49.957091686980746, 'loss': 9.66302490234375}


EP_train:0:  73%|| 20112/27626 [47:18<17:42,  7.07it/s]

{'epoch': 0, 'iter': 20110, 'avg_loss': 8.903244357824883, 'avg_acc': 49.95726841032271, 'loss': 8.20772933959961}


EP_train:0:  73%|| 20122/27626 [47:19<17:41,  7.07it/s]

{'epoch': 0, 'iter': 20120, 'avg_loss': 8.903074610026538, 'avg_acc': 49.95713433725958, 'loss': 8.53893756866455}


EP_train:0:  73%|| 20132/27626 [47:21<17:41,  7.06it/s]

{'epoch': 0, 'iter': 20130, 'avg_loss': 8.902834092941236, 'avg_acc': 49.956224231285084, 'loss': 8.393433570861816}


EP_train:0:  73%|| 20142/27626 [47:22<17:41,  7.05it/s]

{'epoch': 0, 'iter': 20140, 'avg_loss': 8.90272403207272, 'avg_acc': 49.95748721513331, 'loss': 9.189455032348633}


EP_train:0:  73%|| 20152/27626 [47:24<17:40,  7.05it/s]

{'epoch': 0, 'iter': 20150, 'avg_loss': 8.902711701436905, 'avg_acc': 49.95719815393777, 'loss': 8.178707122802734}


EP_train:0:  73%|| 20162/27626 [47:25<17:38,  7.05it/s]

{'epoch': 0, 'iter': 20160, 'avg_loss': 8.902524409150564, 'avg_acc': 49.95783939288726, 'loss': 8.652182579040527}


EP_train:0:  73%|| 20172/27626 [47:26<17:36,  7.06it/s]

{'epoch': 0, 'iter': 20170, 'avg_loss': 8.90210708813712, 'avg_acc': 49.95801521987011, 'loss': 7.767203330993652}


EP_train:0:  73%|| 20182/27626 [47:28<17:42,  7.01it/s]

{'epoch': 0, 'iter': 20180, 'avg_loss': 8.901882338870015, 'avg_acc': 49.958810267082896, 'loss': 8.10435676574707}


EP_train:0:  73%|| 20192/27626 [47:29<17:41,  7.00it/s]

{'epoch': 0, 'iter': 20190, 'avg_loss': 8.901768624141969, 'avg_acc': 49.958675895200834, 'loss': 9.065332412719727}


EP_train:0:  73%|| 20202/27626 [47:31<17:27,  7.09it/s]

{'epoch': 0, 'iter': 20200, 'avg_loss': 8.901597317501635, 'avg_acc': 49.956685312608286, 'loss': 8.81314468383789}


EP_train:0:  73%|| 20212/27626 [47:32<17:26,  7.08it/s]

{'epoch': 0, 'iter': 20210, 'avg_loss': 8.901354824014446, 'avg_acc': 49.95686136262431, 'loss': 8.893912315368652}


EP_train:0:  73%|| 20222/27626 [47:34<17:32,  7.04it/s]

{'epoch': 0, 'iter': 20220, 'avg_loss': 8.901219135257259, 'avg_acc': 49.958273576974435, 'loss': 8.273761749267578}


EP_train:0:  73%|| 20232/27626 [47:35<17:19,  7.11it/s]

{'epoch': 0, 'iter': 20230, 'avg_loss': 8.901013077916963, 'avg_acc': 49.9587575997232, 'loss': 8.378180503845215}


EP_train:0:  73%|| 20242/27626 [47:36<17:23,  7.07it/s]

{'epoch': 0, 'iter': 20240, 'avg_loss': 8.900646725801558, 'avg_acc': 49.957697248159675, 'loss': 9.049483299255371}


EP_train:0:  73%|| 20252/27626 [47:38<17:18,  7.10it/s]

{'epoch': 0, 'iter': 20250, 'avg_loss': 8.900501796907392, 'avg_acc': 49.95632931707076, 'loss': 8.752696990966797}


EP_train:0:  73%|| 20262/27626 [47:39<17:27,  7.03it/s]

{'epoch': 0, 'iter': 20260, 'avg_loss': 8.900245662338259, 'avg_acc': 49.95357460145106, 'loss': 8.250444412231445}


EP_train:0:  73%|| 20272/27626 [47:41<17:35,  6.97it/s]

{'epoch': 0, 'iter': 20270, 'avg_loss': 8.900180098371932, 'avg_acc': 49.95436830940753, 'loss': 9.433491706848145}


EP_train:0:  73%|| 20282/27626 [47:42<17:30,  6.99it/s]

{'epoch': 0, 'iter': 20280, 'avg_loss': 8.900070654288486, 'avg_acc': 49.95392855381884, 'loss': 9.386114120483398}


EP_train:0:  73%|| 20292/27626 [47:43<17:21,  7.04it/s]

{'epoch': 0, 'iter': 20290, 'avg_loss': 8.899985623873153, 'avg_acc': 49.95333522251244, 'loss': 8.895861625671387}


EP_train:0:  73%|| 20302/27626 [47:45<17:09,  7.11it/s]

{'epoch': 0, 'iter': 20300, 'avg_loss': 8.90002656350564, 'avg_acc': 49.952742475740116, 'loss': 8.621748924255371}


EP_train:0:  74%|| 20312/27626 [47:46<17:18,  7.04it/s]

{'epoch': 0, 'iter': 20310, 'avg_loss': 8.899880929815147, 'avg_acc': 49.95368888779479, 'loss': 8.56571102142334}


EP_train:0:  74%|| 20322/27626 [47:48<17:18,  7.03it/s]

{'epoch': 0, 'iter': 20320, 'avg_loss': 8.899732579068461, 'avg_acc': 49.95340411397077, 'loss': 8.523386001586914}


EP_train:0:  74%|| 20332/27626 [47:49<17:11,  7.07it/s]

{'epoch': 0, 'iter': 20330, 'avg_loss': 8.899307884953505, 'avg_acc': 49.95404185726231, 'loss': 8.368265151977539}


EP_train:0:  74%|| 20342/27626 [47:50<17:05,  7.10it/s]

{'epoch': 0, 'iter': 20340, 'avg_loss': 8.898996115258283, 'avg_acc': 49.95606164888648, 'loss': 8.33895492553711}


EP_train:0:  74%|| 20352/27626 [47:52<17:13,  7.04it/s]

{'epoch': 0, 'iter': 20350, 'avg_loss': 8.898670576484623, 'avg_acc': 49.954240577858585, 'loss': 7.771069526672363}


EP_train:0:  74%|| 20362/27626 [47:53<17:13,  7.03it/s]

{'epoch': 0, 'iter': 20360, 'avg_loss': 8.89876015362773, 'avg_acc': 49.95487697067924, 'loss': 9.455000877380371}


EP_train:0:  74%|| 20372/27626 [47:55<17:06,  7.07it/s]

{'epoch': 0, 'iter': 20370, 'avg_loss': 8.898612679873866, 'avg_acc': 49.95321167345737, 'loss': 8.90428638458252}


EP_train:0:  74%|| 20382/27626 [47:56<16:59,  7.11it/s]

{'epoch': 0, 'iter': 20380, 'avg_loss': 8.898507088418388, 'avg_acc': 49.954614592021976, 'loss': 9.665240287780762}


EP_train:0:  74%|| 20392/27626 [47:58<17:01,  7.08it/s]

{'epoch': 0, 'iter': 20390, 'avg_loss': 8.898317032520735, 'avg_acc': 49.95601613456917, 'loss': 8.695291519165039}


EP_train:0:  74%|| 20402/27626 [47:59<17:00,  7.08it/s]

{'epoch': 0, 'iter': 20400, 'avg_loss': 8.89807038493942, 'avg_acc': 49.95435272780746, 'loss': 8.818556785583496}


EP_train:0:  74%|| 20412/27626 [48:00<16:57,  7.09it/s]

{'epoch': 0, 'iter': 20410, 'avg_loss': 8.898021073981717, 'avg_acc': 49.95360957326931, 'loss': 8.490564346313477}


EP_train:0:  74%|| 20422/27626 [48:02<16:56,  7.09it/s]

{'epoch': 0, 'iter': 20420, 'avg_loss': 8.89797311043752, 'avg_acc': 49.951948974095295, 'loss': 9.110753059387207}


EP_train:0:  74%|| 20432/27626 [48:03<16:55,  7.08it/s]

{'epoch': 0, 'iter': 20430, 'avg_loss': 8.897772951043478, 'avg_acc': 49.95181953893593, 'loss': 7.7393999099731445}


EP_train:0:  74%|| 20442/27626 [48:05<17:00,  7.04it/s]

{'epoch': 0, 'iter': 20440, 'avg_loss': 8.897542496412434, 'avg_acc': 49.951843109436915, 'loss': 8.534634590148926}


EP_train:0:  74%|| 20452/27626 [48:06<17:08,  6.97it/s]

{'epoch': 0, 'iter': 20450, 'avg_loss': 8.897287830712598, 'avg_acc': 49.95171385262334, 'loss': 8.471400260925293}


EP_train:0:  74%|| 20462/27626 [48:07<16:58,  7.04it/s]

{'epoch': 0, 'iter': 20460, 'avg_loss': 8.897142324610298, 'avg_acc': 49.95143199257123, 'loss': 8.955671310424805}


EP_train:0:  74%|| 20472/27626 [48:09<16:54,  7.05it/s]

{'epoch': 0, 'iter': 20470, 'avg_loss': 8.897026529717904, 'avg_acc': 49.95176102779541, 'loss': 9.70468521118164}


EP_train:0:  74%|| 20482/27626 [48:10<16:47,  7.09it/s]

{'epoch': 0, 'iter': 20480, 'avg_loss': 8.896949642111178, 'avg_acc': 49.95132683950979, 'loss': 7.736584186553955}


EP_train:0:  74%|| 20492/27626 [48:12<16:46,  7.09it/s]

{'epoch': 0, 'iter': 20490, 'avg_loss': 8.896825208734217, 'avg_acc': 49.9502830510956, 'loss': 8.207000732421875}


EP_train:0:  74%|| 20502/27626 [48:13<16:45,  7.09it/s]

{'epoch': 0, 'iter': 20500, 'avg_loss': 8.896754194984121, 'avg_acc': 49.95122189161504, 'loss': 8.249916076660156}


EP_train:0:  74%|| 20512/27626 [48:14<16:44,  7.08it/s]

{'epoch': 0, 'iter': 20510, 'avg_loss': 8.896599619620245, 'avg_acc': 49.95078860123836, 'loss': 8.694276809692383}


EP_train:0:  74%|| 20522/27626 [48:16<16:42,  7.09it/s]

{'epoch': 0, 'iter': 20520, 'avg_loss': 8.896729845243392, 'avg_acc': 49.95279226158569, 'loss': 8.487935066223145}


EP_train:0:  74%|| 20532/27626 [48:17<16:43,  7.07it/s]

{'epoch': 0, 'iter': 20530, 'avg_loss': 8.89649149199955, 'avg_acc': 49.954337343529296, 'loss': 8.156394958496094}


EP_train:0:  74%|| 20542/27626 [48:19<16:50,  7.01it/s]

{'epoch': 0, 'iter': 20540, 'avg_loss': 8.896397209603903, 'avg_acc': 49.95314249549681, 'loss': 8.6337308883667}


EP_train:0:  74%|| 20552/27626 [48:20<16:42,  7.06it/s]

{'epoch': 0, 'iter': 20550, 'avg_loss': 8.89634676639961, 'avg_acc': 49.953925599727505, 'loss': 8.61253547668457}


EP_train:0:  74%|| 20562/27626 [48:22<16:45,  7.03it/s]

{'epoch': 0, 'iter': 20560, 'avg_loss': 8.896167335224536, 'avg_acc': 49.955467876076064, 'loss': 8.597994804382324}


EP_train:0:  74%|| 20572/27626 [48:23<16:37,  7.08it/s]

{'epoch': 0, 'iter': 20570, 'avg_loss': 8.896001704322956, 'avg_acc': 49.956249088522675, 'loss': 8.871514320373535}


EP_train:0:  75%|| 20582/27626 [48:24<16:34,  7.08it/s]

{'epoch': 0, 'iter': 20580, 'avg_loss': 8.896002777000076, 'avg_acc': 49.95763689810991, 'loss': 10.25887393951416}


EP_train:0:  75%|| 20592/27626 [48:26<16:38,  7.04it/s]

{'epoch': 0, 'iter': 20590, 'avg_loss': 8.89603063738437, 'avg_acc': 49.95902335972027, 'loss': 8.808643341064453}


EP_train:0:  75%|| 20602/27626 [48:27<16:37,  7.04it/s]

{'epoch': 0, 'iter': 20600, 'avg_loss': 8.895870622526841, 'avg_acc': 49.95813310033493, 'loss': 8.610272407531738}


EP_train:0:  75%|| 20612/27626 [48:29<16:33,  7.06it/s]

{'epoch': 0, 'iter': 20610, 'avg_loss': 8.89577420335873, 'avg_acc': 49.959517975838146, 'loss': 9.014808654785156}


EP_train:0:  75%|| 20622/27626 [48:30<16:35,  7.03it/s]

{'epoch': 0, 'iter': 20620, 'avg_loss': 8.895584258669697, 'avg_acc': 49.95923451820959, 'loss': 9.05699348449707}


EP_train:0:  75%|| 20632/27626 [48:31<16:38,  7.00it/s]

{'epoch': 0, 'iter': 20630, 'avg_loss': 8.895525310512289, 'avg_acc': 49.95728515340992, 'loss': 8.590836524963379}


EP_train:0:  75%|| 20642/27626 [48:33<16:33,  7.03it/s]

{'epoch': 0, 'iter': 20640, 'avg_loss': 8.895424012729846, 'avg_acc': 49.957003052177704, 'loss': 9.649967193603516}


EP_train:0:  75%|| 20652/27626 [48:34<16:35,  7.01it/s]

{'epoch': 0, 'iter': 20650, 'avg_loss': 8.89528627117623, 'avg_acc': 49.95717519732701, 'loss': 8.98125171661377}


EP_train:0:  75%|| 20662/27626 [48:36<16:21,  7.09it/s]

{'epoch': 0, 'iter': 20660, 'avg_loss': 8.895181533126088, 'avg_acc': 49.95901093848313, 'loss': 8.270030975341797}


EP_train:0:  75%|| 20672/27626 [48:37<16:20,  7.10it/s]

{'epoch': 0, 'iter': 20670, 'avg_loss': 8.894943431213697, 'avg_acc': 49.95857723380582, 'loss': 8.470471382141113}


EP_train:0:  75%|| 20682/27626 [48:38<16:24,  7.05it/s]

{'epoch': 0, 'iter': 20680, 'avg_loss': 8.89478723023271, 'avg_acc': 49.95799284367294, 'loss': 9.277094841003418}


EP_train:0:  75%|| 20692/27626 [48:40<16:16,  7.10it/s]

{'epoch': 0, 'iter': 20690, 'avg_loss': 8.894986232571176, 'avg_acc': 49.960882750954525, 'loss': 9.041006088256836}


EP_train:0:  75%|| 20702/27626 [48:41<16:26,  7.02it/s]

{'epoch': 0, 'iter': 20700, 'avg_loss': 8.89491457538578, 'avg_acc': 49.961656441717786, 'loss': 9.382613182067871}


EP_train:0:  75%|| 20712/27626 [48:43<16:29,  6.99it/s]

{'epoch': 0, 'iter': 20710, 'avg_loss': 8.894732759918769, 'avg_acc': 49.95926077929603, 'loss': 8.947739601135254}


EP_train:0:  75%|| 20722/27626 [48:44<16:28,  6.98it/s]

{'epoch': 0, 'iter': 20720, 'avg_loss': 8.8947056917189, 'avg_acc': 49.957319868732206, 'loss': 9.038653373718262}


EP_train:0:  75%|| 20732/27626 [48:46<16:20,  7.03it/s]

{'epoch': 0, 'iter': 20730, 'avg_loss': 8.894735304746112, 'avg_acc': 49.95824489894361, 'loss': 9.009629249572754}


EP_train:0:  75%|| 20742/27626 [48:47<16:10,  7.10it/s]

{'epoch': 0, 'iter': 20740, 'avg_loss': 8.894483016729412, 'avg_acc': 49.95871703389422, 'loss': 8.374960899353027}


EP_train:0:  75%|| 20752/27626 [48:48<16:07,  7.10it/s]

{'epoch': 0, 'iter': 20750, 'avg_loss': 8.894325480140104, 'avg_acc': 49.95828514288468, 'loss': 8.134242057800293}


EP_train:0:  75%|| 20762/27626 [48:50<16:17,  7.02it/s]

{'epoch': 0, 'iter': 20760, 'avg_loss': 8.89413926929966, 'avg_acc': 49.958455758393136, 'loss': 8.847269058227539}


EP_train:0:  75%|| 20772/27626 [48:51<16:06,  7.09it/s]

{'epoch': 0, 'iter': 20770, 'avg_loss': 8.894018341744161, 'avg_acc': 49.956369457416585, 'loss': 8.529605865478516}


EP_train:0:  75%|| 20782/27626 [48:53<16:01,  7.12it/s]

{'epoch': 0, 'iter': 20780, 'avg_loss': 8.893931461524403, 'avg_acc': 49.95578894182186, 'loss': 8.466268539428711}


EP_train:0:  75%|| 20792/27626 [48:54<16:09,  7.05it/s]

{'epoch': 0, 'iter': 20790, 'avg_loss': 8.893808453465246, 'avg_acc': 49.9559605117599, 'loss': 8.414236068725586}


EP_train:0:  75%|| 20802/27626 [48:55<16:12,  7.01it/s]

{'epoch': 0, 'iter': 20800, 'avg_loss': 8.893675072755581, 'avg_acc': 49.9550802846017, 'loss': 8.768961906433105}


EP_train:0:  75%|| 20812/27626 [48:57<16:14,  6.99it/s]

{'epoch': 0, 'iter': 20810, 'avg_loss': 8.893530895431487, 'avg_acc': 49.956903800874535, 'loss': 9.078097343444824}


EP_train:0:  75%|| 20822/27626 [48:58<16:08,  7.03it/s]

{'epoch': 0, 'iter': 20820, 'avg_loss': 8.893446374286475, 'avg_acc': 49.95557369963018, 'loss': 8.782073974609375}


EP_train:0:  75%|| 20832/27626 [49:00<15:58,  7.09it/s]

{'epoch': 0, 'iter': 20830, 'avg_loss': 8.893303395919979, 'avg_acc': 49.95499495943545, 'loss': 8.850032806396484}


EP_train:0:  75%|| 20842/27626 [49:01<16:12,  6.98it/s]

{'epoch': 0, 'iter': 20840, 'avg_loss': 8.893148347081187, 'avg_acc': 49.955316443548774, 'loss': 9.124618530273438}


EP_train:0:  75%|| 20852/27626 [49:03<16:06,  7.01it/s]

{'epoch': 0, 'iter': 20850, 'avg_loss': 8.893150481461484, 'avg_acc': 49.95458850894441, 'loss': 8.300689697265625}


EP_train:0:  76%|| 20862/27626 [49:04<15:59,  7.05it/s]

{'epoch': 0, 'iter': 20860, 'avg_loss': 8.89300593232201, 'avg_acc': 49.95161425626768, 'loss': 8.878637313842773}


EP_train:0:  76%|| 20872/27626 [49:05<15:57,  7.05it/s]

{'epoch': 0, 'iter': 20870, 'avg_loss': 8.892845810199354, 'avg_acc': 49.95313473240381, 'loss': 8.724170684814453}


EP_train:0:  76%|| 20882/27626 [49:07<15:56,  7.05it/s]

{'epoch': 0, 'iter': 20880, 'avg_loss': 8.89287769743654, 'avg_acc': 49.953007518796994, 'loss': 8.60142707824707}


EP_train:0:  76%|| 20892/27626 [49:08<15:58,  7.02it/s]

{'epoch': 0, 'iter': 20890, 'avg_loss': 8.892715256163127, 'avg_acc': 49.951833325355416, 'loss': 8.090757369995117}


EP_train:0:  76%|| 20902/27626 [49:10<15:59,  7.00it/s]

{'epoch': 0, 'iter': 20900, 'avg_loss': 8.892487746209918, 'avg_acc': 49.95200588488589, 'loss': 7.9459004402160645}


EP_train:0:  76%|| 20912/27626 [49:11<15:52,  7.05it/s]

{'epoch': 0, 'iter': 20910, 'avg_loss': 8.892382465882644, 'avg_acc': 49.953822151020994, 'loss': 8.583854675292969}


EP_train:0:  76%|| 20922/27626 [49:12<15:52,  7.04it/s]

{'epoch': 0, 'iter': 20920, 'avg_loss': 8.892093417396179, 'avg_acc': 49.95160365183309, 'loss': 8.252419471740723}


EP_train:0:  76%|| 20932/27626 [49:14<15:45,  7.08it/s]

{'epoch': 0, 'iter': 20930, 'avg_loss': 8.892100417456222, 'avg_acc': 49.95207467392862, 'loss': 9.379122734069824}


EP_train:0:  76%|| 20942/27626 [49:15<15:44,  7.07it/s]

{'epoch': 0, 'iter': 20940, 'avg_loss': 8.891984476514192, 'avg_acc': 49.95164987345399, 'loss': 8.22861385345459}


EP_train:0:  76%|| 20952/27626 [49:17<15:43,  7.08it/s]

{'epoch': 0, 'iter': 20950, 'avg_loss': 8.891992370915364, 'avg_acc': 49.95212042384612, 'loss': 9.36343765258789}


EP_train:0:  76%|| 20962/27626 [49:18<15:45,  7.05it/s]

{'epoch': 0, 'iter': 20960, 'avg_loss': 8.891917486851876, 'avg_acc': 49.95154692047135, 'loss': 8.605155944824219}


EP_train:0:  76%|| 20972/27626 [49:20<15:43,  7.05it/s]

{'epoch': 0, 'iter': 20970, 'avg_loss': 8.891900511998987, 'avg_acc': 49.95246411711411, 'loss': 8.044381141662598}


EP_train:0:  76%|| 20982/27626 [49:21<15:45,  7.03it/s]

{'epoch': 0, 'iter': 20980, 'avg_loss': 8.89182477131238, 'avg_acc': 49.95367832801106, 'loss': 8.80754566192627}


EP_train:0:  76%|| 20992/27626 [49:22<15:45,  7.02it/s]

{'epoch': 0, 'iter': 20990, 'avg_loss': 8.891633812307056, 'avg_acc': 49.95414701538755, 'loss': 8.80508041381836}


EP_train:0:  76%|| 21002/27626 [49:24<15:47,  6.99it/s]

{'epoch': 0, 'iter': 21000, 'avg_loss': 8.891569554513422, 'avg_acc': 49.95312723203657, 'loss': 7.825458526611328}


EP_train:0:  76%|| 21012/27626 [49:25<15:49,  6.96it/s]

{'epoch': 0, 'iter': 21010, 'avg_loss': 8.891319556111673, 'avg_acc': 49.95433939365094, 'loss': 8.600883483886719}


EP_train:0:  76%|| 21022/27626 [49:27<15:38,  7.03it/s]

{'epoch': 0, 'iter': 21020, 'avg_loss': 8.89125435643019, 'avg_acc': 49.955699062841916, 'loss': 8.434752464294434}


EP_train:0:  76%|| 21032/27626 [49:28<15:36,  7.04it/s]

{'epoch': 0, 'iter': 21030, 'avg_loss': 8.891028443505583, 'avg_acc': 49.957206029195, 'loss': 7.82462215423584}


EP_train:0:  76%|| 21042/27626 [49:29<15:35,  7.04it/s]

{'epoch': 0, 'iter': 21040, 'avg_loss': 8.890714046444188, 'avg_acc': 49.958860082695686, 'loss': 7.5648393630981445}


EP_train:0:  76%|| 21052/27626 [49:31<15:30,  7.06it/s]

{'epoch': 0, 'iter': 21050, 'avg_loss': 8.890522987405046, 'avg_acc': 49.955762196570234, 'loss': 8.736899375915527}


EP_train:0:  76%|| 21062/27626 [49:32<15:33,  7.03it/s]

{'epoch': 0, 'iter': 21060, 'avg_loss': 8.890372380023498, 'avg_acc': 49.95548644413845, 'loss': 8.127849578857422}


EP_train:0:  76%|| 21072/27626 [49:34<15:39,  6.98it/s]

{'epoch': 0, 'iter': 21070, 'avg_loss': 8.89014255276924, 'avg_acc': 49.95357956433012, 'loss': 8.888091087341309}


EP_train:0:  76%|| 21082/27626 [49:35<15:40,  6.96it/s]

{'epoch': 0, 'iter': 21080, 'avg_loss': 8.889876995518247, 'avg_acc': 49.951378018120586, 'loss': 9.155384063720703}


EP_train:0:  76%|| 21092/27626 [49:37<15:27,  7.04it/s]

{'epoch': 0, 'iter': 21090, 'avg_loss': 8.889869205575597, 'avg_acc': 49.95184557394149, 'loss': 9.103083610534668}


EP_train:0:  76%|| 21102/27626 [49:38<15:26,  7.04it/s]

{'epoch': 0, 'iter': 21100, 'avg_loss': 8.889882953702191, 'avg_acc': 49.950683616890196, 'loss': 8.238693237304688}


EP_train:0:  76%|| 21112/27626 [49:39<15:26,  7.03it/s]

{'epoch': 0, 'iter': 21110, 'avg_loss': 8.889806849665929, 'avg_acc': 49.9524833025437, 'loss': 8.873933792114258}


EP_train:0:  76%|| 21122/27626 [49:41<15:16,  7.10it/s]

{'epoch': 0, 'iter': 21120, 'avg_loss': 8.889641490772103, 'avg_acc': 49.95502106907817, 'loss': 8.449454307556152}


EP_train:0:  76%|| 21132/27626 [49:42<15:28,  7.00it/s]

{'epoch': 0, 'iter': 21130, 'avg_loss': 8.8896526321596, 'avg_acc': 49.95666911173158, 'loss': 9.18958568572998}


EP_train:0:  77%|| 21142/27626 [49:44<15:18,  7.06it/s]

{'epoch': 0, 'iter': 21140, 'avg_loss': 8.889533586214723, 'avg_acc': 49.95861122936474, 'loss': 8.834659576416016}


EP_train:0:  77%|| 21152/27626 [49:45<15:19,  7.04it/s]

{'epoch': 0, 'iter': 21150, 'avg_loss': 8.889422591586671, 'avg_acc': 49.95789206184105, 'loss': 7.888129234313965}


EP_train:0:  77%|| 21162/27626 [49:46<15:19,  7.03it/s]

{'epoch': 0, 'iter': 21160, 'avg_loss': 8.889266049882751, 'avg_acc': 49.95879802466802, 'loss': 8.405660629272461}


EP_train:0:  77%|| 21172/27626 [49:48<15:20,  7.01it/s]

{'epoch': 0, 'iter': 21170, 'avg_loss': 8.889174082931138, 'avg_acc': 49.95866987860753, 'loss': 8.751710891723633}


EP_train:0:  77%|| 21182/27626 [49:49<15:17,  7.02it/s]

{'epoch': 0, 'iter': 21180, 'avg_loss': 8.889119042715818, 'avg_acc': 49.95780416410934, 'loss': 7.956478118896484}


EP_train:0:  77%|| 21192/27626 [49:51<15:05,  7.11it/s]

{'epoch': 0, 'iter': 21190, 'avg_loss': 8.88890050231727, 'avg_acc': 49.96165825114435, 'loss': 8.206382751464844}


EP_train:0:  77%|| 21202/27626 [49:52<15:06,  7.09it/s]

{'epoch': 0, 'iter': 21200, 'avg_loss': 8.888389170976705, 'avg_acc': 49.96049714636102, 'loss': 8.672367095947266}


EP_train:0:  77%|| 21212/27626 [49:53<15:08,  7.06it/s]

{'epoch': 0, 'iter': 21210, 'avg_loss': 8.888388341052389, 'avg_acc': 49.961547074631085, 'loss': 9.003753662109375}


EP_train:0:  77%|| 21222/27626 [49:55<15:13,  7.01it/s]

{'epoch': 0, 'iter': 21220, 'avg_loss': 8.888200770965435, 'avg_acc': 49.96112341548466, 'loss': 8.014091491699219}


EP_train:0:  77%|| 21232/27626 [49:56<15:07,  7.04it/s]

{'epoch': 0, 'iter': 21230, 'avg_loss': 8.888077275953247, 'avg_acc': 49.96202486929489, 'loss': 9.720866203308105}


EP_train:0:  77%|| 21242/27626 [49:58<15:03,  7.07it/s]

{'epoch': 0, 'iter': 21240, 'avg_loss': 8.88800604613587, 'avg_acc': 49.962631232051216, 'loss': 8.540998458862305}


EP_train:0:  77%|| 21252/27626 [49:59<15:10,  7.00it/s]

{'epoch': 0, 'iter': 21250, 'avg_loss': 8.887878659427724, 'avg_acc': 49.961472401298764, 'loss': 7.477208137512207}


EP_train:0:  77%|| 21262/27626 [50:01<15:02,  7.05it/s]

{'epoch': 0, 'iter': 21260, 'avg_loss': 8.887608500685596, 'avg_acc': 49.96501810827336, 'loss': 8.08942985534668}


EP_train:0:  77%|| 21272/27626 [50:02<14:59,  7.06it/s]

{'epoch': 0, 'iter': 21270, 'avg_loss': 8.887422695700616, 'avg_acc': 49.96341850406657, 'loss': 9.6981782913208}


EP_train:0:  77%|| 21282/27626 [50:03<15:00,  7.04it/s]

{'epoch': 0, 'iter': 21280, 'avg_loss': 8.887371442398738, 'avg_acc': 49.963729383017714, 'loss': 9.114727020263672}


EP_train:0:  77%|| 21292/27626 [50:05<14:57,  7.06it/s]

{'epoch': 0, 'iter': 21290, 'avg_loss': 8.887398023131892, 'avg_acc': 49.961691559814, 'loss': 10.156418800354004}


EP_train:0:  77%|| 21302/27626 [50:06<15:01,  7.01it/s]

{'epoch': 0, 'iter': 21300, 'avg_loss': 8.887344744534433, 'avg_acc': 49.96288319797193, 'loss': 8.889026641845703}


EP_train:0:  77%|| 21312/27626 [50:08<14:53,  7.07it/s]

{'epoch': 0, 'iter': 21310, 'avg_loss': 8.887228961552745, 'avg_acc': 49.9631938904791, 'loss': 8.672916412353516}


EP_train:0:  77%|| 21322/27626 [50:09<14:59,  7.01it/s]

{'epoch': 0, 'iter': 21320, 'avg_loss': 8.887048349165346, 'avg_acc': 49.964237137094884, 'loss': 9.021279335021973}


EP_train:0:  77%|| 21332/27626 [50:10<14:56,  7.02it/s]

{'epoch': 0, 'iter': 21330, 'avg_loss': 8.887020294656566, 'avg_acc': 49.96483990436454, 'loss': 8.812941551208496}


EP_train:0:  77%|| 21342/27626 [50:12<14:53,  7.03it/s]

{'epoch': 0, 'iter': 21340, 'avg_loss': 8.886896991197956, 'avg_acc': 49.96602783374725, 'loss': 9.082174301147461}


EP_train:0:  77%|| 21352/27626 [50:13<14:58,  6.98it/s]

{'epoch': 0, 'iter': 21350, 'avg_loss': 8.886722780838431, 'avg_acc': 49.96560465551965, 'loss': 8.657125473022461}


EP_train:0:  77%|| 21362/27626 [50:15<14:49,  7.05it/s]

{'epoch': 0, 'iter': 21360, 'avg_loss': 8.886651417204662, 'avg_acc': 49.96664481999906, 'loss': 9.125426292419434}


EP_train:0:  77%|| 21372/27626 [50:16<14:49,  7.03it/s]

{'epoch': 0, 'iter': 21370, 'avg_loss': 8.886487879286115, 'avg_acc': 49.96578307051612, 'loss': 9.006454467773438}


EP_train:0:  77%|| 21382/27626 [50:18<14:41,  7.08it/s]

{'epoch': 0, 'iter': 21380, 'avg_loss': 8.886494538139614, 'avg_acc': 49.96638370515878, 'loss': 9.048680305480957}


EP_train:0:  77%|| 21392/27626 [50:19<14:47,  7.03it/s]

{'epoch': 0, 'iter': 21390, 'avg_loss': 8.886408226023349, 'avg_acc': 49.96742204665514, 'loss': 8.774886131286621}


EP_train:0:  77%|| 21402/27626 [50:20<14:45,  7.03it/s]

{'epoch': 0, 'iter': 21400, 'avg_loss': 8.886192084060617, 'avg_acc': 49.96845941778422, 'loss': 7.8732829093933105}


EP_train:0:  78%|| 21412/27626 [50:22<14:42,  7.04it/s]

{'epoch': 0, 'iter': 21410, 'avg_loss': 8.886047313684484, 'avg_acc': 49.96847414880202, 'loss': 8.121861457824707}


EP_train:0:  78%|| 21422/27626 [50:23<14:36,  7.08it/s]

{'epoch': 0, 'iter': 21420, 'avg_loss': 8.885968019768535, 'avg_acc': 49.96936417534195, 'loss': 9.076656341552734}


EP_train:0:  78%|| 21432/27626 [50:25<14:39,  7.04it/s]

{'epoch': 0, 'iter': 21430, 'avg_loss': 8.885771084374683, 'avg_acc': 49.97127408893658, 'loss': 7.616783618927002}


EP_train:0:  78%|| 21442/27626 [50:26<14:40,  7.03it/s]

{'epoch': 0, 'iter': 21440, 'avg_loss': 8.885863140210816, 'avg_acc': 49.97114173779208, 'loss': 9.384330749511719}


EP_train:0:  78%|| 21452/27626 [50:27<14:39,  7.02it/s]

{'epoch': 0, 'iter': 21450, 'avg_loss': 8.885696933295176, 'avg_acc': 49.97100951004615, 'loss': 8.002152442932129}


EP_train:0:  78%|| 21462/27626 [50:29<14:31,  7.07it/s]

{'epoch': 0, 'iter': 21460, 'avg_loss': 8.885454506338833, 'avg_acc': 49.9708774055263, 'loss': 8.293946266174316}


EP_train:0:  78%|| 21472/27626 [50:30<14:31,  7.06it/s]

{'epoch': 0, 'iter': 21470, 'avg_loss': 8.8852295273923, 'avg_acc': 49.97118205952214, 'loss': 8.686525344848633}


EP_train:0:  78%|| 21482/27626 [50:32<14:36,  7.01it/s]

{'epoch': 0, 'iter': 21480, 'avg_loss': 8.885059065139748, 'avg_acc': 49.971340952469625, 'loss': 8.387292861938477}


EP_train:0:  78%|| 21492/27626 [50:33<14:29,  7.05it/s]

{'epoch': 0, 'iter': 21490, 'avg_loss': 8.884892788668209, 'avg_acc': 49.97237215578615, 'loss': 8.434160232543945}


EP_train:0:  78%|| 21502/27626 [50:34<14:29,  7.04it/s]

{'epoch': 0, 'iter': 21500, 'avg_loss': 8.884854324584417, 'avg_acc': 49.97122226873169, 'loss': 9.046479225158691}


EP_train:0:  78%|| 21512/27626 [50:36<14:26,  7.06it/s]

{'epoch': 0, 'iter': 21510, 'avg_loss': 8.884736989588648, 'avg_acc': 49.96818488215332, 'loss': 8.841071128845215}


EP_train:0:  78%|| 21522/27626 [50:37<14:26,  7.05it/s]

{'epoch': 0, 'iter': 21520, 'avg_loss': 8.884686636828361, 'avg_acc': 49.969361321499925, 'loss': 8.354118347167969}


EP_train:0:  78%|| 21532/27626 [50:39<14:28,  7.02it/s]

{'epoch': 0, 'iter': 21530, 'avg_loss': 8.884416883231305, 'avg_acc': 49.9703915284938, 'loss': 8.716815948486328}


EP_train:0:  78%|| 21542/27626 [50:40<14:20,  7.07it/s]

{'epoch': 0, 'iter': 21540, 'avg_loss': 8.884235814091312, 'avg_acc': 49.970840490227936, 'loss': 8.48026180267334}


EP_train:0:  78%|| 21552/27626 [50:42<14:26,  7.01it/s]

{'epoch': 0, 'iter': 21550, 'avg_loss': 8.88412878710368, 'avg_acc': 49.97389912301053, 'loss': 9.203702926635742}


EP_train:0:  78%|| 21562/27626 [50:43<14:24,  7.01it/s]

{'epoch': 0, 'iter': 21560, 'avg_loss': 8.88407948844138, 'avg_acc': 49.971592226705624, 'loss': 9.028986930847168}


EP_train:0:  78%|| 21572/27626 [50:44<14:19,  7.05it/s]

{'epoch': 0, 'iter': 21570, 'avg_loss': 8.88395916888956, 'avg_acc': 49.971750266561585, 'loss': 8.201261520385742}


EP_train:0:  78%|| 21582/27626 [50:46<14:15,  7.06it/s]

{'epoch': 0, 'iter': 21580, 'avg_loss': 8.883863370084137, 'avg_acc': 49.96872248737315, 'loss': 8.662171363830566}


EP_train:0:  78%|| 21592/27626 [50:47<14:10,  7.09it/s]

{'epoch': 0, 'iter': 21590, 'avg_loss': 8.883657046297692, 'avg_acc': 49.969315918669814, 'loss': 8.208106994628906}


EP_train:0:  78%|| 21602/27626 [50:49<14:10,  7.09it/s]

{'epoch': 0, 'iter': 21600, 'avg_loss': 8.883607503457354, 'avg_acc': 49.96875144669228, 'loss': 8.834993362426758}


EP_train:0:  78%|| 21612/27626 [50:50<14:11,  7.06it/s]

{'epoch': 0, 'iter': 21610, 'avg_loss': 8.883555217572757, 'avg_acc': 49.96862130396557, 'loss': 8.662299156188965}


EP_train:0:  78%|| 21622/27626 [50:51<14:11,  7.05it/s]

{'epoch': 0, 'iter': 21620, 'avg_loss': 8.883483090424294, 'avg_acc': 49.96964756486749, 'loss': 7.844686031341553}


EP_train:0:  78%|| 21632/27626 [50:53<14:07,  7.07it/s]

{'epoch': 0, 'iter': 21630, 'avg_loss': 8.883270736871747, 'avg_acc': 49.96951712819565, 'loss': 9.061955451965332}


EP_train:0:  78%|| 21642/27626 [50:54<14:11,  7.03it/s]

{'epoch': 0, 'iter': 21640, 'avg_loss': 8.883106131853532, 'avg_acc': 49.97039762487871, 'loss': 7.65700101852417}


EP_train:0:  78%|| 21652/27626 [50:56<14:04,  7.07it/s]

{'epoch': 0, 'iter': 21650, 'avg_loss': 8.883020148674373, 'avg_acc': 49.97069996766893, 'loss': 9.575202941894531}


EP_train:0:  78%|| 21662/27626 [50:57<14:03,  7.07it/s]

{'epoch': 0, 'iter': 21660, 'avg_loss': 8.882899432822688, 'avg_acc': 49.972300447809424, 'loss': 8.84833812713623}


EP_train:0:  78%|| 21672/27626 [50:59<14:04,  7.05it/s]

{'epoch': 0, 'iter': 21670, 'avg_loss': 8.882739070994255, 'avg_acc': 49.971736421946375, 'loss': 8.583395957946777}


EP_train:0:  78%|| 21682/27626 [51:00<14:01,  7.06it/s]

{'epoch': 0, 'iter': 21680, 'avg_loss': 8.882601038104664, 'avg_acc': 49.97232599972326, 'loss': 8.388124465942383}


EP_train:0:  79%|| 21692/27626 [51:01<13:58,  7.08it/s]

{'epoch': 0, 'iter': 21690, 'avg_loss': 8.882663780973196, 'avg_acc': 49.97277096491632, 'loss': 9.153030395507812}


EP_train:0:  79%|| 21702/27626 [51:03<13:56,  7.08it/s]

{'epoch': 0, 'iter': 21700, 'avg_loss': 8.882517589302603, 'avg_acc': 49.97379153034422, 'loss': 8.908393859863281}


EP_train:0:  79%|| 21712/27626 [51:04<14:00,  7.04it/s]

{'epoch': 0, 'iter': 21710, 'avg_loss': 8.882457500237432, 'avg_acc': 49.97337179310028, 'loss': 8.142705917358398}


EP_train:0:  79%|| 21722/27626 [51:06<13:59,  7.03it/s]

{'epoch': 0, 'iter': 21720, 'avg_loss': 8.882415638999118, 'avg_acc': 49.97367179227476, 'loss': 9.157279014587402}


EP_train:0:  79%|| 21732/27626 [51:07<13:58,  7.03it/s]

{'epoch': 0, 'iter': 21730, 'avg_loss': 8.882305644921255, 'avg_acc': 49.97411531912935, 'loss': 8.813942909240723}


EP_train:0:  79%|| 21742/27626 [51:08<13:52,  7.07it/s]

{'epoch': 0, 'iter': 21740, 'avg_loss': 8.882090340421268, 'avg_acc': 49.9747021756129, 'loss': 9.525166511535645}


EP_train:0:  79%|| 21752/27626 [51:10<13:55,  7.03it/s]

{'epoch': 0, 'iter': 21750, 'avg_loss': 8.88193994162302, 'avg_acc': 49.975719507149094, 'loss': 8.423688888549805}


EP_train:0:  79%|| 21762/27626 [51:11<13:49,  7.07it/s]

{'epoch': 0, 'iter': 21760, 'avg_loss': 8.881867681168211, 'avg_acc': 49.97644869261523, 'loss': 9.7076416015625}


EP_train:0:  79%|| 21772/27626 [51:13<13:44,  7.10it/s]

{'epoch': 0, 'iter': 21770, 'avg_loss': 8.88172922077015, 'avg_acc': 49.97746428735474, 'loss': 8.406700134277344}


EP_train:0:  79%|| 21782/27626 [51:14<13:41,  7.11it/s]

{'epoch': 0, 'iter': 21780, 'avg_loss': 8.881605214495988, 'avg_acc': 49.977331160185486, 'loss': 7.927894115447998}


EP_train:0:  79%|| 21792/27626 [51:15<13:42,  7.10it/s]

{'epoch': 0, 'iter': 21790, 'avg_loss': 8.881336452338306, 'avg_acc': 49.97633770822817, 'loss': 8.669042587280273}


EP_train:0:  79%|| 21802/27626 [51:17<13:43,  7.07it/s]

{'epoch': 0, 'iter': 21800, 'avg_loss': 8.88095626364738, 'avg_acc': 49.974628457410205, 'loss': 8.111347198486328}


EP_train:0:  79%|| 21812/27626 [51:18<13:44,  7.06it/s]

{'epoch': 0, 'iter': 21810, 'avg_loss': 8.880938604649648, 'avg_acc': 49.97621612947595, 'loss': 9.742189407348633}


EP_train:0:  79%|| 21822/27626 [51:20<13:36,  7.11it/s]

{'epoch': 0, 'iter': 21820, 'avg_loss': 8.880797884683988, 'avg_acc': 49.977086293020484, 'loss': 8.952466011047363}


EP_train:0:  79%|| 21832/27626 [51:21<13:39,  7.07it/s]

{'epoch': 0, 'iter': 21830, 'avg_loss': 8.880651249024321, 'avg_acc': 49.97638106362512, 'loss': 9.049234390258789}


EP_train:0:  79%|| 21842/27626 [51:22<13:42,  7.04it/s]

{'epoch': 0, 'iter': 21840, 'avg_loss': 8.88049390129235, 'avg_acc': 49.97796575248386, 'loss': 8.258684158325195}


EP_train:0:  79%|| 21852/27626 [51:24<13:34,  7.09it/s]

{'epoch': 0, 'iter': 21850, 'avg_loss': 8.880398558247887, 'avg_acc': 49.97754679419707, 'loss': 10.229167938232422}


EP_train:0:  79%|| 21862/27626 [51:25<13:36,  7.06it/s]

{'epoch': 0, 'iter': 21860, 'avg_loss': 8.880127863480228, 'avg_acc': 49.9766993733132, 'loss': 7.945969581604004}


EP_train:0:  79%|| 21872/27626 [51:27<13:32,  7.08it/s]

{'epoch': 0, 'iter': 21870, 'avg_loss': 8.87999747176757, 'avg_acc': 49.97771020986695, 'loss': 8.674222946166992}


EP_train:0:  79%|| 21882/27626 [51:28<13:34,  7.05it/s]

{'epoch': 0, 'iter': 21880, 'avg_loss': 8.879731551106236, 'avg_acc': 49.977434760751336, 'loss': 7.956847190856934}


EP_train:0:  79%|| 21892/27626 [51:30<13:42,  6.97it/s]

{'epoch': 0, 'iter': 21890, 'avg_loss': 8.879706364933513, 'avg_acc': 49.97744506874972, 'loss': 7.778508186340332}


EP_train:0:  79%|| 21902/27626 [51:31<13:30,  7.06it/s]

{'epoch': 0, 'iter': 21900, 'avg_loss': 8.879539712651843, 'avg_acc': 49.97588580430117, 'loss': 8.106590270996094}


EP_train:0:  79%|| 21912/27626 [51:32<13:28,  7.06it/s]

{'epoch': 0, 'iter': 21910, 'avg_loss': 8.879331585350174, 'avg_acc': 49.976467299529915, 'loss': 8.64759349822998}


EP_train:0:  79%|| 21922/27626 [51:34<13:23,  7.10it/s]

{'epoch': 0, 'iter': 21920, 'avg_loss': 8.879274396477408, 'avg_acc': 49.97619292003102, 'loss': 8.681347846984863}


EP_train:0:  79%|| 21932/27626 [51:35<13:29,  7.03it/s]

{'epoch': 0, 'iter': 21930, 'avg_loss': 8.879424615976264, 'avg_acc': 49.97677374492727, 'loss': 8.963455200195312}


EP_train:0:  79%|| 21942/27626 [51:37<13:23,  7.08it/s]

{'epoch': 0, 'iter': 21940, 'avg_loss': 8.879230828974361, 'avg_acc': 49.9764994758671, 'loss': 8.43213176727295}


EP_train:0:  79%|| 21952/27626 [51:38<13:18,  7.11it/s]

{'epoch': 0, 'iter': 21950, 'avg_loss': 8.878998036971316, 'avg_acc': 49.978076169650585, 'loss': 7.952339172363281}


EP_train:0:  79%|| 21962/27626 [51:39<13:25,  7.03it/s]

{'epoch': 0, 'iter': 21960, 'avg_loss': 8.878863936450701, 'avg_acc': 49.97765925959656, 'loss': 8.056303977966309}


EP_train:0:  80%|| 21972/27626 [51:41<13:29,  6.98it/s]

{'epoch': 0, 'iter': 21970, 'avg_loss': 8.87875077973065, 'avg_acc': 49.976531564334806, 'loss': 9.030572891235352}


EP_train:0:  80%|| 21982/27626 [51:42<13:28,  6.98it/s]

{'epoch': 0, 'iter': 21980, 'avg_loss': 8.878727284951182, 'avg_acc': 49.97611573631773, 'loss': 9.432378768920898}


EP_train:0:  80%|| 21992/27626 [51:44<13:21,  7.03it/s]

{'epoch': 0, 'iter': 21990, 'avg_loss': 8.878835499625453, 'avg_acc': 49.97626870083216, 'loss': 10.078392028808594}


EP_train:0:  80%|| 22002/27626 [51:45<13:21,  7.02it/s]

{'epoch': 0, 'iter': 22000, 'avg_loss': 8.878776881397975, 'avg_acc': 49.97698968228717, 'loss': 8.895792961120605}


EP_train:0:  80%|| 22012/27626 [51:47<13:18,  7.03it/s]

{'epoch': 0, 'iter': 22010, 'avg_loss': 8.878684392689651, 'avg_acc': 49.97558039162237, 'loss': 9.216204643249512}


EP_train:0:  80%|| 22022/27626 [51:48<13:19,  7.01it/s]

{'epoch': 0, 'iter': 22020, 'avg_loss': 8.878603531940666, 'avg_acc': 49.97303710094909, 'loss': 9.126404762268066}


EP_train:0:  80%|| 22032/27626 [51:49<13:15,  7.03it/s]

{'epoch': 0, 'iter': 22030, 'avg_loss': 8.878397709068745, 'avg_acc': 49.97177272933593, 'loss': 8.612594604492188}


EP_train:0:  80%|| 22042/27626 [51:51<13:09,  7.08it/s]

{'epoch': 0, 'iter': 22040, 'avg_loss': 8.878379815352016, 'avg_acc': 49.972919785853634, 'loss': 8.971007347106934}


EP_train:0:  80%|| 22052/27626 [51:52<13:18,  6.98it/s]

{'epoch': 0, 'iter': 22050, 'avg_loss': 8.878261870480705, 'avg_acc': 49.97307378350188, 'loss': 8.399703025817871}


EP_train:0:  80%|| 22062/27626 [51:54<13:10,  7.03it/s]

{'epoch': 0, 'iter': 22060, 'avg_loss': 8.878144523314026, 'avg_acc': 49.97407755768097, 'loss': 8.41748332977295}


EP_train:0:  80%|| 22072/27626 [51:55<13:08,  7.04it/s]

{'epoch': 0, 'iter': 22070, 'avg_loss': 8.877970531494388, 'avg_acc': 49.97423089121472, 'loss': 8.619072914123535}


EP_train:0:  80%|| 22082/27626 [51:56<13:07,  7.04it/s]

{'epoch': 0, 'iter': 22080, 'avg_loss': 8.877874909749007, 'avg_acc': 49.97438408586567, 'loss': 8.33853530883789}


EP_train:0:  80%|| 22092/27626 [51:58<13:02,  7.07it/s]

{'epoch': 0, 'iter': 22090, 'avg_loss': 8.877797886653019, 'avg_acc': 49.97326399891359, 'loss': 7.441497325897217}


EP_train:0:  80%|| 22102/27626 [51:59<13:07,  7.01it/s]

{'epoch': 0, 'iter': 22100, 'avg_loss': 8.877628583076566, 'avg_acc': 49.97638681507624, 'loss': 9.004429817199707}


EP_train:0:  80%|| 22112/27626 [52:01<12:57,  7.09it/s]

{'epoch': 0, 'iter': 22110, 'avg_loss': 8.877700358371786, 'avg_acc': 49.97682149156528, 'loss': 9.66819953918457}


EP_train:0:  80%|| 22122/27626 [52:02<12:57,  7.08it/s]

{'epoch': 0, 'iter': 22120, 'avg_loss': 8.877500534273176, 'avg_acc': 49.97654943266579, 'loss': 8.798314094543457}


EP_train:0:  80%|| 22132/27626 [52:03<12:56,  7.08it/s]

{'epoch': 0, 'iter': 22130, 'avg_loss': 8.877471563577016, 'avg_acc': 49.975571596403235, 'loss': 8.890091896057129}


EP_train:0:  80%|| 22142/27626 [52:05<12:57,  7.05it/s]

{'epoch': 0, 'iter': 22140, 'avg_loss': 8.877338734278698, 'avg_acc': 49.97572377038074, 'loss': 8.80514907836914}


EP_train:0:  80%|| 22152/27626 [52:06<13:00,  7.01it/s]

{'epoch': 0, 'iter': 22150, 'avg_loss': 8.877195610568881, 'avg_acc': 49.976581192722676, 'loss': 8.459892272949219}


EP_train:0:  80%|| 22162/27626 [52:08<13:00,  7.00it/s]

{'epoch': 0, 'iter': 22160, 'avg_loss': 8.877116944610112, 'avg_acc': 49.97419453093272, 'loss': 8.885565757751465}


EP_train:0:  80%|| 22172/27626 [52:09<13:02,  6.97it/s]

{'epoch': 0, 'iter': 22170, 'avg_loss': 8.877179176482322, 'avg_acc': 49.97476996978034, 'loss': 8.41512680053711}


EP_train:0:  80%|| 22182/27626 [52:11<12:57,  7.00it/s]

{'epoch': 0, 'iter': 22180, 'avg_loss': 8.87719911394136, 'avg_acc': 49.97478134439385, 'loss': 8.263195037841797}


EP_train:0:  80%|| 22192/27626 [52:12<12:51,  7.05it/s]

{'epoch': 0, 'iter': 22190, 'avg_loss': 8.876946032178886, 'avg_acc': 49.97606011446082, 'loss': 8.352394104003906}


EP_train:0:  80%|| 22202/27626 [52:13<12:50,  7.04it/s]

{'epoch': 0, 'iter': 22200, 'avg_loss': 8.876696975020362, 'avg_acc': 49.976211657132566, 'loss': 8.434648513793945}


EP_train:0:  80%|| 22212/27626 [52:15<12:48,  7.04it/s]

{'epoch': 0, 'iter': 22210, 'avg_loss': 8.876649224770492, 'avg_acc': 49.97664445545, 'loss': 8.446800231933594}


EP_train:0:  80%|| 22222/27626 [52:16<12:49,  7.02it/s]

{'epoch': 0, 'iter': 22220, 'avg_loss': 8.876379549060738, 'avg_acc': 49.97496737320552, 'loss': 8.380899429321289}


EP_train:0:  80%|| 22232/27626 [52:18<12:44,  7.05it/s]

{'epoch': 0, 'iter': 22230, 'avg_loss': 8.876266412849525, 'avg_acc': 49.9742757860645, 'loss': 8.649480819702148}


EP_train:0:  81%|| 22242/27626 [52:19<12:46,  7.03it/s]

{'epoch': 0, 'iter': 22240, 'avg_loss': 8.87605014569221, 'avg_acc': 49.9737253270986, 'loss': 7.99875545501709}


EP_train:0:  81%|| 22252/27626 [52:21<12:52,  6.95it/s]

{'epoch': 0, 'iter': 22250, 'avg_loss': 8.87601752860011, 'avg_acc': 49.97486068041886, 'loss': 7.96292781829834}


EP_train:0:  81%|| 22262/27626 [52:22<12:49,  6.97it/s]

{'epoch': 0, 'iter': 22260, 'avg_loss': 8.875923208703913, 'avg_acc': 49.97543349355375, 'loss': 8.575376510620117}


EP_train:0:  81%|| 22272/27626 [52:23<12:42,  7.02it/s]

{'epoch': 0, 'iter': 22270, 'avg_loss': 8.87581516409492, 'avg_acc': 49.978250864352745, 'loss': 9.051079750061035}


EP_train:0:  81%|| 22282/27626 [52:25<12:38,  7.05it/s]

{'epoch': 0, 'iter': 22280, 'avg_loss': 8.875727062598576, 'avg_acc': 49.97798011758898, 'loss': 9.131099700927734}


EP_train:0:  81%|| 22292/27626 [52:26<12:34,  7.07it/s]

{'epoch': 0, 'iter': 22290, 'avg_loss': 8.8756153058687, 'avg_acc': 49.979391907047685, 'loss': 8.801709175109863}


EP_train:0:  81%|| 22302/27626 [52:28<12:39,  7.01it/s]

{'epoch': 0, 'iter': 22300, 'avg_loss': 8.875508292288712, 'avg_acc': 49.97785973723152, 'loss': 8.355379104614258}


EP_train:0:  81%|| 22312/27626 [52:29<12:35,  7.04it/s]

{'epoch': 0, 'iter': 22310, 'avg_loss': 8.875404753767707, 'avg_acc': 49.97969051140693, 'loss': 8.655141830444336}


EP_train:0:  81%|| 22322/27626 [52:30<12:30,  7.07it/s]

{'epoch': 0, 'iter': 22320, 'avg_loss': 8.875238017686394, 'avg_acc': 49.97871959141615, 'loss': 8.088507652282715}


EP_train:0:  81%|| 22332/27626 [52:32<12:32,  7.03it/s]

{'epoch': 0, 'iter': 22330, 'avg_loss': 8.875089115998486, 'avg_acc': 49.98152792082755, 'loss': 9.612732887268066}


EP_train:0:  81%|| 22342/27626 [52:33<12:33,  7.01it/s]

{'epoch': 0, 'iter': 22340, 'avg_loss': 8.874929766375153, 'avg_acc': 49.98083680229175, 'loss': 9.365436553955078}


EP_train:0:  81%|| 22352/27626 [52:35<12:34,  6.99it/s]

{'epoch': 0, 'iter': 22350, 'avg_loss': 8.874784684032942, 'avg_acc': 49.981544449912754, 'loss': 9.19753360748291}


EP_train:0:  81%|| 22362/27626 [52:36<12:28,  7.03it/s]

{'epoch': 0, 'iter': 22360, 'avg_loss': 8.874585031316881, 'avg_acc': 49.97875765842315, 'loss': 8.696150779724121}


EP_train:0:  81%|| 22372/27626 [52:38<12:24,  7.05it/s]

{'epoch': 0, 'iter': 22370, 'avg_loss': 8.874689795591404, 'avg_acc': 49.98030374145099, 'loss': 9.874429702758789}


EP_train:0:  81%|| 22382/27626 [52:39<12:26,  7.03it/s]

{'epoch': 0, 'iter': 22380, 'avg_loss': 8.874789631828525, 'avg_acc': 49.98003328716322, 'loss': 8.69356632232666}


EP_train:0:  81%|| 22392/27626 [52:40<12:24,  7.03it/s]

{'epoch': 0, 'iter': 22390, 'avg_loss': 8.87467091444812, 'avg_acc': 49.98241480952168, 'loss': 8.4462308883667}


EP_train:0:  81%|| 22402/27626 [52:42<12:13,  7.12it/s]

{'epoch': 0, 'iter': 22400, 'avg_loss': 8.87475619129419, 'avg_acc': 49.98130663809651, 'loss': 9.464797973632812}


EP_train:0:  81%|| 22412/27626 [52:43<12:16,  7.08it/s]

{'epoch': 0, 'iter': 22410, 'avg_loss': 8.874726526389654, 'avg_acc': 49.97950225335773, 'loss': 8.97551441192627}


EP_train:0:  81%|| 22422/27626 [52:45<12:16,  7.06it/s]

{'epoch': 0, 'iter': 22420, 'avg_loss': 8.874508251094737, 'avg_acc': 49.980068908612466, 'loss': 8.642715454101562}


EP_train:0:  81%|| 22432/27626 [52:46<12:22,  7.00it/s]

{'epoch': 0, 'iter': 22430, 'avg_loss': 8.874465950248256, 'avg_acc': 49.98202821987428, 'loss': 8.752689361572266}


EP_train:0:  81%|| 22442/27626 [52:47<12:17,  7.03it/s]

{'epoch': 0, 'iter': 22440, 'avg_loss': 8.87429238536514, 'avg_acc': 49.98203622833207, 'loss': 8.788719177246094}


EP_train:0:  81%|| 22452/27626 [52:49<12:14,  7.04it/s]

{'epoch': 0, 'iter': 22450, 'avg_loss': 8.874183744201416, 'avg_acc': 49.979817157364934, 'loss': 8.479281425476074}


EP_train:0:  81%|| 22462/27626 [52:50<12:16,  7.01it/s]

{'epoch': 0, 'iter': 22460, 'avg_loss': 8.874078622256429, 'avg_acc': 49.980382663283024, 'loss': 8.325560569763184}


EP_train:0:  81%|| 22472/27626 [52:52<12:07,  7.09it/s]

{'epoch': 0, 'iter': 22470, 'avg_loss': 8.873975800553309, 'avg_acc': 49.98039139335143, 'loss': 8.409172058105469}


EP_train:0:  81%|| 22482/27626 [52:53<12:13,  7.02it/s]

{'epoch': 0, 'iter': 22480, 'avg_loss': 8.87377490753816, 'avg_acc': 49.97901005293359, 'loss': 9.007753372192383}


EP_train:0:  81%|| 22492/27626 [52:54<12:06,  7.06it/s]

{'epoch': 0, 'iter': 22490, 'avg_loss': 8.873615016541217, 'avg_acc': 49.98026988573207, 'loss': 9.660706520080566}


EP_train:0:  81%|| 22502/27626 [52:56<12:05,  7.06it/s]

{'epoch': 0, 'iter': 22500, 'avg_loss': 8.87345128544468, 'avg_acc': 49.97847317896982, 'loss': 8.537278175354004}


EP_train:0:  81%|| 22512/27626 [52:57<12:13,  6.98it/s]

{'epoch': 0, 'iter': 22510, 'avg_loss': 8.873323340210698, 'avg_acc': 49.979176846874864, 'loss': 8.914583206176758}


EP_train:0:  82%|| 22522/27626 [52:59<12:14,  6.95it/s]

{'epoch': 0, 'iter': 22520, 'avg_loss': 8.87320528771027, 'avg_acc': 49.98043492740109, 'loss': 8.243733406066895}


EP_train:0:  82%|| 22532/27626 [53:00<12:03,  7.04it/s]

{'epoch': 0, 'iter': 22530, 'avg_loss': 8.873231302178137, 'avg_acc': 49.98044361102481, 'loss': 8.800335884094238}


EP_train:0:  82%|| 22542/27626 [53:02<12:03,  7.03it/s]

{'epoch': 0, 'iter': 22540, 'avg_loss': 8.873330994298817, 'avg_acc': 49.98183864957189, 'loss': 8.866232872009277}


EP_train:0:  82%|| 22552/27626 [53:03<11:57,  7.07it/s]

{'epoch': 0, 'iter': 22550, 'avg_loss': 8.873155393567055, 'avg_acc': 49.98337102567514, 'loss': 7.412181854248047}


EP_train:0:  82%|| 22562/27626 [53:04<11:55,  7.08it/s]

{'epoch': 0, 'iter': 22560, 'avg_loss': 8.87292669472619, 'avg_acc': 49.98365542307522, 'loss': 8.161529541015625}


EP_train:0:  82%|| 22572/27626 [53:06<11:53,  7.08it/s]

{'epoch': 0, 'iter': 22570, 'avg_loss': 8.872889944597677, 'avg_acc': 49.983662664480974, 'loss': 8.19939136505127}


EP_train:0:  82%|| 22582/27626 [53:07<11:56,  7.04it/s]

{'epoch': 0, 'iter': 22580, 'avg_loss': 8.872656939655911, 'avg_acc': 49.981594039236526, 'loss': 8.344734191894531}


EP_train:0:  82%|| 22592/27626 [53:09<11:55,  7.03it/s]

{'epoch': 0, 'iter': 22590, 'avg_loss': 8.872468911848465, 'avg_acc': 49.98118719844186, 'loss': 8.345930099487305}


EP_train:0:  82%|| 22602/27626 [53:10<11:51,  7.06it/s]

{'epoch': 0, 'iter': 22600, 'avg_loss': 8.872320625234623, 'avg_acc': 49.982301668067784, 'loss': 8.981143951416016}


EP_train:0:  82%|| 22612/27626 [53:11<12:03,  6.93it/s]

{'epoch': 0, 'iter': 22610, 'avg_loss': 8.872166719162914, 'avg_acc': 49.98106563177215, 'loss': 8.015788078308105}


EP_train:0:  82%|| 22622/27626 [53:13<11:54,  7.00it/s]

{'epoch': 0, 'iter': 22620, 'avg_loss': 8.872056483758666, 'avg_acc': 49.98052141815128, 'loss': 9.093313217163086}


EP_train:0:  82%|| 22632/27626 [53:14<11:50,  7.03it/s]

{'epoch': 0, 'iter': 22630, 'avg_loss': 8.871889212882543, 'avg_acc': 49.9810823648977, 'loss': 8.554649353027344}


EP_train:0:  82%|| 22642/27626 [53:16<11:44,  7.07it/s]

{'epoch': 0, 'iter': 22640, 'avg_loss': 8.871779738769456, 'avg_acc': 49.982885031579876, 'loss': 8.2164888381958}


EP_train:0:  82%|| 22652/27626 [53:17<11:42,  7.08it/s]

{'epoch': 0, 'iter': 22650, 'avg_loss': 8.871578926282456, 'avg_acc': 49.983996291554455, 'loss': 8.302047729492188}


EP_train:0:  82%|| 22662/27626 [53:19<11:45,  7.04it/s]

{'epoch': 0, 'iter': 22660, 'avg_loss': 8.871506985464508, 'avg_acc': 49.98483076651516, 'loss': 8.116374015808105}


EP_train:0:  82%|| 22672/27626 [53:20<11:40,  7.07it/s]

{'epoch': 0, 'iter': 22670, 'avg_loss': 8.87143503235107, 'avg_acc': 49.98359688588946, 'loss': 9.15774154663086}


EP_train:0:  82%|| 22682/27626 [53:21<11:39,  7.07it/s]

{'epoch': 0, 'iter': 22680, 'avg_loss': 8.871316490284345, 'avg_acc': 49.983052995899655, 'loss': 8.38078784942627}


EP_train:0:  82%|| 22692/27626 [53:23<11:43,  7.02it/s]

{'epoch': 0, 'iter': 22690, 'avg_loss': 8.871237741540693, 'avg_acc': 49.98333590410295, 'loss': 8.223383903503418}


EP_train:0:  82%|| 22702/27626 [53:24<11:47,  6.96it/s]

{'epoch': 0, 'iter': 22700, 'avg_loss': 8.871043505424351, 'avg_acc': 49.98403154046077, 'loss': 9.032246589660645}


EP_train:0:  82%|| 22712/27626 [53:26<11:39,  7.03it/s]

{'epoch': 0, 'iter': 22710, 'avg_loss': 8.870901933765532, 'avg_acc': 49.98431376865836, 'loss': 7.993358612060547}


EP_train:0:  82%|| 22722/27626 [53:27<11:36,  7.04it/s]

{'epoch': 0, 'iter': 22720, 'avg_loss': 8.870722216035347, 'avg_acc': 49.986108665991814, 'loss': 8.629026412963867}


EP_train:0:  82%|| 22732/27626 [53:28<11:33,  7.06it/s]

{'epoch': 0, 'iter': 22730, 'avg_loss': 8.870611994880413, 'avg_acc': 49.986252254630244, 'loss': 7.989510536193848}


EP_train:0:  82%|| 22742/27626 [53:30<11:36,  7.02it/s]

{'epoch': 0, 'iter': 22740, 'avg_loss': 8.870560590051147, 'avg_acc': 49.98337254298404, 'loss': 8.808082580566406}


EP_train:0:  82%|| 22752/27626 [53:31<11:24,  7.12it/s]

{'epoch': 0, 'iter': 22750, 'avg_loss': 8.870330936614982, 'avg_acc': 49.98104478924003, 'loss': 7.923558712005615}


EP_train:0:  82%|| 22762/27626 [53:33<11:28,  7.07it/s]

{'epoch': 0, 'iter': 22760, 'avg_loss': 8.87025187724787, 'avg_acc': 49.98077852466939, 'loss': 8.297080039978027}


EP_train:0:  82%|| 22772/27626 [53:34<11:23,  7.10it/s]

{'epoch': 0, 'iter': 22770, 'avg_loss': 8.870095388748279, 'avg_acc': 49.98325721312195, 'loss': 9.054610252380371}


EP_train:0:  82%|| 22782/27626 [53:35<11:25,  7.07it/s]

{'epoch': 0, 'iter': 22780, 'avg_loss': 8.870093286929643, 'avg_acc': 49.98230433255783, 'loss': 8.846470832824707}


EP_train:0:  83%|| 22792/27626 [53:37<11:25,  7.05it/s]

{'epoch': 0, 'iter': 22790, 'avg_loss': 8.870061760614892, 'avg_acc': 49.983271905576764, 'loss': 7.449544429779053}


EP_train:0:  83%|| 22802/27626 [53:38<11:25,  7.04it/s]

{'epoch': 0, 'iter': 22800, 'avg_loss': 8.869980168747256, 'avg_acc': 49.985472128415424, 'loss': 9.133330345153809}


EP_train:0:  83%|| 22812/27626 [53:40<11:17,  7.11it/s]

{'epoch': 0, 'iter': 22810, 'avg_loss': 8.86996225027026, 'avg_acc': 49.9858894831441, 'loss': 9.02950668334961}


EP_train:0:  83%|| 22822/27626 [53:41<11:22,  7.04it/s]

{'epoch': 0, 'iter': 22820, 'avg_loss': 8.86972881528342, 'avg_acc': 49.98370470180974, 'loss': 8.414616584777832}


EP_train:0:  83%|| 22832/27626 [53:43<11:18,  7.07it/s]

{'epoch': 0, 'iter': 22830, 'avg_loss': 8.869491975518722, 'avg_acc': 49.98302746266042, 'loss': 8.075881958007812}


EP_train:0:  83%|| 22842/27626 [53:44<11:16,  7.07it/s]

{'epoch': 0, 'iter': 22840, 'avg_loss': 8.869309262977602, 'avg_acc': 49.98330852414517, 'loss': 7.972130298614502}


EP_train:0:  83%|| 22852/27626 [53:45<11:21,  7.01it/s]

{'epoch': 0, 'iter': 22850, 'avg_loss': 8.869167484113838, 'avg_acc': 49.98126449608332, 'loss': 9.546504020690918}


EP_train:0:  83%|| 22862/27626 [53:47<11:12,  7.08it/s]

{'epoch': 0, 'iter': 22860, 'avg_loss': 8.868881241834401, 'avg_acc': 49.98181947421373, 'loss': 7.851141452789307}


EP_train:0:  83%|| 22872/27626 [53:48<11:17,  7.01it/s]

{'epoch': 0, 'iter': 22870, 'avg_loss': 8.868798350845843, 'avg_acc': 49.98005115648638, 'loss': 9.142814636230469}


EP_train:0:  83%|| 22882/27626 [53:50<11:16,  7.02it/s]

{'epoch': 0, 'iter': 22880, 'avg_loss': 8.868715608710254, 'avg_acc': 49.98019645120406, 'loss': 8.769149780273438}


EP_train:0:  83%|| 22892/27626 [53:51<11:12,  7.04it/s]

{'epoch': 0, 'iter': 22890, 'avg_loss': 8.868683954381124, 'avg_acc': 49.981979817395484, 'loss': 9.064438819885254}


EP_train:0:  83%|| 22902/27626 [53:52<11:09,  7.06it/s]

{'epoch': 0, 'iter': 22900, 'avg_loss': 8.868585642849657, 'avg_acc': 49.98116894458757, 'loss': 8.95000171661377}


EP_train:0:  83%|| 22912/27626 [53:54<11:04,  7.10it/s]

{'epoch': 0, 'iter': 22910, 'avg_loss': 8.868445712025395, 'avg_acc': 49.98213194535376, 'loss': 8.42575454711914}


EP_train:0:  83%|| 22922/27626 [53:55<11:00,  7.12it/s]

{'epoch': 0, 'iter': 22920, 'avg_loss': 8.868405558874581, 'avg_acc': 49.98050368657563, 'loss': 8.843639373779297}


EP_train:0:  83%|| 22932/27626 [53:57<11:06,  7.04it/s]

{'epoch': 0, 'iter': 22930, 'avg_loss': 8.86803563668077, 'avg_acc': 49.98037591033972, 'loss': 8.945191383361816}


EP_train:0:  83%|| 22942/27626 [53:58<11:03,  7.06it/s]

{'epoch': 0, 'iter': 22940, 'avg_loss': 8.867965348860913, 'avg_acc': 49.980520683492436, 'loss': 8.064812660217285}


EP_train:0:  83%|| 22952/27626 [53:59<11:03,  7.04it/s]

{'epoch': 0, 'iter': 22950, 'avg_loss': 8.867851966315898, 'avg_acc': 49.98120996906453, 'loss': 9.083014488220215}


EP_train:0:  83%|| 22962/27626 [54:01<11:05,  7.00it/s]

{'epoch': 0, 'iter': 22960, 'avg_loss': 8.867827023858004, 'avg_acc': 49.9825791559601, 'loss': 8.052501678466797}


EP_train:0:  83%|| 22972/27626 [54:02<11:11,  6.93it/s]

{'epoch': 0, 'iter': 22970, 'avg_loss': 8.867664067591909, 'avg_acc': 49.98245069870706, 'loss': 7.75170373916626}


EP_train:0:  83%|| 22982/27626 [54:04<10:54,  7.10it/s]

{'epoch': 0, 'iter': 22980, 'avg_loss': 8.867694143537058, 'avg_acc': 49.98055458857317, 'loss': 8.017993927001953}


EP_train:0:  83%|| 22992/27626 [54:05<10:57,  7.05it/s]

{'epoch': 0, 'iter': 22990, 'avg_loss': 8.867514063040506, 'avg_acc': 49.9802912009047, 'loss': 8.827008247375488}


EP_train:0:  83%|| 23002/27626 [54:07<10:54,  7.06it/s]

{'epoch': 0, 'iter': 23000, 'avg_loss': 8.867453426139509, 'avg_acc': 49.98138667884005, 'loss': 8.444463729858398}


EP_train:0:  83%|| 23012/27626 [54:08<10:48,  7.12it/s]

{'epoch': 0, 'iter': 23010, 'avg_loss': 8.867358068449857, 'avg_acc': 49.98139476771979, 'loss': 9.601885795593262}


EP_train:0:  83%|| 23022/27626 [54:09<10:53,  7.05it/s]

{'epoch': 0, 'iter': 23020, 'avg_loss': 8.86724043843189, 'avg_acc': 49.98099561270145, 'loss': 8.509716033935547}


EP_train:0:  83%|| 23032/27626 [54:11<10:47,  7.10it/s]

{'epoch': 0, 'iter': 23030, 'avg_loss': 8.867131893263467, 'avg_acc': 49.97883287742608, 'loss': 8.284624099731445}


EP_train:0:  83%|| 23042/27626 [54:12<10:53,  7.01it/s]

{'epoch': 0, 'iter': 23040, 'avg_loss': 8.86700371190143, 'avg_acc': 49.97897769194045, 'loss': 9.742012977600098}


EP_train:0:  83%|| 23052/27626 [54:14<10:54,  6.99it/s]

{'epoch': 0, 'iter': 23050, 'avg_loss': 8.866949816793946, 'avg_acc': 49.97871567394039, 'loss': 8.337526321411133}


EP_train:0:  83%|| 23062/27626 [54:15<10:50,  7.01it/s]

{'epoch': 0, 'iter': 23060, 'avg_loss': 8.866814632199901, 'avg_acc': 49.978860413685446, 'loss': 8.51453971862793}


EP_train:0:  84%|| 23072/27626 [54:16<10:47,  7.03it/s]

{'epoch': 0, 'iter': 23070, 'avg_loss': 8.866713094130526, 'avg_acc': 49.979817736552384, 'loss': 8.573563575744629}


EP_train:0:  84%|| 23082/27626 [54:18<10:45,  7.04it/s]

{'epoch': 0, 'iter': 23080, 'avg_loss': 8.866663190759782, 'avg_acc': 49.98131580087518, 'loss': 9.116622924804688}


EP_train:0:  84%|| 23092/27626 [54:19<10:39,  7.09it/s]

{'epoch': 0, 'iter': 23090, 'avg_loss': 8.866564036128086, 'avg_acc': 49.98118855831276, 'loss': 8.16337776184082}


EP_train:0:  84%|| 23102/27626 [54:21<10:39,  7.07it/s]

{'epoch': 0, 'iter': 23100, 'avg_loss': 8.866325051499354, 'avg_acc': 49.98024977273711, 'loss': 8.077309608459473}


EP_train:0:  84%|| 23112/27626 [54:22<10:35,  7.11it/s]

{'epoch': 0, 'iter': 23110, 'avg_loss': 8.866099241980583, 'avg_acc': 49.97836528060231, 'loss': 8.173721313476562}


EP_train:0:  84%|| 23122/27626 [54:23<10:36,  7.08it/s]

{'epoch': 0, 'iter': 23120, 'avg_loss': 8.866119192657234, 'avg_acc': 49.97972622291423, 'loss': 9.006514549255371}


EP_train:0:  84%|| 23132/27626 [54:25<10:39,  7.02it/s]

{'epoch': 0, 'iter': 23130, 'avg_loss': 8.866192542936112, 'avg_acc': 49.978248886775326, 'loss': 9.479329109191895}


EP_train:0:  84%|| 23142/27626 [54:26<10:37,  7.03it/s]

{'epoch': 0, 'iter': 23140, 'avg_loss': 8.866088032408236, 'avg_acc': 49.97636770234649, 'loss': 8.502284049987793}


EP_train:0:  84%|| 23152/27626 [54:28<10:40,  6.99it/s]

{'epoch': 0, 'iter': 23150, 'avg_loss': 8.865939032804384, 'avg_acc': 49.97826767742214, 'loss': 8.43204116821289}


EP_train:0:  84%|| 23162/27626 [54:29<10:33,  7.04it/s]

{'epoch': 0, 'iter': 23160, 'avg_loss': 8.865732489541777, 'avg_acc': 49.97814213548637, 'loss': 8.314845085144043}


EP_train:0:  84%|| 23172/27626 [54:31<10:26,  7.11it/s]

{'epoch': 0, 'iter': 23170, 'avg_loss': 8.86565321780348, 'avg_acc': 49.97882590306849, 'loss': 8.834226608276367}


EP_train:0:  84%|| 23182/27626 [54:32<10:27,  7.08it/s]

{'epoch': 0, 'iter': 23180, 'avg_loss': 8.86546102962113, 'avg_acc': 49.978565419956, 'loss': 7.4849629402160645}


EP_train:0:  84%|| 23192/27626 [54:33<10:28,  7.05it/s]

{'epoch': 0, 'iter': 23190, 'avg_loss': 8.865433318181836, 'avg_acc': 49.97938316588332, 'loss': 8.700873374938965}


EP_train:0:  84%|| 23202/27626 [54:35<10:29,  7.03it/s]

{'epoch': 0, 'iter': 23200, 'avg_loss': 8.86534121375542, 'avg_acc': 49.97844920477566, 'loss': 8.339468002319336}


EP_train:0:  84%|| 23212/27626 [54:36<10:25,  7.05it/s]

{'epoch': 0, 'iter': 23210, 'avg_loss': 8.86515885158294, 'avg_acc': 49.978458489509286, 'loss': 9.3640775680542}


EP_train:0:  84%|| 23222/27626 [54:38<10:23,  7.06it/s]

{'epoch': 0, 'iter': 23220, 'avg_loss': 8.865121097025865, 'avg_acc': 49.97792946040222, 'loss': 8.543745994567871}


EP_train:0:  84%|| 23232/27626 [54:39<10:23,  7.04it/s]

{'epoch': 0, 'iter': 23230, 'avg_loss': 8.864874978188851, 'avg_acc': 49.976324738495975, 'loss': 8.627500534057617}


EP_train:0:  84%|| 23242/27626 [54:40<10:28,  6.98it/s]

{'epoch': 0, 'iter': 23240, 'avg_loss': 8.864603977606324, 'avg_acc': 49.97835183511897, 'loss': 8.185689926147461}


EP_train:0:  84%|| 23252/27626 [54:42<10:17,  7.08it/s]

{'epoch': 0, 'iter': 23250, 'avg_loss': 8.86436999736768, 'avg_acc': 49.97809234011441, 'loss': 7.348614692687988}


EP_train:0:  84%|| 23262/27626 [54:43<10:14,  7.10it/s]

{'epoch': 0, 'iter': 23260, 'avg_loss': 8.864229390475742, 'avg_acc': 49.97662396285628, 'loss': 7.837515830993652}


EP_train:0:  84%|| 23272/27626 [54:45<10:14,  7.09it/s]

{'epoch': 0, 'iter': 23270, 'avg_loss': 8.864100326552675, 'avg_acc': 49.975828284130465, 'loss': 8.998871803283691}


EP_train:0:  84%|| 23282/27626 [54:46<10:15,  7.06it/s]

{'epoch': 0, 'iter': 23280, 'avg_loss': 8.864174290132668, 'avg_acc': 49.97583866672394, 'loss': 8.79109001159668}


EP_train:0:  84%|| 23292/27626 [54:47<10:15,  7.04it/s]

{'epoch': 0, 'iter': 23290, 'avg_loss': 8.863931252442415, 'avg_acc': 49.97517818041303, 'loss': 8.029346466064453}


EP_train:0:  84%|| 23302/27626 [54:49<10:14,  7.04it/s]

{'epoch': 0, 'iter': 23300, 'avg_loss': 8.863941648365484, 'avg_acc': 49.97357946010901, 'loss': 9.425357818603516}


EP_train:0:  84%|| 23312/27626 [54:50<10:10,  7.07it/s]

{'epoch': 0, 'iter': 23310, 'avg_loss': 8.863754937201923, 'avg_acc': 49.9725183389816, 'loss': 9.305949211120605}


EP_train:0:  84%|| 23322/27626 [54:52<10:09,  7.06it/s]

{'epoch': 0, 'iter': 23320, 'avg_loss': 8.863730258871314, 'avg_acc': 49.970788130869174, 'loss': 8.73305892944336}


EP_train:0:  84%|| 23332/27626 [54:53<10:08,  7.05it/s]

{'epoch': 0, 'iter': 23330, 'avg_loss': 8.863393521166856, 'avg_acc': 49.97080065149372, 'loss': 7.703790664672852}


EP_train:0:  84%|| 23342/27626 [54:55<10:05,  7.07it/s]

{'epoch': 0, 'iter': 23340, 'avg_loss': 8.862948010837167, 'avg_acc': 49.97175035345529, 'loss': 7.898183822631836}


EP_train:0:  85%|| 23352/27626 [54:56<10:04,  7.07it/s]

{'epoch': 0, 'iter': 23350, 'avg_loss': 8.862828094664899, 'avg_acc': 49.971762451286885, 'loss': 8.487936973571777}


EP_train:0:  85%|| 23362/27626 [54:57<10:00,  7.10it/s]

{'epoch': 0, 'iter': 23360, 'avg_loss': 8.862810457657524, 'avg_acc': 49.97177453876118, 'loss': 8.662127494812012}


EP_train:0:  85%|| 23372/27626 [54:59<10:01,  7.07it/s]

{'epoch': 0, 'iter': 23370, 'avg_loss': 8.862694120678464, 'avg_acc': 49.971519190449705, 'loss': 8.839826583862305}


EP_train:0:  85%|| 23382/27626 [55:00<10:04,  7.02it/s]

{'epoch': 0, 'iter': 23380, 'avg_loss': 8.862528482834984, 'avg_acc': 49.971130405029726, 'loss': 8.778553009033203}


EP_train:0:  85%|| 23392/27626 [55:02<09:59,  7.06it/s]

{'epoch': 0, 'iter': 23390, 'avg_loss': 8.862452586012843, 'avg_acc': 49.971142747210465, 'loss': 9.213194847106934}


EP_train:0:  85%|| 23402/27626 [55:03<10:00,  7.04it/s]

{'epoch': 0, 'iter': 23400, 'avg_loss': 8.862369994975365, 'avg_acc': 49.97062091363617, 'loss': 8.260519027709961}


EP_train:0:  85%|| 23412/27626 [55:04<09:57,  7.05it/s]

{'epoch': 0, 'iter': 23410, 'avg_loss': 8.862285200684193, 'avg_acc': 49.969565588825766, 'loss': 8.801857948303223}


EP_train:0:  85%|| 23422/27626 [55:06<09:59,  7.01it/s]

{'epoch': 0, 'iter': 23420, 'avg_loss': 8.86209024480464, 'avg_acc': 49.96891144699202, 'loss': 8.259106636047363}


EP_train:0:  85%|| 23432/27626 [55:07<09:52,  7.08it/s]

{'epoch': 0, 'iter': 23430, 'avg_loss': 8.862103387084469, 'avg_acc': 49.96905808544236, 'loss': 8.991950988769531}


EP_train:0:  85%|| 23442/27626 [55:09<09:52,  7.06it/s]

{'epoch': 0, 'iter': 23440, 'avg_loss': 8.862149525978404, 'avg_acc': 49.96987116590589, 'loss': 8.770855903625488}


EP_train:0:  85%|| 23452/27626 [55:10<09:52,  7.05it/s]

{'epoch': 0, 'iter': 23450, 'avg_loss': 8.862173394594441, 'avg_acc': 49.968684704277, 'loss': 8.658817291259766}


EP_train:0:  85%|| 23462/27626 [55:11<09:46,  7.11it/s]

{'epoch': 0, 'iter': 23460, 'avg_loss': 8.861965480858263, 'avg_acc': 49.968564852308084, 'loss': 8.638113021850586}


EP_train:0:  85%|| 23472/27626 [55:13<09:46,  7.08it/s]

{'epoch': 0, 'iter': 23470, 'avg_loss': 8.862002063796078, 'avg_acc': 49.96817881641174, 'loss': 9.098650932312012}


EP_train:0:  85%|| 23482/27626 [55:14<09:47,  7.06it/s]

{'epoch': 0, 'iter': 23480, 'avg_loss': 8.861917442095796, 'avg_acc': 49.96872471359823, 'loss': 8.733717918395996}


EP_train:0:  85%|| 23492/27626 [55:16<09:46,  7.05it/s]

{'epoch': 0, 'iter': 23490, 'avg_loss': 8.861843758080838, 'avg_acc': 49.96993529436806, 'loss': 9.192960739135742}


EP_train:0:  85%|| 23502/27626 [55:17<09:42,  7.07it/s]

{'epoch': 0, 'iter': 23500, 'avg_loss': 8.861733458132537, 'avg_acc': 49.971277817965195, 'loss': 8.021650314331055}


EP_train:0:  85%|| 23512/27626 [55:19<09:48,  6.99it/s]

{'epoch': 0, 'iter': 23510, 'avg_loss': 8.861773977701112, 'avg_acc': 49.972353366509296, 'loss': 8.39254379272461}


EP_train:0:  85%|| 23522/27626 [55:20<09:39,  7.08it/s]

{'epoch': 0, 'iter': 23520, 'avg_loss': 8.861659845047512, 'avg_acc': 49.97209940053569, 'loss': 8.07506275177002}


EP_train:0:  85%|| 23532/27626 [55:21<09:42,  7.03it/s]

{'epoch': 0, 'iter': 23530, 'avg_loss': 8.861609209653276, 'avg_acc': 49.971580043347075, 'loss': 8.664247512817383}


EP_train:0:  85%|| 23542/27626 [55:23<09:42,  7.02it/s]

{'epoch': 0, 'iter': 23540, 'avg_loss': 8.861549703495854, 'avg_acc': 49.973981564079686, 'loss': 8.296854019165039}


EP_train:0:  85%|| 23552/27626 [55:24<09:34,  7.10it/s]

{'epoch': 0, 'iter': 23550, 'avg_loss': 8.861567784376161, 'avg_acc': 49.97332915799754, 'loss': 10.82183837890625}


EP_train:0:  85%|| 23562/27626 [55:26<09:36,  7.05it/s]

{'epoch': 0, 'iter': 23560, 'avg_loss': 8.861613078299719, 'avg_acc': 49.97294257459361, 'loss': 8.481819152832031}


EP_train:0:  85%|| 23572/27626 [55:27<09:34,  7.05it/s]

{'epoch': 0, 'iter': 23570, 'avg_loss': 8.86149615445614, 'avg_acc': 49.974279835390945, 'loss': 8.184325218200684}


EP_train:0:  85%|| 23582/27626 [55:28<09:35,  7.03it/s]

{'epoch': 0, 'iter': 23580, 'avg_loss': 8.861355100564337, 'avg_acc': 49.977868835079086, 'loss': 8.388160705566406}


EP_train:0:  85%|| 23592/27626 [55:30<09:34,  7.02it/s]

{'epoch': 0, 'iter': 23590, 'avg_loss': 8.861255115432064, 'avg_acc': 49.97880547666483, 'loss': 8.805033683776855}


EP_train:0:  85%|| 23602/27626 [55:31<09:33,  7.02it/s]

{'epoch': 0, 'iter': 23600, 'avg_loss': 8.861104520012436, 'avg_acc': 49.978682047370874, 'loss': 7.513674736022949}


EP_train:0:  85%|| 23612/27626 [55:33<09:29,  7.05it/s]

{'epoch': 0, 'iter': 23610, 'avg_loss': 8.860911113950985, 'avg_acc': 49.97723518698912, 'loss': 8.658352851867676}


EP_train:0:  86%|| 23622/27626 [55:34<09:28,  7.04it/s]

{'epoch': 0, 'iter': 23620, 'avg_loss': 8.860702518221537, 'avg_acc': 49.978038609711696, 'loss': 8.223333358764648}


EP_train:0:  86%|| 23632/27626 [55:35<09:22,  7.10it/s]

{'epoch': 0, 'iter': 23630, 'avg_loss': 8.860614488644543, 'avg_acc': 49.977386695442426, 'loss': 9.224324226379395}


EP_train:0:  86%|| 23642/27626 [55:37<09:22,  7.09it/s]

{'epoch': 0, 'iter': 23640, 'avg_loss': 8.860483600728116, 'avg_acc': 49.976999703904234, 'loss': 8.393871307373047}


EP_train:0:  86%|| 23652/27626 [55:38<09:25,  7.03it/s]

{'epoch': 0, 'iter': 23650, 'avg_loss': 8.860440988993474, 'avg_acc': 49.976613039617774, 'loss': 8.818595886230469}


EP_train:0:  86%|| 23662/27626 [55:40<09:23,  7.03it/s]

{'epoch': 0, 'iter': 23660, 'avg_loss': 8.8603761871157, 'avg_acc': 49.9776795148134, 'loss': 9.311026573181152}


EP_train:0:  86%|| 23672/27626 [55:41<09:22,  7.03it/s]

{'epoch': 0, 'iter': 23670, 'avg_loss': 8.860198753343719, 'avg_acc': 49.9779529804402, 'loss': 7.733119010925293}


EP_train:0:  86%|| 23682/27626 [55:43<09:17,  7.08it/s]

{'epoch': 0, 'iter': 23680, 'avg_loss': 8.860170990725758, 'avg_acc': 49.97796229044382, 'loss': 9.323493957519531}


EP_train:0:  86%|| 23692/27626 [55:44<09:18,  7.04it/s]

{'epoch': 0, 'iter': 23690, 'avg_loss': 8.860014760807715, 'avg_acc': 49.97889493900637, 'loss': 8.342964172363281}


EP_train:0:  86%|| 23702/27626 [55:45<09:16,  7.05it/s]

{'epoch': 0, 'iter': 23700, 'avg_loss': 8.860083653491175, 'avg_acc': 49.977453482975406, 'loss': 8.62067699432373}


EP_train:0:  86%|| 23712/27626 [55:47<09:15,  7.04it/s]

{'epoch': 0, 'iter': 23710, 'avg_loss': 8.860042564057775, 'avg_acc': 49.97627683353718, 'loss': 8.682151794433594}


EP_train:0:  86%|| 23722/27626 [55:48<09:10,  7.10it/s]

{'epoch': 0, 'iter': 23720, 'avg_loss': 8.859991250726235, 'avg_acc': 49.974837696555795, 'loss': 8.936869621276855}


EP_train:0:  86%|| 23732/27626 [55:50<09:10,  7.07it/s]

{'epoch': 0, 'iter': 23730, 'avg_loss': 8.859835573074806, 'avg_acc': 49.97484829969239, 'loss': 8.10558032989502}


EP_train:0:  86%|| 23742/27626 [55:51<09:09,  7.06it/s]

{'epoch': 0, 'iter': 23740, 'avg_loss': 8.859662786680357, 'avg_acc': 49.97591192451877, 'loss': 8.726309776306152}


EP_train:0:  86%|| 23752/27626 [55:52<09:07,  7.08it/s]

{'epoch': 0, 'iter': 23750, 'avg_loss': 8.85954209519589, 'avg_acc': 49.97592206643931, 'loss': 8.55256462097168}


EP_train:0:  86%|| 23762/27626 [55:54<09:06,  7.07it/s]

{'epoch': 0, 'iter': 23760, 'avg_loss': 8.859398630601449, 'avg_acc': 49.97790497032953, 'loss': 8.104759216308594}


EP_train:0:  86%|| 23772/27626 [55:55<09:12,  6.97it/s]

{'epoch': 0, 'iter': 23770, 'avg_loss': 8.85937359380078, 'avg_acc': 49.97791426528123, 'loss': 8.600301742553711}


EP_train:0:  86%|| 23782/27626 [55:57<09:08,  7.01it/s]

{'epoch': 0, 'iter': 23780, 'avg_loss': 8.859274674091521, 'avg_acc': 49.977923552415795, 'loss': 8.249285697937012}


EP_train:0:  86%|| 23792/27626 [55:58<09:01,  7.08it/s]

{'epoch': 0, 'iter': 23790, 'avg_loss': 8.859204759193316, 'avg_acc': 49.97990311462318, 'loss': 8.619013786315918}


EP_train:0:  86%|| 23802/27626 [55:59<09:01,  7.06it/s]

{'epoch': 0, 'iter': 23800, 'avg_loss': 8.859085675443072, 'avg_acc': 49.979255073316246, 'loss': 8.056800842285156}


EP_train:0:  86%|| 23812/27626 [56:01<09:00,  7.05it/s]

{'epoch': 0, 'iter': 23810, 'avg_loss': 8.858602235110062, 'avg_acc': 49.98018247868632, 'loss': 8.19957447052002}


EP_train:0:  86%|| 23822/27626 [56:02<09:00,  7.04it/s]

{'epoch': 0, 'iter': 23820, 'avg_loss': 8.858718696396625, 'avg_acc': 49.98058435833928, 'loss': 8.959325790405273}


EP_train:0:  86%|| 23832/27626 [56:04<08:56,  7.07it/s]

{'epoch': 0, 'iter': 23830, 'avg_loss': 8.858552403498233, 'avg_acc': 49.980723637279176, 'loss': 8.269146919250488}


EP_train:0:  86%|| 23842/27626 [56:05<08:55,  7.06it/s]

{'epoch': 0, 'iter': 23840, 'avg_loss': 8.858538594143306, 'avg_acc': 49.98178033639529, 'loss': 8.124735832214355}


EP_train:0:  86%|| 23852/27626 [56:07<08:52,  7.09it/s]

{'epoch': 0, 'iter': 23850, 'avg_loss': 8.85849255407128, 'avg_acc': 49.979953670705626, 'loss': 8.729143142700195}


EP_train:0:  86%|| 23862/27626 [56:08<08:54,  7.05it/s]

{'epoch': 0, 'iter': 23860, 'avg_loss': 8.858292368480424, 'avg_acc': 49.97970013830099, 'loss': 7.886690139770508}


EP_train:0:  86%|| 23872/27626 [56:09<08:57,  6.99it/s]

{'epoch': 0, 'iter': 23870, 'avg_loss': 8.858273161366, 'avg_acc': 49.98127958610867, 'loss': 8.326717376708984}


EP_train:0:  86%|| 23882/27626 [56:11<08:49,  7.06it/s]

{'epoch': 0, 'iter': 23880, 'avg_loss': 8.858142780202362, 'avg_acc': 49.981810853816846, 'loss': 8.425847053527832}


EP_train:0:  86%|| 23892/27626 [56:12<08:50,  7.04it/s]

{'epoch': 0, 'iter': 23890, 'avg_loss': 8.85808248259634, 'avg_acc': 49.98090285044577, 'loss': 9.330142974853516}


EP_train:0:  87%|| 23902/27626 [56:14<08:49,  7.04it/s]

{'epoch': 0, 'iter': 23900, 'avg_loss': 8.85785864089893, 'avg_acc': 49.9819568218903, 'loss': 8.29707145690918}


EP_train:0:  87%|| 23912/27626 [56:15<08:45,  7.07it/s]

{'epoch': 0, 'iter': 23910, 'avg_loss': 8.857719604595408, 'avg_acc': 49.982879218769604, 'loss': 8.267291069030762}


EP_train:0:  87%|| 23922/27626 [56:16<08:48,  7.00it/s]

{'epoch': 0, 'iter': 23920, 'avg_loss': 8.857563279613066, 'avg_acc': 49.982886375987626, 'loss': 8.241082191467285}


EP_train:0:  87%|| 23932/27626 [56:18<08:41,  7.09it/s]

{'epoch': 0, 'iter': 23930, 'avg_loss': 8.857539654754426, 'avg_acc': 49.9834158622707, 'loss': 8.895694732666016}


EP_train:0:  87%|| 23942/27626 [56:19<08:39,  7.09it/s]

{'epoch': 0, 'iter': 23940, 'avg_loss': 8.857628118533258, 'avg_acc': 49.98342278935717, 'loss': 8.746870994567871}


EP_train:0:  87%|| 23952/27626 [56:21<08:38,  7.09it/s]

{'epoch': 0, 'iter': 23950, 'avg_loss': 8.85746767458092, 'avg_acc': 49.983951609536135, 'loss': 7.8811235427856445}


EP_train:0:  87%|| 23962/27626 [56:22<08:40,  7.04it/s]

{'epoch': 0, 'iter': 23960, 'avg_loss': 8.857227584405354, 'avg_acc': 49.98356704645048, 'loss': 8.458703994750977}


EP_train:0:  87%|| 23972/27626 [56:23<08:37,  7.06it/s]

{'epoch': 0, 'iter': 23970, 'avg_loss': 8.857237877264314, 'avg_acc': 49.9838346335155, 'loss': 8.712138175964355}


EP_train:0:  87%|| 23982/27626 [56:25<08:37,  7.05it/s]

{'epoch': 0, 'iter': 23980, 'avg_loss': 8.857187615050957, 'avg_acc': 49.98371106292482, 'loss': 8.719436645507812}


EP_train:0:  87%|| 23992/27626 [56:26<08:36,  7.03it/s]

{'epoch': 0, 'iter': 23990, 'avg_loss': 8.857083386724149, 'avg_acc': 49.98371785252803, 'loss': 8.36190414428711}


EP_train:0:  87%|| 24002/27626 [56:28<08:31,  7.09it/s]

{'epoch': 0, 'iter': 24000, 'avg_loss': 8.856992849662529, 'avg_acc': 49.98619849172951, 'loss': 8.774636268615723}


EP_train:0:  87%|| 24012/27626 [56:29<08:29,  7.09it/s]

{'epoch': 0, 'iter': 24010, 'avg_loss': 8.856951143486318, 'avg_acc': 49.985163050268625, 'loss': 8.27151870727539}


EP_train:0:  87%|| 24022/27626 [56:31<08:26,  7.11it/s]

{'epoch': 0, 'iter': 24020, 'avg_loss': 8.856881454293562, 'avg_acc': 49.984648848923854, 'loss': 8.502739906311035}


EP_train:0:  87%|| 24032/27626 [56:32<08:27,  7.09it/s]

{'epoch': 0, 'iter': 24030, 'avg_loss': 8.856866228233725, 'avg_acc': 49.98504535807915, 'loss': 9.227409362792969}


EP_train:0:  87%|| 24042/27626 [56:33<08:26,  7.07it/s]

{'epoch': 0, 'iter': 24040, 'avg_loss': 8.856684233205032, 'avg_acc': 49.984661619732954, 'loss': 9.548542022705078}


EP_train:0:  87%|| 24052/27626 [56:35<08:29,  7.01it/s]

{'epoch': 0, 'iter': 24050, 'avg_loss': 8.856629310420118, 'avg_acc': 49.98570745499148, 'loss': 8.28939437866211}


EP_train:0:  87%|| 24062/27626 [56:36<08:23,  7.08it/s]

{'epoch': 0, 'iter': 24060, 'avg_loss': 8.856528590313193, 'avg_acc': 49.98571339512073, 'loss': 8.715351104736328}


EP_train:0:  87%|| 24072/27626 [56:38<08:20,  7.10it/s]

{'epoch': 0, 'iter': 24070, 'avg_loss': 8.856315026131325, 'avg_acc': 49.98597897885422, 'loss': 8.347752571105957}


EP_train:0:  87%|| 24082/27626 [56:39<08:23,  7.04it/s]

{'epoch': 0, 'iter': 24080, 'avg_loss': 8.85630485939329, 'avg_acc': 49.98715273452099, 'loss': 9.268101692199707}


EP_train:0:  87%|| 24092/27626 [56:40<08:21,  7.05it/s]

{'epoch': 0, 'iter': 24090, 'avg_loss': 8.856123218925907, 'avg_acc': 49.988195799261135, 'loss': 8.63559341430664}


EP_train:0:  87%|| 24102/27626 [56:42<08:19,  7.06it/s]

{'epoch': 0, 'iter': 24100, 'avg_loss': 8.856034867551127, 'avg_acc': 49.9893676610929, 'loss': 9.25475025177002}


EP_train:0:  87%|| 24112/27626 [56:43<08:18,  7.05it/s]

{'epoch': 0, 'iter': 24110, 'avg_loss': 8.855870035078242, 'avg_acc': 49.99014972419228, 'loss': 8.596282005310059}


EP_train:0:  87%|| 24122/27626 [56:45<08:19,  7.01it/s]

{'epoch': 0, 'iter': 24120, 'avg_loss': 8.855749252893098, 'avg_acc': 49.99041291820405, 'loss': 8.886137962341309}


EP_train:0:  87%|| 24132/27626 [56:46<08:17,  7.03it/s]

{'epoch': 0, 'iter': 24130, 'avg_loss': 8.855652423144265, 'avg_acc': 49.99028738966474, 'loss': 8.845319747924805}


EP_train:0:  87%|| 24142/27626 [56:47<08:17,  7.00it/s]

{'epoch': 0, 'iter': 24140, 'avg_loss': 8.855639323506388, 'avg_acc': 49.989255830330144, 'loss': 9.461638450622559}


EP_train:0:  87%|| 24152/27626 [56:49<08:10,  7.08it/s]

{'epoch': 0, 'iter': 24150, 'avg_loss': 8.855485741877219, 'avg_acc': 49.99094240404124, 'loss': 8.629528045654297}


EP_train:0:  87%|| 24162/27626 [56:50<08:08,  7.09it/s]

{'epoch': 0, 'iter': 24160, 'avg_loss': 8.85533350399455, 'avg_acc': 49.989523405488185, 'loss': 8.316092491149902}


EP_train:0:  87%|| 24172/27626 [56:52<08:07,  7.08it/s]

{'epoch': 0, 'iter': 24170, 'avg_loss': 8.855393119081457, 'avg_acc': 49.99082061147656, 'loss': 9.134735107421875}


EP_train:0:  88%|| 24182/27626 [56:53<08:06,  7.08it/s]

{'epoch': 0, 'iter': 24180, 'avg_loss': 8.85539494451718, 'avg_acc': 49.99108287498449, 'loss': 8.715374946594238}


EP_train:0:  88%|| 24192/27626 [56:55<08:07,  7.04it/s]

{'epoch': 0, 'iter': 24190, 'avg_loss': 8.855369856543748, 'avg_acc': 49.99289508494895, 'loss': 8.556770324707031}


EP_train:0:  88%|| 24202/27626 [56:56<08:08,  7.01it/s]

{'epoch': 0, 'iter': 24200, 'avg_loss': 8.855357895872375, 'avg_acc': 49.99186500557828, 'loss': 8.818473815917969}


EP_train:0:  88%|| 24212/27626 [56:57<08:03,  7.06it/s]

{'epoch': 0, 'iter': 24210, 'avg_loss': 8.855243684239982, 'avg_acc': 49.99199743918054, 'loss': 7.922825336456299}


EP_train:0:  88%|| 24222/27626 [56:59<08:01,  7.06it/s]

{'epoch': 0, 'iter': 24220, 'avg_loss': 8.855232481745634, 'avg_acc': 49.9892913174518, 'loss': 8.815706253051758}


EP_train:0:  88%|| 24232/27626 [57:00<08:02,  7.04it/s]

{'epoch': 0, 'iter': 24230, 'avg_loss': 8.855268308998388, 'avg_acc': 49.99019850604597, 'loss': 9.954641342163086}


EP_train:0:  88%|| 24242/27626 [57:02<07:57,  7.08it/s]

{'epoch': 0, 'iter': 24240, 'avg_loss': 8.855145238029166, 'avg_acc': 49.989815807928714, 'loss': 9.3945951461792}


EP_train:0:  88%|| 24252/27626 [57:03<07:57,  7.07it/s]

{'epoch': 0, 'iter': 24250, 'avg_loss': 8.855123791830902, 'avg_acc': 49.989820007422374, 'loss': 7.813809871673584}


EP_train:0:  88%|| 24262/27626 [57:04<07:56,  7.06it/s]

{'epoch': 0, 'iter': 24260, 'avg_loss': 8.855023711164316, 'avg_acc': 49.98956658835168, 'loss': 9.27202033996582}


EP_train:0:  88%|| 24272/27626 [57:06<07:55,  7.06it/s]

{'epoch': 0, 'iter': 24270, 'avg_loss': 8.85487003060404, 'avg_acc': 49.989055869144245, 'loss': 8.260343551635742}


EP_train:0:  88%|| 24282/27626 [57:07<07:56,  7.02it/s]

{'epoch': 0, 'iter': 24280, 'avg_loss': 8.854814359449604, 'avg_acc': 49.990733495325564, 'loss': 9.632049560546875}


EP_train:0:  88%|| 24292/27626 [57:09<07:50,  7.08it/s]

{'epoch': 0, 'iter': 24290, 'avg_loss': 8.854705157999808, 'avg_acc': 49.99176649787987, 'loss': 8.822404861450195}


EP_train:0:  88%|| 24302/27626 [57:10<07:49,  7.08it/s]

{'epoch': 0, 'iter': 24300, 'avg_loss': 8.854683128709503, 'avg_acc': 49.99215567260607, 'loss': 9.263575553894043}


EP_train:0:  88%|| 24312/27626 [57:11<07:48,  7.07it/s]

{'epoch': 0, 'iter': 24310, 'avg_loss': 8.854748360834423, 'avg_acc': 49.99164472872362, 'loss': 9.334763526916504}


EP_train:0:  88%|| 24322/27626 [57:13<07:52,  6.99it/s]

{'epoch': 0, 'iter': 24320, 'avg_loss': 8.854642155776167, 'avg_acc': 49.993190041527896, 'loss': 9.257590293884277}


EP_train:0:  88%|| 24332/27626 [57:14<07:49,  7.02it/s]

{'epoch': 0, 'iter': 24330, 'avg_loss': 8.854427444298402, 'avg_acc': 49.99396346224981, 'loss': 8.270273208618164}


EP_train:0:  88%|| 24342/27626 [57:16<07:45,  7.06it/s]

{'epoch': 0, 'iter': 24340, 'avg_loss': 8.854393135212788, 'avg_acc': 49.99460786327595, 'loss': 8.862845420837402}


EP_train:0:  88%|| 24352/27626 [57:17<07:46,  7.02it/s]

{'epoch': 0, 'iter': 24350, 'avg_loss': 8.85434331279443, 'avg_acc': 49.99461007761488, 'loss': 9.076248168945312}


EP_train:0:  88%|| 24362/27626 [57:19<07:40,  7.08it/s]

{'epoch': 0, 'iter': 24360, 'avg_loss': 8.854187136795284, 'avg_acc': 49.99499712655474, 'loss': 8.293680191040039}


EP_train:0:  88%|| 24372/27626 [57:20<07:45,  6.99it/s]

{'epoch': 0, 'iter': 24370, 'avg_loss': 8.854052789112172, 'avg_acc': 49.99692257190924, 'loss': 7.706295013427734}


EP_train:0:  88%|| 24382/27626 [57:21<07:43,  7.00it/s]

{'epoch': 0, 'iter': 24380, 'avg_loss': 8.853940147581241, 'avg_acc': 49.99705200771093, 'loss': 8.459453582763672}


EP_train:0:  88%|| 24392/27626 [57:23<07:34,  7.12it/s]

{'epoch': 0, 'iter': 24390, 'avg_loss': 8.853931061617734, 'avg_acc': 49.998206305604526, 'loss': 8.334968566894531}


EP_train:0:  88%|| 24402/27626 [57:24<07:35,  7.07it/s]

{'epoch': 0, 'iter': 24400, 'avg_loss': 8.853878168780112, 'avg_acc': 49.99782283512971, 'loss': 8.613186836242676}


EP_train:0:  88%|| 24412/27626 [57:26<07:35,  7.05it/s]

{'epoch': 0, 'iter': 24410, 'avg_loss': 8.853660473970637, 'avg_acc': 50.00025603211667, 'loss': 8.210227012634277}


EP_train:0:  88%|| 24422/27626 [57:27<07:35,  7.03it/s]

{'epoch': 0, 'iter': 24420, 'avg_loss': 8.85342739268928, 'avg_acc': 49.997312763605095, 'loss': 8.626255989074707}


EP_train:0:  88%|| 24432/27626 [57:28<07:34,  7.02it/s]

{'epoch': 0, 'iter': 24430, 'avg_loss': 8.853196287442062, 'avg_acc': 49.99859297613688, 'loss': 8.290444374084473}


EP_train:0:  88%|| 24442/27626 [57:30<07:34,  7.01it/s]

{'epoch': 0, 'iter': 24440, 'avg_loss': 8.852875009446764, 'avg_acc': 49.99820997504194, 'loss': 7.489312171936035}


EP_train:0:  89%|| 24452/27626 [57:31<07:26,  7.11it/s]

{'epoch': 0, 'iter': 24450, 'avg_loss': 8.852625318385014, 'avg_acc': 50.000511226534705, 'loss': 9.235722541809082}


EP_train:0:  89%|| 24462/27626 [57:33<07:29,  7.03it/s]

{'epoch': 0, 'iter': 24460, 'avg_loss': 8.852717013678962, 'avg_acc': 50.000511017538116, 'loss': 9.321154594421387}


EP_train:0:  89%|| 24472/27626 [57:34<07:25,  7.08it/s]

{'epoch': 0, 'iter': 24470, 'avg_loss': 8.85263658461361, 'avg_acc': 50.00063851089044, 'loss': 8.180135726928711}


EP_train:0:  89%|| 24482/27626 [57:35<07:23,  7.09it/s]

{'epoch': 0, 'iter': 24480, 'avg_loss': 8.852611990459195, 'avg_acc': 50.00089355010008, 'loss': 8.813880920410156}


EP_train:0:  89%|| 24492/27626 [57:37<07:25,  7.03it/s]

{'epoch': 0, 'iter': 24490, 'avg_loss': 8.852476410485925, 'avg_acc': 50.001403576824146, 'loss': 8.934273719787598}


EP_train:0:  89%|| 24502/27626 [57:38<07:24,  7.02it/s]

{'epoch': 0, 'iter': 24500, 'avg_loss': 8.852340490077205, 'avg_acc': 49.999744908371085, 'loss': 8.828615188598633}


EP_train:0:  89%|| 24512/27626 [57:40<07:22,  7.04it/s]

{'epoch': 0, 'iter': 24510, 'avg_loss': 8.852345125585032, 'avg_acc': 50.00063746889152, 'loss': 8.942520141601562}


EP_train:0:  89%|| 24522/27626 [57:41<07:22,  7.02it/s]

{'epoch': 0, 'iter': 24520, 'avg_loss': 8.852211702424665, 'avg_acc': 49.998598140369474, 'loss': 8.787452697753906}


EP_train:0:  89%|| 24532/27626 [57:43<07:19,  7.04it/s]

{'epoch': 0, 'iter': 24530, 'avg_loss': 8.852244858249781, 'avg_acc': 49.9977069830011, 'loss': 9.923802375793457}


EP_train:0:  89%|| 24542/27626 [57:44<07:16,  7.06it/s]

{'epoch': 0, 'iter': 24540, 'avg_loss': 8.852297770715198, 'avg_acc': 49.99770791736278, 'loss': 9.941420555114746}


EP_train:0:  89%|| 24552/27626 [57:45<07:14,  7.07it/s]

{'epoch': 0, 'iter': 24550, 'avg_loss': 8.852228127789884, 'avg_acc': 49.998345281251275, 'loss': 8.671097755432129}


EP_train:0:  89%|| 24562/27626 [57:47<07:12,  7.08it/s]

{'epoch': 0, 'iter': 24560, 'avg_loss': 8.85199580305752, 'avg_acc': 49.99732808110419, 'loss': 8.006034851074219}


EP_train:0:  89%|| 24572/27626 [57:48<07:11,  7.07it/s]

{'epoch': 0, 'iter': 24570, 'avg_loss': 8.851814517335992, 'avg_acc': 49.99732916853201, 'loss': 8.091212272644043}


EP_train:0:  89%|| 24582/27626 [57:50<07:11,  7.06it/s]

{'epoch': 0, 'iter': 24580, 'avg_loss': 8.851772406002198, 'avg_acc': 49.99644034010008, 'loss': 8.664861679077148}


EP_train:0:  89%|| 24592/27626 [57:51<07:12,  7.02it/s]

{'epoch': 0, 'iter': 24590, 'avg_loss': 8.851653000012268, 'avg_acc': 49.99478976048148, 'loss': 8.59386920928955}


EP_train:0:  89%|| 24602/27626 [57:52<07:08,  7.05it/s]

{'epoch': 0, 'iter': 24600, 'avg_loss': 8.851455586038272, 'avg_acc': 49.99593512458843, 'loss': 8.192573547363281}


EP_train:0:  89%|| 24612/27626 [57:54<07:06,  7.07it/s]

{'epoch': 0, 'iter': 24610, 'avg_loss': 8.85139582106857, 'avg_acc': 49.996317703465934, 'loss': 8.836642265319824}


EP_train:0:  89%|| 24622/27626 [57:55<07:08,  7.01it/s]

{'epoch': 0, 'iter': 24620, 'avg_loss': 8.85137281891953, 'avg_acc': 49.994796109012626, 'loss': 8.478350639343262}


EP_train:0:  89%|| 24632/27626 [57:57<07:03,  7.07it/s]

{'epoch': 0, 'iter': 24630, 'avg_loss': 8.851357931090082, 'avg_acc': 49.99454447647273, 'loss': 8.594754219055176}


EP_train:0:  89%|| 24642/27626 [57:58<07:01,  7.07it/s]

{'epoch': 0, 'iter': 24640, 'avg_loss': 8.851244762353002, 'avg_acc': 49.99302483665436, 'loss': 8.73770809173584}


EP_train:0:  89%|| 24652/27626 [58:00<07:00,  7.08it/s]

{'epoch': 0, 'iter': 24650, 'avg_loss': 8.851103379283346, 'avg_acc': 49.993027666220435, 'loss': 8.013493537902832}


EP_train:0:  89%|| 24662/27626 [58:01<06:59,  7.06it/s]

{'epoch': 0, 'iter': 24660, 'avg_loss': 8.850967398660858, 'avg_acc': 49.99404423989295, 'loss': 8.549812316894531}


EP_train:0:  89%|| 24672/27626 [58:02<07:01,  7.01it/s]

{'epoch': 0, 'iter': 24670, 'avg_loss': 8.850901748773502, 'avg_acc': 49.99417332090309, 'loss': 7.62784481048584}


EP_train:0:  89%|| 24682/27626 [58:04<07:01,  6.99it/s]

{'epoch': 0, 'iter': 24680, 'avg_loss': 8.850732407323168, 'avg_acc': 49.995315222235725, 'loss': 8.652229309082031}


EP_train:0:  89%|| 24692/27626 [58:05<06:56,  7.05it/s]

{'epoch': 0, 'iter': 24690, 'avg_loss': 8.850703562379275, 'avg_acc': 49.99379834757604, 'loss': 9.385873794555664}


EP_train:0:  89%|| 24702/27626 [58:07<06:56,  7.03it/s]

{'epoch': 0, 'iter': 24700, 'avg_loss': 8.8505652876191, 'avg_acc': 49.99481296303793, 'loss': 8.300421714782715}


EP_train:0:  89%|| 24712/27626 [58:08<06:51,  7.07it/s]

{'epoch': 0, 'iter': 24710, 'avg_loss': 8.850547511074641, 'avg_acc': 49.993550443122494, 'loss': 9.413043975830078}


EP_train:0:  89%|| 24722/27626 [58:09<06:49,  7.10it/s]

{'epoch': 0, 'iter': 24720, 'avg_loss': 8.85043062202599, 'avg_acc': 49.99456433801222, 'loss': 8.330954551696777}


EP_train:0:  90%|| 24732/27626 [58:11<06:47,  7.10it/s]

{'epoch': 0, 'iter': 24730, 'avg_loss': 8.850241052275802, 'avg_acc': 49.99292386074158, 'loss': 7.901062488555908}


EP_train:0:  90%|| 24742/27626 [58:12<06:48,  7.05it/s]

{'epoch': 0, 'iter': 24740, 'avg_loss': 8.849974465184193, 'avg_acc': 49.99456873206419, 'loss': 8.668807983398438}


EP_train:0:  90%|| 24752/27626 [58:14<06:49,  7.01it/s]

{'epoch': 0, 'iter': 24750, 'avg_loss': 8.84984311020633, 'avg_acc': 49.99154074582845, 'loss': 8.316734313964844}


EP_train:0:  90%|| 24762/27626 [58:15<06:49,  6.99it/s]

{'epoch': 0, 'iter': 24760, 'avg_loss': 8.849764781065439, 'avg_acc': 49.99167036872501, 'loss': 9.330223083496094}


EP_train:0:  90%|| 24772/27626 [58:16<06:47,  7.00it/s]

{'epoch': 0, 'iter': 24770, 'avg_loss': 8.849733188286965, 'avg_acc': 49.993566065156834, 'loss': 8.359939575195312}


EP_train:0:  90%|| 24782/27626 [58:18<06:44,  7.04it/s]

{'epoch': 0, 'iter': 24780, 'avg_loss': 8.84964916035596, 'avg_acc': 49.99319034744361, 'loss': 8.90610408782959}


EP_train:0:  90%|| 24792/27626 [58:19<06:42,  7.05it/s]

{'epoch': 0, 'iter': 24790, 'avg_loss': 8.84944399534817, 'avg_acc': 49.99533600903553, 'loss': 8.10661506652832}


EP_train:0:  90%|| 24802/27626 [58:21<06:39,  7.07it/s]

{'epoch': 0, 'iter': 24800, 'avg_loss': 8.849249945490174, 'avg_acc': 49.99508588363372, 'loss': 8.275031089782715}


EP_train:0:  90%|| 24812/27626 [58:22<06:36,  7.11it/s]

{'epoch': 0, 'iter': 24810, 'avg_loss': 8.849028114386345, 'avg_acc': 49.99571762524687, 'loss': 8.316558837890625}


EP_train:0:  90%|| 24822/27626 [58:24<06:36,  7.08it/s]

{'epoch': 0, 'iter': 24820, 'avg_loss': 8.84895969394865, 'avg_acc': 49.995341646186695, 'loss': 9.18811321258545}


EP_train:0:  90%|| 24832/27626 [58:25<06:35,  7.06it/s]

{'epoch': 0, 'iter': 24830, 'avg_loss': 8.848747138159759, 'avg_acc': 49.99660202972092, 'loss': 9.234278678894043}


EP_train:0:  90%|| 24842/27626 [58:26<06:31,  7.11it/s]

{'epoch': 0, 'iter': 24840, 'avg_loss': 8.848644594241875, 'avg_acc': 49.99786139849442, 'loss': 8.362780570983887}


EP_train:0:  90%|| 24852/27626 [58:28<06:32,  7.06it/s]

{'epoch': 0, 'iter': 24850, 'avg_loss': 8.848643902998173, 'avg_acc': 49.999497002132706, 'loss': 8.555388450622559}


EP_train:0:  90%|| 24862/27626 [58:29<06:34,  7.01it/s]

{'epoch': 0, 'iter': 24860, 'avg_loss': 8.848530529634907, 'avg_acc': 50.000879892200636, 'loss': 7.913700580596924}


EP_train:0:  90%|| 24872/27626 [58:31<06:33,  7.00it/s]

{'epoch': 0, 'iter': 24870, 'avg_loss': 8.84830455890079, 'avg_acc': 50.00288991194564, 'loss': 8.182696342468262}


EP_train:0:  90%|| 24882/27626 [58:32<06:27,  7.07it/s]

{'epoch': 0, 'iter': 24880, 'avg_loss': 8.848161994709011, 'avg_acc': 50.00113038061171, 'loss': 8.139734268188477}


EP_train:0:  90%|| 24892/27626 [58:33<06:28,  7.03it/s]

{'epoch': 0, 'iter': 24890, 'avg_loss': 8.847976277407202, 'avg_acc': 50.0023854003455, 'loss': 8.400378227233887}


EP_train:0:  90%|| 24902/27626 [58:35<06:26,  7.05it/s]

{'epoch': 0, 'iter': 24900, 'avg_loss': 8.84782380392492, 'avg_acc': 50.001882454519894, 'loss': 8.06692886352539}


EP_train:0:  90%|| 24912/27626 [58:36<06:21,  7.12it/s]

{'epoch': 0, 'iter': 24910, 'avg_loss': 8.847811628242674, 'avg_acc': 50.0, 'loss': 9.048664093017578}


EP_train:0:  90%|| 24922/27626 [58:38<06:23,  7.05it/s]

{'epoch': 0, 'iter': 24920, 'avg_loss': 8.847799484756841, 'avg_acc': 50.00037618875647, 'loss': 8.782150268554688}


EP_train:0:  90%|| 24932/27626 [58:39<06:20,  7.08it/s]

{'epoch': 0, 'iter': 24930, 'avg_loss': 8.84759171048942, 'avg_acc': 50.000376037864505, 'loss': 8.208639144897461}


EP_train:0:  90%|| 24942/27626 [58:40<06:21,  7.03it/s]

{'epoch': 0, 'iter': 24940, 'avg_loss': 8.847554517487259, 'avg_acc': 50.00025059139569, 'loss': 8.747493743896484}


EP_train:0:  90%|| 24952/27626 [58:42<06:22,  6.99it/s]

{'epoch': 0, 'iter': 24950, 'avg_loss': 8.84749430820061, 'avg_acc': 50.00137770029257, 'loss': 8.729857444763184}


EP_train:0:  90%|| 24962/27626 [58:43<06:18,  7.04it/s]

{'epoch': 0, 'iter': 24960, 'avg_loss': 8.84735801297213, 'avg_acc': 50.00037558591403, 'loss': 9.09097671508789}


EP_train:0:  90%|| 24972/27626 [58:45<06:14,  7.08it/s]

{'epoch': 0, 'iter': 24970, 'avg_loss': 8.847139155094684, 'avg_acc': 50.00075087101037, 'loss': 8.782488822937012}


EP_train:0:  90%|| 24982/27626 [58:46<06:16,  7.03it/s]

{'epoch': 0, 'iter': 24980, 'avg_loss': 8.847043109834397, 'avg_acc': 50.001501140867056, 'loss': 8.757987022399902}


EP_train:0:  90%|| 24992/27626 [58:48<06:15,  7.02it/s]

{'epoch': 0, 'iter': 24990, 'avg_loss': 8.846955627063462, 'avg_acc': 50.0, 'loss': 9.315471649169922}


EP_train:0:  91%|| 25002/27626 [58:49<06:10,  7.07it/s]

{'epoch': 0, 'iter': 25000, 'avg_loss': 8.846805519907194, 'avg_acc': 49.998750049997994, 'loss': 8.999761581420898}


EP_train:0:  91%|| 25012/27626 [58:50<06:09,  7.08it/s]

{'epoch': 0, 'iter': 25010, 'avg_loss': 8.846583494206458, 'avg_acc': 49.998250769661354, 'loss': 8.072050094604492}


EP_train:0:  91%|| 25022/27626 [58:52<06:12,  6.98it/s]

{'epoch': 0, 'iter': 25020, 'avg_loss': 8.846399351902647, 'avg_acc': 50.00087426561688, 'loss': 7.154666900634766}


EP_train:0:  91%|| 25032/27626 [58:53<06:10,  6.99it/s]

{'epoch': 0, 'iter': 25030, 'avg_loss': 8.84624673932461, 'avg_acc': 49.99987515480804, 'loss': 9.12924861907959}


EP_train:0:  91%|| 25042/27626 [58:55<06:07,  7.03it/s]

{'epoch': 0, 'iter': 25040, 'avg_loss': 8.84620295126576, 'avg_acc': 50.00149754402779, 'loss': 8.091150283813477}


EP_train:0:  91%|| 25052/27626 [58:56<06:03,  7.08it/s]

{'epoch': 0, 'iter': 25050, 'avg_loss': 8.846101359347232, 'avg_acc': 50.00149694622969, 'loss': 8.150557518005371}


EP_train:0:  91%|| 25062/27626 [58:57<06:00,  7.11it/s]

{'epoch': 0, 'iter': 25060, 'avg_loss': 8.84606771639218, 'avg_acc': 50.00149634890866, 'loss': 8.402713775634766}


EP_train:0:  91%|| 25072/27626 [58:59<05:59,  7.10it/s]

{'epoch': 0, 'iter': 25070, 'avg_loss': 8.845823732905373, 'avg_acc': 50.00162039806948, 'loss': 8.090728759765625}


EP_train:0:  91%|| 25082/27626 [59:00<06:00,  7.06it/s]

{'epoch': 0, 'iter': 25080, 'avg_loss': 8.845653200581106, 'avg_acc': 50.00174434831147, 'loss': 9.126429557800293}


EP_train:0:  91%|| 25092/27626 [59:02<05:58,  7.07it/s]

{'epoch': 0, 'iter': 25090, 'avg_loss': 8.84556569027248, 'avg_acc': 50.0018681997529, 'loss': 8.703446388244629}


EP_train:0:  91%|| 25102/27626 [59:03<05:56,  7.08it/s]

{'epoch': 0, 'iter': 25100, 'avg_loss': 8.845472482185308, 'avg_acc': 50.00161846141587, 'loss': 9.05239200592041}


EP_train:0:  91%|| 25112/27626 [59:04<05:55,  7.08it/s]

{'epoch': 0, 'iter': 25110, 'avg_loss': 8.845561287032913, 'avg_acc': 50.0, 'loss': 10.028592109680176}


EP_train:0:  91%|| 25122/27626 [59:06<05:55,  7.05it/s]

{'epoch': 0, 'iter': 25120, 'avg_loss': 8.845430046579773, 'avg_acc': 50.0, 'loss': 8.676050186157227}


EP_train:0:  91%|| 25132/27626 [59:07<05:54,  7.04it/s]

{'epoch': 0, 'iter': 25130, 'avg_loss': 8.845356291342538, 'avg_acc': 50.00024869682862, 'loss': 8.575551986694336}


EP_train:0:  91%|| 25142/27626 [59:09<05:53,  7.03it/s]

{'epoch': 0, 'iter': 25140, 'avg_loss': 8.845378649537762, 'avg_acc': 50.0014915874468, 'loss': 8.701273918151855}


EP_train:0:  91%|| 25152/27626 [59:10<05:48,  7.10it/s]

{'epoch': 0, 'iter': 25150, 'avg_loss': 8.845450213991487, 'avg_acc': 50.001366744861045, 'loss': 8.737571716308594}


EP_train:0:  91%|| 25162/27626 [59:11<05:46,  7.11it/s]

{'epoch': 0, 'iter': 25160, 'avg_loss': 8.845197167382711, 'avg_acc': 50.00062100075514, 'loss': 8.134810447692871}


EP_train:0:  91%|| 25172/27626 [59:13<05:46,  7.07it/s]

{'epoch': 0, 'iter': 25170, 'avg_loss': 8.845118244945267, 'avg_acc': 50.00037245242541, 'loss': 8.288460731506348}


EP_train:0:  91%|| 25182/27626 [59:14<05:44,  7.08it/s]

{'epoch': 0, 'iter': 25180, 'avg_loss': 8.844926275161114, 'avg_acc': 50.00111691354593, 'loss': 8.621591567993164}


EP_train:0:  91%|| 25192/27626 [59:16<05:43,  7.08it/s]

{'epoch': 0, 'iter': 25190, 'avg_loss': 8.844880062610716, 'avg_acc': 50.00012405224088, 'loss': 9.09779167175293}


EP_train:0:  91%|| 25202/27626 [59:17<05:42,  7.08it/s]

{'epoch': 0, 'iter': 25200, 'avg_loss': 8.84474669181706, 'avg_acc': 50.000124003015756, 'loss': 7.366274356842041}


EP_train:0:  91%|| 25212/27626 [59:19<05:44,  7.00it/s]

{'epoch': 0, 'iter': 25210, 'avg_loss': 8.844681947405585, 'avg_acc': 50.000495815318715, 'loss': 8.033010482788086}


EP_train:0:  91%|| 25222/27626 [59:20<05:42,  7.01it/s]

{'epoch': 0, 'iter': 25220, 'avg_loss': 8.844496240680504, 'avg_acc': 50.003221521747754, 'loss': 7.703371524810791}


EP_train:0:  91%|| 25232/27626 [59:21<05:38,  7.08it/s]

{'epoch': 0, 'iter': 25230, 'avg_loss': 8.844358357222411, 'avg_acc': 50.001610122468385, 'loss': 8.019902229309082}


EP_train:0:  91%|| 25242/27626 [59:23<05:36,  7.07it/s]

{'epoch': 0, 'iter': 25240, 'avg_loss': 8.844437199535124, 'avg_acc': 50.00198090408462, 'loss': 8.97976016998291}


EP_train:0:  91%|| 25252/27626 [59:24<05:34,  7.10it/s]

{'epoch': 0, 'iter': 25250, 'avg_loss': 8.844300996562778, 'avg_acc': 50.00297017939883, 'loss': 8.52753734588623}


EP_train:0:  91%|| 25262/27626 [59:26<05:34,  7.06it/s]

{'epoch': 0, 'iter': 25260, 'avg_loss': 8.844258094163317, 'avg_acc': 50.002969003602395, 'loss': 8.432047843933105}


EP_train:0:  91%|| 25272/27626 [59:27<05:36,  7.00it/s]

{'epoch': 0, 'iter': 25270, 'avg_loss': 8.844078355546682, 'avg_acc': 50.003215147797874, 'loss': 8.518381118774414}


EP_train:0:  92%|| 25282/27626 [59:28<05:33,  7.03it/s]

{'epoch': 0, 'iter': 25280, 'avg_loss': 8.84397468640077, 'avg_acc': 50.004079150350066, 'loss': 9.149772644042969}


EP_train:0:  92%|| 25292/27626 [59:30<05:30,  7.06it/s]

{'epoch': 0, 'iter': 25290, 'avg_loss': 8.843806458266151, 'avg_acc': 50.003830413981255, 'loss': 8.931793212890625}


EP_train:0:  92%|| 25302/27626 [59:31<05:31,  7.02it/s]

{'epoch': 0, 'iter': 25300, 'avg_loss': 8.843656708556132, 'avg_acc': 50.00531105489902, 'loss': 7.306469440460205}


EP_train:0:  92%|| 25312/27626 [59:33<05:27,  7.07it/s]

{'epoch': 0, 'iter': 25310, 'avg_loss': 8.843378503538027, 'avg_acc': 50.006667061751806, 'loss': 7.754736423492432}


EP_train:0:  92%|| 25322/27626 [59:34<05:27,  7.03it/s]

{'epoch': 0, 'iter': 25320, 'avg_loss': 8.843252624193688, 'avg_acc': 50.00740492081671, 'loss': 9.094441413879395}


EP_train:0:  92%|| 25332/27626 [59:35<05:25,  7.05it/s]

{'epoch': 0, 'iter': 25330, 'avg_loss': 8.84310998981005, 'avg_acc': 50.00678516442304, 'loss': 7.980283260345459}


EP_train:0:  92%|| 25342/27626 [59:37<05:25,  7.01it/s]

{'epoch': 0, 'iter': 25340, 'avg_loss': 8.843042295040533, 'avg_acc': 50.00542598950317, 'loss': 8.912545204162598}


EP_train:0:  92%|| 25352/27626 [59:38<05:22,  7.05it/s]

{'epoch': 0, 'iter': 25350, 'avg_loss': 8.843049656958755, 'avg_acc': 50.007642696540564, 'loss': 9.384591102600098}


EP_train:0:  92%|| 25362/27626 [59:40<05:19,  7.08it/s]

{'epoch': 0, 'iter': 25360, 'avg_loss': 8.842908749543344, 'avg_acc': 50.00924155198927, 'loss': 8.107922554016113}


EP_train:0:  92%|| 25372/27626 [59:41<05:20,  7.03it/s]

{'epoch': 0, 'iter': 25370, 'avg_loss': 8.842776275424416, 'avg_acc': 50.00985377005242, 'loss': 8.137415885925293}


EP_train:0:  92%|| 25382/27626 [59:43<05:19,  7.02it/s]

{'epoch': 0, 'iter': 25380, 'avg_loss': 8.842525604407061, 'avg_acc': 50.01058862928962, 'loss': 7.346285343170166}


EP_train:0:  92%|| 25392/27626 [59:44<05:19,  6.99it/s]

{'epoch': 0, 'iter': 25390, 'avg_loss': 8.842495165881193, 'avg_acc': 50.00947678311213, 'loss': 9.01961898803711}


EP_train:0:  92%|| 25402/27626 [59:45<05:16,  7.03it/s]

{'epoch': 0, 'iter': 25400, 'avg_loss': 8.842287003470709, 'avg_acc': 50.00959607889454, 'loss': 8.894458770751953}


EP_train:0:  92%|| 25412/27626 [59:47<05:14,  7.05it/s]

{'epoch': 0, 'iter': 25410, 'avg_loss': 8.842151625163627, 'avg_acc': 50.00946932430838, 'loss': 8.126769065856934}


EP_train:0:  92%|| 25422/27626 [59:48<05:14,  7.01it/s]

{'epoch': 0, 'iter': 25420, 'avg_loss': 8.842081827284721, 'avg_acc': 50.00663821250148, 'loss': 8.65677547454834}


EP_train:0:  92%|| 25432/27626 [59:50<05:09,  7.10it/s]

{'epoch': 0, 'iter': 25430, 'avg_loss': 8.841977119652054, 'avg_acc': 50.005529668514804, 'loss': 8.429193496704102}


EP_train:0:  92%|| 25442/27626 [59:51<05:08,  7.09it/s]

{'epoch': 0, 'iter': 25440, 'avg_loss': 8.841985272797046, 'avg_acc': 50.00773849298377, 'loss': 9.575526237487793}


EP_train:0:  92%|| 25452/27626 [59:52<05:08,  7.05it/s]

{'epoch': 0, 'iter': 25450, 'avg_loss': 8.841975684906194, 'avg_acc': 50.00798102235669, 'loss': 9.31052017211914}


EP_train:0:  92%|| 25462/27626 [59:54<05:06,  7.07it/s]

{'epoch': 0, 'iter': 25460, 'avg_loss': 8.841771681120248, 'avg_acc': 50.008100624484506, 'loss': 8.123207092285156}


EP_train:0:  92%|| 25472/27626 [59:55<05:05,  7.05it/s]

{'epoch': 0, 'iter': 25470, 'avg_loss': 8.84164801992192, 'avg_acc': 50.00846550979545, 'loss': 8.505560874938965}


EP_train:0:  92%|| 25482/27626 [59:57<05:12,  6.87it/s]

{'epoch': 0, 'iter': 25480, 'avg_loss': 8.8415516406493, 'avg_acc': 50.008339547113536, 'loss': 8.959856033325195}


EP_train:0:  92%|| 25492/27626 [59:58<05:06,  6.97it/s]

{'epoch': 0, 'iter': 25490, 'avg_loss': 8.841550275645407, 'avg_acc': 50.00809109097328, 'loss': 8.105091094970703}


EP_train:0:  92%|| 25502/27626 [1:00:00<05:01,  7.05it/s]

{'epoch': 0, 'iter': 25500, 'avg_loss': 8.841524247407754, 'avg_acc': 50.00968099290224, 'loss': 8.756190299987793}


EP_train:0:  92%|| 25512/27626 [1:00:01<05:00,  7.04it/s]

{'epoch': 0, 'iter': 25510, 'avg_loss': 8.841534013577329, 'avg_acc': 50.008942221002705, 'loss': 8.906932830810547}


EP_train:0:  92%|| 25522/27626 [1:00:02<04:56,  7.09it/s]

{'epoch': 0, 'iter': 25520, 'avg_loss': 8.841318928435427, 'avg_acc': 50.010775439833864, 'loss': 8.08063793182373}


EP_train:0:  92%|| 25532/27626 [1:00:04<04:55,  7.08it/s]

{'epoch': 0, 'iter': 25530, 'avg_loss': 8.841201786092657, 'avg_acc': 50.01064881908268, 'loss': 8.820952415466309}


EP_train:0:  92%|| 25542/27626 [1:00:05<04:54,  7.08it/s]

{'epoch': 0, 'iter': 25540, 'avg_loss': 8.841088488958794, 'avg_acc': 50.00868701303786, 'loss': 8.7042875289917}


EP_train:0:  92%|| 25552/27626 [1:00:07<04:54,  7.04it/s]

{'epoch': 0, 'iter': 25550, 'avg_loss': 8.841094755416307, 'avg_acc': 50.01100739697076, 'loss': 8.585750579833984}


EP_train:0:  93%|| 25562/27626 [1:00:08<04:51,  7.07it/s]

{'epoch': 0, 'iter': 25560, 'avg_loss': 8.840830015140932, 'avg_acc': 50.0121033997105, 'loss': 8.128857612609863}


EP_train:0:  93%|| 25572/27626 [1:00:09<04:50,  7.06it/s]

{'epoch': 0, 'iter': 25570, 'avg_loss': 8.840717709398742, 'avg_acc': 50.00977670016816, 'loss': 7.958686351776123}


EP_train:0:  93%|| 25582/27626 [1:00:11<04:51,  7.02it/s]

{'epoch': 0, 'iter': 25580, 'avg_loss': 8.84050386767788, 'avg_acc': 50.009284234392716, 'loss': 7.630806922912598}


EP_train:0:  93%|| 25592/27626 [1:00:12<04:49,  7.02it/s]

{'epoch': 0, 'iter': 25590, 'avg_loss': 8.8404490341126, 'avg_acc': 50.009158493220276, 'loss': 7.789515495300293}


EP_train:0:  93%|| 25602/27626 [1:00:14<04:47,  7.05it/s]

{'epoch': 0, 'iter': 25600, 'avg_loss': 8.84023046202969, 'avg_acc': 50.00952111245655, 'loss': 8.242151260375977}


EP_train:0:  93%|| 25612/27626 [1:00:15<04:44,  7.07it/s]

{'epoch': 0, 'iter': 25610, 'avg_loss': 8.840123788972809, 'avg_acc': 50.00780914450822, 'loss': 8.385305404663086}


EP_train:0:  93%|| 25622/27626 [1:00:17<04:44,  7.03it/s]

{'epoch': 0, 'iter': 25620, 'avg_loss': 8.839940128391182, 'avg_acc': 50.00597654267983, 'loss': 8.566152572631836}


EP_train:0:  93%|| 25632/27626 [1:00:18<04:40,  7.11it/s]

{'epoch': 0, 'iter': 25630, 'avg_loss': 8.839877887752769, 'avg_acc': 50.005730365572944, 'loss': 8.08485221862793}


EP_train:0:  93%|| 25642/27626 [1:00:19<04:41,  7.04it/s]

{'epoch': 0, 'iter': 25640, 'avg_loss': 8.839748589233128, 'avg_acc': 50.004875004875004, 'loss': 8.732685089111328}


EP_train:0:  93%|| 25652/27626 [1:00:21<04:38,  7.08it/s]

{'epoch': 0, 'iter': 25650, 'avg_loss': 8.839908198452216, 'avg_acc': 50.005116759580524, 'loss': 8.705782890319824}


EP_train:0:  93%|| 25662/27626 [1:00:22<04:39,  7.03it/s]

{'epoch': 0, 'iter': 25660, 'avg_loss': 8.83981822128953, 'avg_acc': 50.007063247730024, 'loss': 8.097780227661133}


EP_train:0:  93%|| 25672/27626 [1:00:24<04:39,  7.00it/s]

{'epoch': 0, 'iter': 25670, 'avg_loss': 8.83973447020204, 'avg_acc': 50.00754742705777, 'loss': 9.293113708496094}


EP_train:0:  93%|| 25682/27626 [1:00:25<04:35,  7.05it/s]

{'epoch': 0, 'iter': 25680, 'avg_loss': 8.839582378584566, 'avg_acc': 50.00778785872825, 'loss': 9.027779579162598}


EP_train:0:  93%|| 25692/27626 [1:00:26<04:33,  7.06it/s]

{'epoch': 0, 'iter': 25690, 'avg_loss': 8.839696099808169, 'avg_acc': 50.00851465493753, 'loss': 10.305423736572266}


EP_train:0:  93%|| 25702/27626 [1:00:28<04:32,  7.06it/s]

{'epoch': 0, 'iter': 25700, 'avg_loss': 8.839724589895123, 'avg_acc': 50.009605657367416, 'loss': 8.621831893920898}


EP_train:0:  93%|| 25712/27626 [1:00:29<04:30,  7.07it/s]

{'epoch': 0, 'iter': 25710, 'avg_loss': 8.83969763298861, 'avg_acc': 50.01033118120649, 'loss': 8.730385780334473}


EP_train:0:  93%|| 25722/27626 [1:00:31<04:28,  7.08it/s]

{'epoch': 0, 'iter': 25720, 'avg_loss': 8.839629015087002, 'avg_acc': 50.01057015668131, 'loss': 8.653328895568848}


EP_train:0:  93%|| 25732/27626 [1:00:32<04:28,  7.06it/s]

{'epoch': 0, 'iter': 25730, 'avg_loss': 8.839737534212606, 'avg_acc': 50.01226633243947, 'loss': 9.14847469329834}


EP_train:0:  93%|| 25742/27626 [1:00:33<04:26,  7.07it/s]

{'epoch': 0, 'iter': 25740, 'avg_loss': 8.839784850228023, 'avg_acc': 50.01153315722, 'loss': 8.718018531799316}


EP_train:0:  93%|| 25752/27626 [1:00:35<04:25,  7.05it/s]

{'epoch': 0, 'iter': 25750, 'avg_loss': 8.839778557096304, 'avg_acc': 50.01152867849792, 'loss': 9.14820384979248}


EP_train:0:  93%|| 25762/27626 [1:00:36<04:25,  7.03it/s]

{'epoch': 0, 'iter': 25760, 'avg_loss': 8.83964631474262, 'avg_acc': 50.010189821823694, 'loss': 8.43433952331543}


EP_train:0:  93%|| 25772/27626 [1:00:38<04:20,  7.11it/s]

{'epoch': 0, 'iter': 25770, 'avg_loss': 8.839546247280026, 'avg_acc': 50.0098220868418, 'loss': 8.396746635437012}


EP_train:0:  93%|| 25782/27626 [1:00:39<04:21,  7.04it/s]

{'epoch': 0, 'iter': 25780, 'avg_loss': 8.839406983439286, 'avg_acc': 50.009090997246034, 'loss': 9.53994369506836}


EP_train:0:  93%|| 25792/27626 [1:00:40<04:20,  7.05it/s]

{'epoch': 0, 'iter': 25790, 'avg_loss': 8.839376551179322, 'avg_acc': 50.00884513977745, 'loss': 8.20372486114502}


EP_train:0:  93%|| 25802/27626 [1:00:42<04:17,  7.08it/s]

{'epoch': 0, 'iter': 25800, 'avg_loss': 8.839280012212868, 'avg_acc': 50.006903802178215, 'loss': 9.359969139099121}


EP_train:0:  93%|| 25812/27626 [1:00:43<04:16,  7.07it/s]

{'epoch': 0, 'iter': 25810, 'avg_loss': 8.839203914693176, 'avg_acc': 50.00799077912518, 'loss': 8.50164794921875}


EP_train:0:  93%|| 25822/27626 [1:00:45<04:14,  7.08it/s]

{'epoch': 0, 'iter': 25820, 'avg_loss': 8.839223505890086, 'avg_acc': 50.0060512760931, 'loss': 8.661457061767578}


EP_train:0:  94%|| 25832/27626 [1:00:46<04:13,  7.07it/s]

{'epoch': 0, 'iter': 25830, 'avg_loss': 8.839039276980028, 'avg_acc': 50.00629089079014, 'loss': 8.3480806350708}


EP_train:0:  94%|| 25842/27626 [1:00:48<04:13,  7.03it/s]

{'epoch': 0, 'iter': 25840, 'avg_loss': 8.83893791521432, 'avg_acc': 50.00616752447661, 'loss': 9.565309524536133}


EP_train:0:  94%|| 25852/27626 [1:00:49<04:12,  7.02it/s]

{'epoch': 0, 'iter': 25850, 'avg_loss': 8.838872493118075, 'avg_acc': 50.00640690882364, 'loss': 8.967732429504395}


EP_train:0:  94%|| 25862/27626 [1:00:50<04:10,  7.05it/s]

{'epoch': 0, 'iter': 25860, 'avg_loss': 8.83874135373073, 'avg_acc': 50.00809616797495, 'loss': 8.460477828979492}


EP_train:0:  94%|| 25872/27626 [1:00:52<04:09,  7.03it/s]

{'epoch': 0, 'iter': 25870, 'avg_loss': 8.838573663660751, 'avg_acc': 50.00748908043755, 'loss': 8.388055801391602}


EP_train:0:  94%|| 25882/27626 [1:00:53<04:06,  7.07it/s]

{'epoch': 0, 'iter': 25880, 'avg_loss': 8.838370405500553, 'avg_acc': 50.005312777713385, 'loss': 8.298693656921387}


EP_train:0:  94%|| 25892/27626 [1:00:55<04:06,  7.04it/s]

{'epoch': 0, 'iter': 25890, 'avg_loss': 8.83836609535148, 'avg_acc': 50.00543142404696, 'loss': 9.131940841674805}


EP_train:0:  94%|| 25902/27626 [1:00:56<04:05,  7.03it/s]

{'epoch': 0, 'iter': 25900, 'avg_loss': 8.838318055083192, 'avg_acc': 50.00530867534072, 'loss': 8.177205085754395}


EP_train:0:  94%|| 25912/27626 [1:00:57<04:03,  7.03it/s]

{'epoch': 0, 'iter': 25910, 'avg_loss': 8.838121474674853, 'avg_acc': 50.00578904712284, 'loss': 8.43263053894043}


EP_train:0:  94%|| 25922/27626 [1:00:59<04:03,  6.99it/s]

{'epoch': 0, 'iter': 25920, 'avg_loss': 8.838049557155575, 'avg_acc': 50.005907372400756, 'loss': 8.975359916687012}


EP_train:0:  94%|| 25932/27626 [1:01:00<04:02,  6.98it/s]

{'epoch': 0, 'iter': 25930, 'avg_loss': 8.838019089832844, 'avg_acc': 50.005664070032005, 'loss': 8.627286911010742}


EP_train:0:  94%|| 25942/27626 [1:01:02<04:00,  7.02it/s]

{'epoch': 0, 'iter': 25940, 'avg_loss': 8.837967573293609, 'avg_acc': 50.003854901507275, 'loss': 7.652379989624023}


EP_train:0:  94%|| 25952/27626 [1:01:03<03:57,  7.06it/s]

{'epoch': 0, 'iter': 25950, 'avg_loss': 8.837898193997486, 'avg_acc': 50.005418866325, 'loss': 9.328130722045898}


EP_train:0:  94%|| 25962/27626 [1:01:05<03:54,  7.08it/s]

{'epoch': 0, 'iter': 25960, 'avg_loss': 8.837925413798159, 'avg_acc': 50.005176033280684, 'loss': 8.158038139343262}


EP_train:0:  94%|| 25972/27626 [1:01:06<03:55,  7.03it/s]

{'epoch': 0, 'iter': 25970, 'avg_loss': 8.837847378140928, 'avg_acc': 50.004331754649414, 'loss': 8.945321083068848}


EP_train:0:  94%|| 25982/27626 [1:01:07<03:53,  7.05it/s]

{'epoch': 0, 'iter': 25980, 'avg_loss': 8.837673200525296, 'avg_acc': 50.00469092798584, 'loss': 7.722350597381592}


EP_train:0:  94%|| 25992/27626 [1:01:09<03:51,  7.05it/s]

{'epoch': 0, 'iter': 25990, 'avg_loss': 8.837470785962683, 'avg_acc': 50.00372725174099, 'loss': 9.602682113647461}


EP_train:0:  94%|| 26002/27626 [1:01:10<03:50,  7.05it/s]

{'epoch': 0, 'iter': 26000, 'avg_loss': 8.837495743368713, 'avg_acc': 50.004927695088654, 'loss': 7.292577266693115}


EP_train:0:  94%|| 26012/27626 [1:01:12<03:47,  7.10it/s]

{'epoch': 0, 'iter': 26010, 'avg_loss': 8.837480640206056, 'avg_acc': 50.00612721540887, 'loss': 8.711803436279297}


EP_train:0:  94%|| 26022/27626 [1:01:13<03:47,  7.06it/s]

{'epoch': 0, 'iter': 26020, 'avg_loss': 8.837545686944415, 'avg_acc': 50.00540428884362, 'loss': 9.765901565551758}


EP_train:0:  94%|| 26032/27626 [1:01:14<03:49,  6.95it/s]

{'epoch': 0, 'iter': 26030, 'avg_loss': 8.837554883189384, 'avg_acc': 50.00528216357421, 'loss': 9.63464641571045}


EP_train:0:  94%|| 26042/27626 [1:01:16<03:45,  7.04it/s]

{'epoch': 0, 'iter': 26040, 'avg_loss': 8.837457598448212, 'avg_acc': 50.005760147459775, 'loss': 8.87712574005127}


EP_train:0:  94%|| 26052/27626 [1:01:17<03:42,  7.08it/s]

{'epoch': 0, 'iter': 26050, 'avg_loss': 8.837352189435267, 'avg_acc': 50.005997850370434, 'loss': 8.866382598876953}


EP_train:0:  94%|| 26062/27626 [1:01:19<03:41,  7.07it/s]

{'epoch': 0, 'iter': 26060, 'avg_loss': 8.837268879662352, 'avg_acc': 50.006355281838765, 'loss': 8.361748695373535}


EP_train:0:  94%|| 26072/27626 [1:01:20<03:40,  7.06it/s]

{'epoch': 0, 'iter': 26070, 'avg_loss': 8.837191278058116, 'avg_acc': 50.005393924283695, 'loss': 9.046780586242676}


EP_train:0:  94%|| 26082/27626 [1:01:21<03:40,  7.01it/s]

{'epoch': 0, 'iter': 26080, 'avg_loss': 8.837145242811342, 'avg_acc': 50.00527203711515, 'loss': 8.781878471374512}


EP_train:0:  94%|| 26092/27626 [1:01:23<03:37,  7.04it/s]

{'epoch': 0, 'iter': 26090, 'avg_loss': 8.837093910303402, 'avg_acc': 50.004910697175276, 'loss': 8.473740577697754}


EP_train:0:  94%|| 26102/27626 [1:01:24<03:35,  7.06it/s]

{'epoch': 0, 'iter': 26100, 'avg_loss': 8.83690898503651, 'avg_acc': 50.00478908854067, 'loss': 9.124263763427734}


EP_train:0:  95%|| 26112/27626 [1:01:26<03:35,  7.03it/s]

{'epoch': 0, 'iter': 26110, 'avg_loss': 8.83685115482061, 'avg_acc': 50.003829803531076, 'loss': 9.211090087890625}


EP_train:0:  95%|| 26122/27626 [1:01:27<03:34,  7.00it/s]

{'epoch': 0, 'iter': 26120, 'avg_loss': 8.836826415372467, 'avg_acc': 50.003469430726234, 'loss': 8.908439636230469}


EP_train:0:  95%|| 26132/27626 [1:01:29<03:31,  7.07it/s]

{'epoch': 0, 'iter': 26130, 'avg_loss': 8.836854334243617, 'avg_acc': 50.003109333741534, 'loss': 8.708830833435059}


EP_train:0:  95%|| 26142/27626 [1:01:30<03:29,  7.09it/s]

{'epoch': 0, 'iter': 26140, 'avg_loss': 8.836884269653796, 'avg_acc': 50.0051403924869, 'loss': 8.312454223632812}


EP_train:0:  95%|| 26152/27626 [1:01:31<03:27,  7.09it/s]

{'epoch': 0, 'iter': 26150, 'avg_loss': 8.836704089716239, 'avg_acc': 50.00430193874039, 'loss': 9.051197052001953}


EP_train:0:  95%|| 26162/27626 [1:01:33<03:27,  7.04it/s]

{'epoch': 0, 'iter': 26160, 'avg_loss': 8.836521281647777, 'avg_acc': 50.00298631550781, 'loss': 8.695023536682129}


EP_train:0:  95%|| 26172/27626 [1:01:34<03:28,  6.98it/s]

{'epoch': 0, 'iter': 26170, 'avg_loss': 8.836292590178866, 'avg_acc': 50.00298517442972, 'loss': 7.743841648101807}


EP_train:0:  95%|| 26182/27626 [1:01:36<03:25,  7.02it/s]

{'epoch': 0, 'iter': 26180, 'avg_loss': 8.836171458792064, 'avg_acc': 50.004058286543675, 'loss': 8.843875885009766}


EP_train:0:  95%|| 26192/27626 [1:01:37<03:24,  7.01it/s]

{'epoch': 0, 'iter': 26190, 'avg_loss': 8.836148385443387, 'avg_acc': 50.0051305792066, 'loss': 8.408814430236816}


EP_train:0:  95%|| 26202/27626 [1:01:38<03:21,  7.07it/s]

{'epoch': 0, 'iter': 26200, 'avg_loss': 8.835990876843194, 'avg_acc': 50.00357810770581, 'loss': 8.933602333068848}


EP_train:0:  95%|| 26212/27626 [1:01:40<03:20,  7.06it/s]

{'epoch': 0, 'iter': 26210, 'avg_loss': 8.83582987804805, 'avg_acc': 50.004053641600855, 'loss': 7.158727169036865}


EP_train:0:  95%|| 26222/27626 [1:01:41<03:18,  7.06it/s]

{'epoch': 0, 'iter': 26220, 'avg_loss': 8.835749281875275, 'avg_acc': 50.00452881278365, 'loss': 8.196131706237793}


EP_train:0:  95%|| 26232/27626 [1:01:43<03:18,  7.03it/s]

{'epoch': 0, 'iter': 26230, 'avg_loss': 8.835633549332679, 'avg_acc': 50.00464622012123, 'loss': 8.922442436218262}


EP_train:0:  95%|| 26242/27626 [1:01:44<03:14,  7.10it/s]

{'epoch': 0, 'iter': 26240, 'avg_loss': 8.835629215482522, 'avg_acc': 50.00535898022179, 'loss': 8.288418769836426}


EP_train:0:  95%|| 26252/27626 [1:01:46<03:14,  7.08it/s]

{'epoch': 0, 'iter': 26250, 'avg_loss': 8.835497267718251, 'avg_acc': 50.0052378956992, 'loss': 8.549869537353516}


EP_train:0:  95%|| 26262/27626 [1:01:47<03:12,  7.10it/s]

{'epoch': 0, 'iter': 26260, 'avg_loss': 8.83536443709964, 'avg_acc': 50.00630688092609, 'loss': 8.817841529846191}


EP_train:0:  95%|| 26272/27626 [1:01:48<03:12,  7.02it/s]

{'epoch': 0, 'iter': 26270, 'avg_loss': 8.835271731378064, 'avg_acc': 50.005352860568685, 'loss': 9.330869674682617}


EP_train:0:  95%|| 26282/27626 [1:01:50<03:10,  7.07it/s]

{'epoch': 0, 'iter': 26280, 'avg_loss': 8.835103119305519, 'avg_acc': 50.00416175183593, 'loss': 8.865283012390137}


EP_train:0:  95%|| 26292/27626 [1:01:51<03:10,  7.02it/s]

{'epoch': 0, 'iter': 26290, 'avg_loss': 8.83494842293227, 'avg_acc': 50.00356585903921, 'loss': 8.387063026428223}


EP_train:0:  95%|| 26302/27626 [1:01:53<03:08,  7.04it/s]

{'epoch': 0, 'iter': 26300, 'avg_loss': 8.834864599641922, 'avg_acc': 50.00451503745105, 'loss': 8.687265396118164}


EP_train:0:  95%|| 26312/27626 [1:01:54<03:05,  7.09it/s]

{'epoch': 0, 'iter': 26310, 'avg_loss': 8.83479049406856, 'avg_acc': 50.005344722739544, 'loss': 8.32398796081543}


EP_train:0:  95%|| 26322/27626 [1:01:55<03:05,  7.01it/s]

{'epoch': 0, 'iter': 26320, 'avg_loss': 8.83476106696866, 'avg_acc': 50.00474905968618, 'loss': 8.905501365661621}


EP_train:0:  95%|| 26332/27626 [1:01:57<03:04,  7.01it/s]

{'epoch': 0, 'iter': 26330, 'avg_loss': 8.834617027025418, 'avg_acc': 50.00640879571607, 'loss': 9.773839950561523}


EP_train:0:  95%|| 26342/27626 [1:01:58<03:03,  7.00it/s]

{'epoch': 0, 'iter': 26340, 'avg_loss': 8.834465847784374, 'avg_acc': 50.0065249990509, 'loss': 8.177410125732422}


EP_train:0:  95%|| 26352/27626 [1:02:00<03:00,  7.05it/s]

{'epoch': 0, 'iter': 26350, 'avg_loss': 8.834428178766892, 'avg_acc': 50.00770843611249, 'loss': 9.370437622070312}


EP_train:0:  95%|| 26362/27626 [1:02:01<02:59,  7.05it/s]

{'epoch': 0, 'iter': 26360, 'avg_loss': 8.834433804811537, 'avg_acc': 50.007468419255716, 'loss': 8.671732902526855}


EP_train:0:  95%|| 26372/27626 [1:02:02<02:56,  7.10it/s]

{'epoch': 0, 'iter': 26370, 'avg_loss': 8.834289465868759, 'avg_acc': 50.00722858442986, 'loss': 8.546575546264648}


EP_train:0:  95%|| 26382/27626 [1:02:04<02:56,  7.04it/s]

{'epoch': 0, 'iter': 26380, 'avg_loss': 8.83419302801227, 'avg_acc': 50.0071073878928, 'loss': 8.251167297363281}


EP_train:0:  96%|| 26392/27626 [1:02:05<02:54,  7.06it/s]

{'epoch': 0, 'iter': 26390, 'avg_loss': 8.83408785865503, 'avg_acc': 50.00947292637642, 'loss': 8.260773658752441}


EP_train:0:  96%|| 26402/27626 [1:02:07<02:53,  7.06it/s]

{'epoch': 0, 'iter': 26400, 'avg_loss': 8.833989953891802, 'avg_acc': 50.0104162721109, 'loss': 8.142410278320312}


EP_train:0:  96%|| 26412/27626 [1:02:08<02:51,  7.08it/s]

{'epoch': 0, 'iter': 26410, 'avg_loss': 8.833914358944195, 'avg_acc': 50.00993904055129, 'loss': 7.948624134063721}


EP_train:0:  96%|| 26422/27626 [1:02:10<02:50,  7.08it/s]

{'epoch': 0, 'iter': 26420, 'avg_loss': 8.833937336397696, 'avg_acc': 50.009580447371405, 'loss': 8.601792335510254}


EP_train:0:  96%|| 26432/27626 [1:02:11<02:49,  7.03it/s]

{'epoch': 0, 'iter': 26430, 'avg_loss': 8.83382453902581, 'avg_acc': 50.00839449888389, 'loss': 8.11750316619873}


EP_train:0:  96%|| 26442/27626 [1:02:12<02:48,  7.04it/s]

{'epoch': 0, 'iter': 26440, 'avg_loss': 8.833642589554954, 'avg_acc': 50.007327635112134, 'loss': 8.544697761535645}


EP_train:0:  96%|| 26452/27626 [1:02:14<02:45,  7.10it/s]

{'epoch': 0, 'iter': 26450, 'avg_loss': 8.833459797736932, 'avg_acc': 50.008624437639405, 'loss': 9.306130409240723}


EP_train:0:  96%|| 26462/27626 [1:02:15<02:43,  7.10it/s]

{'epoch': 0, 'iter': 26460, 'avg_loss': 8.83345512536318, 'avg_acc': 50.008148785004344, 'loss': 8.702875137329102}


EP_train:0:  96%|| 26472/27626 [1:02:17<02:43,  7.07it/s]

{'epoch': 0, 'iter': 26470, 'avg_loss': 8.833412371405547, 'avg_acc': 50.007673491745685, 'loss': 8.972453117370605}


EP_train:0:  96%|| 26482/27626 [1:02:18<02:42,  7.05it/s]

{'epoch': 0, 'iter': 26480, 'avg_loss': 8.833430769346185, 'avg_acc': 50.00885068539708, 'loss': 8.648249626159668}


EP_train:0:  96%|| 26492/27626 [1:02:19<02:40,  7.05it/s]

{'epoch': 0, 'iter': 26490, 'avg_loss': 8.833323742056265, 'avg_acc': 50.009437167339854, 'loss': 8.360123634338379}


EP_train:0:  96%|| 26502/27626 [1:02:21<02:37,  7.11it/s]

{'epoch': 0, 'iter': 26500, 'avg_loss': 8.833275261755139, 'avg_acc': 50.009787366514466, 'loss': 8.183792114257812}


EP_train:0:  96%|| 26512/27626 [1:02:22<02:37,  7.06it/s]

{'epoch': 0, 'iter': 26510, 'avg_loss': 8.83302471053258, 'avg_acc': 50.00966579910225, 'loss': 9.048477172851562}


EP_train:0:  96%|| 26522/27626 [1:02:24<02:37,  7.00it/s]

{'epoch': 0, 'iter': 26520, 'avg_loss': 8.832826872423203, 'avg_acc': 50.00966215451906, 'loss': 7.675967216491699}


EP_train:0:  96%|| 26532/27626 [1:02:25<02:34,  7.07it/s]

{'epoch': 0, 'iter': 26530, 'avg_loss': 8.832978850650058, 'avg_acc': 50.00824507180279, 'loss': 9.064050674438477}


EP_train:0:  96%|| 26542/27626 [1:02:26<02:33,  7.05it/s]

{'epoch': 0, 'iter': 26540, 'avg_loss': 8.832923471826584, 'avg_acc': 50.00824196526129, 'loss': 8.318180084228516}


EP_train:0:  96%|| 26552/27626 [1:02:28<02:31,  7.08it/s]

{'epoch': 0, 'iter': 26550, 'avg_loss': 8.832769310212639, 'avg_acc': 50.005649504726755, 'loss': 8.356054306030273}


EP_train:0:  96%|| 26562/27626 [1:02:29<02:31,  7.04it/s]

{'epoch': 0, 'iter': 26560, 'avg_loss': 8.83270849032079, 'avg_acc': 50.0042355333007, 'loss': 8.707940101623535}


EP_train:0:  96%|| 26572/27626 [1:02:31<02:29,  7.03it/s]

{'epoch': 0, 'iter': 26570, 'avg_loss': 8.832579260061586, 'avg_acc': 50.00376350156185, 'loss': 8.081541061401367}


EP_train:0:  96%|| 26582/27626 [1:02:32<02:29,  7.01it/s]

{'epoch': 0, 'iter': 26580, 'avg_loss': 8.832598409347387, 'avg_acc': 50.00411478123472, 'loss': 8.51767349243164}


EP_train:0:  96%|| 26592/27626 [1:02:34<02:26,  7.07it/s]

{'epoch': 0, 'iter': 26590, 'avg_loss': 8.832433224423724, 'avg_acc': 50.00317306607499, 'loss': 8.264065742492676}


EP_train:0:  96%|| 26602/27626 [1:02:35<02:25,  7.06it/s]

{'epoch': 0, 'iter': 26600, 'avg_loss': 8.832336777085887, 'avg_acc': 50.002232058945154, 'loss': 7.862547397613525}


EP_train:0:  96%|| 26612/27626 [1:02:36<02:22,  7.09it/s]

{'epoch': 0, 'iter': 26610, 'avg_loss': 8.832320973236214, 'avg_acc': 50.003170681297206, 'loss': 9.338300704956055}


EP_train:0:  96%|| 26622/27626 [1:02:38<02:22,  7.04it/s]

{'epoch': 0, 'iter': 26620, 'avg_loss': 8.83228850403196, 'avg_acc': 50.0030521017242, 'loss': 9.602940559387207}


EP_train:0:  96%|| 26632/27626 [1:02:39<02:20,  7.07it/s]

{'epoch': 0, 'iter': 26630, 'avg_loss': 8.832232349119723, 'avg_acc': 50.00211220006759, 'loss': 8.132599830627441}


EP_train:0:  96%|| 26642/27626 [1:02:41<02:18,  7.08it/s]

{'epoch': 0, 'iter': 26640, 'avg_loss': 8.832172352220331, 'avg_acc': 50.00093840321309, 'loss': 7.766215801239014}


EP_train:0:  96%|| 26652/27626 [1:02:42<02:18,  7.01it/s]

{'epoch': 0, 'iter': 26650, 'avg_loss': 8.832077346006068, 'avg_acc': 50.001524333045666, 'loss': 8.158002853393555}


EP_train:0:  97%|| 26662/27626 [1:02:43<02:16,  7.06it/s]

{'epoch': 0, 'iter': 26660, 'avg_loss': 8.831970240348037, 'avg_acc': 50.0, 'loss': 8.341317176818848}


EP_train:0:  97%|| 26672/27626 [1:02:45<02:16,  7.00it/s]

{'epoch': 0, 'iter': 26670, 'avg_loss': 8.831926458513307, 'avg_acc': 50.00023433692024, 'loss': 9.469526290893555}


EP_train:0:  97%|| 26682/27626 [1:02:46<02:14,  7.04it/s]

{'epoch': 0, 'iter': 26680, 'avg_loss': 8.832005062702335, 'avg_acc': 50.000351373636676, 'loss': 9.43425464630127}


EP_train:0:  97%|| 26692/27626 [1:02:48<02:12,  7.05it/s]

{'epoch': 0, 'iter': 26690, 'avg_loss': 8.831920668710001, 'avg_acc': 50.00023416132778, 'loss': 8.340965270996094}


EP_train:0:  97%|| 26702/27626 [1:02:49<02:10,  7.09it/s]

{'epoch': 0, 'iter': 26700, 'avg_loss': 8.831855376487074, 'avg_acc': 49.999063705479195, 'loss': 8.32406997680664}


EP_train:0:  97%|| 26712/27626 [1:02:50<02:09,  7.07it/s]

{'epoch': 0, 'iter': 26710, 'avg_loss': 8.831713077241972, 'avg_acc': 49.99859608401033, 'loss': 9.048157691955566}


EP_train:0:  97%|| 26722/27626 [1:02:52<02:07,  7.08it/s]

{'epoch': 0, 'iter': 26720, 'avg_loss': 8.831621495055552, 'avg_acc': 49.997310168032634, 'loss': 7.8422746658325195}


EP_train:0:  97%|| 26732/27626 [1:02:53<02:06,  7.07it/s]

{'epoch': 0, 'iter': 26730, 'avg_loss': 8.831617614591542, 'avg_acc': 49.99731117429202, 'loss': 8.7598295211792}


EP_train:0:  97%|| 26742/27626 [1:02:55<02:05,  7.05it/s]

{'epoch': 0, 'iter': 26740, 'avg_loss': 8.831644380142995, 'avg_acc': 49.99742904154669, 'loss': 9.651885032653809}


EP_train:0:  97%|| 26752/27626 [1:02:56<02:03,  7.06it/s]

{'epoch': 0, 'iter': 26750, 'avg_loss': 8.831535906815038, 'avg_acc': 49.99894863743412, 'loss': 8.294693946838379}


EP_train:0:  97%|| 26762/27626 [1:02:57<02:02,  7.05it/s]

{'epoch': 0, 'iter': 26760, 'avg_loss': 8.831530456536553, 'avg_acc': 49.998715481484254, 'loss': 8.674457550048828}


EP_train:0:  97%|| 26772/27626 [1:02:59<02:01,  7.05it/s]

{'epoch': 0, 'iter': 26770, 'avg_loss': 8.83142994177862, 'avg_acc': 49.99824903813828, 'loss': 8.036111831665039}


EP_train:0:  97%|| 26782/27626 [1:03:00<01:59,  7.08it/s]

{'epoch': 0, 'iter': 26780, 'avg_loss': 8.831423202464936, 'avg_acc': 49.99789963033494, 'loss': 9.137147903442383}


EP_train:0:  97%|| 26792/27626 [1:03:02<01:59,  6.99it/s]

{'epoch': 0, 'iter': 26790, 'avg_loss': 8.831341950945285, 'avg_acc': 49.99790041431824, 'loss': 8.217123031616211}


EP_train:0:  97%|| 26802/27626 [1:03:03<01:56,  7.07it/s]

{'epoch': 0, 'iter': 26800, 'avg_loss': 8.831253607448083, 'avg_acc': 49.99685179657475, 'loss': 9.4484224319458}


EP_train:0:  97%|| 26812/27626 [1:03:05<01:55,  7.06it/s]

{'epoch': 0, 'iter': 26810, 'avg_loss': 8.831193519818655, 'avg_acc': 49.99720264070717, 'loss': 8.809232711791992}


EP_train:0:  97%|| 26822/27626 [1:03:06<01:53,  7.08it/s]

{'epoch': 0, 'iter': 26820, 'avg_loss': 8.831112889154419, 'avg_acc': 49.99545598598114, 'loss': 7.87407922744751}


EP_train:0:  97%|| 26832/27626 [1:03:07<01:52,  7.05it/s]

{'epoch': 0, 'iter': 26830, 'avg_loss': 8.830980404619979, 'avg_acc': 49.9941765122433, 'loss': 9.057470321655273}


EP_train:0:  97%|| 26842/27626 [1:03:09<01:51,  7.04it/s]

{'epoch': 0, 'iter': 26840, 'avg_loss': 8.830993105458198, 'avg_acc': 49.993712976416674, 'loss': 7.82798957824707}


EP_train:0:  97%|| 26852/27626 [1:03:10<01:49,  7.05it/s]

{'epoch': 0, 'iter': 26850, 'avg_loss': 8.830941673435227, 'avg_acc': 49.992085955830326, 'loss': 8.55935001373291}


EP_train:0:  97%|| 26862/27626 [1:03:12<01:48,  7.07it/s]

{'epoch': 0, 'iter': 26860, 'avg_loss': 8.830791881436731, 'avg_acc': 49.99429935594356, 'loss': 8.950984001159668}


EP_train:0:  97%|| 26872/27626 [1:03:13<01:46,  7.05it/s]

{'epoch': 0, 'iter': 26870, 'avg_loss': 8.83059090894732, 'avg_acc': 49.994185181050206, 'loss': 7.30631160736084}


EP_train:0:  97%|| 26882/27626 [1:03:14<01:45,  7.03it/s]

{'epoch': 0, 'iter': 26880, 'avg_loss': 8.830602109735997, 'avg_acc': 49.99302481306499, 'loss': 9.071612358093262}


EP_train:0:  97%|| 26892/27626 [1:03:16<01:43,  7.06it/s]

{'epoch': 0, 'iter': 26890, 'avg_loss': 8.830602842134248, 'avg_acc': 49.99349224647652, 'loss': 8.912690162658691}


EP_train:0:  97%|| 26902/27626 [1:03:17<01:42,  7.04it/s]

{'epoch': 0, 'iter': 26900, 'avg_loss': 8.830427468281679, 'avg_acc': 49.99233299877328, 'loss': 7.617045879364014}


EP_train:0:  97%|| 26912/27626 [1:03:19<01:40,  7.10it/s]

{'epoch': 0, 'iter': 26910, 'avg_loss': 8.830303621967209, 'avg_acc': 49.992800341867635, 'loss': 8.839593887329102}


EP_train:0:  97%|| 26922/27626 [1:03:20<01:39,  7.07it/s]

{'epoch': 0, 'iter': 26920, 'avg_loss': 8.830351270541257, 'avg_acc': 49.99303517699937, 'loss': 8.106551170349121}


EP_train:0:  97%|| 26932/27626 [1:03:22<01:39,  6.99it/s]

{'epoch': 0, 'iter': 26930, 'avg_loss': 8.830353312626759, 'avg_acc': 49.99385002413575, 'loss': 9.720643043518066}


EP_train:0:  98%|| 26942/27626 [1:03:23<01:37,  7.03it/s]

{'epoch': 0, 'iter': 26940, 'avg_loss': 8.830293937681832, 'avg_acc': 49.99350432426413, 'loss': 9.085464477539062}


EP_train:0:  98%|| 26952/27626 [1:03:24<01:35,  7.04it/s]

{'epoch': 0, 'iter': 26950, 'avg_loss': 8.830201649302246, 'avg_acc': 49.99513005083299, 'loss': 8.66374397277832}


EP_train:0:  98%|| 26962/27626 [1:03:26<01:34,  7.01it/s]

{'epoch': 0, 'iter': 26960, 'avg_loss': 8.830187290059074, 'avg_acc': 49.99582730610882, 'loss': 9.06254768371582}


EP_train:0:  98%|| 26972/27626 [1:03:27<01:33,  7.02it/s]

{'epoch': 0, 'iter': 26970, 'avg_loss': 8.830049934732314, 'avg_acc': 49.99513366208149, 'loss': 8.44512939453125}


EP_train:0:  98%|| 26982/27626 [1:03:29<01:32,  6.98it/s]

{'epoch': 0, 'iter': 26980, 'avg_loss': 8.829861093135634, 'avg_acc': 49.9950196434528, 'loss': 8.891827583312988}


EP_train:0:  98%|| 26992/27626 [1:03:30<01:29,  7.06it/s]

{'epoch': 0, 'iter': 26990, 'avg_loss': 8.829756827349307, 'avg_acc': 49.993979474639694, 'loss': 8.641396522521973}


EP_train:0:  98%|| 27002/27626 [1:03:31<01:28,  7.03it/s]

{'epoch': 0, 'iter': 27000, 'avg_loss': 8.829619657860425, 'avg_acc': 49.99085682011778, 'loss': 8.273269653320312}


EP_train:0:  98%|| 27012/27626 [1:03:33<01:27,  6.99it/s]

{'epoch': 0, 'iter': 27010, 'avg_loss': 8.829542161658296, 'avg_acc': 49.98981896264485, 'loss': 8.608806610107422}


EP_train:0:  98%|| 27022/27626 [1:03:34<01:26,  6.96it/s]

{'epoch': 0, 'iter': 27020, 'avg_loss': 8.82944806670945, 'avg_acc': 49.99051663520965, 'loss': 9.024843215942383}


EP_train:0:  98%|| 27032/27626 [1:03:36<01:24,  7.04it/s]

{'epoch': 0, 'iter': 27030, 'avg_loss': 8.82926209695922, 'avg_acc': 49.99040453553328, 'loss': 8.178032875061035}


EP_train:0:  98%|| 27042/27626 [1:03:37<01:22,  7.07it/s]

{'epoch': 0, 'iter': 27040, 'avg_loss': 8.829221921428934, 'avg_acc': 49.99063921452609, 'loss': 7.885423183441162}


EP_train:0:  98%|| 27052/27626 [1:03:39<01:20,  7.09it/s]

{'epoch': 0, 'iter': 27050, 'avg_loss': 8.829060821417567, 'avg_acc': 49.99018058482127, 'loss': 8.247926712036133}


EP_train:0:  98%|| 27062/27626 [1:03:40<01:20,  7.04it/s]

{'epoch': 0, 'iter': 27060, 'avg_loss': 8.829051782751748, 'avg_acc': 49.98926037470899, 'loss': 8.492159843444824}


EP_train:0:  98%|| 27072/27626 [1:03:41<01:17,  7.11it/s]

{'epoch': 0, 'iter': 27070, 'avg_loss': 8.829115018710384, 'avg_acc': 49.98984152783421, 'loss': 9.308958053588867}


EP_train:0:  98%|| 27082/27626 [1:03:43<01:17,  7.06it/s]

{'epoch': 0, 'iter': 27080, 'avg_loss': 8.829015337889553, 'avg_acc': 49.98961448986374, 'loss': 8.775264739990234}


EP_train:0:  98%|| 27092/27626 [1:03:44<01:15,  7.08it/s]

{'epoch': 0, 'iter': 27090, 'avg_loss': 8.82898312779141, 'avg_acc': 49.99031043519988, 'loss': 8.807480812072754}


EP_train:0:  98%|| 27102/27626 [1:03:46<01:14,  7.05it/s]

{'epoch': 0, 'iter': 27100, 'avg_loss': 8.82895120769087, 'avg_acc': 49.990314010553114, 'loss': 8.155546188354492}


EP_train:0:  98%|| 27112/27626 [1:03:47<01:13,  7.00it/s]

{'epoch': 0, 'iter': 27110, 'avg_loss': 8.828941319518297, 'avg_acc': 49.989164914610306, 'loss': 8.899502754211426}


EP_train:0:  98%|| 27122/27626 [1:03:48<01:12,  6.97it/s]

{'epoch': 0, 'iter': 27120, 'avg_loss': 8.828909180657595, 'avg_acc': 49.98836233914679, 'loss': 8.462820053100586}


EP_train:0:  98%|| 27132/27626 [1:03:50<01:09,  7.12it/s]

{'epoch': 0, 'iter': 27130, 'avg_loss': 8.828888688813853, 'avg_acc': 49.98882735616085, 'loss': 8.786298751831055}


EP_train:0:  98%|| 27142/27626 [1:03:51<01:08,  7.09it/s]

{'epoch': 0, 'iter': 27140, 'avg_loss': 8.828905733379854, 'avg_acc': 49.98917689105044, 'loss': 8.440507888793945}


EP_train:0:  98%|| 27152/27626 [1:03:53<01:07,  7.06it/s]

{'epoch': 0, 'iter': 27150, 'avg_loss': 8.828852036939677, 'avg_acc': 49.98906578026592, 'loss': 8.727741241455078}


EP_train:0:  98%|| 27162/27626 [1:03:54<01:05,  7.10it/s]

{'epoch': 0, 'iter': 27160, 'avg_loss': 8.828768655937017, 'avg_acc': 49.98929991531976, 'loss': 7.894748210906982}


EP_train:0:  98%|| 27172/27626 [1:03:55<01:04,  7.08it/s]

{'epoch': 0, 'iter': 27170, 'avg_loss': 8.828824147096702, 'avg_acc': 49.987923705421224, 'loss': 9.39633846282959}


EP_train:0:  98%|| 27182/27626 [1:03:57<01:02,  7.10it/s]

{'epoch': 0, 'iter': 27180, 'avg_loss': 8.828626873836951, 'avg_acc': 49.987813178323094, 'loss': 7.614525318145752}


EP_train:0:  98%|| 27192/27626 [1:03:58<01:01,  7.07it/s]

{'epoch': 0, 'iter': 27190, 'avg_loss': 8.828445628630112, 'avg_acc': 49.987013166121145, 'loss': 8.894384384155273}


EP_train:0:  98%|| 27202/27626 [1:04:00<01:00,  6.99it/s]

{'epoch': 0, 'iter': 27200, 'avg_loss': 8.828440637379192, 'avg_acc': 49.98495000183817, 'loss': 8.67083740234375}


EP_train:0:  99%|| 27212/27626 [1:04:01<00:58,  7.02it/s]

{'epoch': 0, 'iter': 27210, 'avg_loss': 8.828355821603399, 'avg_acc': 49.984496159641324, 'loss': 8.71233081817627}


EP_train:0:  99%|| 27222/27626 [1:04:03<00:57,  7.08it/s]

{'epoch': 0, 'iter': 27220, 'avg_loss': 8.828305048497803, 'avg_acc': 49.98530546269425, 'loss': 7.8141374588012695}


EP_train:0:  99%|| 27232/27626 [1:04:04<00:55,  7.09it/s]

{'epoch': 0, 'iter': 27230, 'avg_loss': 8.828161958898495, 'avg_acc': 49.98508134111857, 'loss': 8.876554489135742}


EP_train:0:  99%|| 27242/27626 [1:04:05<00:54,  7.09it/s]

{'epoch': 0, 'iter': 27240, 'avg_loss': 8.828083224296744, 'avg_acc': 49.98623398553651, 'loss': 8.598676681518555}


EP_train:0:  99%|| 27252/27626 [1:04:07<00:52,  7.10it/s]

{'epoch': 0, 'iter': 27250, 'avg_loss': 8.82806212381242, 'avg_acc': 49.98589501302704, 'loss': 9.157513618469238}


EP_train:0:  99%|| 27262/27626 [1:04:08<00:51,  7.09it/s]

{'epoch': 0, 'iter': 27260, 'avg_loss': 8.82787147846406, 'avg_acc': 49.98658798283262, 'loss': 7.754292964935303}


EP_train:0:  99%|| 27272/27626 [1:04:10<00:49,  7.09it/s]

{'epoch': 0, 'iter': 27270, 'avg_loss': 8.827901069552569, 'avg_acc': 49.984988632613394, 'loss': 8.482125282287598}


EP_train:0:  99%|| 27282/27626 [1:04:11<00:48,  7.08it/s]

{'epoch': 0, 'iter': 27280, 'avg_loss': 8.827682183436524, 'avg_acc': 49.985795975220846, 'loss': 8.265995979309082}


EP_train:0:  99%|| 27292/27626 [1:04:12<00:47,  7.07it/s]

{'epoch': 0, 'iter': 27290, 'avg_loss': 8.827752301868648, 'avg_acc': 49.98408358066762, 'loss': 9.522555351257324}


EP_train:0:  99%|| 27302/27626 [1:04:14<00:46,  7.01it/s]

{'epoch': 0, 'iter': 27300, 'avg_loss': 8.827644255068629, 'avg_acc': 49.98351708728618, 'loss': 7.893100261688232}


EP_train:0:  99%|| 27312/27626 [1:04:15<00:44,  7.06it/s]

{'epoch': 0, 'iter': 27310, 'avg_loss': 8.827661468066857, 'avg_acc': 49.98466735015195, 'loss': 8.587897300720215}


EP_train:0:  99%|| 27322/27626 [1:04:17<00:42,  7.09it/s]

{'epoch': 0, 'iter': 27320, 'avg_loss': 8.827535321137326, 'avg_acc': 49.986045532740384, 'loss': 8.66679573059082}


EP_train:0:  99%|| 27332/27626 [1:04:18<00:41,  7.10it/s]

{'epoch': 0, 'iter': 27330, 'avg_loss': 8.827510707408308, 'avg_acc': 49.98582196041125, 'loss': 9.692071914672852}


EP_train:0:  99%|| 27342/27626 [1:04:19<00:40,  7.08it/s]

{'epoch': 0, 'iter': 27340, 'avg_loss': 8.827410740962405, 'avg_acc': 49.98594144325372, 'loss': 9.273942947387695}


EP_train:0:  99%|| 27352/27626 [1:04:21<00:38,  7.06it/s]

{'epoch': 0, 'iter': 27350, 'avg_loss': 8.82726936487475, 'avg_acc': 49.985375306204524, 'loss': 8.016953468322754}


EP_train:0:  99%|| 27362/27626 [1:04:22<00:37,  7.10it/s]

{'epoch': 0, 'iter': 27360, 'avg_loss': 8.827184970866588, 'avg_acc': 49.98675121523336, 'loss': 8.55899715423584}


EP_train:0:  99%|| 27372/27626 [1:04:24<00:36,  7.00it/s]

{'epoch': 0, 'iter': 27370, 'avg_loss': 8.827096349847821, 'avg_acc': 49.98755525921596, 'loss': 9.177892684936523}


EP_train:0:  99%|| 27382/27626 [1:04:25<00:34,  7.03it/s]

{'epoch': 0, 'iter': 27380, 'avg_loss': 8.82699004135976, 'avg_acc': 49.989043497315656, 'loss': 8.34218978881836}


EP_train:0:  99%|| 27392/27626 [1:04:26<00:33,  7.02it/s]

{'epoch': 0, 'iter': 27390, 'avg_loss': 8.826972361214342, 'avg_acc': 49.98961794019934, 'loss': 8.974149703979492}


EP_train:0:  99%|| 27402/27626 [1:04:28<00:32,  7.00it/s]

{'epoch': 0, 'iter': 27400, 'avg_loss': 8.826867040232722, 'avg_acc': 49.99076219845991, 'loss': 8.356687545776367}


EP_train:0:  99%|| 27412/27626 [1:04:29<00:30,  7.04it/s]

{'epoch': 0, 'iter': 27410, 'avg_loss': 8.826646942567166, 'avg_acc': 49.98916949399876, 'loss': 8.226064682006836}


EP_train:0:  99%|| 27422/27626 [1:04:31<00:28,  7.07it/s]

{'epoch': 0, 'iter': 27420, 'avg_loss': 8.826601374441031, 'avg_acc': 49.98928740746143, 'loss': 8.78837776184082}


EP_train:0:  99%|| 27432/27626 [1:04:32<00:27,  7.07it/s]

{'epoch': 0, 'iter': 27430, 'avg_loss': 8.826563447673653, 'avg_acc': 49.98872170172432, 'loss': 8.223304748535156}


EP_train:0:  99%|| 27442/27626 [1:04:34<00:26,  7.01it/s]

{'epoch': 0, 'iter': 27440, 'avg_loss': 8.826426740594718, 'avg_acc': 49.98940909587843, 'loss': 8.298247337341309}


EP_train:0:  99%|| 27452/27626 [1:04:35<00:24,  7.10it/s]

{'epoch': 0, 'iter': 27450, 'avg_loss': 8.826206152645218, 'avg_acc': 49.989185275581946, 'loss': 7.62721586227417}


EP_train:0:  99%|| 27462/27626 [1:04:36<00:23,  7.01it/s]

{'epoch': 0, 'iter': 27460, 'avg_loss': 8.826065957765456, 'avg_acc': 49.989758202541786, 'loss': 8.742012977600098}


EP_train:0:  99%|| 27472/27626 [1:04:38<00:21,  7.00it/s]

{'epoch': 0, 'iter': 27470, 'avg_loss': 8.825927730070257, 'avg_acc': 49.99101325033672, 'loss': 8.491480827331543}


EP_train:0:  99%|| 27482/27626 [1:04:39<00:20,  7.01it/s]

{'epoch': 0, 'iter': 27480, 'avg_loss': 8.825796622982894, 'avg_acc': 49.992722244459806, 'loss': 8.547420501708984}


EP_train:0: 100%|| 27492/27626 [1:04:41<00:19,  7.05it/s]

{'epoch': 0, 'iter': 27490, 'avg_loss': 8.825813861998313, 'avg_acc': 49.99227019751919, 'loss': 9.073151588439941}


EP_train:0: 100%|| 27502/27626 [1:04:42<00:17,  7.05it/s]

{'epoch': 0, 'iter': 27500, 'avg_loss': 8.825820570767808, 'avg_acc': 49.99363659503291, 'loss': 8.733524322509766}


EP_train:0: 100%|| 27512/27626 [1:04:43<00:16,  7.02it/s]

{'epoch': 0, 'iter': 27510, 'avg_loss': 8.82570612671929, 'avg_acc': 49.99295736250954, 'loss': 8.239291191101074}


EP_train:0: 100%|| 27522/27626 [1:04:45<00:14,  7.09it/s]

{'epoch': 0, 'iter': 27520, 'avg_loss': 8.825573023701168, 'avg_acc': 49.99171087533156, 'loss': 8.15626335144043}


EP_train:0: 100%|| 27532/27626 [1:04:46<00:13,  7.04it/s]

{'epoch': 0, 'iter': 27530, 'avg_loss': 8.825552309712497, 'avg_acc': 49.99103283571247, 'loss': 8.796159744262695}


EP_train:0: 100%|| 27542/27626 [1:04:48<00:11,  7.00it/s]

{'epoch': 0, 'iter': 27540, 'avg_loss': 8.825479537831868, 'avg_acc': 49.991149558839545, 'loss': 8.929678916931152}


EP_train:0: 100%|| 27552/27626 [1:04:49<00:10,  6.96it/s]

{'epoch': 0, 'iter': 27550, 'avg_loss': 8.82554135208861, 'avg_acc': 49.99160647526406, 'loss': 8.263575553894043}


EP_train:0: 100%|| 27562/27626 [1:04:51<00:09,  7.03it/s]

{'epoch': 0, 'iter': 27560, 'avg_loss': 8.825503118662489, 'avg_acc': 49.99251659954283, 'loss': 8.504006385803223}


EP_train:0: 100%|| 27572/27626 [1:04:52<00:07,  7.02it/s]

{'epoch': 0, 'iter': 27570, 'avg_loss': 8.825444341332812, 'avg_acc': 49.99274600123318, 'loss': 8.180861473083496}


EP_train:0: 100%|| 27582/27626 [1:04:53<00:06,  7.08it/s]

{'epoch': 0, 'iter': 27580, 'avg_loss': 8.825437458952967, 'avg_acc': 49.99127569703782, 'loss': 8.621255874633789}


EP_train:0: 100%|| 27592/27626 [1:04:55<00:04,  7.07it/s]

{'epoch': 0, 'iter': 27590, 'avg_loss': 8.82533854397144, 'avg_acc': 49.99173190533145, 'loss': 8.956474304199219}


EP_train:0: 100%|| 27602/27626 [1:04:56<00:03,  7.10it/s]

{'epoch': 0, 'iter': 27600, 'avg_loss': 8.825332908136966, 'avg_acc': 49.991168798231946, 'loss': 8.5133056640625}


EP_train:0: 100%|| 27612/27626 [1:04:58<00:01,  7.07it/s]

{'epoch': 0, 'iter': 27610, 'avg_loss': 8.825206407775857, 'avg_acc': 49.99173789431748, 'loss': 8.383462905883789}


EP_train:0: 100%|| 27622/27626 [1:04:59<00:00,  7.07it/s]

{'epoch': 0, 'iter': 27620, 'avg_loss': 8.825138707608248, 'avg_acc': 49.99174088555809, 'loss': 9.495349884033203}


EP_train:0: 100%|| 27626/27626 [1:05:00<00:00,  7.08it/s]


EP0_train, avg_loss= 8.825114900574077 total_acc= 49.99089379907309
EP:0 Model Saved on: ./wikitext_trained.model.ep0


EP_train:1:   0%|| 2/27626 [00:00<1:18:53,  5.84it/s]

{'epoch': 1, 'iter': 0, 'avg_loss': 9.050117492675781, 'avg_acc': 46.875, 'loss': 9.050117492675781}


EP_train:1:   0%|| 12/27626 [00:01<1:05:55,  6.98it/s]

{'epoch': 1, 'iter': 10, 'avg_loss': 8.42451828176325, 'avg_acc': 50.85227272727273, 'loss': 7.7155609130859375}


EP_train:1:   0%|| 22/27626 [00:03<1:04:57,  7.08it/s]

{'epoch': 1, 'iter': 20, 'avg_loss': 8.471832252684093, 'avg_acc': 50.74404761904761, 'loss': 8.344019889831543}


EP_train:1:   0%|| 32/27626 [00:04<1:05:22,  7.04it/s]

{'epoch': 1, 'iter': 30, 'avg_loss': 8.383585514560822, 'avg_acc': 50.201612903225815, 'loss': 8.165770530700684}


EP_train:1:   0%|| 42/27626 [00:06<1:05:33,  7.01it/s]

{'epoch': 1, 'iter': 40, 'avg_loss': 8.410563794577994, 'avg_acc': 49.542682926829265, 'loss': 8.086234092712402}


EP_train:1:   0%|| 52/27626 [00:07<1:05:34,  7.01it/s]

{'epoch': 1, 'iter': 50, 'avg_loss': 8.496493788326488, 'avg_acc': 48.713235294117645, 'loss': 8.981877326965332}


EP_train:1:   0%|| 62/27626 [00:08<1:05:08,  7.05it/s]

{'epoch': 1, 'iter': 60, 'avg_loss': 8.540375271781546, 'avg_acc': 49.53893442622951, 'loss': 9.612804412841797}


EP_train:1:   0%|| 72/27626 [00:10<1:06:26,  6.91it/s]

{'epoch': 1, 'iter': 70, 'avg_loss': 8.611848105846995, 'avg_acc': 50.220070422535215, 'loss': 8.749978065490723}


EP_train:1:   0%|| 82/27626 [00:11<1:05:57,  6.96it/s]

{'epoch': 1, 'iter': 80, 'avg_loss': 8.575529681311714, 'avg_acc': 50.1929012345679, 'loss': 8.355416297912598}


EP_train:1:   0%|| 92/27626 [00:13<1:05:51,  6.97it/s]

{'epoch': 1, 'iter': 90, 'avg_loss': 8.558607840276027, 'avg_acc': 50.37774725274725, 'loss': 8.09361743927002}


EP_train:1:   0%|| 102/27626 [00:14<1:05:16,  7.03it/s]

{'epoch': 1, 'iter': 100, 'avg_loss': 8.573468505746067, 'avg_acc': 49.87623762376238, 'loss': 8.298909187316895}


EP_train:1:   0%|| 112/27626 [00:15<1:04:55,  7.06it/s]

{'epoch': 1, 'iter': 110, 'avg_loss': 8.5710750098701, 'avg_acc': 50.14076576576577, 'loss': 9.363157272338867}


EP_train:1:   0%|| 122/27626 [00:17<1:05:09,  7.03it/s]

{'epoch': 1, 'iter': 120, 'avg_loss': 8.560524172034146, 'avg_acc': 50.25826446280992, 'loss': 8.701847076416016}


EP_train:1:   0%|| 132/27626 [00:18<1:05:19,  7.01it/s]

{'epoch': 1, 'iter': 130, 'avg_loss': 8.54878619972986, 'avg_acc': 50.262404580152676, 'loss': 8.424430847167969}


EP_train:1:   1%|| 142/27626 [00:20<1:05:09,  7.03it/s]

{'epoch': 1, 'iter': 140, 'avg_loss': 8.536676406860352, 'avg_acc': 50.50975177304965, 'loss': 8.231841087341309}


EP_train:1:   1%|| 152/27626 [00:21<1:05:37,  6.98it/s]

{'epoch': 1, 'iter': 150, 'avg_loss': 8.510606194174052, 'avg_acc': 50.26903973509934, 'loss': 7.965179920196533}


EP_train:1:   1%|| 162/27626 [00:23<1:05:17,  7.01it/s]

{'epoch': 1, 'iter': 160, 'avg_loss': 8.536862308194179, 'avg_acc': 50.640527950310556, 'loss': 9.75743579864502}


EP_train:1:   1%|| 172/27626 [00:24<1:05:13,  7.01it/s]

{'epoch': 1, 'iter': 170, 'avg_loss': 8.526952732376188, 'avg_acc': 50.78581871345029, 'loss': 8.017050743103027}


EP_train:1:   1%|| 182/27626 [00:25<1:05:31,  6.98it/s]

{'epoch': 1, 'iter': 180, 'avg_loss': 8.537127075932961, 'avg_acc': 51.00138121546961, 'loss': 8.629302024841309}


EP_train:1:   1%|| 192/27626 [00:27<1:04:55,  7.04it/s]

{'epoch': 1, 'iter': 190, 'avg_loss': 8.526307565379517, 'avg_acc': 51.24345549738219, 'loss': 8.486817359924316}


EP_train:1:   1%|| 202/27626 [00:28<1:04:25,  7.09it/s]

{'epoch': 1, 'iter': 200, 'avg_loss': 8.541093503657859, 'avg_acc': 51.1660447761194, 'loss': 8.761861801147461}


EP_train:1:   1%|| 212/27626 [00:30<1:04:36,  7.07it/s]

{'epoch': 1, 'iter': 210, 'avg_loss': 8.545183245039665, 'avg_acc': 51.12559241706162, 'loss': 8.965388298034668}


EP_train:1:   1%|| 222/27626 [00:31<1:04:55,  7.03it/s]

{'epoch': 1, 'iter': 220, 'avg_loss': 8.547153408171365, 'avg_acc': 51.03223981900452, 'loss': 8.853507041931152}


EP_train:1:   1%|| 232/27626 [00:32<1:04:47,  7.05it/s]

{'epoch': 1, 'iter': 230, 'avg_loss': 8.529038293021065, 'avg_acc': 51.028138528138534, 'loss': 7.963188171386719}


EP_train:1:   1%|| 242/27626 [00:34<1:04:34,  7.07it/s]

{'epoch': 1, 'iter': 240, 'avg_loss': 8.535275991526877, 'avg_acc': 50.842842323651446, 'loss': 8.33392333984375}


EP_train:1:   1%|| 252/27626 [00:35<1:04:24,  7.08it/s]

{'epoch': 1, 'iter': 250, 'avg_loss': 8.521098169197598, 'avg_acc': 50.79681274900398, 'loss': 7.457027435302734}


EP_train:1:   1%|| 262/27626 [00:37<1:04:43,  7.05it/s]

{'epoch': 1, 'iter': 260, 'avg_loss': 8.541345488522701, 'avg_acc': 50.85009578544061, 'loss': 9.304693222045898}


EP_train:1:   1%|| 272/27626 [00:38<1:04:56,  7.02it/s]

{'epoch': 1, 'iter': 270, 'avg_loss': 8.538368703694362, 'avg_acc': 50.84178966789668, 'loss': 7.959885120391846}


EP_train:1:   1%|| 282/27626 [00:40<1:04:20,  7.08it/s]

{'epoch': 1, 'iter': 280, 'avg_loss': 8.5278708043896, 'avg_acc': 50.76734875444839, 'loss': 8.088726043701172}


EP_train:1:   1%|| 292/27626 [00:41<1:04:26,  7.07it/s]

{'epoch': 1, 'iter': 290, 'avg_loss': 8.531370964246927, 'avg_acc': 50.77319587628865, 'loss': 8.414783477783203}


EP_train:1:   1%|| 302/27626 [00:42<1:04:21,  7.08it/s]

{'epoch': 1, 'iter': 300, 'avg_loss': 8.544035824430345, 'avg_acc': 50.695598006644516, 'loss': 8.985612869262695}


EP_train:1:   1%|| 312/27626 [00:44<1:04:49,  7.02it/s]

{'epoch': 1, 'iter': 310, 'avg_loss': 8.554912556406002, 'avg_acc': 50.703376205787784, 'loss': 9.528470993041992}


EP_train:1:   1%|| 322/27626 [00:45<1:04:53,  7.01it/s]

{'epoch': 1, 'iter': 320, 'avg_loss': 8.562873390233404, 'avg_acc': 50.671728971962615, 'loss': 8.632047653198242}


EP_train:1:   1%|| 332/27626 [00:47<1:04:16,  7.08it/s]

{'epoch': 1, 'iter': 330, 'avg_loss': 8.566360999450223, 'avg_acc': 50.63255287009063, 'loss': 8.878193855285645}


EP_train:1:   1%|| 342/27626 [00:48<1:04:27,  7.05it/s]

{'epoch': 1, 'iter': 340, 'avg_loss': 8.56869562769915, 'avg_acc': 50.595674486803524, 'loss': 7.686997413635254}


EP_train:1:   1%|| 352/27626 [00:49<1:04:08,  7.09it/s]

{'epoch': 1, 'iter': 350, 'avg_loss': 8.576162585505733, 'avg_acc': 50.61431623931624, 'loss': 8.929279327392578}


EP_train:1:   1%|| 362/27626 [00:51<1:04:32,  7.04it/s]

{'epoch': 1, 'iter': 360, 'avg_loss': 8.581493689412886, 'avg_acc': 50.57132963988919, 'loss': 8.620777130126953}


EP_train:1:   1%|| 372/27626 [00:52<1:04:47,  7.01it/s]

{'epoch': 1, 'iter': 370, 'avg_loss': 8.581245840077774, 'avg_acc': 50.57277628032345, 'loss': 8.721113204956055}


EP_train:1:   1%|| 382/27626 [00:54<1:05:11,  6.97it/s]

{'epoch': 1, 'iter': 380, 'avg_loss': 8.58748166767631, 'avg_acc': 50.5249343832021, 'loss': 9.435587882995605}


EP_train:1:   1%|| 392/27626 [00:55<1:04:14,  7.07it/s]

{'epoch': 1, 'iter': 390, 'avg_loss': 8.589711824646386, 'avg_acc': 50.63938618925832, 'loss': 8.668192863464355}


EP_train:1:   1%|| 402/27626 [00:57<1:04:00,  7.09it/s]

{'epoch': 1, 'iter': 400, 'avg_loss': 8.596230534246734, 'avg_acc': 50.6857855361596, 'loss': 8.18346118927002}


EP_train:1:   1%|| 412/27626 [00:58<1:04:28,  7.03it/s]

{'epoch': 1, 'iter': 410, 'avg_loss': 8.603313126993296, 'avg_acc': 50.6234793187348, 'loss': 9.076643943786621}


EP_train:1:   2%|| 422/27626 [00:59<1:04:21,  7.04it/s]

{'epoch': 1, 'iter': 420, 'avg_loss': 8.606942248174526, 'avg_acc': 50.616092636579566, 'loss': 9.027424812316895}


EP_train:1:   2%|| 432/27626 [01:01<1:04:12,  7.06it/s]

{'epoch': 1, 'iter': 430, 'avg_loss': 8.600008795266914, 'avg_acc': 50.580046403712295, 'loss': 8.480048179626465}


EP_train:1:   2%|| 442/27626 [01:02<1:04:35,  7.01it/s]

{'epoch': 1, 'iter': 440, 'avg_loss': 8.603624937485675, 'avg_acc': 50.51020408163265, 'loss': 7.657510757446289}


EP_train:1:   2%|| 452/27626 [01:04<1:04:24,  7.03it/s]

{'epoch': 1, 'iter': 450, 'avg_loss': 8.613168180385344, 'avg_acc': 50.47810421286031, 'loss': 9.623355865478516}


EP_train:1:   2%|| 462/27626 [01:05<1:04:49,  6.98it/s]

{'epoch': 1, 'iter': 460, 'avg_loss': 8.618349690757967, 'avg_acc': 50.39994577006508, 'loss': 8.746074676513672}


EP_train:1:   2%|| 472/27626 [01:06<1:04:28,  7.02it/s]

{'epoch': 1, 'iter': 470, 'avg_loss': 8.624896831066998, 'avg_acc': 50.43789808917197, 'loss': 8.181283950805664}


EP_train:1:   2%|| 482/27626 [01:08<1:04:31,  7.01it/s]

{'epoch': 1, 'iter': 480, 'avg_loss': 8.6304956662184, 'avg_acc': 50.46127858627859, 'loss': 8.523414611816406}


EP_train:1:   2%|| 492/27626 [01:09<1:04:43,  6.99it/s]

{'epoch': 1, 'iter': 490, 'avg_loss': 8.629009397355214, 'avg_acc': 50.47097759674134, 'loss': 8.453627586364746}


EP_train:1:   2%|| 502/27626 [01:11<1:04:19,  7.03it/s]

{'epoch': 1, 'iter': 500, 'avg_loss': 8.63038421200659, 'avg_acc': 50.44910179640718, 'loss': 8.074066162109375}


EP_train:1:   2%|| 512/27626 [01:12<1:04:05,  7.05it/s]

{'epoch': 1, 'iter': 510, 'avg_loss': 8.632117255559873, 'avg_acc': 50.4708904109589, 'loss': 8.754239082336426}


EP_train:1:   2%|| 522/27626 [01:14<1:04:01,  7.06it/s]

{'epoch': 1, 'iter': 520, 'avg_loss': 8.634712842512954, 'avg_acc': 50.44985604606526, 'loss': 7.745795249938965}


EP_train:1:   2%|| 532/27626 [01:15<1:03:50,  7.07it/s]

{'epoch': 1, 'iter': 530, 'avg_loss': 8.631486518234857, 'avg_acc': 50.51789077212806, 'loss': 8.816490173339844}


EP_train:1:   2%|| 542/27626 [01:16<1:04:06,  7.04it/s]

{'epoch': 1, 'iter': 540, 'avg_loss': 8.630772531583437, 'avg_acc': 50.519870609981524, 'loss': 7.610097408294678}


EP_train:1:   2%|| 552/27626 [01:18<1:04:21,  7.01it/s]

{'epoch': 1, 'iter': 550, 'avg_loss': 8.62329119995588, 'avg_acc': 50.544464609800364, 'loss': 9.058866500854492}


EP_train:1:   2%|| 562/27626 [01:19<1:03:46,  7.07it/s]

{'epoch': 1, 'iter': 560, 'avg_loss': 8.619495711437096, 'avg_acc': 50.52361853832442, 'loss': 9.108420372009277}


EP_train:1:   2%|| 572/27626 [01:21<1:03:48,  7.07it/s]

{'epoch': 1, 'iter': 570, 'avg_loss': 8.614424708845737, 'avg_acc': 50.55275831873905, 'loss': 8.934308052062988}


EP_train:1:   2%|| 582/27626 [01:22<1:03:58,  7.05it/s]

{'epoch': 1, 'iter': 580, 'avg_loss': 8.61267869689994, 'avg_acc': 50.47870051635112, 'loss': 7.963716506958008}


EP_train:1:   2%|| 592/27626 [01:23<1:04:09,  7.02it/s]

{'epoch': 1, 'iter': 590, 'avg_loss': 8.614502177424036, 'avg_acc': 50.380710659898476, 'loss': 8.464774131774902}


EP_train:1:   2%|| 602/27626 [01:25<1:04:05,  7.03it/s]

{'epoch': 1, 'iter': 600, 'avg_loss': 8.613052899746252, 'avg_acc': 50.37957570715474, 'loss': 8.700886726379395}


EP_train:1:   2%|| 612/27626 [01:26<1:03:43,  7.07it/s]

{'epoch': 1, 'iter': 610, 'avg_loss': 8.611462986410737, 'avg_acc': 50.286415711947626, 'loss': 8.522876739501953}


EP_train:1:   2%|| 622/27626 [01:28<1:04:19,  7.00it/s]

{'epoch': 1, 'iter': 620, 'avg_loss': 8.611211257664285, 'avg_acc': 50.26167471819646, 'loss': 8.397134780883789}


EP_train:1:   2%|| 632/27626 [01:29<1:04:11,  7.01it/s]

{'epoch': 1, 'iter': 630, 'avg_loss': 8.611695629670011, 'avg_acc': 50.29219492868463, 'loss': 8.221000671386719}


EP_train:1:   2%|| 642/27626 [01:31<1:03:31,  7.08it/s]

{'epoch': 1, 'iter': 640, 'avg_loss': 8.60783960294798, 'avg_acc': 50.31201248049923, 'loss': 8.468684196472168}


EP_train:1:   2%|| 652/27626 [01:32<1:03:59,  7.03it/s]

{'epoch': 1, 'iter': 650, 'avg_loss': 8.610006186636179, 'avg_acc': 50.307219662058365, 'loss': 8.635672569274902}


EP_train:1:   2%|| 662/27626 [01:33<1:03:36,  7.07it/s]

{'epoch': 1, 'iter': 660, 'avg_loss': 8.607260528743357, 'avg_acc': 50.35457639939486, 'loss': 7.786149978637695}


EP_train:1:   2%|| 672/27626 [01:35<1:04:02,  7.01it/s]

{'epoch': 1, 'iter': 670, 'avg_loss': 8.60254426578118, 'avg_acc': 50.386549925484346, 'loss': 8.41661262512207}


EP_train:1:   2%|| 682/27626 [01:36<1:04:27,  6.97it/s]

{'epoch': 1, 'iter': 680, 'avg_loss': 8.600647130901768, 'avg_acc': 50.38087371512482, 'loss': 8.240732192993164}


EP_train:1:   3%|| 692/27626 [01:38<1:03:59,  7.02it/s]

{'epoch': 1, 'iter': 690, 'avg_loss': 8.600124551659901, 'avg_acc': 50.36631693198264, 'loss': 7.762039661407471}


EP_train:1:   3%|| 702/27626 [01:39<1:03:38,  7.05it/s]

{'epoch': 1, 'iter': 700, 'avg_loss': 8.601630108842835, 'avg_acc': 50.40567047075606, 'loss': 9.21619987487793}


EP_train:1:   3%|| 712/27626 [01:40<1:04:00,  7.01it/s]

{'epoch': 1, 'iter': 710, 'avg_loss': 8.602364504555274, 'avg_acc': 50.42633614627285, 'loss': 8.803768157958984}


EP_train:1:   3%|| 722/27626 [01:42<1:04:28,  6.95it/s]

{'epoch': 1, 'iter': 720, 'avg_loss': 8.595632274008658, 'avg_acc': 50.44642857142857, 'loss': 7.94681453704834}


EP_train:1:   3%|| 732/27626 [01:43<1:04:09,  6.99it/s]

{'epoch': 1, 'iter': 730, 'avg_loss': 8.600262628860579, 'avg_acc': 50.35909712722299, 'loss': 9.864774703979492}


EP_train:1:   3%|| 742/27626 [01:45<1:03:33,  7.05it/s]

{'epoch': 1, 'iter': 740, 'avg_loss': 8.60020291306551, 'avg_acc': 50.33316464237517, 'loss': 8.604439735412598}


EP_train:1:   3%|| 752/27626 [01:46<1:04:14,  6.97it/s]

{'epoch': 1, 'iter': 750, 'avg_loss': 8.602583077553902, 'avg_acc': 50.32040612516645, 'loss': 8.609886169433594}


EP_train:1:   3%|| 762/27626 [01:48<1:03:36,  7.04it/s]

{'epoch': 1, 'iter': 760, 'avg_loss': 8.602396063359745, 'avg_acc': 50.29155716162943, 'loss': 8.622489929199219}


EP_train:1:   3%|| 772/27626 [01:49<1:03:40,  7.03it/s]

{'epoch': 1, 'iter': 770, 'avg_loss': 8.60545226603012, 'avg_acc': 50.275616083009076, 'loss': 10.11002254486084}


EP_train:1:   3%|| 782/27626 [01:50<1:02:58,  7.11it/s]

{'epoch': 1, 'iter': 780, 'avg_loss': 8.605762198395674, 'avg_acc': 50.20406530089628, 'loss': 8.585817337036133}


EP_train:1:   3%|| 792/27626 [01:52<1:03:09,  7.08it/s]

{'epoch': 1, 'iter': 790, 'avg_loss': 8.604076647125515, 'avg_acc': 50.13827433628318, 'loss': 7.8436079025268555}


EP_train:1:   3%|| 802/27626 [01:53<1:03:29,  7.04it/s]

{'epoch': 1, 'iter': 800, 'avg_loss': 8.596668322583412, 'avg_acc': 50.120942571785264, 'loss': 8.374236106872559}


EP_train:1:   3%|| 812/27626 [01:55<1:03:51,  7.00it/s]

{'epoch': 1, 'iter': 810, 'avg_loss': 8.598223533936112, 'avg_acc': 50.1348643649815, 'loss': 8.601568222045898}


EP_train:1:   3%|| 822/27626 [01:56<1:03:23,  7.05it/s]

{'epoch': 1, 'iter': 820, 'avg_loss': 8.59743925192179, 'avg_acc': 50.18270401948843, 'loss': 8.746870994567871}


EP_train:1:   3%|| 832/27626 [01:57<1:03:04,  7.08it/s]

{'epoch': 1, 'iter': 830, 'avg_loss': 8.59475535405385, 'avg_acc': 50.169223826714806, 'loss': 9.265826225280762}


EP_train:1:   3%|| 842/27626 [01:59<1:03:10,  7.07it/s]

{'epoch': 1, 'iter': 840, 'avg_loss': 8.589365631448244, 'avg_acc': 50.248959571938165, 'loss': 7.757991313934326}


EP_train:1:   3%|| 852/27626 [02:00<1:03:15,  7.05it/s]

{'epoch': 1, 'iter': 850, 'avg_loss': 8.58768013753566, 'avg_acc': 50.268066980023505, 'loss': 8.524070739746094}


EP_train:1:   3%|| 862/27626 [02:02<1:03:17,  7.05it/s]

{'epoch': 1, 'iter': 860, 'avg_loss': 8.586968909295733, 'avg_acc': 50.243176538908244, 'loss': 8.56502628326416}


EP_train:1:   3%|| 872/27626 [02:03<1:03:01,  7.07it/s]

{'epoch': 1, 'iter': 870, 'avg_loss': 8.590030938421133, 'avg_acc': 50.25114810562572, 'loss': 8.902576446533203}


EP_train:1:   3%|| 882/27626 [02:05<1:03:17,  7.04it/s]

{'epoch': 1, 'iter': 880, 'avg_loss': 8.590792903835197, 'avg_acc': 50.27312712826334, 'loss': 8.85590648651123}


EP_train:1:   3%|| 892/27626 [02:06<1:03:15,  7.04it/s]

{'epoch': 1, 'iter': 890, 'avg_loss': 8.58858426090844, 'avg_acc': 50.27356902356902, 'loss': 8.677431106567383}


EP_train:1:   3%|| 902/27626 [02:07<1:03:37,  7.00it/s]

{'epoch': 1, 'iter': 900, 'avg_loss': 8.589181824344378, 'avg_acc': 50.2427857935627, 'loss': 8.243886947631836}


EP_train:1:   3%|| 912/27626 [02:09<1:03:13,  7.04it/s]

{'epoch': 1, 'iter': 910, 'avg_loss': 8.591502445589436, 'avg_acc': 50.25384193194292, 'loss': 8.338225364685059}


EP_train:1:   3%|| 922/27626 [02:10<1:03:05,  7.06it/s]

{'epoch': 1, 'iter': 920, 'avg_loss': 8.59450238483607, 'avg_acc': 50.22394136807817, 'loss': 9.464472770690918}


EP_train:1:   3%|| 932/27626 [02:12<1:02:46,  7.09it/s]

{'epoch': 1, 'iter': 930, 'avg_loss': 8.593840204683461, 'avg_acc': 50.22489258861439, 'loss': 8.334712982177734}


EP_train:1:   3%|| 942/27626 [02:13<1:03:09,  7.04it/s]

{'epoch': 1, 'iter': 940, 'avg_loss': 8.593167294857986, 'avg_acc': 50.20589798087142, 'loss': 8.343666076660156}


EP_train:1:   3%|| 952/27626 [02:14<1:02:56,  7.06it/s]

{'epoch': 1, 'iter': 950, 'avg_loss': 8.595963810019187, 'avg_acc': 50.17415878023134, 'loss': 8.0281400680542}


EP_train:1:   3%|| 962/27626 [02:16<1:02:49,  7.07it/s]

{'epoch': 1, 'iter': 960, 'avg_loss': 8.593377255748388, 'avg_acc': 50.18210197710719, 'loss': 8.00152587890625}


EP_train:1:   4%|| 972/27626 [02:17<1:03:18,  7.02it/s]

{'epoch': 1, 'iter': 970, 'avg_loss': 8.592196825972302, 'avg_acc': 50.14160659114315, 'loss': 8.552726745605469}


EP_train:1:   4%|| 982/27626 [02:19<1:03:00,  7.05it/s]

{'epoch': 1, 'iter': 980, 'avg_loss': 8.594461081832435, 'avg_acc': 50.12423547400612, 'loss': 8.872507095336914}


EP_train:1:   4%|| 992/27626 [02:20<1:03:25,  7.00it/s]

{'epoch': 1, 'iter': 990, 'avg_loss': 8.596086811466005, 'avg_acc': 50.1324419778002, 'loss': 7.964034080505371}


EP_train:1:   4%|| 1002/27626 [02:21<1:02:37,  7.09it/s]

{'epoch': 1, 'iter': 1000, 'avg_loss': 8.594111315377585, 'avg_acc': 50.11863136863137, 'loss': 8.367227554321289}


EP_train:1:   4%|| 1012/27626 [02:23<1:02:19,  7.12it/s]

{'epoch': 1, 'iter': 1010, 'avg_loss': 8.595158843447027, 'avg_acc': 50.14527695351138, 'loss': 8.636396408081055}


EP_train:1:   4%|| 1022/27626 [02:24<1:02:52,  7.05it/s]

{'epoch': 1, 'iter': 1020, 'avg_loss': 8.595603897569228, 'avg_acc': 50.12242899118511, 'loss': 8.646466255187988}


EP_train:1:   4%|| 1032/27626 [02:26<1:02:37,  7.08it/s]

{'epoch': 1, 'iter': 1030, 'avg_loss': 8.595321014711397, 'avg_acc': 50.08486905916586, 'loss': 8.55469036102295}


EP_train:1:   4%|| 1042/27626 [02:27<1:03:12,  7.01it/s]

{'epoch': 1, 'iter': 1040, 'avg_loss': 8.595751722539193, 'avg_acc': 50.081051873198845, 'loss': 8.276189804077148}


EP_train:1:   4%|| 1052/27626 [02:29<1:02:52,  7.04it/s]

{'epoch': 1, 'iter': 1050, 'avg_loss': 8.593846686333276, 'avg_acc': 50.065413891531875, 'loss': 8.37234878540039}


EP_train:1:   4%|| 1062/27626 [02:30<1:02:48,  7.05it/s]

{'epoch': 1, 'iter': 1060, 'avg_loss': 8.593361751635495, 'avg_acc': 50.11486804901036, 'loss': 9.463228225708008}


EP_train:1:   4%|| 1072/27626 [02:31<1:02:51,  7.04it/s]

{'epoch': 1, 'iter': 1070, 'avg_loss': 8.591478317502945, 'avg_acc': 50.10795985060691, 'loss': 8.52934455871582}


EP_train:1:   4%|| 1082/27626 [02:33<1:02:58,  7.02it/s]

{'epoch': 1, 'iter': 1080, 'avg_loss': 8.588193817121027, 'avg_acc': 50.11852451433858, 'loss': 9.610891342163086}


EP_train:1:   4%|| 1092/27626 [02:34<1:02:50,  7.04it/s]

{'epoch': 1, 'iter': 1090, 'avg_loss': 8.58849888783218, 'avg_acc': 50.10311640696609, 'loss': 8.500280380249023}


EP_train:1:   4%|| 1102/27626 [02:36<1:02:43,  7.05it/s]

{'epoch': 1, 'iter': 1100, 'avg_loss': 8.588853766764434, 'avg_acc': 50.133401453224344, 'loss': 7.778494358062744}


EP_train:1:   4%|| 1112/27626 [02:37<1:02:29,  7.07it/s]

{'epoch': 1, 'iter': 1110, 'avg_loss': 8.589539498278517, 'avg_acc': 50.146264626462646, 'loss': 9.58863639831543}


EP_train:1:   4%|| 1122/27626 [02:38<1:02:43,  7.04it/s]

{'epoch': 1, 'iter': 1120, 'avg_loss': 8.591396530008444, 'avg_acc': 50.10314451382693, 'loss': 8.878386497497559}


EP_train:1:   4%|| 1132/27626 [02:40<1:02:07,  7.11it/s]

{'epoch': 1, 'iter': 1130, 'avg_loss': 8.592240923806484, 'avg_acc': 50.09394341290893, 'loss': 9.388890266418457}


EP_train:1:   4%|| 1142/27626 [02:41<1:02:23,  7.08it/s]

{'epoch': 1, 'iter': 1140, 'avg_loss': 8.587684951885869, 'avg_acc': 50.09859772129711, 'loss': 7.931522846221924}


EP_train:1:   4%|| 1152/27626 [02:43<1:02:23,  7.07it/s]

{'epoch': 1, 'iter': 1150, 'avg_loss': 8.589926446240224, 'avg_acc': 50.070590790616855, 'loss': 9.279065132141113}


EP_train:1:   4%|| 1162/27626 [02:44<1:02:24,  7.07it/s]

{'epoch': 1, 'iter': 1160, 'avg_loss': 8.58992324627031, 'avg_acc': 50.064599483204134, 'loss': 7.84690523147583}


EP_train:1:   4%|| 1172/27626 [02:45<1:03:01,  7.00it/s]

{'epoch': 1, 'iter': 1170, 'avg_loss': 8.591778979557992, 'avg_acc': 50.02134927412468, 'loss': 8.580657005310059}


EP_train:1:   4%|| 1182/27626 [02:47<1:02:20,  7.07it/s]

{'epoch': 1, 'iter': 1180, 'avg_loss': 8.593343463012669, 'avg_acc': 50.02116850127011, 'loss': 8.436692237854004}


EP_train:1:   4%|| 1192/27626 [02:48<1:02:55,  7.00it/s]

{'epoch': 1, 'iter': 1190, 'avg_loss': 8.59039605634739, 'avg_acc': 50.03673383711167, 'loss': 7.685776233673096}


EP_train:1:   4%|| 1202/27626 [02:50<1:02:38,  7.03it/s]

{'epoch': 1, 'iter': 1200, 'avg_loss': 8.591011181560583, 'avg_acc': 50.03122398001665, 'loss': 8.613965034484863}


EP_train:1:   4%|| 1212/27626 [02:51<1:02:38,  7.03it/s]

{'epoch': 1, 'iter': 1210, 'avg_loss': 8.590773169409628, 'avg_acc': 50.00516102394715, 'loss': 8.605722427368164}


EP_train:1:   4%|| 1222/27626 [02:53<1:02:23,  7.05it/s]

{'epoch': 1, 'iter': 1220, 'avg_loss': 8.588506763045853, 'avg_acc': 50.0, 'loss': 8.727275848388672}


EP_train:1:   4%|| 1232/27626 [02:54<1:01:55,  7.10it/s]

{'epoch': 1, 'iter': 1230, 'avg_loss': 8.588410262457245, 'avg_acc': 49.959382615759544, 'loss': 8.122441291809082}


EP_train:1:   4%|| 1242/27626 [02:55<1:01:51,  7.11it/s]

{'epoch': 1, 'iter': 1240, 'avg_loss': 8.588311403245335, 'avg_acc': 49.96978243352135, 'loss': 8.788002967834473}


EP_train:1:   5%|| 1252/27626 [02:57<1:02:39,  7.02it/s]

{'epoch': 1, 'iter': 1250, 'avg_loss': 8.586589516686212, 'avg_acc': 49.967525979216624, 'loss': 8.963200569152832}


EP_train:1:   5%|| 1262/27626 [02:58<1:02:51,  6.99it/s]

{'epoch': 1, 'iter': 1260, 'avg_loss': 8.58604945879338, 'avg_acc': 49.960348929421095, 'loss': 8.932449340820312}


EP_train:1:   5%|| 1272/27626 [03:00<1:01:53,  7.10it/s]

{'epoch': 1, 'iter': 1270, 'avg_loss': 8.58429444971092, 'avg_acc': 49.987706530291106, 'loss': 8.840608596801758}


EP_train:1:   5%|| 1282/27626 [03:01<1:02:09,  7.06it/s]

{'epoch': 1, 'iter': 1280, 'avg_loss': 8.585315453457143, 'avg_acc': 49.98536299765808, 'loss': 8.75851058959961}


EP_train:1:   5%|| 1292/27626 [03:02<1:02:15,  7.05it/s]

{'epoch': 1, 'iter': 1290, 'avg_loss': 8.584450139268068, 'avg_acc': 49.97095274980635, 'loss': 8.99007797241211}


EP_train:1:   5%|| 1302/27626 [03:04<1:01:54,  7.09it/s]

{'epoch': 1, 'iter': 1300, 'avg_loss': 8.581301679985053, 'avg_acc': 49.95676402767103, 'loss': 8.863663673400879}


EP_train:1:   5%|| 1312/27626 [03:05<1:01:56,  7.08it/s]

{'epoch': 1, 'iter': 1310, 'avg_loss': 8.582463597269298, 'avg_acc': 49.97377955758962, 'loss': 9.039820671081543}


EP_train:1:   5%|| 1322/27626 [03:07<1:02:16,  7.04it/s]

{'epoch': 1, 'iter': 1320, 'avg_loss': 8.580885032376585, 'avg_acc': 49.964515518546555, 'loss': 8.150195121765137}


EP_train:1:   5%|| 1332/27626 [03:08<1:01:51,  7.08it/s]

{'epoch': 1, 'iter': 1330, 'avg_loss': 8.582763773977621, 'avg_acc': 49.97417355371901, 'loss': 9.670134544372559}


EP_train:1:   5%|| 1342/27626 [03:09<1:02:00,  7.06it/s]

{'epoch': 1, 'iter': 1340, 'avg_loss': 8.58378605234543, 'avg_acc': 49.97669649515287, 'loss': 9.129979133605957}


EP_train:1:   5%|| 1352/27626 [03:11<1:02:21,  7.02it/s]

{'epoch': 1, 'iter': 1350, 'avg_loss': 8.582553197859307, 'avg_acc': 49.96761658031088, 'loss': 8.811586380004883}


EP_train:1:   5%|| 1362/27626 [03:12<1:02:36,  6.99it/s]

{'epoch': 1, 'iter': 1360, 'avg_loss': 8.58415265542282, 'avg_acc': 49.974742836149886, 'loss': 9.08419132232666}


EP_train:1:   5%|| 1372/27626 [03:14<1:02:04,  7.05it/s]

{'epoch': 1, 'iter': 1370, 'avg_loss': 8.583911758717788, 'avg_acc': 49.9589715536105, 'loss': 8.073260307312012}


EP_train:1:   5%|| 1382/27626 [03:15<1:02:20,  7.02it/s]

{'epoch': 1, 'iter': 1380, 'avg_loss': 8.58109911311631, 'avg_acc': 49.93211440984794, 'loss': 8.981661796569824}


EP_train:1:   5%|| 1392/27626 [03:17<1:02:02,  7.05it/s]

{'epoch': 1, 'iter': 1390, 'avg_loss': 8.579940225476417, 'avg_acc': 49.934849029475195, 'loss': 8.074821472167969}


EP_train:1:   5%|| 1402/27626 [03:18<1:01:48,  7.07it/s]

{'epoch': 1, 'iter': 1400, 'avg_loss': 8.581472840673323, 'avg_acc': 49.94200571020699, 'loss': 9.169692039489746}


EP_train:1:   5%|| 1412/27626 [03:19<1:01:28,  7.11it/s]

{'epoch': 1, 'iter': 1410, 'avg_loss': 8.583028559140665, 'avg_acc': 49.94463146704465, 'loss': 7.976553440093994}


EP_train:1:   5%|| 1422/27626 [03:21<1:01:40,  7.08it/s]

{'epoch': 1, 'iter': 1420, 'avg_loss': 8.583965783048733, 'avg_acc': 49.94941942294159, 'loss': 8.865222930908203}


EP_train:1:   5%|| 1432/27626 [03:22<1:01:57,  7.05it/s]

{'epoch': 1, 'iter': 1430, 'avg_loss': 8.583792769100848, 'avg_acc': 49.960691823899374, 'loss': 9.096426963806152}


EP_train:1:   5%|| 1442/27626 [03:24<1:02:13,  7.01it/s]

{'epoch': 1, 'iter': 1440, 'avg_loss': 8.581682715458973, 'avg_acc': 49.919760582928525, 'loss': 8.166801452636719}


EP_train:1:   5%|| 1452/27626 [03:25<1:01:45,  7.06it/s]

{'epoch': 1, 'iter': 1450, 'avg_loss': 8.582874905397611, 'avg_acc': 49.91600620261888, 'loss': 8.223542213439941}


EP_train:1:   5%|| 1462/27626 [03:26<1:01:50,  7.05it/s]

{'epoch': 1, 'iter': 1460, 'avg_loss': 8.583495294452122, 'avg_acc': 49.91230321697467, 'loss': 8.95113468170166}


EP_train:1:   5%|| 1472/27626 [03:28<1:01:31,  7.09it/s]

{'epoch': 1, 'iter': 1470, 'avg_loss': 8.58590349220727, 'avg_acc': 49.92139700883752, 'loss': 8.942461013793945}


EP_train:1:   5%|| 1482/27626 [03:29<1:01:37,  7.07it/s]

{'epoch': 1, 'iter': 1480, 'avg_loss': 8.585321227414314, 'avg_acc': 49.932478055367994, 'loss': 8.537369728088379}


EP_train:1:   5%|| 1492/27626 [03:31<1:01:29,  7.08it/s]

{'epoch': 1, 'iter': 1490, 'avg_loss': 8.586997423613335, 'avg_acc': 49.941314553990615, 'loss': 8.525787353515625}


EP_train:1:   5%|| 1502/27626 [03:32<1:01:23,  7.09it/s]

{'epoch': 1, 'iter': 1500, 'avg_loss': 8.588665639456712, 'avg_acc': 49.92921385742838, 'loss': 8.835409164428711}


EP_train:1:   5%|| 1512/27626 [03:33<1:01:17,  7.10it/s]

{'epoch': 1, 'iter': 1510, 'avg_loss': 8.591618465320074, 'avg_acc': 49.86970549305096, 'loss': 8.572327613830566}


EP_train:1:   6%|| 1522/27626 [03:35<1:02:06,  7.01it/s]

{'epoch': 1, 'iter': 1520, 'avg_loss': 8.592421816964436, 'avg_acc': 49.866452991452995, 'loss': 8.232111930847168}


EP_train:1:   6%|| 1532/27626 [03:36<1:01:47,  7.04it/s]

{'epoch': 1, 'iter': 1530, 'avg_loss': 8.591353080694695, 'avg_acc': 49.883654474199865, 'loss': 8.096451759338379}


EP_train:1:   6%|| 1542/27626 [03:38<1:01:40,  7.05it/s]

{'epoch': 1, 'iter': 1540, 'avg_loss': 8.590104556718018, 'avg_acc': 49.91482803374432, 'loss': 8.324631690979004}


EP_train:1:   6%|| 1552/27626 [03:39<1:01:36,  7.05it/s]

{'epoch': 1, 'iter': 1550, 'avg_loss': 8.588920696561065, 'avg_acc': 49.9294809800129, 'loss': 8.556144714355469}


EP_train:1:   6%|| 1562/27626 [03:41<1:01:16,  7.09it/s]

{'epoch': 1, 'iter': 1560, 'avg_loss': 8.591297861095578, 'avg_acc': 49.92592889173606, 'loss': 7.9494805335998535}


EP_train:1:   6%|| 1572/27626 [03:42<1:01:31,  7.06it/s]

{'epoch': 1, 'iter': 1570, 'avg_loss': 8.58925554328933, 'avg_acc': 49.94231381285805, 'loss': 8.03654670715332}


EP_train:1:   6%|| 1582/27626 [03:43<1:01:14,  7.09it/s]

{'epoch': 1, 'iter': 1580, 'avg_loss': 8.590545280608552, 'avg_acc': 49.940702087286525, 'loss': 8.577424049377441}


EP_train:1:   6%|| 1592/27626 [03:45<1:01:06,  7.10it/s]

{'epoch': 1, 'iter': 1590, 'avg_loss': 8.58890035420675, 'avg_acc': 49.95678818353237, 'loss': 8.935673713684082}


EP_train:1:   6%|| 1602/27626 [03:46<1:01:22,  7.07it/s]

{'epoch': 1, 'iter': 1600, 'avg_loss': 8.58685955682596, 'avg_acc': 49.937539038101185, 'loss': 7.642853260040283}


EP_train:1:   6%|| 1612/27626 [03:48<1:01:41,  7.03it/s]

{'epoch': 1, 'iter': 1610, 'avg_loss': 8.585583413927397, 'avg_acc': 49.957324643078834, 'loss': 8.035492897033691}


EP_train:1:   6%|| 1622/27626 [03:49<1:01:37,  7.03it/s]

{'epoch': 1, 'iter': 1620, 'avg_loss': 8.584310649575311, 'avg_acc': 49.93445404071561, 'loss': 8.351000785827637}


EP_train:1:   6%|| 1632/27626 [03:50<1:01:31,  7.04it/s]

{'epoch': 1, 'iter': 1630, 'avg_loss': 8.58207449132427, 'avg_acc': 49.946351931330476, 'loss': 8.30743408203125}


EP_train:1:   6%|| 1642/27626 [03:52<1:01:11,  7.08it/s]

{'epoch': 1, 'iter': 1640, 'avg_loss': 8.58292340796434, 'avg_acc': 49.93715722120658, 'loss': 9.109553337097168}


EP_train:1:   6%|| 1652/27626 [03:53<1:01:05,  7.09it/s]

{'epoch': 1, 'iter': 1650, 'avg_loss': 8.581253904923752, 'avg_acc': 49.95457298606905, 'loss': 8.058603286743164}


EP_train:1:   6%|| 1662/27626 [03:55<1:01:02,  7.09it/s]

{'epoch': 1, 'iter': 1660, 'avg_loss': 8.583830274780567, 'avg_acc': 49.95108368452739, 'loss': 8.830734252929688}


EP_train:1:   6%|| 1672/27626 [03:56<1:01:48,  7.00it/s]

{'epoch': 1, 'iter': 1670, 'avg_loss': 8.585644144986219, 'avg_acc': 49.958856971873125, 'loss': 8.886974334716797}


EP_train:1:   6%|| 1682/27626 [03:57<1:00:41,  7.13it/s]

{'epoch': 1, 'iter': 1680, 'avg_loss': 8.585317974782713, 'avg_acc': 49.97211481261154, 'loss': 7.812190055847168}


EP_train:1:   6%|| 1692/27626 [03:59<1:01:21,  7.04it/s]

{'epoch': 1, 'iter': 1690, 'avg_loss': 8.585408772751116, 'avg_acc': 49.951951507983445, 'loss': 8.477482795715332}


EP_train:1:   6%|| 1702/27626 [04:00<1:01:02,  7.08it/s]

{'epoch': 1, 'iter': 1700, 'avg_loss': 8.587549519356667, 'avg_acc': 49.95407113462669, 'loss': 9.779288291931152}


EP_train:1:   6%|| 1712/27626 [04:02<1:01:13,  7.05it/s]

{'epoch': 1, 'iter': 1710, 'avg_loss': 8.589414841525391, 'avg_acc': 49.928769725306836, 'loss': 8.48118782043457}


EP_train:1:   6%|| 1722/27626 [04:03<1:01:33,  7.01it/s]

{'epoch': 1, 'iter': 1720, 'avg_loss': 8.590314757886285, 'avg_acc': 49.941894247530506, 'loss': 8.759515762329102}


EP_train:1:   6%|| 1732/27626 [04:05<1:00:54,  7.08it/s]

{'epoch': 1, 'iter': 1730, 'avg_loss': 8.59010027930063, 'avg_acc': 49.91876083188908, 'loss': 8.742793083190918}


EP_train:1:   6%|| 1742/27626 [04:06<1:00:31,  7.13it/s]

{'epoch': 1, 'iter': 1740, 'avg_loss': 8.591277673558077, 'avg_acc': 49.92820218265365, 'loss': 7.909671306610107}


EP_train:1:   6%|| 1752/27626 [04:07<1:00:32,  7.12it/s]

{'epoch': 1, 'iter': 1750, 'avg_loss': 8.58898475468329, 'avg_acc': 49.93039691604797, 'loss': 8.349640846252441}


EP_train:1:   6%|| 1762/27626 [04:09<1:01:18,  7.03it/s]

{'epoch': 1, 'iter': 1760, 'avg_loss': 8.588869060189259, 'avg_acc': 49.95208688245315, 'loss': 8.743598937988281}


EP_train:1:   6%|| 1772/27626 [04:10<1:00:36,  7.11it/s]

{'epoch': 1, 'iter': 1770, 'avg_loss': 8.588017389910293, 'avg_acc': 49.95059288537549, 'loss': 8.364164352416992}


EP_train:1:   6%|| 1782/27626 [04:12<1:00:49,  7.08it/s]

{'epoch': 1, 'iter': 1780, 'avg_loss': 8.588702215230043, 'avg_acc': 49.96315272318922, 'loss': 8.93575668334961}


EP_train:1:   6%|| 1792/27626 [04:13<1:01:34,  6.99it/s]

{'epoch': 1, 'iter': 1790, 'avg_loss': 8.589998438263791, 'avg_acc': 49.95986878838637, 'loss': 8.756632804870605}


EP_train:1:   7%|| 1802/27626 [04:14<1:01:17,  7.02it/s]

{'epoch': 1, 'iter': 1800, 'avg_loss': 8.591354237471204, 'avg_acc': 49.95662132148806, 'loss': 8.02745246887207}


EP_train:1:   7%|| 1812/27626 [04:16<1:00:53,  7.07it/s]

{'epoch': 1, 'iter': 1810, 'avg_loss': 8.590738507149169, 'avg_acc': 49.95340971838763, 'loss': 8.243595123291016}


EP_train:1:   7%|| 1822/27626 [04:17<1:01:07,  7.04it/s]

{'epoch': 1, 'iter': 1820, 'avg_loss': 8.592205328839222, 'avg_acc': 49.974258649093905, 'loss': 8.416131973266602}


EP_train:1:   7%|| 1832/27626 [04:19<1:00:29,  7.11it/s]

{'epoch': 1, 'iter': 1830, 'avg_loss': 8.591104637137663, 'avg_acc': 49.99146641179683, 'loss': 7.839978218078613}


EP_train:1:   7%|| 1842/27626 [04:20<1:00:30,  7.10it/s]

{'epoch': 1, 'iter': 1840, 'avg_loss': 8.592407386112576, 'avg_acc': 49.98642042368278, 'loss': 8.58536434173584}


EP_train:1:   7%|| 1852/27626 [04:21<1:00:45,  7.07it/s]

{'epoch': 1, 'iter': 1850, 'avg_loss': 8.593713914684706, 'avg_acc': 49.97974068071313, 'loss': 9.211385726928711}


EP_train:1:   7%|| 1862/27626 [04:23<1:00:57,  7.04it/s]

{'epoch': 1, 'iter': 1860, 'avg_loss': 8.5926602935996, 'avg_acc': 49.99496238581408, 'loss': 8.030488967895508}


EP_train:1:   7%|| 1872/27626 [04:24<1:00:30,  7.09it/s]

{'epoch': 1, 'iter': 1870, 'avg_loss': 8.59204587141248, 'avg_acc': 49.984967931587384, 'loss': 9.043283462524414}


EP_train:1:   7%|| 1882/27626 [04:26<1:01:12,  7.01it/s]

{'epoch': 1, 'iter': 1880, 'avg_loss': 8.59281597892887, 'avg_acc': 50.026581605528975, 'loss': 8.069426536560059}


EP_train:1:   7%|| 1892/27626 [04:27<1:01:06,  7.02it/s]

{'epoch': 1, 'iter': 1890, 'avg_loss': 8.594732282276446, 'avg_acc': 50.0165256478054, 'loss': 8.953685760498047}


EP_train:1:   7%|| 1902/27626 [04:28<1:00:20,  7.11it/s]

{'epoch': 1, 'iter': 1900, 'avg_loss': 8.596718769836025, 'avg_acc': 50.00821935823251, 'loss': 8.913500785827637}


EP_train:1:   7%|| 1912/27626 [04:30<1:00:34,  7.07it/s]

{'epoch': 1, 'iter': 1910, 'avg_loss': 8.597081104540063, 'avg_acc': 49.99345892203035, 'loss': 7.352351188659668}


EP_train:1:   7%|| 1922/27626 [04:31<1:00:38,  7.06it/s]

{'epoch': 1, 'iter': 1920, 'avg_loss': 8.5981049371349, 'avg_acc': 49.96258459135866, 'loss': 8.994152069091797}


EP_train:1:   7%|| 1932/27626 [04:33<1:00:43,  7.05it/s]

{'epoch': 1, 'iter': 1930, 'avg_loss': 8.59720969113706, 'avg_acc': 49.978961677887106, 'loss': 8.374578475952148}


EP_train:1:   7%|| 1942/27626 [04:34<1:00:46,  7.04it/s]

{'epoch': 1, 'iter': 1940, 'avg_loss': 8.598402664994284, 'avg_acc': 49.96780010303967, 'loss': 9.292637825012207}


EP_train:1:   7%|| 1952/27626 [04:36<1:00:27,  7.08it/s]

{'epoch': 1, 'iter': 1950, 'avg_loss': 8.59972219574702, 'avg_acc': 49.93112506406971, 'loss': 8.20486831665039}


EP_train:1:   7%|| 1962/27626 [04:37<1:00:26,  7.08it/s]

{'epoch': 1, 'iter': 1960, 'avg_loss': 8.599229158764285, 'avg_acc': 49.936257011728706, 'loss': 8.103964805603027}


EP_train:1:   7%|| 1972/27626 [04:38<1:00:50,  7.03it/s]

{'epoch': 1, 'iter': 1970, 'avg_loss': 8.597375129939698, 'avg_acc': 49.93499492643328, 'loss': 8.596702575683594}


EP_train:1:   7%|| 1982/27626 [04:40<1:00:41,  7.04it/s]

{'epoch': 1, 'iter': 1980, 'avg_loss': 8.597456942658903, 'avg_acc': 49.921125694093895, 'loss': 8.584044456481934}


EP_train:1:   7%|| 1992/27626 [04:41<1:00:13,  7.09it/s]

{'epoch': 1, 'iter': 1990, 'avg_loss': 8.59647472751374, 'avg_acc': 49.92152184831743, 'loss': 8.218117713928223}


EP_train:1:   7%|| 2002/27626 [04:43<1:00:36,  7.05it/s]

{'epoch': 1, 'iter': 2000, 'avg_loss': 8.594169055742363, 'avg_acc': 49.94377811094453, 'loss': 7.882113933563232}


EP_train:1:   7%|| 2012/27626 [04:44<1:00:04,  7.11it/s]

{'epoch': 1, 'iter': 2010, 'avg_loss': 8.592450376650044, 'avg_acc': 49.939395822973644, 'loss': 7.874289035797119}


EP_train:1:   7%|| 2022/27626 [04:45<1:00:42,  7.03it/s]

{'epoch': 1, 'iter': 2020, 'avg_loss': 8.590915528693333, 'avg_acc': 49.958250865907964, 'loss': 8.825230598449707}


EP_train:1:   7%|| 2032/27626 [04:47<1:00:37,  7.04it/s]

{'epoch': 1, 'iter': 2030, 'avg_loss': 8.592059396277033, 'avg_acc': 49.956917774495324, 'loss': 8.578393936157227}


EP_train:1:   7%|| 2042/27626 [04:48<1:00:06,  7.09it/s]

{'epoch': 1, 'iter': 2040, 'avg_loss': 8.591477282195626, 'avg_acc': 49.967846643802055, 'loss': 8.591636657714844}


EP_train:1:   7%|| 2052/27626 [04:50<1:00:26,  7.05it/s]

{'epoch': 1, 'iter': 2050, 'avg_loss': 8.590957003881268, 'avg_acc': 49.96038517796197, 'loss': 8.615095138549805}


EP_train:1:   7%|| 2062/27626 [04:51<1:00:28,  7.05it/s]

{'epoch': 1, 'iter': 2060, 'avg_loss': 8.591577305948775, 'avg_acc': 49.98635371179039, 'loss': 7.980506420135498}


EP_train:1:   8%|| 2072/27626 [04:52<1:00:37,  7.02it/s]

{'epoch': 1, 'iter': 2070, 'avg_loss': 8.591428575971632, 'avg_acc': 50.0, 'loss': 9.268599510192871}


EP_train:1:   8%|| 2082/27626 [04:54<1:00:05,  7.08it/s]

{'epoch': 1, 'iter': 2080, 'avg_loss': 8.591577085607732, 'avg_acc': 50.019521864488226, 'loss': 8.376070976257324}


EP_train:1:   8%|| 2092/27626 [04:55<1:00:06,  7.08it/s]

{'epoch': 1, 'iter': 2090, 'avg_loss': 8.590910776330773, 'avg_acc': 50.008967001434726, 'loss': 8.071258544921875}


EP_train:1:   8%|| 2102/27626 [04:57<1:00:29,  7.03it/s]

{'epoch': 1, 'iter': 2100, 'avg_loss': 8.593887972525334, 'avg_acc': 50.02231080437887, 'loss': 9.342720031738281}


EP_train:1:   8%|| 2112/27626 [04:58<1:00:38,  7.01it/s]

{'epoch': 1, 'iter': 2110, 'avg_loss': 8.594487051103195, 'avg_acc': 50.02368545712932, 'loss': 8.586060523986816}


EP_train:1:   8%|| 2122/27626 [04:59<59:59,  7.09it/s]

{'epoch': 1, 'iter': 2120, 'avg_loss': 8.595165102507696, 'avg_acc': 50.044200848656295, 'loss': 8.65954875946045}


EP_train:1:   8%|| 2132/27626 [05:01<59:46,  7.11it/s]  

{'epoch': 1, 'iter': 2130, 'avg_loss': 8.596366747171876, 'avg_acc': 50.04545987799155, 'loss': 8.784650802612305}


EP_train:1:   8%|| 2142/27626 [05:02<59:51,  7.10it/s]

{'epoch': 1, 'iter': 2140, 'avg_loss': 8.596606236314841, 'avg_acc': 50.04232835123774, 'loss': 8.709543228149414}


EP_train:1:   8%|| 2152/27626 [05:04<1:00:41,  6.99it/s]

{'epoch': 1, 'iter': 2150, 'avg_loss': 8.597153757184897, 'avg_acc': 50.05230125523013, 'loss': 8.343552589416504}


EP_train:1:   8%|| 2162/27626 [05:05<1:00:21,  7.03it/s]

{'epoch': 1, 'iter': 2160, 'avg_loss': 8.59491420487241, 'avg_acc': 50.05350532161037, 'loss': 7.717837333679199}


EP_train:1:   8%|| 2172/27626 [05:07<1:00:03,  7.06it/s]

{'epoch': 1, 'iter': 2170, 'avg_loss': 8.5942321434267, 'avg_acc': 50.047501151543074, 'loss': 8.687211990356445}


EP_train:1:   8%|| 2182/27626 [05:08<59:58,  7.07it/s]

{'epoch': 1, 'iter': 2180, 'avg_loss': 8.593628504690583, 'avg_acc': 50.04155204034847, 'loss': 8.863838195800781}


EP_train:1:   8%|| 2192/27626 [05:09<59:37,  7.11it/s]

{'epoch': 1, 'iter': 2190, 'avg_loss': 8.59329440679685, 'avg_acc': 50.027099497946146, 'loss': 8.842660903930664}


EP_train:1:   8%|| 2202/27626 [05:11<1:00:05,  7.05it/s]

{'epoch': 1, 'iter': 2200, 'avg_loss': 8.592586274474169, 'avg_acc': 50.00851885506587, 'loss': 8.590507507324219}


EP_train:1:   8%|| 2212/27626 [05:12<59:42,  7.09it/s]

{'epoch': 1, 'iter': 2210, 'avg_loss': 8.591324187899433, 'avg_acc': 49.98445273631841, 'loss': 8.464531898498535}


EP_train:1:   8%|| 2222/27626 [05:14<59:54,  7.07it/s]

{'epoch': 1, 'iter': 2220, 'avg_loss': 8.591785842263446, 'avg_acc': 49.96763845114813, 'loss': 9.00020980834961}


EP_train:1:   8%|| 2232/27626 [05:15<1:00:03,  7.05it/s]

{'epoch': 1, 'iter': 2230, 'avg_loss': 8.591890109381511, 'avg_acc': 49.97618780815778, 'loss': 8.6231050491333}


EP_train:1:   8%|| 2242/27626 [05:16<1:00:09,  7.03it/s]

{'epoch': 1, 'iter': 2240, 'avg_loss': 8.591670246541314, 'avg_acc': 49.9776885319054, 'loss': 8.369345664978027}


EP_train:1:   8%|| 2252/27626 [05:18<59:59,  7.05it/s]  

{'epoch': 1, 'iter': 2250, 'avg_loss': 8.591130945747135, 'avg_acc': 49.94724566859174, 'loss': 7.993259429931641}


EP_train:1:   8%|| 2262/27626 [05:19<59:39,  7.08it/s]

{'epoch': 1, 'iter': 2260, 'avg_loss': 8.592208982093943, 'avg_acc': 49.94471472799646, 'loss': 8.821638107299805}


EP_train:1:   8%|| 2272/27626 [05:21<59:51,  7.06it/s]

{'epoch': 1, 'iter': 2270, 'avg_loss': 8.59137099012831, 'avg_acc': 49.9642228093351, 'loss': 9.206679344177246}


EP_train:1:   8%|| 2282/27626 [05:22<59:30,  7.10it/s]

{'epoch': 1, 'iter': 2280, 'avg_loss': 8.591787467239092, 'avg_acc': 49.97259973695748, 'loss': 9.060871124267578}


EP_train:1:   8%|| 2292/27626 [05:23<59:44,  7.07it/s]

{'epoch': 1, 'iter': 2290, 'avg_loss': 8.592719043513563, 'avg_acc': 49.96999127018769, 'loss': 7.790628910064697}


EP_train:1:   8%|| 2302/27626 [05:25<59:32,  7.09it/s]

{'epoch': 1, 'iter': 2300, 'avg_loss': 8.59160377252936, 'avg_acc': 49.98641894828336, 'loss': 7.411312580108643}


EP_train:1:   8%|| 2312/27626 [05:26<59:39,  7.07it/s]

{'epoch': 1, 'iter': 2310, 'avg_loss': 8.591318655612406, 'avg_acc': 49.966194288186934, 'loss': 9.67730712890625}


EP_train:1:   8%|| 2322/27626 [05:28<59:54,  7.04it/s]  

{'epoch': 1, 'iter': 2320, 'avg_loss': 8.591857837634269, 'avg_acc': 49.96903274450668, 'loss': 8.549549102783203}


EP_train:1:   8%|| 2332/27626 [05:29<59:46,  7.05it/s]

{'epoch': 1, 'iter': 2330, 'avg_loss': 8.591680831737346, 'avg_acc': 49.957099957099956, 'loss': 8.609297752380371}


EP_train:1:   8%|| 2342/27626 [05:31<1:00:15,  6.99it/s]

{'epoch': 1, 'iter': 2340, 'avg_loss': 8.592535935320033, 'avg_acc': 49.958618111917986, 'loss': 8.753153800964355}


EP_train:1:   9%|| 2352/27626 [05:32<59:25,  7.09it/s]

{'epoch': 1, 'iter': 2350, 'avg_loss': 8.594082376187226, 'avg_acc': 49.962781794980856, 'loss': 8.574118614196777}


EP_train:1:   9%|| 2362/27626 [05:33<59:20,  7.10it/s]

{'epoch': 1, 'iter': 2360, 'avg_loss': 8.59262990507217, 'avg_acc': 49.960292249047015, 'loss': 9.025466918945312}


EP_train:1:   9%|| 2372/27626 [05:35<59:27,  7.08it/s]

{'epoch': 1, 'iter': 2370, 'avg_loss': 8.592672383820442, 'avg_acc': 49.97495782370308, 'loss': 9.293428421020508}


EP_train:1:   9%|| 2382/27626 [05:36<59:37,  7.06it/s]

{'epoch': 1, 'iter': 2380, 'avg_loss': 8.59404543156085, 'avg_acc': 49.964563208735825, 'loss': 8.778884887695312}


EP_train:1:   9%|| 2392/27626 [05:38<59:23,  7.08it/s]

{'epoch': 1, 'iter': 2390, 'avg_loss': 8.595485929762754, 'avg_acc': 49.967325386867415, 'loss': 9.061946868896484}


EP_train:1:   9%|| 2402/27626 [05:39<59:23,  7.08it/s]

{'epoch': 1, 'iter': 2400, 'avg_loss': 8.595154551156904, 'avg_acc': 49.94923990004165, 'loss': 8.737757682800293}


EP_train:1:   9%|| 2412/27626 [05:40<59:35,  7.05it/s]

{'epoch': 1, 'iter': 2410, 'avg_loss': 8.595046556426103, 'avg_acc': 49.94556200746578, 'loss': 8.219841003417969}


EP_train:1:   9%|| 2422/27626 [05:42<59:48,  7.02it/s]

{'epoch': 1, 'iter': 2420, 'avg_loss': 8.595888824809538, 'avg_acc': 49.96643948781495, 'loss': 8.63615608215332}


EP_train:1:   9%|| 2432/27626 [05:43<59:47,  7.02it/s]  

{'epoch': 1, 'iter': 2430, 'avg_loss': 8.595527976101586, 'avg_acc': 49.976861373920194, 'loss': 8.403726577758789}


EP_train:1:   9%|| 2442/27626 [05:45<59:20,  7.07it/s]

{'epoch': 1, 'iter': 2440, 'avg_loss': 8.594363885744338, 'avg_acc': 49.98591765669808, 'loss': 8.574742317199707}


EP_train:1:   9%|| 2452/27626 [05:46<59:14,  7.08it/s]

{'epoch': 1, 'iter': 2450, 'avg_loss': 8.593899608193393, 'avg_acc': 49.98597511219911, 'loss': 8.501129150390625}


EP_train:1:   9%|| 2462/27626 [05:47<59:17,  7.07it/s]

{'epoch': 1, 'iter': 2460, 'avg_loss': 8.593501628099153, 'avg_acc': 49.99238114587566, 'loss': 8.494787216186523}


EP_train:1:   9%|| 2472/27626 [05:49<59:09,  7.09it/s]

{'epoch': 1, 'iter': 2470, 'avg_loss': 8.593465426825937, 'avg_acc': 50.00379401052205, 'loss': 8.814335823059082}


EP_train:1:   9%|| 2482/27626 [05:50<59:32,  7.04it/s]

{'epoch': 1, 'iter': 2480, 'avg_loss': 8.594105365727035, 'avg_acc': 49.99496170898831, 'loss': 8.548125267028809}


EP_train:1:   9%|| 2492/27626 [05:52<59:34,  7.03it/s]

{'epoch': 1, 'iter': 2490, 'avg_loss': 8.593021596114543, 'avg_acc': 49.98494580489763, 'loss': 8.56689167022705}


EP_train:1:   9%|| 2502/27626 [05:53<59:41,  7.01it/s]

{'epoch': 1, 'iter': 2500, 'avg_loss': 8.593084526367065, 'avg_acc': 49.98375649740104, 'loss': 9.264537811279297}


EP_train:1:   9%|| 2512/27626 [05:55<59:32,  7.03it/s]

{'epoch': 1, 'iter': 2510, 'avg_loss': 8.59234337994702, 'avg_acc': 49.99751095181203, 'loss': 8.872913360595703}


EP_train:1:   9%|| 2522/27626 [05:56<1:00:02,  6.97it/s]

{'epoch': 1, 'iter': 2520, 'avg_loss': 8.592168539206881, 'avg_acc': 50.00371876239588, 'loss': 8.553524017333984}


EP_train:1:   9%|| 2532/27626 [05:57<58:58,  7.09it/s]

{'epoch': 1, 'iter': 2530, 'avg_loss': 8.59164299940413, 'avg_acc': 50.01605096799684, 'loss': 8.896745681762695}


EP_train:1:   9%|| 2542/27626 [05:59<59:28,  7.03it/s]

{'epoch': 1, 'iter': 2540, 'avg_loss': 8.590382779596922, 'avg_acc': 50.02336678473041, 'loss': 8.556281089782715}


EP_train:1:   9%|| 2552/27626 [06:00<59:05,  7.07it/s]

{'epoch': 1, 'iter': 2550, 'avg_loss': 8.589899708177006, 'avg_acc': 50.03185025480204, 'loss': 7.850376129150391}


EP_train:1:   9%|| 2562/27626 [06:02<59:09,  7.06it/s]

{'epoch': 1, 'iter': 2560, 'avg_loss': 8.588847622839745, 'avg_acc': 50.02440452948067, 'loss': 7.832638740539551}


EP_train:1:   9%|| 2572/27626 [06:03<59:29,  7.02it/s]

{'epoch': 1, 'iter': 2570, 'avg_loss': 8.589357850153776, 'avg_acc': 50.03646441073513, 'loss': 8.527318954467773}


EP_train:1:   9%|| 2582/27626 [06:04<59:17,  7.04it/s]

{'epoch': 1, 'iter': 2580, 'avg_loss': 8.589103096079244, 'avg_acc': 50.00968616815188, 'loss': 8.390949249267578}


EP_train:1:   9%|| 2592/27626 [06:06<58:59,  7.07it/s]

{'epoch': 1, 'iter': 2590, 'avg_loss': 8.5897735113897, 'avg_acc': 50.00723658818988, 'loss': 9.998716354370117}


EP_train:1:   9%|| 2602/27626 [06:07<58:47,  7.09it/s]

{'epoch': 1, 'iter': 2600, 'avg_loss': 8.589971537408532, 'avg_acc': 50.0, 'loss': 9.130270957946777}


EP_train:1:   9%|| 2612/27626 [06:09<1:02:14,  6.70it/s]

{'epoch': 1, 'iter': 2610, 'avg_loss': 8.590131430368412, 'avg_acc': 50.0179528916124, 'loss': 8.158734321594238}


EP_train:1:   9%|| 2622/27626 [06:10<58:47,  7.09it/s]

{'epoch': 1, 'iter': 2620, 'avg_loss': 8.588452981747274, 'avg_acc': 50.00596146508967, 'loss': 7.699911594390869}


EP_train:1:  10%|| 2632/27626 [06:12<58:38,  7.10it/s]

{'epoch': 1, 'iter': 2630, 'avg_loss': 8.58717899710359, 'avg_acc': 50.01068985176739, 'loss': 8.638569831848145}


EP_train:1:  10%|| 2642/27626 [06:13<58:34,  7.11it/s]

{'epoch': 1, 'iter': 2640, 'avg_loss': 8.588250214563361, 'avg_acc': 50.024848542218855, 'loss': 9.104767799377441}


EP_train:1:  10%|| 2652/27626 [06:14<59:16,  7.02it/s]

{'epoch': 1, 'iter': 2650, 'avg_loss': 8.59044677345135, 'avg_acc': 50.01296680497925, 'loss': 9.112791061401367}


EP_train:1:  10%|| 2662/27626 [06:16<59:41,  6.97it/s]

{'epoch': 1, 'iter': 2660, 'avg_loss': 8.589642039459928, 'avg_acc': 50.02348741074783, 'loss': 8.019110679626465}


EP_train:1:  10%|| 2672/27626 [06:17<59:12,  7.02it/s]

{'epoch': 1, 'iter': 2670, 'avg_loss': 8.589557571653925, 'avg_acc': 50.022229502059155, 'loss': 7.979210376739502}


EP_train:1:  10%|| 2682/27626 [06:19<58:43,  7.08it/s]

{'epoch': 1, 'iter': 2680, 'avg_loss': 8.589649972342947, 'avg_acc': 50.0209809772473, 'loss': 9.197712898254395}


EP_train:1:  10%|| 2692/27626 [06:20<58:37,  7.09it/s]

{'epoch': 1, 'iter': 2690, 'avg_loss': 8.58984492818293, 'avg_acc': 50.026709401709404, 'loss': 8.829051971435547}


EP_train:1:  10%|| 2702/27626 [06:21<59:08,  7.02it/s]

{'epoch': 1, 'iter': 2700, 'avg_loss': 8.589581205155486, 'avg_acc': 50.02661051462422, 'loss': 6.983428955078125}


EP_train:1:  10%|| 2712/27626 [06:23<58:34,  7.09it/s]

{'epoch': 1, 'iter': 2710, 'avg_loss': 8.589357064421966, 'avg_acc': 50.048413869420884, 'loss': 9.07153606414795}


EP_train:1:  10%|| 2722/27626 [06:24<58:30,  7.09it/s]

{'epoch': 1, 'iter': 2720, 'avg_loss': 8.590100803957052, 'avg_acc': 50.05972069092246, 'loss': 8.465925216674805}


EP_train:1:  10%|| 2732/27626 [06:26<58:18,  7.11it/s]

{'epoch': 1, 'iter': 2730, 'avg_loss': 8.59044700802049, 'avg_acc': 50.07323324789454, 'loss': 7.964441776275635}


EP_train:1:  10%|| 2742/27626 [06:27<59:04,  7.02it/s]

{'epoch': 1, 'iter': 2740, 'avg_loss': 8.592482253175197, 'avg_acc': 50.08208682962423, 'loss': 8.831918716430664}


EP_train:1:  10%|| 2752/27626 [06:28<58:35,  7.08it/s]

{'epoch': 1, 'iter': 2750, 'avg_loss': 8.593520702859871, 'avg_acc': 50.07610868774991, 'loss': 8.023487091064453}


EP_train:1:  10%|| 2762/27626 [06:30<58:41,  7.06it/s]

{'epoch': 1, 'iter': 2760, 'avg_loss': 8.594127707013355, 'avg_acc': 50.093942412169504, 'loss': 9.055899620056152}


EP_train:1:  10%|| 2772/27626 [06:31<58:11,  7.12it/s]

{'epoch': 1, 'iter': 2770, 'avg_loss': 8.594286453332423, 'avg_acc': 50.0778148682786, 'loss': 8.087067604064941}


EP_train:1:  10%|| 2782/27626 [06:33<58:26,  7.09it/s]

{'epoch': 1, 'iter': 2780, 'avg_loss': 8.59361260197052, 'avg_acc': 50.09101941747572, 'loss': 9.701741218566895}


EP_train:1:  10%|| 2792/27626 [06:34<58:37,  7.06it/s]

{'epoch': 1, 'iter': 2790, 'avg_loss': 8.594951238396431, 'avg_acc': 50.094052310999636, 'loss': 9.015375137329102}


EP_train:1:  10%|| 2802/27626 [06:35<58:29,  7.07it/s]

{'epoch': 1, 'iter': 2800, 'avg_loss': 8.596296830332223, 'avg_acc': 50.08479114601928, 'loss': 8.339742660522461}


EP_train:1:  10%|| 2812/27626 [06:37<58:31,  7.07it/s]

{'epoch': 1, 'iter': 2810, 'avg_loss': 8.596948045040525, 'avg_acc': 50.09004802561366, 'loss': 9.138387680053711}


EP_train:1:  10%|| 2822/27626 [06:38<58:22,  7.08it/s]

{'epoch': 1, 'iter': 2820, 'avg_loss': 8.596450657761718, 'avg_acc': 50.0830822403403, 'loss': 9.144439697265625}


EP_train:1:  10%|| 2832/27626 [06:40<59:04,  7.00it/s]

{'epoch': 1, 'iter': 2830, 'avg_loss': 8.596321681665051, 'avg_acc': 50.069542564464854, 'loss': 8.470499038696289}


EP_train:1:  10%|| 2842/27626 [06:41<58:41,  7.04it/s]

{'epoch': 1, 'iter': 2840, 'avg_loss': 8.5956018686882, 'avg_acc': 50.06049806406195, 'loss': 7.704988956451416}


EP_train:1:  10%|| 2852/27626 [06:43<58:10,  7.10it/s]

{'epoch': 1, 'iter': 2850, 'avg_loss': 8.595470080079634, 'avg_acc': 50.05370922483338, 'loss': 8.281233787536621}


EP_train:1:  10%|| 2862/27626 [06:44<58:00,  7.12it/s]

{'epoch': 1, 'iter': 2860, 'avg_loss': 8.595124033688249, 'avg_acc': 50.04369101712688, 'loss': 8.50469970703125}


EP_train:1:  10%|| 2872/27626 [06:45<58:19,  7.07it/s]

{'epoch': 1, 'iter': 2870, 'avg_loss': 8.59512713544255, 'avg_acc': 50.05442354580285, 'loss': 8.83833122253418}


EP_train:1:  10%|| 2882/27626 [06:47<58:27,  7.05it/s]

{'epoch': 1, 'iter': 2880, 'avg_loss': 8.595504919136204, 'avg_acc': 50.05423464074974, 'loss': 9.024191856384277}


EP_train:1:  10%|| 2892/27626 [06:48<58:09,  7.09it/s]

{'epoch': 1, 'iter': 2890, 'avg_loss': 8.595409678819063, 'avg_acc': 50.038913870632996, 'loss': 8.391778945922852}


EP_train:1:  11%|| 2902/27626 [06:50<58:06,  7.09it/s]

{'epoch': 1, 'iter': 2900, 'avg_loss': 8.595264964414358, 'avg_acc': 50.03877973112719, 'loss': 8.523184776306152}


EP_train:1:  11%|| 2912/27626 [06:51<58:31,  7.04it/s]

{'epoch': 1, 'iter': 2910, 'avg_loss': 8.59594246842369, 'avg_acc': 50.03542597045689, 'loss': 8.528404235839844}


EP_train:1:  11%|| 2922/27626 [06:52<58:25,  7.05it/s]

{'epoch': 1, 'iter': 2920, 'avg_loss': 8.595865585427706, 'avg_acc': 50.02460629921261, 'loss': 8.828502655029297}


EP_train:1:  11%|| 2932/27626 [06:54<58:23,  7.05it/s]

{'epoch': 1, 'iter': 2930, 'avg_loss': 8.595625816047171, 'avg_acc': 50.01386045718185, 'loss': 8.432581901550293}


EP_train:1:  11%|| 2942/27626 [06:55<58:02,  7.09it/s]

{'epoch': 1, 'iter': 2940, 'avg_loss': 8.595858445989563, 'avg_acc': 50.02656409384563, 'loss': 8.89939022064209}


EP_train:1:  11%|| 2952/27626 [06:57<58:01,  7.09it/s]

{'epoch': 1, 'iter': 2950, 'avg_loss': 8.594976709721978, 'avg_acc': 50.01164859369705, 'loss': 8.621787071228027}


EP_train:1:  11%|| 2962/27626 [06:58<58:36,  7.01it/s]

{'epoch': 1, 'iter': 2960, 'avg_loss': 8.594926254687298, 'avg_acc': 50.02005234718001, 'loss': 10.271808624267578}


EP_train:1:  11%|| 2972/27626 [06:59<58:32,  7.02it/s]

{'epoch': 1, 'iter': 2970, 'avg_loss': 8.59493633072531, 'avg_acc': 50.01682935038707, 'loss': 8.889132499694824}


EP_train:1:  11%|| 2982/27626 [07:01<58:36,  7.01it/s]

{'epoch': 1, 'iter': 2980, 'avg_loss': 8.595049582484263, 'avg_acc': 50.01467628312647, 'loss': 7.831116199493408}


EP_train:1:  11%|| 2992/27626 [07:02<58:09,  7.06it/s]

{'epoch': 1, 'iter': 2990, 'avg_loss': 8.596439256830703, 'avg_acc': 50.009403209628886, 'loss': 9.415552139282227}


EP_train:1:  11%|| 3002/27626 [07:04<58:31,  7.01it/s]

{'epoch': 1, 'iter': 3000, 'avg_loss': 8.597536439778048, 'avg_acc': 50.01145451516161, 'loss': 7.874088764190674}


EP_train:1:  11%|| 3012/27626 [07:05<57:57,  7.08it/s]

{'epoch': 1, 'iter': 3010, 'avg_loss': 8.598325551367804, 'avg_acc': 50.00622716705414, 'loss': 9.475714683532715}


EP_train:1:  11%|| 3022/27626 [07:07<58:16,  7.04it/s]

{'epoch': 1, 'iter': 3020, 'avg_loss': 8.59863421933535, 'avg_acc': 50.012413108242306, 'loss': 8.15693187713623}


EP_train:1:  11%|| 3032/27626 [07:08<58:10,  7.05it/s]

{'epoch': 1, 'iter': 3030, 'avg_loss': 8.598448649997643, 'avg_acc': 50.02268228307489, 'loss': 8.784172058105469}


EP_train:1:  11%|| 3042/27626 [07:09<58:25,  7.01it/s]

{'epoch': 1, 'iter': 3040, 'avg_loss': 8.597189707569447, 'avg_acc': 50.01952482735942, 'loss': 8.396279335021973}


EP_train:1:  11%|| 3052/27626 [07:11<58:13,  7.03it/s]

{'epoch': 1, 'iter': 3050, 'avg_loss': 8.596140042714158, 'avg_acc': 50.020485086856766, 'loss': 8.4782133102417}


EP_train:1:  11%|| 3062/27626 [07:12<59:03,  6.93it/s]

{'epoch': 1, 'iter': 3060, 'avg_loss': 8.595126333364432, 'avg_acc': 50.02245998039856, 'loss': 9.387937545776367}


EP_train:1:  11%|| 3072/27626 [07:14<58:21,  7.01it/s]

{'epoch': 1, 'iter': 3070, 'avg_loss': 8.594417459139487, 'avg_acc': 50.03866818625855, 'loss': 8.050305366516113}


EP_train:1:  11%|| 3082/27626 [07:15<58:10,  7.03it/s]

{'epoch': 1, 'iter': 3080, 'avg_loss': 8.594886689246456, 'avg_acc': 50.0182570593963, 'loss': 9.010604858398438}


EP_train:1:  11%|| 3092/27626 [07:16<58:23,  7.00it/s]

{'epoch': 1, 'iter': 3090, 'avg_loss': 8.59517696813178, 'avg_acc': 50.02426399223552, 'loss': 7.874675750732422}


EP_train:1:  11%|| 3102/27626 [07:18<58:16,  7.01it/s]

{'epoch': 1, 'iter': 3100, 'avg_loss': 8.595974208539012, 'avg_acc': 50.02720896485005, 'loss': 8.657857894897461}


EP_train:1:  11%|| 3112/27626 [07:19<58:03,  7.04it/s]

{'epoch': 1, 'iter': 3110, 'avg_loss': 8.59709510554918, 'avg_acc': 50.02913050466088, 'loss': 8.192585945129395}


EP_train:1:  11%|| 3122/27626 [07:21<58:00,  7.04it/s]

{'epoch': 1, 'iter': 3120, 'avg_loss': 8.598160338378877, 'avg_acc': 50.032041012495995, 'loss': 9.175712585449219}


EP_train:1:  11%|| 3132/27626 [07:22<57:56,  7.05it/s]

{'epoch': 1, 'iter': 3130, 'avg_loss': 8.59946728681688, 'avg_acc': 50.03593101245608, 'loss': 8.991219520568848}


EP_train:1:  11%|| 3142/27626 [07:24<58:09,  7.02it/s]

{'epoch': 1, 'iter': 3140, 'avg_loss': 8.59931541620186, 'avg_acc': 50.03681152499204, 'loss': 9.385187149047852}


EP_train:1:  11%|| 3152/27626 [07:25<58:13,  7.01it/s]

{'epoch': 1, 'iter': 3150, 'avg_loss': 8.59991678454618, 'avg_acc': 50.03966994604887, 'loss': 8.838871002197266}


EP_train:1:  11%|| 3162/27626 [07:26<58:00,  7.03it/s]

{'epoch': 1, 'iter': 3160, 'avg_loss': 8.600289553111581, 'avg_acc': 50.04152167035748, 'loss': 8.055545806884766}


EP_train:1:  11%|| 3172/27626 [07:28<57:46,  7.05it/s]

{'epoch': 1, 'iter': 3170, 'avg_loss': 8.599466318890185, 'avg_acc': 50.047303689687794, 'loss': 7.857842445373535}


EP_train:1:  12%|| 3182/27626 [07:29<57:53,  7.04it/s]

{'epoch': 1, 'iter': 3180, 'avg_loss': 8.600111141852409, 'avg_acc': 50.05796133291418, 'loss': 9.940186500549316}


EP_train:1:  12%|| 3192/27626 [07:31<58:01,  7.02it/s]

{'epoch': 1, 'iter': 3190, 'avg_loss': 8.601677219010714, 'avg_acc': 50.06659354434346, 'loss': 8.999199867248535}


EP_train:1:  12%|| 3202/27626 [07:32<57:15,  7.11it/s]

{'epoch': 1, 'iter': 3200, 'avg_loss': 8.60186914412985, 'avg_acc': 50.05857544517338, 'loss': 8.602411270141602}


EP_train:1:  12%|| 3212/27626 [07:33<57:27,  7.08it/s]

{'epoch': 1, 'iter': 3210, 'avg_loss': 8.602337355481543, 'avg_acc': 50.06812519464341, 'loss': 8.454758644104004}


EP_train:1:  12%|| 3222/27626 [07:35<57:34,  7.06it/s]

{'epoch': 1, 'iter': 3220, 'avg_loss': 8.602429750283214, 'avg_acc': 50.059181931077305, 'loss': 8.648306846618652}


EP_train:1:  12%|| 3232/27626 [07:36<58:14,  6.98it/s]

{'epoch': 1, 'iter': 3230, 'avg_loss': 8.60258945759071, 'avg_acc': 50.064801918910554, 'loss': 8.64408016204834}


EP_train:1:  12%|| 3242/27626 [07:38<57:48,  7.03it/s]

{'epoch': 1, 'iter': 3240, 'avg_loss': 8.602065826411307, 'avg_acc': 50.06653039185437, 'loss': 8.433874130249023}


EP_train:1:  12%|| 3252/27626 [07:39<57:43,  7.04it/s]

{'epoch': 1, 'iter': 3250, 'avg_loss': 8.601998470116307, 'avg_acc': 50.07401568748078, 'loss': 7.848227024078369}


EP_train:1:  12%|| 3262/27626 [07:41<57:15,  7.09it/s]

{'epoch': 1, 'iter': 3260, 'avg_loss': 8.601248902288985, 'avg_acc': 50.07283042011653, 'loss': 8.932377815246582}


EP_train:1:  12%|| 3272/27626 [07:42<57:25,  7.07it/s]

{'epoch': 1, 'iter': 3270, 'avg_loss': 8.601549585599559, 'avg_acc': 50.07069703454601, 'loss': 8.362435340881348}


EP_train:1:  12%|| 3282/27626 [07:43<57:37,  7.04it/s]

{'epoch': 1, 'iter': 3280, 'avg_loss': 8.601648193494244, 'avg_acc': 50.07714873514173, 'loss': 8.630372047424316}


EP_train:1:  12%|| 3292/27626 [07:45<57:08,  7.10it/s]

{'epoch': 1, 'iter': 3290, 'avg_loss': 8.60163154315166, 'avg_acc': 50.0835612275904, 'loss': 8.21894359588623}


EP_train:1:  12%|| 3302/27626 [07:46<57:01,  7.11it/s]

{'epoch': 1, 'iter': 3300, 'avg_loss': 8.600742606024639, 'avg_acc': 50.09088155104514, 'loss': 8.797348976135254}


EP_train:1:  12%|| 3312/27626 [07:48<57:38,  7.03it/s]

{'epoch': 1, 'iter': 3310, 'avg_loss': 8.600719090856048, 'avg_acc': 50.08305647840532, 'loss': 8.481809616088867}


EP_train:1:  12%|| 3322/27626 [07:49<57:10,  7.09it/s]

{'epoch': 1, 'iter': 3320, 'avg_loss': 8.600661060530822, 'avg_acc': 50.083747365251426, 'loss': 8.924100875854492}


EP_train:1:  12%|| 3332/27626 [07:50<57:52,  7.00it/s]

{'epoch': 1, 'iter': 3330, 'avg_loss': 8.600133700186124, 'avg_acc': 50.08443410387271, 'loss': 9.08382797241211}


EP_train:1:  12%|| 3342/27626 [07:52<56:56,  7.11it/s]

{'epoch': 1, 'iter': 3340, 'avg_loss': 8.600287669506375, 'avg_acc': 50.0692158036516, 'loss': 9.246973991394043}


EP_train:1:  12%|| 3352/27626 [07:53<57:09,  7.08it/s]

{'epoch': 1, 'iter': 3350, 'avg_loss': 8.600151554074653, 'avg_acc': 50.08299761265293, 'loss': 8.792859077453613}


EP_train:1:  12%|| 3362/27626 [07:55<57:10,  7.07it/s]

{'epoch': 1, 'iter': 3360, 'avg_loss': 8.600000533558221, 'avg_acc': 50.079961321035405, 'loss': 8.059698104858398}


EP_train:1:  12%|| 3372/27626 [07:56<57:49,  6.99it/s]

{'epoch': 1, 'iter': 3370, 'avg_loss': 8.59975212372152, 'avg_acc': 50.0889943636903, 'loss': 8.112366676330566}


EP_train:1:  12%|| 3382/27626 [07:57<57:17,  7.05it/s]

{'epoch': 1, 'iter': 3380, 'avg_loss': 8.599784563364132, 'avg_acc': 50.08873114463177, 'loss': 8.089930534362793}


EP_train:1:  12%|| 3392/27626 [07:59<57:25,  7.03it/s]

{'epoch': 1, 'iter': 3390, 'avg_loss': 8.599853200077202, 'avg_acc': 50.090312592155705, 'loss': 9.104174613952637}


EP_train:1:  12%|| 3402/27626 [08:00<57:36,  7.01it/s]

{'epoch': 1, 'iter': 3400, 'avg_loss': 8.601252111397361, 'avg_acc': 50.10382975595413, 'loss': 8.291473388671875}


EP_train:1:  12%|| 3412/27626 [08:02<57:25,  7.03it/s]

{'epoch': 1, 'iter': 3410, 'avg_loss': 8.600532744085289, 'avg_acc': 50.114519202579885, 'loss': 8.80322265625}


EP_train:1:  12%|| 3422/27626 [08:03<57:18,  7.04it/s]

{'epoch': 1, 'iter': 3420, 'avg_loss': 8.59988080796635, 'avg_acc': 50.11235749780766, 'loss': 8.397856712341309}


EP_train:1:  12%|| 3432/27626 [08:05<57:24,  7.02it/s]

{'epoch': 1, 'iter': 3430, 'avg_loss': 8.599075509034938, 'avg_acc': 50.115673273098224, 'loss': 8.062131881713867}


EP_train:1:  12%|| 3442/27626 [08:06<57:14,  7.04it/s]

{'epoch': 1, 'iter': 3440, 'avg_loss': 8.59872184316629, 'avg_acc': 50.11715344376635, 'loss': 8.049691200256348}


EP_train:1:  12%|| 3452/27626 [08:07<57:05,  7.06it/s]

{'epoch': 1, 'iter': 3450, 'avg_loss': 8.599005722094535, 'avg_acc': 50.12134164010432, 'loss': 8.555133819580078}


EP_train:1:  13%|| 3462/27626 [08:09<57:10,  7.04it/s]

{'epoch': 1, 'iter': 3460, 'avg_loss': 8.599142207261696, 'avg_acc': 50.12640855244149, 'loss': 8.49905014038086}


EP_train:1:  13%|| 3472/27626 [08:10<57:02,  7.06it/s]

{'epoch': 1, 'iter': 3470, 'avg_loss': 8.598503918786518, 'avg_acc': 50.13234658599826, 'loss': 8.991358757019043}


EP_train:1:  13%|| 3482/27626 [08:12<57:04,  7.05it/s]

{'epoch': 1, 'iter': 3480, 'avg_loss': 8.59825724741742, 'avg_acc': 50.13465958058029, 'loss': 7.708103179931641}


EP_train:1:  13%|| 3492/27626 [08:13<57:29,  7.00it/s]

{'epoch': 1, 'iter': 3490, 'avg_loss': 8.59846490012032, 'avg_acc': 50.128007734173586, 'loss': 8.994538307189941}


EP_train:1:  13%|| 3502/27626 [08:14<57:14,  7.02it/s]

{'epoch': 1, 'iter': 3500, 'avg_loss': 8.600595047527706, 'avg_acc': 50.128534704370175, 'loss': 10.012685775756836}


EP_train:1:  13%|| 3512/27626 [08:16<57:03,  7.04it/s]

{'epoch': 1, 'iter': 3510, 'avg_loss': 8.600904559654511, 'avg_acc': 50.12905867274281, 'loss': 8.696802139282227}


EP_train:1:  13%|| 3522/27626 [08:17<56:52,  7.06it/s]

{'epoch': 1, 'iter': 3520, 'avg_loss': 8.601328308492247, 'avg_acc': 50.13046719681908, 'loss': 7.961030960083008}


EP_train:1:  13%|| 3532/27626 [08:19<57:05,  7.03it/s]

{'epoch': 1, 'iter': 3530, 'avg_loss': 8.60189499324322, 'avg_acc': 50.12744265080714, 'loss': 8.848685264587402}


EP_train:1:  13%|| 3542/27626 [08:20<56:53,  7.06it/s]

{'epoch': 1, 'iter': 3540, 'avg_loss': 8.60191869048943, 'avg_acc': 50.113844959051114, 'loss': 8.192241668701172}


EP_train:1:  13%|| 3552/27626 [08:22<57:07,  7.02it/s]

{'epoch': 1, 'iter': 3550, 'avg_loss': 8.602685976820709, 'avg_acc': 50.123204731061676, 'loss': 8.880813598632812}


EP_train:1:  13%|| 3562/27626 [08:23<57:10,  7.02it/s]

{'epoch': 1, 'iter': 3560, 'avg_loss': 8.602669846573026, 'avg_acc': 50.12636899747262, 'loss': 8.064776420593262}


EP_train:1:  13%|| 3572/27626 [08:24<56:40,  7.07it/s]

{'epoch': 1, 'iter': 3570, 'avg_loss': 8.602390724882897, 'avg_acc': 50.13126575189023, 'loss': 8.709806442260742}


EP_train:1:  13%|| 3582/27626 [08:26<56:44,  7.06it/s]

{'epoch': 1, 'iter': 3580, 'avg_loss': 8.602521477448677, 'avg_acc': 50.12740854509914, 'loss': 8.524251937866211}


EP_train:1:  13%|| 3592/27626 [08:27<56:51,  7.05it/s]

{'epoch': 1, 'iter': 3590, 'avg_loss': 8.602620591031114, 'avg_acc': 50.13053467000835, 'loss': 8.548319816589355}


EP_train:1:  13%|| 3602/27626 [08:29<56:53,  7.04it/s]

{'epoch': 1, 'iter': 3600, 'avg_loss': 8.60284733765657, 'avg_acc': 50.12756873090808, 'loss': 8.82854175567627}


EP_train:1:  13%|| 3612/27626 [08:30<56:54,  7.03it/s]

{'epoch': 1, 'iter': 3610, 'avg_loss': 8.602976303554842, 'avg_acc': 50.12894627527, 'loss': 8.287210464477539}


EP_train:1:  13%|| 3622/27626 [08:31<57:03,  7.01it/s]

{'epoch': 1, 'iter': 3620, 'avg_loss': 8.603450433551458, 'avg_acc': 50.15016570008285, 'loss': 8.946619987487793}


EP_train:1:  13%|| 3632/27626 [08:33<57:05,  7.00it/s]

{'epoch': 1, 'iter': 3630, 'avg_loss': 8.604320705774171, 'avg_acc': 50.151473423299365, 'loss': 9.341181755065918}


EP_train:1:  13%|| 3642/27626 [08:34<56:49,  7.04it/s]

{'epoch': 1, 'iter': 3640, 'avg_loss': 8.604852484530454, 'avg_acc': 50.14419115627575, 'loss': 8.788710594177246}


EP_train:1:  13%|| 3652/27626 [08:36<56:20,  7.09it/s]

{'epoch': 1, 'iter': 3650, 'avg_loss': 8.60423724026394, 'avg_acc': 50.156635168447, 'loss': 8.013514518737793}


EP_train:1:  13%|| 3662/27626 [08:37<56:47,  7.03it/s]

{'epoch': 1, 'iter': 3660, 'avg_loss': 8.603621988992032, 'avg_acc': 50.17669352635892, 'loss': 7.841681003570557}


EP_train:1:  13%|| 3672/27626 [08:39<56:34,  7.06it/s]

{'epoch': 1, 'iter': 3670, 'avg_loss': 8.603851286665487, 'avg_acc': 50.17195587033506, 'loss': 9.435832977294922}


EP_train:1:  13%|| 3682/27626 [08:40<57:03,  6.99it/s]

{'epoch': 1, 'iter': 3680, 'avg_loss': 8.603610076583037, 'avg_acc': 50.182525129041025, 'loss': 9.220659255981445}


EP_train:1:  13%|| 3692/27626 [08:41<57:24,  6.95it/s]

{'epoch': 1, 'iter': 3690, 'avg_loss': 8.603355074406833, 'avg_acc': 50.16679084259008, 'loss': 9.060123443603516}


EP_train:1:  13%|| 3702/27626 [08:43<56:44,  7.03it/s]

{'epoch': 1, 'iter': 3700, 'avg_loss': 8.60414864752171, 'avg_acc': 50.159585247230474, 'loss': 8.867781639099121}


EP_train:1:  13%|| 3712/27626 [08:44<56:24,  7.06it/s]

{'epoch': 1, 'iter': 3710, 'avg_loss': 8.604674356512483, 'avg_acc': 50.15578684990568, 'loss': 8.426054000854492}


EP_train:1:  13%|| 3722/27626 [08:46<56:17,  7.08it/s]

{'epoch': 1, 'iter': 3720, 'avg_loss': 8.604750402702244, 'avg_acc': 50.15704783660306, 'loss': 8.975248336791992}


EP_train:1:  14%|| 3732/27626 [08:47<56:20,  7.07it/s]

{'epoch': 1, 'iter': 3730, 'avg_loss': 8.604254599264243, 'avg_acc': 50.15411417850443, 'loss': 7.969954490661621}


EP_train:1:  14%|| 3742/27626 [08:48<56:20,  7.07it/s]

{'epoch': 1, 'iter': 3740, 'avg_loss': 8.604271624503177, 'avg_acc': 50.16539695268645, 'loss': 8.59609603881836}


EP_train:1:  14%|| 3752/27626 [08:50<56:20,  7.06it/s]

{'epoch': 1, 'iter': 3750, 'avg_loss': 8.604421445891813, 'avg_acc': 50.166622234070914, 'loss': 8.412508010864258}


EP_train:1:  14%|| 3762/27626 [08:51<56:28,  7.04it/s]

{'epoch': 1, 'iter': 3760, 'avg_loss': 8.603799964100464, 'avg_acc': 50.15288487104493, 'loss': 8.746719360351562}


EP_train:1:  14%|| 3772/27626 [08:53<56:44,  7.01it/s]

{'epoch': 1, 'iter': 3770, 'avg_loss': 8.603313366508837, 'avg_acc': 50.14667859984089, 'loss': 7.917550086975098}


EP_train:1:  14%|| 3782/27626 [08:54<56:55,  6.98it/s]

{'epoch': 1, 'iter': 3780, 'avg_loss': 8.604983785927406, 'avg_acc': 50.14463766199418, 'loss': 9.821663856506348}


EP_train:1:  14%|| 3792/27626 [08:55<56:26,  7.04it/s]

{'epoch': 1, 'iter': 3790, 'avg_loss': 8.60548683924714, 'avg_acc': 50.132715642310735, 'loss': 8.891596794128418}


EP_train:1:  14%|| 3802/27626 [08:57<56:25,  7.04it/s]

{'epoch': 1, 'iter': 3800, 'avg_loss': 8.605746878162556, 'avg_acc': 50.13072217837411, 'loss': 8.893922805786133}


EP_train:1:  14%|| 3812/27626 [08:58<56:23,  7.04it/s]

{'epoch': 1, 'iter': 3810, 'avg_loss': 8.605709118844329, 'avg_acc': 50.13857911309367, 'loss': 8.391088485717773}


EP_train:1:  14%|| 3822/27626 [09:00<56:26,  7.03it/s]

{'epoch': 1, 'iter': 3820, 'avg_loss': 8.605438510712077, 'avg_acc': 50.14394137660298, 'loss': 7.794990539550781}


EP_train:1:  14%|| 3832/27626 [09:01<56:03,  7.07it/s]

{'epoch': 1, 'iter': 3830, 'avg_loss': 8.605300643306524, 'avg_acc': 50.14356564865571, 'loss': 8.657034873962402}


EP_train:1:  14%|| 3842/27626 [09:03<56:26,  7.02it/s]

{'epoch': 1, 'iter': 3840, 'avg_loss': 8.604907287839985, 'avg_acc': 50.14563264774799, 'loss': 9.189956665039062}


EP_train:1:  14%|| 3852/27626 [09:04<56:05,  7.06it/s]

{'epoch': 1, 'iter': 3850, 'avg_loss': 8.604500866902088, 'avg_acc': 50.13551674889639, 'loss': 8.58267879486084}


EP_train:1:  14%|| 3862/27626 [09:05<56:14,  7.04it/s]

{'epoch': 1, 'iter': 3860, 'avg_loss': 8.60495337733516, 'avg_acc': 50.12626262626263, 'loss': 8.97336483001709}


EP_train:1:  14%|| 3872/27626 [09:07<56:09,  7.05it/s]

{'epoch': 1, 'iter': 3870, 'avg_loss': 8.605104449121134, 'avg_acc': 50.12916559028675, 'loss': 7.800031661987305}


EP_train:1:  14%|| 3882/27626 [09:08<56:28,  7.01it/s]

{'epoch': 1, 'iter': 3880, 'avg_loss': 8.603974396652305, 'avg_acc': 50.14654728162845, 'loss': 8.843785285949707}


EP_train:1:  14%|| 3892/27626 [09:10<56:02,  7.06it/s]

{'epoch': 1, 'iter': 3890, 'avg_loss': 8.60413206502914, 'avg_acc': 50.15179259830378, 'loss': 9.386000633239746}


EP_train:1:  14%|| 3902/27626 [09:11<56:30,  7.00it/s]

{'epoch': 1, 'iter': 3900, 'avg_loss': 8.603878367775314, 'avg_acc': 50.143392719815424, 'loss': 8.686887741088867}


EP_train:1:  14%|| 3912/27626 [09:12<56:38,  6.98it/s]

{'epoch': 1, 'iter': 3910, 'avg_loss': 8.603882588318504, 'avg_acc': 50.14222705190489, 'loss': 8.310461044311523}


EP_train:1:  14%|| 3922/27626 [09:14<56:08,  7.04it/s]

{'epoch': 1, 'iter': 3920, 'avg_loss': 8.60434959713703, 'avg_acc': 50.14345830145371, 'loss': 8.637919425964355}


EP_train:1:  14%|| 3932/27626 [09:15<56:23,  7.00it/s]

{'epoch': 1, 'iter': 3930, 'avg_loss': 8.604246568570638, 'avg_acc': 50.1446832866955, 'loss': 8.323410034179688}


EP_train:1:  14%|| 3942/27626 [09:17<56:12,  7.02it/s]

{'epoch': 1, 'iter': 3940, 'avg_loss': 8.604428473174709, 'avg_acc': 50.15462446079675, 'loss': 9.284459114074707}


EP_train:1:  14%|| 3952/27626 [09:18<56:14,  7.01it/s]

{'epoch': 1, 'iter': 3950, 'avg_loss': 8.605296863178397, 'avg_acc': 50.155024044545684, 'loss': 8.866888046264648}


EP_train:1:  14%|| 3962/27626 [09:20<56:20,  7.00it/s]

{'epoch': 1, 'iter': 3960, 'avg_loss': 8.606063237728359, 'avg_acc': 50.15857737944963, 'loss': 8.481861114501953}


EP_train:1:  14%|| 3972/27626 [09:21<55:46,  7.07it/s]

{'epoch': 1, 'iter': 3970, 'avg_loss': 8.605635694279474, 'avg_acc': 50.14716066481994, 'loss': 8.460186004638672}


EP_train:1:  14%|| 3982/27626 [09:22<55:54,  7.05it/s]

{'epoch': 1, 'iter': 3980, 'avg_loss': 8.606308912039102, 'avg_acc': 50.14208113539311, 'loss': 8.555781364440918}


EP_train:1:  14%|| 3992/27626 [09:24<56:15,  7.00it/s]

{'epoch': 1, 'iter': 3990, 'avg_loss': 8.605617851399742, 'avg_acc': 50.14642320220496, 'loss': 7.7117509841918945}


EP_train:1:  14%|| 4002/27626 [09:25<55:51,  7.05it/s]

{'epoch': 1, 'iter': 4000, 'avg_loss': 8.604701116185758, 'avg_acc': 50.14371407148212, 'loss': 7.868401050567627}


EP_train:1:  15%|| 4012/27626 [09:27<55:33,  7.08it/s]

{'epoch': 1, 'iter': 4010, 'avg_loss': 8.605130986304273, 'avg_acc': 50.14101844926452, 'loss': 8.254111289978027}


EP_train:1:  15%|| 4022/27626 [09:28<55:35,  7.08it/s]

{'epoch': 1, 'iter': 4020, 'avg_loss': 8.605096429238062, 'avg_acc': 50.142222084058695, 'loss': 7.952121257781982}


EP_train:1:  15%|| 4032/27626 [09:29<55:59,  7.02it/s]

{'epoch': 1, 'iter': 4030, 'avg_loss': 8.604815987492577, 'avg_acc': 50.1356673282064, 'loss': 8.722185134887695}


EP_train:1:  15%|| 4042/27626 [09:31<55:41,  7.06it/s]

{'epoch': 1, 'iter': 4040, 'avg_loss': 8.604898296055065, 'avg_acc': 50.13610492452363, 'loss': 8.181431770324707}


EP_train:1:  15%|| 4052/27626 [09:32<56:37,  6.94it/s]

{'epoch': 1, 'iter': 4050, 'avg_loss': 8.604632496980937, 'avg_acc': 50.13345470254258, 'loss': 8.305497169494629}


EP_train:1:  15%|| 4062/27626 [09:34<55:38,  7.06it/s]

{'epoch': 1, 'iter': 4060, 'avg_loss': 8.605788267060943, 'avg_acc': 50.113118689977846, 'loss': 9.997431755065918}


EP_train:1:  15%|| 4072/27626 [09:35<55:28,  7.08it/s]

{'epoch': 1, 'iter': 4070, 'avg_loss': 8.605179568234254, 'avg_acc': 50.11514369933677, 'loss': 8.209578514099121}


EP_train:1:  15%|| 4082/27626 [09:37<55:35,  7.06it/s]

{'epoch': 1, 'iter': 4080, 'avg_loss': 8.605382471918855, 'avg_acc': 50.112564322469986, 'loss': 8.635642051696777}


EP_train:1:  15%|| 4092/27626 [09:38<55:38,  7.05it/s]

{'epoch': 1, 'iter': 4090, 'avg_loss': 8.604574867170209, 'avg_acc': 50.119164018577365, 'loss': 8.722043991088867}


EP_train:1:  15%|| 4102/27626 [09:39<55:11,  7.10it/s]

{'epoch': 1, 'iter': 4100, 'avg_loss': 8.60391948967845, 'avg_acc': 50.125731528895386, 'loss': 8.834525108337402}


EP_train:1:  15%|| 4112/27626 [09:41<55:25,  7.07it/s]

{'epoch': 1, 'iter': 4110, 'avg_loss': 8.6038347977091, 'avg_acc': 50.12314522014108, 'loss': 8.792132377624512}


EP_train:1:  15%|| 4122/27626 [09:42<55:36,  7.05it/s]

{'epoch': 1, 'iter': 4120, 'avg_loss': 8.604101560440379, 'avg_acc': 50.11450497452075, 'loss': 8.401556015014648}


EP_train:1:  15%|| 4132/27626 [09:44<55:35,  7.04it/s]

{'epoch': 1, 'iter': 4130, 'avg_loss': 8.603678680568594, 'avg_acc': 50.107419511014285, 'loss': 9.297402381896973}


EP_train:1:  15%|| 4142/27626 [09:45<55:54,  7.00it/s]

{'epoch': 1, 'iter': 4140, 'avg_loss': 8.603414007097994, 'avg_acc': 50.11093334943251, 'loss': 8.75661563873291}


EP_train:1:  15%|| 4152/27626 [09:46<55:30,  7.05it/s]

{'epoch': 1, 'iter': 4150, 'avg_loss': 8.603293313545883, 'avg_acc': 50.101632136834496, 'loss': 8.047784805297852}


EP_train:1:  15%|| 4162/27626 [09:48<55:26,  7.05it/s]

{'epoch': 1, 'iter': 4160, 'avg_loss': 8.603513016433734, 'avg_acc': 50.09613073780341, 'loss': 8.73012638092041}


EP_train:1:  15%|| 4172/27626 [09:49<55:22,  7.06it/s]

{'epoch': 1, 'iter': 4170, 'avg_loss': 8.604068827783424, 'avg_acc': 50.083912730760005, 'loss': 8.151765823364258}


EP_train:1:  15%|| 4182/27626 [09:51<55:23,  7.05it/s]

{'epoch': 1, 'iter': 4180, 'avg_loss': 8.605039451601858, 'avg_acc': 50.08670174599378, 'loss': 9.169782638549805}


EP_train:1:  15%|| 4192/27626 [09:52<55:14,  7.07it/s]

{'epoch': 1, 'iter': 4190, 'avg_loss': 8.604644724873875, 'avg_acc': 50.087986160820805, 'loss': 8.067575454711914}


EP_train:1:  15%|| 4202/27626 [09:53<55:22,  7.05it/s]

{'epoch': 1, 'iter': 4200, 'avg_loss': 8.604582643769975, 'avg_acc': 50.07141156867413, 'loss': 8.605216026306152}


EP_train:1:  15%|| 4212/27626 [09:55<55:17,  7.06it/s]

{'epoch': 1, 'iter': 4210, 'avg_loss': 8.603979055458563, 'avg_acc': 50.056399905010686, 'loss': 8.427750587463379}


EP_train:1:  15%|| 4222/27626 [09:56<55:49,  6.99it/s]

{'epoch': 1, 'iter': 4220, 'avg_loss': 8.604308750244769, 'avg_acc': 50.05404524994077, 'loss': 9.008391380310059}


EP_train:1:  15%|| 4232/27626 [09:58<55:38,  7.01it/s]

{'epoch': 1, 'iter': 4230, 'avg_loss': 8.603630534415267, 'avg_acc': 50.056133301819905, 'loss': 8.522379875183105}


EP_train:1:  15%|| 4242/27626 [09:59<54:33,  7.14it/s]

{'epoch': 1, 'iter': 4240, 'avg_loss': 8.603995153853928, 'avg_acc': 50.050106107050226, 'loss': 9.110102653503418}


EP_train:1:  15%|| 4252/27626 [10:00<54:50,  7.10it/s]

{'epoch': 1, 'iter': 4250, 'avg_loss': 8.604161108614164, 'avg_acc': 50.05439896494942, 'loss': 8.557333946228027}


EP_train:1:  15%|| 4262/27626 [10:02<55:16,  7.04it/s]

{'epoch': 1, 'iter': 4260, 'avg_loss': 8.604746899003855, 'avg_acc': 50.05573808965031, 'loss': 9.275240898132324}


EP_train:1:  15%|| 4272/27626 [10:03<54:56,  7.08it/s]

{'epoch': 1, 'iter': 4270, 'avg_loss': 8.605350475851642, 'avg_acc': 50.0592659798642, 'loss': 8.611010551452637}


EP_train:1:  15%|| 4282/27626 [10:05<55:01,  7.07it/s]

{'epoch': 1, 'iter': 4280, 'avg_loss': 8.605322359098894, 'avg_acc': 50.06350735809391, 'loss': 9.407259941101074}


EP_train:1:  16%|| 4292/27626 [10:06<55:02,  7.07it/s]

{'epoch': 1, 'iter': 4290, 'avg_loss': 8.605888842314002, 'avg_acc': 50.06772896760662, 'loss': 8.874669075012207}


EP_train:1:  16%|| 4302/27626 [10:08<55:21,  7.02it/s]

{'epoch': 1, 'iter': 4300, 'avg_loss': 8.605119653314636, 'avg_acc': 50.06321204371077, 'loss': 8.36664867401123}


EP_train:1:  16%|| 4312/27626 [10:09<55:14,  7.03it/s]

{'epoch': 1, 'iter': 4310, 'avg_loss': 8.604574100624461, 'avg_acc': 50.05581651588958, 'loss': 8.698251724243164}


EP_train:1:  16%|| 4322/27626 [10:10<55:19,  7.02it/s]

{'epoch': 1, 'iter': 4320, 'avg_loss': 8.605458879548072, 'avg_acc': 50.054964128673916, 'loss': 9.59168815612793}


EP_train:1:  16%|| 4332/27626 [10:12<54:53,  7.07it/s]

{'epoch': 1, 'iter': 4330, 'avg_loss': 8.605720529157717, 'avg_acc': 50.05772338951743, 'loss': 8.252447128295898}


EP_train:1:  16%|| 4342/27626 [10:13<55:01,  7.05it/s]

{'epoch': 1, 'iter': 4340, 'avg_loss': 8.606371396808848, 'avg_acc': 50.05543077631882, 'loss': 8.450176239013672}


EP_train:1:  16%|| 4352/27626 [10:15<55:02,  7.05it/s]

{'epoch': 1, 'iter': 4350, 'avg_loss': 8.606432732043116, 'avg_acc': 50.05889450700989, 'loss': 8.119535446166992}


EP_train:1:  16%|| 4362/27626 [10:16<54:36,  7.10it/s]

{'epoch': 1, 'iter': 4360, 'avg_loss': 8.60599825005552, 'avg_acc': 50.057326301307036, 'loss': 8.472160339355469}


EP_train:1:  16%|| 4372/27626 [10:17<54:46,  7.08it/s]

{'epoch': 1, 'iter': 4370, 'avg_loss': 8.606576381954769, 'avg_acc': 50.052190574239305, 'loss': 8.193663597106934}


EP_train:1:  16%|| 4382/27626 [10:19<54:51,  7.06it/s]

{'epoch': 1, 'iter': 4380, 'avg_loss': 8.60610742880258, 'avg_acc': 50.04279844784296, 'loss': 8.856378555297852}


EP_train:1:  16%|| 4392/27626 [10:20<55:00,  7.04it/s]

{'epoch': 1, 'iter': 4390, 'avg_loss': 8.60618571175228, 'avg_acc': 50.0476827601913, 'loss': 9.102322578430176}


EP_train:1:  16%|| 4402/27626 [10:22<55:23,  6.99it/s]

{'epoch': 1, 'iter': 4400, 'avg_loss': 8.606598947785491, 'avg_acc': 50.046154283117474, 'loss': 8.555720329284668}


EP_train:1:  16%|| 4412/27626 [10:23<55:18,  7.00it/s]

{'epoch': 1, 'iter': 4410, 'avg_loss': 8.607334391110285, 'avg_acc': 50.04392428020857, 'loss': 8.318083763122559}


EP_train:1:  16%|| 4422/27626 [10:25<54:34,  7.09it/s]

{'epoch': 1, 'iter': 4420, 'avg_loss': 8.607752567555295, 'avg_acc': 50.045238633793254, 'loss': 8.899270057678223}


EP_train:1:  16%|| 4432/27626 [10:26<54:35,  7.08it/s]

{'epoch': 1, 'iter': 4430, 'avg_loss': 8.60834324857608, 'avg_acc': 50.045136538027535, 'loss': 10.113703727722168}


EP_train:1:  16%|| 4442/27626 [10:27<54:44,  7.06it/s]

{'epoch': 1, 'iter': 4440, 'avg_loss': 8.608418799230467, 'avg_acc': 50.0457385723936, 'loss': 8.275100708007812}


EP_train:1:  16%|| 4452/27626 [10:29<54:34,  7.08it/s]

{'epoch': 1, 'iter': 4450, 'avg_loss': 8.608393826395762, 'avg_acc': 50.04844416984947, 'loss': 8.48183822631836}


EP_train:1:  16%|| 4462/27626 [10:30<54:47,  7.05it/s]

{'epoch': 1, 'iter': 4460, 'avg_loss': 8.608100172597393, 'avg_acc': 50.059543824254646, 'loss': 9.637533187866211}


EP_train:1:  16%|| 4472/27626 [10:32<54:44,  7.05it/s]

{'epoch': 1, 'iter': 4470, 'avg_loss': 8.60824436758541, 'avg_acc': 50.065002236636104, 'loss': 8.669550895690918}


EP_train:1:  16%|| 4482/27626 [10:33<54:18,  7.10it/s]

{'epoch': 1, 'iter': 4480, 'avg_loss': 8.608179080276132, 'avg_acc': 50.06415978576211, 'loss': 8.704781532287598}


EP_train:1:  16%|| 4492/27626 [10:34<54:54,  7.02it/s]

{'epoch': 1, 'iter': 4490, 'avg_loss': 8.607505964187014, 'avg_acc': 50.063321086617684, 'loss': 8.015188217163086}


EP_train:1:  16%|| 4502/27626 [10:36<54:21,  7.09it/s]

{'epoch': 1, 'iter': 4500, 'avg_loss': 8.607119537358601, 'avg_acc': 50.0715118862475, 'loss': 9.514729499816895}


EP_train:1:  16%|| 4512/27626 [10:37<54:09,  7.11it/s]

{'epoch': 1, 'iter': 4510, 'avg_loss': 8.607593115669374, 'avg_acc': 50.0713533584571, 'loss': 8.506269454956055}


EP_train:1:  16%|| 4522/27626 [10:39<54:44,  7.03it/s]

{'epoch': 1, 'iter': 4520, 'avg_loss': 8.606973427108262, 'avg_acc': 50.064283344392834, 'loss': 8.582945823669434}


EP_train:1:  16%|| 4532/27626 [10:40<54:06,  7.11it/s]

{'epoch': 1, 'iter': 4530, 'avg_loss': 8.606551281797046, 'avg_acc': 50.058623924078574, 'loss': 8.747703552246094}


EP_train:1:  16%|| 4542/27626 [10:41<54:55,  7.00it/s]

{'epoch': 1, 'iter': 4540, 'avg_loss': 8.606508710000568, 'avg_acc': 50.04473133670998, 'loss': 8.562860488891602}


EP_train:1:  16%|| 4552/27626 [10:43<54:30,  7.05it/s]

{'epoch': 1, 'iter': 4550, 'avg_loss': 8.60641219363949, 'avg_acc': 50.030899802241265, 'loss': 7.9217424392700195}


EP_train:1:  17%|| 4562/27626 [10:44<54:02,  7.11it/s]

{'epoch': 1, 'iter': 4560, 'avg_loss': 8.605703506729002, 'avg_acc': 50.023295329971496, 'loss': 8.672472953796387}


EP_train:1:  17%|| 4572/27626 [10:46<54:23,  7.06it/s]

{'epoch': 1, 'iter': 4570, 'avg_loss': 8.60593189522412, 'avg_acc': 50.02187705097353, 'loss': 8.582243919372559}


EP_train:1:  17%|| 4582/27626 [10:47<54:17,  7.07it/s]

{'epoch': 1, 'iter': 4580, 'avg_loss': 8.606034079953277, 'avg_acc': 50.023193625845884, 'loss': 8.525588035583496}


EP_train:1:  17%|| 4592/27626 [10:48<55:09,  6.96it/s]

{'epoch': 1, 'iter': 4590, 'avg_loss': 8.60572275806943, 'avg_acc': 50.02314310607711, 'loss': 8.016946792602539}


EP_train:1:  17%|| 4602/27626 [10:50<54:21,  7.06it/s]

{'epoch': 1, 'iter': 4600, 'avg_loss': 8.606090729611253, 'avg_acc': 50.02445120625951, 'loss': 8.381624221801758}


EP_train:1:  17%|| 4612/27626 [10:51<54:22,  7.05it/s]

{'epoch': 1, 'iter': 4610, 'avg_loss': 8.606271337051243, 'avg_acc': 50.03253090435914, 'loss': 8.97717571258545}


EP_train:1:  17%|| 4622/27626 [10:53<54:45,  7.00it/s]

{'epoch': 1, 'iter': 4620, 'avg_loss': 8.605495405001085, 'avg_acc': 50.03719433023155, 'loss': 8.380352973937988}


EP_train:1:  17%|| 4632/27626 [10:54<54:19,  7.05it/s]

{'epoch': 1, 'iter': 4630, 'avg_loss': 8.60552305826176, 'avg_acc': 50.04521161736126, 'loss': 8.37890625}


EP_train:1:  17%|| 4642/27626 [10:56<54:07,  7.08it/s]

{'epoch': 1, 'iter': 4640, 'avg_loss': 8.605757853769584, 'avg_acc': 50.04309416074122, 'loss': 8.101166725158691}


EP_train:1:  17%|| 4652/27626 [10:57<54:39,  7.01it/s]

{'epoch': 1, 'iter': 4650, 'avg_loss': 8.605414448284739, 'avg_acc': 50.04501720060202, 'loss': 9.188910484313965}


EP_train:1:  17%|| 4662/27626 [10:58<54:17,  7.05it/s]

{'epoch': 1, 'iter': 4660, 'avg_loss': 8.605879227610213, 'avg_acc': 50.04693198884359, 'loss': 8.743163108825684}


EP_train:1:  17%|| 4672/27626 [11:00<54:12,  7.06it/s]

{'epoch': 1, 'iter': 4670, 'avg_loss': 8.605490448183433, 'avg_acc': 50.0535217298223, 'loss': 8.037345886230469}


EP_train:1:  17%|| 4682/27626 [11:01<54:52,  6.97it/s]

{'epoch': 1, 'iter': 4680, 'avg_loss': 8.605623626016293, 'avg_acc': 50.051404614398635, 'loss': 7.963576793670654}


EP_train:1:  17%|| 4692/27626 [11:03<54:19,  7.04it/s]

{'epoch': 1, 'iter': 4690, 'avg_loss': 8.606300349023087, 'avg_acc': 50.051295033041995, 'loss': 8.346497535705566}


EP_train:1:  17%|| 4702/27626 [11:04<53:42,  7.11it/s]

{'epoch': 1, 'iter': 4700, 'avg_loss': 8.606932828639877, 'avg_acc': 50.04653265262709, 'loss': 9.071600914001465}


EP_train:1:  17%|| 4712/27626 [11:05<54:04,  7.06it/s]

{'epoch': 1, 'iter': 4710, 'avg_loss': 8.607308101714784, 'avg_acc': 50.05505731267247, 'loss': 8.220705032348633}


EP_train:1:  17%|| 4722/27626 [11:07<54:01,  7.06it/s]

{'epoch': 1, 'iter': 4720, 'avg_loss': 8.606768395279051, 'avg_acc': 50.06023617877568, 'loss': 8.927123069763184}


EP_train:1:  17%|| 4732/27626 [11:08<53:48,  7.09it/s]

{'epoch': 1, 'iter': 4730, 'avg_loss': 8.607350274035403, 'avg_acc': 50.06209046713168, 'loss': 8.957215309143066}


EP_train:1:  17%|| 4742/27626 [11:10<54:18,  7.02it/s]

{'epoch': 1, 'iter': 4740, 'avg_loss': 8.607960801968417, 'avg_acc': 50.06195950221473, 'loss': 8.316908836364746}


EP_train:1:  17%|| 4752/27626 [11:11<53:55,  7.07it/s]

{'epoch': 1, 'iter': 4750, 'avg_loss': 8.607347899601349, 'avg_acc': 50.06906440749316, 'loss': 7.32435941696167}


EP_train:1:  17%|| 4762/27626 [11:12<54:02,  7.05it/s]

{'epoch': 1, 'iter': 4760, 'avg_loss': 8.607449941088287, 'avg_acc': 50.07351396765385, 'loss': 8.226831436157227}


EP_train:1:  17%|| 4772/27626 [11:14<53:50,  7.07it/s]

{'epoch': 1, 'iter': 4770, 'avg_loss': 8.607798227247415, 'avg_acc': 50.07466988052819, 'loss': 8.71630859375}


EP_train:1:  17%|| 4782/27626 [11:15<53:27,  7.12it/s]

{'epoch': 1, 'iter': 4780, 'avg_loss': 8.607873201021164, 'avg_acc': 50.080396360594015, 'loss': 9.09241771697998}


EP_train:1:  17%|| 4792/27626 [11:17<54:09,  7.03it/s]

{'epoch': 1, 'iter': 4790, 'avg_loss': 8.60743357995387, 'avg_acc': 50.08283761218952, 'loss': 7.964288711547852}


EP_train:1:  17%|| 4802/27626 [11:18<54:30,  6.98it/s]

{'epoch': 1, 'iter': 4800, 'avg_loss': 8.607600160351248, 'avg_acc': 50.08982503645074, 'loss': 9.407196044921875}


EP_train:1:  17%|| 4812/27626 [11:20<53:32,  7.10it/s]

{'epoch': 1, 'iter': 4810, 'avg_loss': 8.60792384814681, 'avg_acc': 50.07924547911037, 'loss': 8.39270305633545}


EP_train:1:  17%|| 4822/27626 [11:21<53:41,  7.08it/s]

{'epoch': 1, 'iter': 4820, 'avg_loss': 8.607758065967287, 'avg_acc': 50.07972930927194, 'loss': 8.542499542236328}


EP_train:1:  17%|| 4832/27626 [11:22<53:42,  7.07it/s]

{'epoch': 1, 'iter': 4830, 'avg_loss': 8.60765674093993, 'avg_acc': 50.08021113641068, 'loss': 8.516278266906738}


EP_train:1:  18%|| 4842/27626 [11:24<53:34,  7.09it/s]

{'epoch': 1, 'iter': 4840, 'avg_loss': 8.607627177873788, 'avg_acc': 50.07036252840322, 'loss': 7.901295185089111}


EP_train:1:  18%|| 4852/27626 [11:25<53:36,  7.08it/s]

{'epoch': 1, 'iter': 4850, 'avg_loss': 8.60750685276825, 'avg_acc': 50.06699649556793, 'loss': 8.445577621459961}


EP_train:1:  18%|| 4862/27626 [11:27<53:38,  7.07it/s]

{'epoch': 1, 'iter': 4860, 'avg_loss': 8.60687202515609, 'avg_acc': 50.0610728245217, 'loss': 7.6045098304748535}


EP_train:1:  18%|| 4872/27626 [11:28<54:05,  7.01it/s]

{'epoch': 1, 'iter': 4870, 'avg_loss': 8.607202131090848, 'avg_acc': 50.058381235885854, 'loss': 8.622468948364258}


EP_train:1:  18%|| 4882/27626 [11:29<53:47,  7.05it/s]

{'epoch': 1, 'iter': 4880, 'avg_loss': 8.606686930020455, 'avg_acc': 50.06146281499693, 'loss': 8.145561218261719}


EP_train:1:  18%|| 4892/27626 [11:31<53:26,  7.09it/s]

{'epoch': 1, 'iter': 4890, 'avg_loss': 8.607290264337104, 'avg_acc': 50.06325393580045, 'loss': 8.287353515625}


EP_train:1:  18%|| 4902/27626 [11:32<53:26,  7.09it/s]

{'epoch': 1, 'iter': 4900, 'avg_loss': 8.606909919821275, 'avg_acc': 50.05228524790859, 'loss': 8.571069717407227}


EP_train:1:  18%|| 4912/27626 [11:34<53:42,  7.05it/s]

{'epoch': 1, 'iter': 4910, 'avg_loss': 8.60707719368256, 'avg_acc': 50.05408776216657, 'loss': 8.403451919555664}


EP_train:1:  18%|| 4922/27626 [11:35<53:35,  7.06it/s]

{'epoch': 1, 'iter': 4920, 'avg_loss': 8.607090035387957, 'avg_acc': 50.04381731355415, 'loss': 8.591835021972656}


EP_train:1:  18%|| 4932/27626 [11:36<53:14,  7.10it/s]

{'epoch': 1, 'iter': 4930, 'avg_loss': 8.607599220805806, 'avg_acc': 50.044362198337055, 'loss': 8.36263656616211}


EP_train:1:  18%|| 4942/27626 [11:38<53:41,  7.04it/s]

{'epoch': 1, 'iter': 4940, 'avg_loss': 8.60781134094118, 'avg_acc': 50.05502428658166, 'loss': 8.763178825378418}


EP_train:1:  18%|| 4952/27626 [11:39<53:46,  7.03it/s]

{'epoch': 1, 'iter': 4950, 'avg_loss': 8.608242685783754, 'avg_acc': 50.05301959200161, 'loss': 9.697863578796387}


EP_train:1:  18%|| 4962/27626 [11:41<53:34,  7.05it/s]

{'epoch': 1, 'iter': 4960, 'avg_loss': 8.608729763872992, 'avg_acc': 50.05606228582947, 'loss': 8.733957290649414}


EP_train:1:  18%|| 4972/27626 [11:42<53:44,  7.03it/s]

{'epoch': 1, 'iter': 4970, 'avg_loss': 8.608085068196411, 'avg_acc': 50.05594950714142, 'loss': 7.764002323150635}


EP_train:1:  18%|| 4982/27626 [11:44<52:58,  7.12it/s]

{'epoch': 1, 'iter': 4980, 'avg_loss': 8.60840286861053, 'avg_acc': 50.05897410158603, 'loss': 8.144164085388184}


EP_train:1:  18%|| 4992/27626 [11:45<53:04,  7.11it/s]

{'epoch': 1, 'iter': 4990, 'avg_loss': 8.608163110384886, 'avg_acc': 50.06261270286516, 'loss': 8.021600723266602}


EP_train:1:  18%|| 5002/27626 [11:46<53:25,  7.06it/s]

{'epoch': 1, 'iter': 5000, 'avg_loss': 8.60842439532876, 'avg_acc': 50.06123775244951, 'loss': 8.671296119689941}


EP_train:1:  18%|| 5012/27626 [11:48<53:33,  7.04it/s]

{'epoch': 1, 'iter': 5010, 'avg_loss': 8.608361911069512, 'avg_acc': 50.06485731390939, 'loss': 8.748102188110352}


EP_train:1:  18%|| 5022/27626 [11:49<53:10,  7.08it/s]

{'epoch': 1, 'iter': 5020, 'avg_loss': 8.608683999423794, 'avg_acc': 50.06659529974109, 'loss': 8.503803253173828}


EP_train:1:  18%|| 5032/27626 [11:51<53:36,  7.02it/s]

{'epoch': 1, 'iter': 5030, 'avg_loss': 8.608434516756606, 'avg_acc': 50.080128205128204, 'loss': 8.56432819366455}


EP_train:1:  18%|| 5042/27626 [11:52<54:01,  6.97it/s]

{'epoch': 1, 'iter': 5040, 'avg_loss': 8.607842808984334, 'avg_acc': 50.08120908549891, 'loss': 8.417886734008789}


EP_train:1:  18%|| 5052/27626 [11:53<53:15,  7.06it/s]

{'epoch': 1, 'iter': 5050, 'avg_loss': 8.608029671410149, 'avg_acc': 50.07424272421302, 'loss': 8.844072341918945}


EP_train:1:  18%|| 5062/27626 [11:55<53:21,  7.05it/s]

{'epoch': 1, 'iter': 5060, 'avg_loss': 8.608084593194201, 'avg_acc': 50.07533096226042, 'loss': 8.767487525939941}


EP_train:1:  18%|| 5072/27626 [11:56<53:22,  7.04it/s]

{'epoch': 1, 'iter': 5070, 'avg_loss': 8.608159058194168, 'avg_acc': 50.081344902386114, 'loss': 8.864949226379395}


EP_train:1:  18%|| 5082/27626 [11:58<52:52,  7.11it/s]

{'epoch': 1, 'iter': 5080, 'avg_loss': 8.607756027952936, 'avg_acc': 50.07995473332021, 'loss': 8.691977500915527}


EP_train:1:  18%|| 5092/27626 [11:59<53:06,  7.07it/s]

{'epoch': 1, 'iter': 5090, 'avg_loss': 8.607621990657934, 'avg_acc': 50.07795619721076, 'loss': 8.203441619873047}


EP_train:1:  18%|| 5102/27626 [12:00<53:02,  7.08it/s]

{'epoch': 1, 'iter': 5100, 'avg_loss': 8.607524007611405, 'avg_acc': 50.081479121740834, 'loss': 8.457330703735352}


EP_train:1:  19%|| 5112/27626 [12:02<52:58,  7.08it/s]

{'epoch': 1, 'iter': 5110, 'avg_loss': 8.6073731893757, 'avg_acc': 50.08559968694971, 'loss': 8.263160705566406}


EP_train:1:  19%|| 5122/27626 [12:03<53:15,  7.04it/s]

{'epoch': 1, 'iter': 5120, 'avg_loss': 8.606884079039085, 'avg_acc': 50.09336555360281, 'loss': 8.028176307678223}


EP_train:1:  19%|| 5132/27626 [12:05<53:09,  7.05it/s]

{'epoch': 1, 'iter': 5130, 'avg_loss': 8.606391099618671, 'avg_acc': 50.09196550380043, 'loss': 8.500847816467285}


EP_train:1:  19%|| 5142/27626 [12:06<52:46,  7.10it/s]

{'epoch': 1, 'iter': 5140, 'avg_loss': 8.605845742502975, 'avg_acc': 50.09057090060299, 'loss': 8.124993324279785}


EP_train:1:  19%|| 5152/27626 [12:08<52:46,  7.10it/s]

{'epoch': 1, 'iter': 5150, 'avg_loss': 8.60532006863783, 'avg_acc': 50.08857503397398, 'loss': 8.326414108276367}


EP_train:1:  19%|| 5162/27626 [12:09<52:49,  7.09it/s]

{'epoch': 1, 'iter': 5160, 'avg_loss': 8.605561270175158, 'avg_acc': 50.08537589614416, 'loss': 7.829347133636475}


EP_train:1:  19%|| 5172/27626 [12:10<52:52,  7.08it/s]

{'epoch': 1, 'iter': 5170, 'avg_loss': 8.605870154765698, 'avg_acc': 50.08823245020305, 'loss': 8.746049880981445}


EP_train:1:  19%|| 5182/27626 [12:12<53:06,  7.04it/s]

{'epoch': 1, 'iter': 5180, 'avg_loss': 8.606674057846938, 'avg_acc': 50.098919127581546, 'loss': 8.411079406738281}


EP_train:1:  19%|| 5192/27626 [12:13<52:54,  7.07it/s]

{'epoch': 1, 'iter': 5190, 'avg_loss': 8.606589559828057, 'avg_acc': 50.10173858601426, 'loss': 8.859335899353027}


EP_train:1:  19%|| 5202/27626 [12:15<52:39,  7.10it/s]

{'epoch': 1, 'iter': 5200, 'avg_loss': 8.606970017012713, 'avg_acc': 50.108152278408, 'loss': 8.188594818115234}


EP_train:1:  19%|| 5212/27626 [12:16<52:53,  7.06it/s]

{'epoch': 1, 'iter': 5210, 'avg_loss': 8.607231441465593, 'avg_acc': 50.099549030896185, 'loss': 8.639967918395996}


EP_train:1:  19%|| 5222/27626 [12:17<53:05,  7.03it/s]

{'epoch': 1, 'iter': 5220, 'avg_loss': 8.607678273381275, 'avg_acc': 50.095168550086186, 'loss': 8.795124053955078}


EP_train:1:  19%|| 5232/27626 [12:19<52:53,  7.06it/s]

{'epoch': 1, 'iter': 5230, 'avg_loss': 8.608007213258533, 'avg_acc': 50.09976581915504, 'loss': 8.945545196533203}


EP_train:1:  19%|| 5242/27626 [12:20<52:35,  7.09it/s]

{'epoch': 1, 'iter': 5240, 'avg_loss': 8.607762434081799, 'avg_acc': 50.09361286014119, 'loss': 9.332788467407227}


EP_train:1:  19%|| 5252/27626 [12:22<52:35,  7.09it/s]

{'epoch': 1, 'iter': 5250, 'avg_loss': 8.607661840144258, 'avg_acc': 50.09938583127024, 'loss': 8.70258903503418}


EP_train:1:  19%|| 5262/27626 [12:23<52:53,  7.05it/s]

{'epoch': 1, 'iter': 5260, 'avg_loss': 8.60692453130618, 'avg_acc': 50.097414940125454, 'loss': 9.1740083694458}


EP_train:1:  19%|| 5272/27626 [12:24<52:41,  7.07it/s]

{'epoch': 1, 'iter': 5270, 'avg_loss': 8.605728274888003, 'avg_acc': 50.09189432745209, 'loss': 8.060694694519043}


EP_train:1:  19%|| 5282/27626 [12:26<52:28,  7.10it/s]

{'epoch': 1, 'iter': 5280, 'avg_loss': 8.605277469947785, 'avg_acc': 50.0875781102064, 'loss': 8.851066589355469}


EP_train:1:  19%|| 5292/27626 [12:27<52:51,  7.04it/s]

{'epoch': 1, 'iter': 5290, 'avg_loss': 8.604539088954143, 'avg_acc': 50.088593838593845, 'loss': 7.47442102432251}


EP_train:1:  19%|| 5302/27626 [12:29<52:57,  7.03it/s]

{'epoch': 1, 'iter': 5300, 'avg_loss': 8.60454357940956, 'avg_acc': 50.087247689115266, 'loss': 8.148329734802246}


EP_train:1:  19%|| 5312/27626 [12:30<52:37,  7.07it/s]

{'epoch': 1, 'iter': 5310, 'avg_loss': 8.604620909650455, 'avg_acc': 50.08531820749388, 'loss': 8.538330078125}


EP_train:1:  19%|| 5322/27626 [12:31<52:31,  7.08it/s]

{'epoch': 1, 'iter': 5320, 'avg_loss': 8.605127170421513, 'avg_acc': 50.087507047547454, 'loss': 8.852185249328613}


EP_train:1:  19%|| 5332/27626 [12:33<52:32,  7.07it/s]

{'epoch': 1, 'iter': 5330, 'avg_loss': 8.605926341040902, 'avg_acc': 50.084411930219474, 'loss': 8.955649375915527}


EP_train:1:  19%|| 5342/27626 [12:34<52:14,  7.11it/s]

{'epoch': 1, 'iter': 5340, 'avg_loss': 8.606088245122582, 'avg_acc': 50.08542407788804, 'loss': 8.598475456237793}


EP_train:1:  19%|| 5352/27626 [12:36<52:56,  7.01it/s]

{'epoch': 1, 'iter': 5350, 'avg_loss': 8.605886066367617, 'avg_acc': 50.086432442534104, 'loss': 8.64808177947998}


EP_train:1:  19%|| 5362/27626 [12:37<52:25,  7.08it/s]

{'epoch': 1, 'iter': 5360, 'avg_loss': 8.60633866534796, 'avg_acc': 50.08976869986943, 'loss': 8.202343940734863}


EP_train:1:  19%|| 5372/27626 [12:39<52:40,  7.04it/s]

{'epoch': 1, 'iter': 5370, 'avg_loss': 8.606409801759437, 'avg_acc': 50.091928877304035, 'loss': 8.538437843322754}


EP_train:1:  19%|| 5382/27626 [12:40<52:15,  7.09it/s]

{'epoch': 1, 'iter': 5380, 'avg_loss': 8.606588292932715, 'avg_acc': 50.10104999070805, 'loss': 8.435486793518066}


EP_train:1:  20%|| 5392/27626 [12:41<52:21,  7.08it/s]

{'epoch': 1, 'iter': 5390, 'avg_loss': 8.606441232716302, 'avg_acc': 50.107818586533114, 'loss': 8.676468849182129}


EP_train:1:  20%|| 5402/27626 [12:43<52:24,  7.07it/s]

{'epoch': 1, 'iter': 5400, 'avg_loss': 8.607193067060138, 'avg_acc': 50.10704036289576, 'loss': 9.254079818725586}


EP_train:1:  20%|| 5412/27626 [12:44<52:19,  7.08it/s]

{'epoch': 1, 'iter': 5410, 'avg_loss': 8.607437115371216, 'avg_acc': 50.10857512474589, 'loss': 8.271981239318848}


EP_train:1:  20%|| 5422/27626 [12:46<52:03,  7.11it/s]

{'epoch': 1, 'iter': 5420, 'avg_loss': 8.60738314581278, 'avg_acc': 50.10433960523889, 'loss': 8.320318222045898}


EP_train:1:  20%|| 5432/27626 [12:47<52:51,  7.00it/s]

{'epoch': 1, 'iter': 5430, 'avg_loss': 8.60705904880735, 'avg_acc': 50.108750690480576, 'loss': 8.050335884094238}


EP_train:1:  20%|| 5442/27626 [12:48<52:55,  6.99it/s]

{'epoch': 1, 'iter': 5440, 'avg_loss': 8.607689471526129, 'avg_acc': 50.11888899099431, 'loss': 9.620753288269043}


EP_train:1:  20%|| 5452/27626 [12:50<52:51,  6.99it/s]

{'epoch': 1, 'iter': 5450, 'avg_loss': 8.607620874515828, 'avg_acc': 50.12096404329481, 'loss': 8.348889350891113}


EP_train:1:  20%|| 5462/27626 [12:51<52:56,  6.98it/s]

{'epoch': 1, 'iter': 5460, 'avg_loss': 8.607425047831317, 'avg_acc': 50.12532045412929, 'loss': 8.147575378417969}


EP_train:1:  20%|| 5472/27626 [12:53<52:40,  7.01it/s]

{'epoch': 1, 'iter': 5470, 'avg_loss': 8.607581147239404, 'avg_acc': 50.12851855236703, 'loss': 8.609855651855469}


EP_train:1:  20%|| 5482/27626 [12:54<52:06,  7.08it/s]

{'epoch': 1, 'iter': 5480, 'avg_loss': 8.607678667773863, 'avg_acc': 50.1311348294107, 'loss': 8.4694185256958}


EP_train:1:  20%|| 5492/27626 [12:56<52:33,  7.02it/s]

{'epoch': 1, 'iter': 5490, 'avg_loss': 8.608024161927059, 'avg_acc': 50.126912219996356, 'loss': 8.036487579345703}


EP_train:1:  20%|| 5502/27626 [12:57<52:17,  7.05it/s]

{'epoch': 1, 'iter': 5500, 'avg_loss': 8.60798947314006, 'avg_acc': 50.126681512452286, 'loss': 8.029096603393555}


EP_train:1:  20%|| 5512/27626 [12:58<52:22,  7.04it/s]

{'epoch': 1, 'iter': 5510, 'avg_loss': 8.607706073753372, 'avg_acc': 50.127585737615675, 'loss': 8.423277854919434}


EP_train:1:  20%|| 5522/27626 [13:00<52:19,  7.04it/s]

{'epoch': 1, 'iter': 5520, 'avg_loss': 8.607848301894249, 'avg_acc': 50.13018474913965, 'loss': 8.685402870178223}


EP_train:1:  20%|| 5532/27626 [13:01<52:43,  6.98it/s]

{'epoch': 1, 'iter': 5530, 'avg_loss': 8.608035000080529, 'avg_acc': 50.12938437895498, 'loss': 8.90817642211914}


EP_train:1:  20%|| 5542/27626 [13:03<52:19,  7.04it/s]

{'epoch': 1, 'iter': 5540, 'avg_loss': 8.607401083660694, 'avg_acc': 50.12745894242916, 'loss': 9.047324180603027}


EP_train:1:  20%|| 5552/27626 [13:04<51:58,  7.08it/s]

{'epoch': 1, 'iter': 5550, 'avg_loss': 8.607353373919064, 'avg_acc': 50.124414519906324, 'loss': 7.372485637664795}


EP_train:1:  20%|| 5562/27626 [13:05<51:50,  7.09it/s]

{'epoch': 1, 'iter': 5560, 'avg_loss': 8.60783256675845, 'avg_acc': 50.12306689444345, 'loss': 8.297444343566895}


EP_train:1:  20%|| 5572/27626 [13:07<51:53,  7.08it/s]

{'epoch': 1, 'iter': 5570, 'avg_loss': 8.608180879840429, 'avg_acc': 50.12508975049362, 'loss': 8.784899711608887}


EP_train:1:  20%|| 5582/27626 [13:08<52:17,  7.03it/s]

{'epoch': 1, 'iter': 5580, 'avg_loss': 8.608090422910038, 'avg_acc': 50.123745744490236, 'loss': 9.365139961242676}


EP_train:1:  20%|| 5592/27626 [13:10<52:33,  6.99it/s]

{'epoch': 1, 'iter': 5590, 'avg_loss': 8.60820553337306, 'avg_acc': 50.12464228223932, 'loss': 7.663724899291992}


EP_train:1:  20%|| 5602/27626 [13:11<52:12,  7.03it/s]

{'epoch': 1, 'iter': 5600, 'avg_loss': 8.607275488801863, 'avg_acc': 50.12163006605963, 'loss': 8.284905433654785}


EP_train:1:  20%|| 5612/27626 [13:13<52:10,  7.03it/s]

{'epoch': 1, 'iter': 5610, 'avg_loss': 8.606636774389104, 'avg_acc': 50.11751470326145, 'loss': 7.992613792419434}


EP_train:1:  20%|| 5622/27626 [13:14<52:01,  7.05it/s]

{'epoch': 1, 'iter': 5620, 'avg_loss': 8.606404569347946, 'avg_acc': 50.10952232698808, 'loss': 8.744481086730957}


EP_train:1:  20%|| 5632/27626 [13:15<51:39,  7.10it/s]

{'epoch': 1, 'iter': 5630, 'avg_loss': 8.606337467363643, 'avg_acc': 50.10655301012253, 'loss': 8.349257469177246}


EP_train:1:  20%|| 5642/27626 [13:17<51:36,  7.10it/s]

{'epoch': 1, 'iter': 5640, 'avg_loss': 8.60597745139344, 'avg_acc': 50.10913401879099, 'loss': 8.431159019470215}


EP_train:1:  20%|| 5652/27626 [13:18<51:42,  7.08it/s]

{'epoch': 1, 'iter': 5650, 'avg_loss': 8.606318594480781, 'avg_acc': 50.1161298885153, 'loss': 7.12666654586792}


EP_train:1:  20%|| 5662/27626 [13:20<51:43,  7.08it/s]

{'epoch': 1, 'iter': 5660, 'avg_loss': 8.606253480094205, 'avg_acc': 50.11923688394276, 'loss': 9.018632888793945}


EP_train:1:  21%|| 5672/27626 [13:21<51:55,  7.05it/s]

{'epoch': 1, 'iter': 5670, 'avg_loss': 8.605589860331927, 'avg_acc': 50.12233292188326, 'loss': 8.55526065826416}


EP_train:1:  21%|| 5682/27626 [13:22<51:42,  7.07it/s]

{'epoch': 1, 'iter': 5680, 'avg_loss': 8.605780692907144, 'avg_acc': 50.12321774335504, 'loss': 8.418379783630371}


EP_train:1:  21%|| 5692/27626 [13:24<51:50,  7.05it/s]

{'epoch': 1, 'iter': 5690, 'avg_loss': 8.605863557593347, 'avg_acc': 50.12464856791426, 'loss': 8.113897323608398}


EP_train:1:  21%|| 5702/27626 [13:25<51:38,  7.08it/s]

{'epoch': 1, 'iter': 5700, 'avg_loss': 8.606177754913206, 'avg_acc': 50.12771882125943, 'loss': 8.987825393676758}


EP_train:1:  21%|| 5712/27626 [13:27<51:37,  7.08it/s]

{'epoch': 1, 'iter': 5710, 'avg_loss': 8.60638363560493, 'avg_acc': 50.11874015058658, 'loss': 8.866711616516113}


EP_train:1:  21%|| 5722/27626 [13:28<51:48,  7.05it/s]

{'epoch': 1, 'iter': 5720, 'avg_loss': 8.605646429476966, 'avg_acc': 50.11197780108373, 'loss': 8.048698425292969}


EP_train:1:  21%|| 5732/27626 [13:29<51:36,  7.07it/s]

{'epoch': 1, 'iter': 5730, 'avg_loss': 8.605315782869493, 'avg_acc': 50.11396353166987, 'loss': 7.486515045166016}


EP_train:1:  21%|| 5742/27626 [13:31<51:17,  7.11it/s]

{'epoch': 1, 'iter': 5740, 'avg_loss': 8.60522911727605, 'avg_acc': 50.109410381466645, 'loss': 8.291228294372559}


EP_train:1:  21%|| 5752/27626 [13:32<51:26,  7.09it/s]

{'epoch': 1, 'iter': 5750, 'avg_loss': 8.605355738847281, 'avg_acc': 50.110850286906626, 'loss': 8.977302551269531}


EP_train:1:  21%|| 5762/27626 [13:34<51:45,  7.04it/s]

{'epoch': 1, 'iter': 5760, 'avg_loss': 8.604950872731486, 'avg_acc': 50.10740322860614, 'loss': 8.195316314697266}


EP_train:1:  21%|| 5772/27626 [13:35<51:35,  7.06it/s]

{'epoch': 1, 'iter': 5770, 'avg_loss': 8.604666720310833, 'avg_acc': 50.10667561947669, 'loss': 8.420437812805176}


EP_train:1:  21%|| 5782/27626 [13:36<51:51,  7.02it/s]

{'epoch': 1, 'iter': 5780, 'avg_loss': 8.60401783029094, 'avg_acc': 50.11946462549732, 'loss': 8.35092544555664}


EP_train:1:  21%|| 5792/27626 [13:38<51:47,  7.03it/s]

{'epoch': 1, 'iter': 5790, 'avg_loss': 8.603944537018355, 'avg_acc': 50.119797962355385, 'loss': 8.612358093261719}


EP_train:1:  21%|| 5802/27626 [13:39<51:18,  7.09it/s]

{'epoch': 1, 'iter': 5800, 'avg_loss': 8.60422979266741, 'avg_acc': 50.12013014997414, 'loss': 7.8623247146606445}


EP_train:1:  21%|| 5812/27626 [13:41<51:22,  7.08it/s]

{'epoch': 1, 'iter': 5810, 'avg_loss': 8.60427196676976, 'avg_acc': 50.12691447255205, 'loss': 8.261252403259277}


EP_train:1:  21%|| 5822/27626 [13:42<51:38,  7.04it/s]

{'epoch': 1, 'iter': 5820, 'avg_loss': 8.604065938004922, 'avg_acc': 50.12777014258718, 'loss': 8.284713745117188}


EP_train:1:  21%|| 5832/27626 [13:44<51:19,  7.08it/s]

{'epoch': 1, 'iter': 5830, 'avg_loss': 8.604218119394103, 'avg_acc': 50.13183844966558, 'loss': 8.770346641540527}


EP_train:1:  21%|| 5842/27626 [13:45<51:28,  7.05it/s]

{'epoch': 1, 'iter': 5840, 'avg_loss': 8.604674393827429, 'avg_acc': 50.13321777092964, 'loss': 8.164132118225098}


EP_train:1:  21%|| 5852/27626 [13:46<51:19,  7.07it/s]

{'epoch': 1, 'iter': 5850, 'avg_loss': 8.604629939161923, 'avg_acc': 50.134058280635784, 'loss': 8.890097618103027}


EP_train:1:  21%|| 5862/27626 [13:48<51:26,  7.05it/s]

{'epoch': 1, 'iter': 5860, 'avg_loss': 8.604247316934208, 'avg_acc': 50.13063043849173, 'loss': 7.458211898803711}


EP_train:1:  21%|| 5872/27626 [13:49<51:34,  7.03it/s]

{'epoch': 1, 'iter': 5870, 'avg_loss': 8.604446763786013, 'avg_acc': 50.13253704649975, 'loss': 8.810210227966309}


EP_train:1:  21%|| 5882/27626 [13:51<51:07,  7.09it/s]

{'epoch': 1, 'iter': 5880, 'avg_loss': 8.604429608643247, 'avg_acc': 50.129654820608735, 'loss': 7.5583319664001465}


EP_train:1:  21%|| 5892/27626 [13:52<51:01,  7.10it/s]

{'epoch': 1, 'iter': 5890, 'avg_loss': 8.60423789188668, 'avg_acc': 50.13580037345102, 'loss': 8.631318092346191}


EP_train:1:  21%|| 5902/27626 [13:53<50:50,  7.12it/s]

{'epoch': 1, 'iter': 5900, 'avg_loss': 8.60438618015948, 'avg_acc': 50.128685815963394, 'loss': 8.750625610351562}


EP_train:1:  21%|| 5912/27626 [13:55<50:51,  7.12it/s]

{'epoch': 1, 'iter': 5910, 'avg_loss': 8.60465840820612, 'avg_acc': 50.12582473354762, 'loss': 8.673539161682129}


EP_train:1:  21%|| 5922/27626 [13:56<51:12,  7.06it/s]

{'epoch': 1, 'iter': 5920, 'avg_loss': 8.60418209331379, 'avg_acc': 50.12244553284918, 'loss': 7.908384799957275}


EP_train:1:  21%|| 5932/27626 [13:58<51:24,  7.03it/s]

{'epoch': 1, 'iter': 5930, 'avg_loss': 8.60422623698174, 'avg_acc': 50.12434665317822, 'loss': 9.045479774475098}


EP_train:1:  22%|| 5942/27626 [13:59<51:52,  6.97it/s]

{'epoch': 1, 'iter': 5940, 'avg_loss': 8.604192037731686, 'avg_acc': 50.127819390674965, 'loss': 8.23814582824707}


EP_train:1:  22%|| 5952/27626 [14:00<51:42,  6.98it/s]

{'epoch': 1, 'iter': 5950, 'avg_loss': 8.604002666521465, 'avg_acc': 50.13285582255084, 'loss': 8.649042129516602}


EP_train:1:  22%|| 5962/27626 [14:02<51:07,  7.06it/s]

{'epoch': 1, 'iter': 5960, 'avg_loss': 8.603912332237536, 'avg_acc': 50.1357783928871, 'loss': 8.593997955322266}


EP_train:1:  22%|| 5972/27626 [14:03<51:04,  7.07it/s]

{'epoch': 1, 'iter': 5970, 'avg_loss': 8.604493062912649, 'avg_acc': 50.13921453692849, 'loss': 9.019732475280762}


EP_train:1:  22%|| 5982/27626 [14:05<51:06,  7.06it/s]

{'epoch': 1, 'iter': 5980, 'avg_loss': 8.604639549953765, 'avg_acc': 50.1389817756228, 'loss': 7.986027240753174}


EP_train:1:  22%|| 5992/27626 [14:06<50:39,  7.12it/s]

{'epoch': 1, 'iter': 5990, 'avg_loss': 8.604301073459249, 'avg_acc': 50.13353363378401, 'loss': 7.600915431976318}


EP_train:1:  22%|| 6002/27626 [14:08<50:46,  7.10it/s]

{'epoch': 1, 'iter': 6000, 'avg_loss': 8.603800190228101, 'avg_acc': 50.125499916680546, 'loss': 9.122323989868164}


EP_train:1:  22%|| 6012/27626 [14:09<51:08,  7.04it/s]

{'epoch': 1, 'iter': 6010, 'avg_loss': 8.603998360469642, 'avg_acc': 50.132049575777735, 'loss': 7.560617446899414}


EP_train:1:  22%|| 6022/27626 [14:10<50:49,  7.08it/s]

{'epoch': 1, 'iter': 6020, 'avg_loss': 8.603500013790184, 'avg_acc': 50.13338731107789, 'loss': 8.634661674499512}


EP_train:1:  22%|| 6032/27626 [14:12<52:00,  6.92it/s]

{'epoch': 1, 'iter': 6030, 'avg_loss': 8.603563579827512, 'avg_acc': 50.127466423478694, 'loss': 8.816596031188965}


EP_train:1:  22%|| 6042/27626 [14:13<50:56,  7.06it/s]

{'epoch': 1, 'iter': 6040, 'avg_loss': 8.603554219604426, 'avg_acc': 50.12104783976164, 'loss': 8.643977165222168}


EP_train:1:  22%|| 6052/27626 [14:15<50:50,  7.07it/s]

{'epoch': 1, 'iter': 6050, 'avg_loss': 8.604182374475416, 'avg_acc': 50.12291356800529, 'loss': 9.566767692565918}


EP_train:1:  22%|| 6062/27626 [14:16<50:30,  7.12it/s]

{'epoch': 1, 'iter': 6060, 'avg_loss': 8.603946905603426, 'avg_acc': 50.125288731232466, 'loss': 7.653633117675781}


EP_train:1:  22%|| 6072/27626 [14:17<50:49,  7.07it/s]

{'epoch': 1, 'iter': 6070, 'avg_loss': 8.60336582681489, 'avg_acc': 50.13022978092572, 'loss': 7.625419616699219}


EP_train:1:  22%|| 6082/27626 [14:19<50:45,  7.07it/s]

{'epoch': 1, 'iter': 6080, 'avg_loss': 8.60318227952061, 'avg_acc': 50.13258510113469, 'loss': 9.489874839782715}


EP_train:1:  22%|| 6092/27626 [14:20<50:47,  7.07it/s]

{'epoch': 1, 'iter': 6090, 'avg_loss': 8.602938759994318, 'avg_acc': 50.13236742735183, 'loss': 9.458694458007812}


EP_train:1:  22%|| 6102/27626 [14:22<50:42,  7.08it/s]

{'epoch': 1, 'iter': 6100, 'avg_loss': 8.602853992301467, 'avg_acc': 50.13624815604, 'loss': 7.9767279624938965}


EP_train:1:  22%|| 6112/27626 [14:23<50:56,  7.04it/s]

{'epoch': 1, 'iter': 6110, 'avg_loss': 8.60286512204774, 'avg_acc': 50.130911471117656, 'loss': 8.755541801452637}


EP_train:1:  22%|| 6122/27626 [14:24<51:18,  6.99it/s]

{'epoch': 1, 'iter': 6120, 'avg_loss': 8.602164377556228, 'avg_acc': 50.1312081359255, 'loss': 8.402103424072266}


EP_train:1:  22%|| 6132/27626 [14:26<51:17,  6.98it/s]

{'epoch': 1, 'iter': 6130, 'avg_loss': 8.602213615034206, 'avg_acc': 50.136600880769855, 'loss': 9.085725784301758}


EP_train:1:  22%|| 6142/27626 [14:27<50:36,  7.08it/s]

{'epoch': 1, 'iter': 6140, 'avg_loss': 8.601940595760075, 'avg_acc': 50.12976306790426, 'loss': 9.828128814697266}


EP_train:1:  22%|| 6152/27626 [14:29<50:13,  7.13it/s]

{'epoch': 1, 'iter': 6150, 'avg_loss': 8.602044514380244, 'avg_acc': 50.130568200292636, 'loss': 9.207942008972168}


EP_train:1:  22%|| 6162/27626 [14:30<50:27,  7.09it/s]

{'epoch': 1, 'iter': 6160, 'avg_loss': 8.602553324582534, 'avg_acc': 50.130356273332254, 'loss': 8.832088470458984}


EP_train:1:  22%|| 6172/27626 [14:32<50:53,  7.03it/s]

{'epoch': 1, 'iter': 6170, 'avg_loss': 8.603490839667499, 'avg_acc': 50.133183438664716, 'loss': 8.554230690002441}


EP_train:1:  22%|| 6182/27626 [14:33<50:21,  7.10it/s]

{'epoch': 1, 'iter': 6180, 'avg_loss': 8.603270900459703, 'avg_acc': 50.1314512214852, 'loss': 8.187078475952148}


EP_train:1:  22%|| 6192/27626 [14:34<50:20,  7.10it/s]

{'epoch': 1, 'iter': 6190, 'avg_loss': 8.603628941334001, 'avg_acc': 50.12821030528186, 'loss': 8.446439743041992}


EP_train:1:  22%|| 6202/27626 [14:36<51:03,  6.99it/s]

{'epoch': 1, 'iter': 6200, 'avg_loss': 8.603908673772272, 'avg_acc': 50.12800354781487, 'loss': 8.61072063446045}


EP_train:1:  22%|| 6212/27626 [14:37<51:26,  6.94it/s]

{'epoch': 1, 'iter': 6210, 'avg_loss': 8.603719071247136, 'avg_acc': 50.12930687489937, 'loss': 8.350654602050781}


EP_train:1:  23%|| 6222/27626 [14:39<50:46,  7.02it/s]

{'epoch': 1, 'iter': 6220, 'avg_loss': 8.603301418343074, 'avg_acc': 50.13110834271017, 'loss': 7.762509346008301}


EP_train:1:  23%|| 6232/27626 [14:40<50:41,  7.03it/s]

{'epoch': 1, 'iter': 6230, 'avg_loss': 8.603571252940727, 'avg_acc': 50.131900978976084, 'loss': 8.278512954711914}


EP_train:1:  23%|| 6242/27626 [14:41<50:14,  7.09it/s]

{'epoch': 1, 'iter': 6240, 'avg_loss': 8.603731748519504, 'avg_acc': 50.13068819099503, 'loss': 9.228188514709473}


EP_train:1:  23%|| 6252/27626 [14:43<50:21,  7.07it/s]

{'epoch': 1, 'iter': 6250, 'avg_loss': 8.60331315930186, 'avg_acc': 50.136978083506634, 'loss': 8.719269752502441}


EP_train:1:  23%|| 6262/27626 [14:44<50:44,  7.02it/s]

{'epoch': 1, 'iter': 6260, 'avg_loss': 8.603195287609726, 'avg_acc': 50.13775754671778, 'loss': 8.493618965148926}


EP_train:1:  23%|| 6272/27626 [14:46<50:32,  7.04it/s]

{'epoch': 1, 'iter': 6270, 'avg_loss': 8.603338866613258, 'avg_acc': 50.133551267740394, 'loss': 7.814449310302734}


EP_train:1:  23%|| 6282/27626 [14:47<50:58,  6.98it/s]

{'epoch': 1, 'iter': 6280, 'avg_loss': 8.6038119968687, 'avg_acc': 50.12587565674256, 'loss': 9.219222068786621}


EP_train:1:  23%|| 6292/27626 [14:49<50:42,  7.01it/s]

{'epoch': 1, 'iter': 6290, 'avg_loss': 8.60351820398336, 'avg_acc': 50.12865601653155, 'loss': 8.533944129943848}


EP_train:1:  23%|| 6302/27626 [14:50<50:58,  6.97it/s]

{'epoch': 1, 'iter': 6300, 'avg_loss': 8.603148092525078, 'avg_acc': 50.126468020949055, 'loss': 8.659740447998047}


EP_train:1:  23%|| 6312/27626 [14:51<50:33,  7.03it/s]

{'epoch': 1, 'iter': 6310, 'avg_loss': 8.601869904698406, 'avg_acc': 50.123296624940586, 'loss': 7.732178688049316}


EP_train:1:  23%|| 6322/27626 [14:53<50:24,  7.04it/s]

{'epoch': 1, 'iter': 6320, 'avg_loss': 8.602105222987781, 'avg_acc': 50.117663344407525, 'loss': 8.683432579040527}


EP_train:1:  23%|| 6332/27626 [14:54<50:00,  7.10it/s]

{'epoch': 1, 'iter': 6330, 'avg_loss': 8.602314077005852, 'avg_acc': 50.12290712367714, 'loss': 8.7595796585083}


EP_train:1:  23%|| 6342/27626 [14:56<50:01,  7.09it/s]

{'epoch': 1, 'iter': 6340, 'avg_loss': 8.602505008385885, 'avg_acc': 50.12813436366503, 'loss': 8.355472564697266}


EP_train:1:  23%|| 6352/27626 [14:57<49:54,  7.10it/s]

{'epoch': 1, 'iter': 6350, 'avg_loss': 8.602604018367382, 'avg_acc': 50.11956778460085, 'loss': 8.5440034866333}


EP_train:1:  23%|| 6362/27626 [14:58<50:43,  6.99it/s]

{'epoch': 1, 'iter': 6360, 'avg_loss': 8.601891423486123, 'avg_acc': 50.11888853953781, 'loss': 8.267393112182617}


EP_train:1:  23%|| 6372/27626 [15:00<50:26,  7.02it/s]

{'epoch': 1, 'iter': 6370, 'avg_loss': 8.602485216051232, 'avg_acc': 50.11575890754983, 'loss': 8.59011173248291}


EP_train:1:  23%|| 6382/27626 [15:01<50:24,  7.02it/s]

{'epoch': 1, 'iter': 6380, 'avg_loss': 8.602048635744335, 'avg_acc': 50.11361855508542, 'loss': 8.858677864074707}


EP_train:1:  23%|| 6392/27626 [15:03<50:44,  6.97it/s]

{'epoch': 1, 'iter': 6390, 'avg_loss': 8.601527835035377, 'avg_acc': 50.1144187138163, 'loss': 8.324529647827148}


EP_train:1:  23%|| 6402/27626 [15:04<50:08,  7.05it/s]

{'epoch': 1, 'iter': 6400, 'avg_loss': 8.601467282525116, 'avg_acc': 50.11033432276207, 'loss': 8.670039176940918}


EP_train:1:  23%|| 6412/27626 [15:05<49:54,  7.08it/s]

{'epoch': 1, 'iter': 6410, 'avg_loss': 8.601963694243253, 'avg_acc': 50.110649664638906, 'loss': 8.614859580993652}


EP_train:1:  23%|| 6422/27626 [15:07<50:22,  7.02it/s]

{'epoch': 1, 'iter': 6420, 'avg_loss': 8.602881239314927, 'avg_acc': 50.11096402429528, 'loss': 9.196001052856445}


EP_train:1:  23%|| 6432/27626 [15:08<50:13,  7.03it/s]

{'epoch': 1, 'iter': 6430, 'avg_loss': 8.60312714478106, 'avg_acc': 50.10933369615923, 'loss': 9.361276626586914}


EP_train:1:  23%|| 6442/27626 [15:10<50:02,  7.06it/s]

{'epoch': 1, 'iter': 6440, 'avg_loss': 8.603577397732497, 'avg_acc': 50.10819360347772, 'loss': 9.046950340270996}


EP_train:1:  23%|| 6452/27626 [15:11<50:05,  7.04it/s]

{'epoch': 1, 'iter': 6450, 'avg_loss': 8.60338931430349, 'avg_acc': 50.1089947294993, 'loss': 8.941976547241211}


EP_train:1:  23%|| 6462/27626 [15:13<50:19,  7.01it/s]

{'epoch': 1, 'iter': 6460, 'avg_loss': 8.603208956713278, 'avg_acc': 50.113662745705, 'loss': 8.062485694885254}


EP_train:1:  23%|| 6472/27626 [15:14<50:38,  6.96it/s]

{'epoch': 1, 'iter': 6470, 'avg_loss': 8.6032411787245, 'avg_acc': 50.11879925822902, 'loss': 9.363323211669922}


EP_train:1:  23%|| 6482/27626 [15:15<50:34,  6.97it/s]

{'epoch': 1, 'iter': 6480, 'avg_loss': 8.603279031513098, 'avg_acc': 50.125848634469996, 'loss': 8.814811706542969}


EP_train:1:  23%|| 6492/27626 [15:17<50:02,  7.04it/s]

{'epoch': 1, 'iter': 6490, 'avg_loss': 8.60407713847615, 'avg_acc': 50.13191341857958, 'loss': 8.795077323913574}


EP_train:1:  24%|| 6502/27626 [15:18<50:07,  7.02it/s]

{'epoch': 1, 'iter': 6500, 'avg_loss': 8.604213923275461, 'avg_acc': 50.129307029687745, 'loss': 8.352458000183105}


EP_train:1:  24%|| 6512/27626 [15:20<50:00,  7.04it/s]

{'epoch': 1, 'iter': 6510, 'avg_loss': 8.604400626050586, 'avg_acc': 50.13054830287206, 'loss': 8.490752220153809}


EP_train:1:  24%|| 6522/27626 [15:21<49:49,  7.06it/s]

{'epoch': 1, 'iter': 6520, 'avg_loss': 8.604563064104282, 'avg_acc': 50.12699355927005, 'loss': 9.625094413757324}


EP_train:1:  24%|| 6532/27626 [15:23<49:57,  7.04it/s]

{'epoch': 1, 'iter': 6530, 'avg_loss': 8.60433593692175, 'avg_acc': 50.123449701423986, 'loss': 8.492670059204102}


EP_train:1:  24%|| 6542/27626 [15:24<50:03,  7.02it/s]

{'epoch': 1, 'iter': 6540, 'avg_loss': 8.603395595515545, 'avg_acc': 50.12803852621923, 'loss': 8.064839363098145}


EP_train:1:  24%|| 6552/27626 [15:25<50:03,  7.02it/s]

{'epoch': 1, 'iter': 6550, 'avg_loss': 8.60271340482127, 'avg_acc': 50.12784307739276, 'loss': 7.923076629638672}


EP_train:1:  24%|| 6562/27626 [15:27<50:02,  7.01it/s]

{'epoch': 1, 'iter': 6560, 'avg_loss': 8.602489158340823, 'avg_acc': 50.13098231976832, 'loss': 7.785730361938477}


EP_train:1:  24%|| 6572/27626 [15:28<50:02,  7.01it/s]

{'epoch': 1, 'iter': 6570, 'avg_loss': 8.602813178338048, 'avg_acc': 50.128405113376964, 'loss': 8.897162437438965}


EP_train:1:  24%|| 6582/27626 [15:30<49:27,  7.09it/s]

{'epoch': 1, 'iter': 6580, 'avg_loss': 8.602643809733719, 'avg_acc': 50.12583573924936, 'loss': 8.610221862792969}


EP_train:1:  24%|| 6592/27626 [15:31<49:37,  7.06it/s]

{'epoch': 1, 'iter': 6590, 'avg_loss': 8.602769208160595, 'avg_acc': 50.1232741617357, 'loss': 8.737231254577637}


EP_train:1:  24%|| 6602/27626 [15:32<49:38,  7.06it/s]

{'epoch': 1, 'iter': 6600, 'avg_loss': 8.602822246776894, 'avg_acc': 50.129715194667476, 'loss': 8.204782485961914}


EP_train:1:  24%|| 6612/27626 [15:34<49:28,  7.08it/s]

{'epoch': 1, 'iter': 6610, 'avg_loss': 8.603186470534325, 'avg_acc': 50.129518983512334, 'loss': 8.9112548828125}


EP_train:1:  24%|| 6622/27626 [15:35<49:40,  7.05it/s]

{'epoch': 1, 'iter': 6620, 'avg_loss': 8.603430530344715, 'avg_acc': 50.132155263555354, 'loss': 9.101933479309082}


EP_train:1:  24%|| 6632/27626 [15:37<49:17,  7.10it/s]

{'epoch': 1, 'iter': 6630, 'avg_loss': 8.603937948402677, 'avg_acc': 50.12441562358618, 'loss': 9.041078567504883}


EP_train:1:  24%|| 6642/27626 [15:38<49:23,  7.08it/s]

{'epoch': 1, 'iter': 6640, 'avg_loss': 8.60385783812562, 'avg_acc': 50.12846333383526, 'loss': 8.377790451049805}


EP_train:1:  24%|| 6652/27626 [15:39<49:43,  7.03it/s]

{'epoch': 1, 'iter': 6650, 'avg_loss': 8.6037715485429, 'avg_acc': 50.12639076830552, 'loss': 8.92500114440918}


EP_train:1:  24%|| 6662/27626 [15:41<49:41,  7.03it/s]

{'epoch': 1, 'iter': 6660, 'avg_loss': 8.603904829496308, 'avg_acc': 50.1290159135265, 'loss': 8.757658004760742}


EP_train:1:  24%|| 6672/27626 [15:42<49:31,  7.05it/s]

{'epoch': 1, 'iter': 6670, 'avg_loss': 8.604066614803749, 'avg_acc': 50.12507495128167, 'loss': 8.543094635009766}


EP_train:1:  24%|| 6682/27626 [15:44<49:14,  7.09it/s]

{'epoch': 1, 'iter': 6680, 'avg_loss': 8.604115370165319, 'avg_acc': 50.12208127525819, 'loss': 8.867212295532227}


EP_train:1:  24%|| 6692/27626 [15:45<49:27,  7.05it/s]

{'epoch': 1, 'iter': 6690, 'avg_loss': 8.604171082250492, 'avg_acc': 50.12329995516366, 'loss': 8.377516746520996}


EP_train:1:  24%|| 6702/27626 [15:47<49:16,  7.08it/s]

{'epoch': 1, 'iter': 6700, 'avg_loss': 8.603963001506255, 'avg_acc': 50.1301111774362, 'loss': 8.220672607421875}


EP_train:1:  24%|| 6712/27626 [15:48<49:33,  7.03it/s]

{'epoch': 1, 'iter': 6710, 'avg_loss': 8.603940990034753, 'avg_acc': 50.131314260169866, 'loss': 8.224663734436035}


EP_train:1:  24%|| 6722/27626 [15:49<49:16,  7.07it/s]

{'epoch': 1, 'iter': 6720, 'avg_loss': 8.603782831744555, 'avg_acc': 50.13809328968903, 'loss': 9.607502937316895}


EP_train:1:  24%|| 6732/27626 [15:51<49:22,  7.05it/s]

{'epoch': 1, 'iter': 6730, 'avg_loss': 8.603603978653645, 'avg_acc': 50.14438790670034, 'loss': 7.744596481323242}


EP_train:1:  24%|| 6742/27626 [15:52<49:29,  7.03it/s]

{'epoch': 1, 'iter': 6740, 'avg_loss': 8.603584449274607, 'avg_acc': 50.14324655095683, 'loss': 8.958370208740234}


EP_train:1:  24%|| 6752/27626 [15:54<49:39,  7.01it/s]

{'epoch': 1, 'iter': 6750, 'avg_loss': 8.603543860385054, 'avg_acc': 50.14673752036736, 'loss': 8.289992332458496}


EP_train:1:  24%|| 6762/27626 [15:55<49:22,  7.04it/s]

{'epoch': 1, 'iter': 6760, 'avg_loss': 8.603235097630385, 'avg_acc': 50.142360597544744, 'loss': 8.480036735534668}


EP_train:1:  25%|| 6772/27626 [15:56<49:33,  7.01it/s]

{'epoch': 1, 'iter': 6770, 'avg_loss': 8.603426846526888, 'avg_acc': 50.135227440555305, 'loss': 8.325252532958984}


EP_train:1:  25%|| 6782/27626 [15:58<49:18,  7.05it/s]

{'epoch': 1, 'iter': 6780, 'avg_loss': 8.603522644723663, 'avg_acc': 50.13733225188025, 'loss': 8.741425514221191}


EP_train:1:  25%|| 6792/27626 [15:59<49:19,  7.04it/s]

{'epoch': 1, 'iter': 6790, 'avg_loss': 8.603426409617315, 'avg_acc': 50.13114784273303, 'loss': 8.611798286437988}


EP_train:1:  25%|| 6802/27626 [16:01<48:50,  7.11it/s]

{'epoch': 1, 'iter': 6800, 'avg_loss': 8.603812085058983, 'avg_acc': 50.13279297162182, 'loss': 9.007914543151855}


EP_train:1:  25%|| 6812/27626 [16:02<48:42,  7.12it/s]

{'epoch': 1, 'iter': 6810, 'avg_loss': 8.603601132312932, 'avg_acc': 50.12755102040817, 'loss': 8.483819961547852}


EP_train:1:  25%|| 6822/27626 [16:03<49:11,  7.05it/s]

{'epoch': 1, 'iter': 6820, 'avg_loss': 8.60386766078028, 'avg_acc': 50.121408151297466, 'loss': 8.280123710632324}


EP_train:1:  25%|| 6832/27626 [16:05<49:22,  7.02it/s]

{'epoch': 1, 'iter': 6830, 'avg_loss': 8.603376359650582, 'avg_acc': 50.11345337432294, 'loss': 7.804797649383545}


EP_train:1:  25%|| 6842/27626 [16:06<49:17,  7.03it/s]

{'epoch': 1, 'iter': 6840, 'avg_loss': 8.602719901789875, 'avg_acc': 50.113744335623444, 'loss': 8.619826316833496}


EP_train:1:  25%|| 6852/27626 [16:08<48:58,  7.07it/s]

{'epoch': 1, 'iter': 6850, 'avg_loss': 8.602674898379494, 'avg_acc': 50.10992920741497, 'loss': 9.430455207824707}


EP_train:1:  25%|| 6862/27626 [16:09<48:58,  7.07it/s]

{'epoch': 1, 'iter': 6860, 'avg_loss': 8.602914090859196, 'avg_acc': 50.09792668707186, 'loss': 8.145164489746094}


EP_train:1:  25%|| 6872/27626 [16:11<49:04,  7.05it/s]

{'epoch': 1, 'iter': 6870, 'avg_loss': 8.602923252648132, 'avg_acc': 50.097329355261245, 'loss': 8.646453857421875}


EP_train:1:  25%|| 6882/27626 [16:12<48:34,  7.12it/s]

{'epoch': 1, 'iter': 6880, 'avg_loss': 8.602921343979283, 'avg_acc': 50.10082110158407, 'loss': 8.264960289001465}


EP_train:1:  25%|| 6892/27626 [16:13<48:39,  7.10it/s]

{'epoch': 1, 'iter': 6890, 'avg_loss': 8.603295944854908, 'avg_acc': 50.09659338267305, 'loss': 8.041570663452148}


EP_train:1:  25%|| 6902/27626 [16:15<48:54,  7.06it/s]

{'epoch': 1, 'iter': 6900, 'avg_loss': 8.602780707486037, 'avg_acc': 50.10098174177655, 'loss': 8.71066665649414}


EP_train:1:  25%|| 6912/27626 [16:16<48:48,  7.07it/s]

{'epoch': 1, 'iter': 6910, 'avg_loss': 8.602802137375706, 'avg_acc': 50.10490522355665, 'loss': 8.27258586883545}


EP_train:1:  25%|| 6922/27626 [16:18<48:53,  7.06it/s]

{'epoch': 1, 'iter': 6920, 'avg_loss': 8.60314465699005, 'avg_acc': 50.10023840485479, 'loss': 8.738616943359375}


EP_train:1:  25%|| 6932/27626 [16:19<49:14,  7.00it/s]

{'epoch': 1, 'iter': 6930, 'avg_loss': 8.603779618314567, 'avg_acc': 50.10144640023084, 'loss': 8.413926124572754}


EP_train:1:  25%|| 6942/27626 [16:20<48:59,  7.04it/s]

{'epoch': 1, 'iter': 6940, 'avg_loss': 8.603635017302688, 'avg_acc': 50.099949574989196, 'loss': 9.5883207321167}


EP_train:1:  25%|| 6952/27626 [16:22<48:31,  7.10it/s]

{'epoch': 1, 'iter': 6950, 'avg_loss': 8.603563008385928, 'avg_acc': 50.09845705653863, 'loss': 8.43333911895752}


EP_train:1:  25%|| 6962/27626 [16:23<49:06,  7.01it/s]

{'epoch': 1, 'iter': 6960, 'avg_loss': 8.603484854652285, 'avg_acc': 50.100111334578365, 'loss': 8.075278282165527}


EP_train:1:  25%|| 6972/27626 [16:25<48:37,  7.08it/s]

{'epoch': 1, 'iter': 6970, 'avg_loss': 8.603092172261787, 'avg_acc': 50.099071151915076, 'loss': 7.886419773101807}


EP_train:1:  25%|| 6982/27626 [16:26<48:45,  7.06it/s]

{'epoch': 1, 'iter': 6980, 'avg_loss': 8.603104521083791, 'avg_acc': 50.094005156854315, 'loss': 8.563596725463867}


EP_train:1:  25%|| 6992/27626 [16:27<48:53,  7.03it/s]

{'epoch': 1, 'iter': 6990, 'avg_loss': 8.603094434895036, 'avg_acc': 50.09074166785867, 'loss': 8.573636054992676}


EP_train:1:  25%|| 7002/27626 [16:29<48:46,  7.05it/s]

{'epoch': 1, 'iter': 7000, 'avg_loss': 8.602952004023884, 'avg_acc': 50.08838023139551, 'loss': 8.465540885925293}


EP_train:1:  25%|| 7012/27626 [16:30<48:50,  7.03it/s]

{'epoch': 1, 'iter': 7010, 'avg_loss': 8.602393587787384, 'avg_acc': 50.08335116245899, 'loss': 7.933479309082031}


EP_train:1:  25%|| 7022/27626 [16:32<48:47,  7.04it/s]

{'epoch': 1, 'iter': 7020, 'avg_loss': 8.60230423517503, 'avg_acc': 50.09257940464321, 'loss': 7.713569641113281}


EP_train:1:  25%|| 7032/27626 [16:33<48:16,  7.11it/s]

{'epoch': 1, 'iter': 7030, 'avg_loss': 8.602022705029295, 'avg_acc': 50.092003271227426, 'loss': 8.303808212280273}


EP_train:1:  25%|| 7042/27626 [16:35<48:18,  7.10it/s]

{'epoch': 1, 'iter': 7040, 'avg_loss': 8.601698815492451, 'avg_acc': 50.086546655304645, 'loss': 8.26638126373291}


EP_train:1:  26%|| 7052/27626 [16:36<48:10,  7.12it/s]

{'epoch': 1, 'iter': 7050, 'avg_loss': 8.601531761281965, 'avg_acc': 50.07977591830945, 'loss': 8.282094955444336}


EP_train:1:  26%|| 7062/27626 [16:37<48:38,  7.05it/s]

{'epoch': 1, 'iter': 7060, 'avg_loss': 8.601370635206209, 'avg_acc': 50.082760940376716, 'loss': 8.813199043273926}


EP_train:1:  26%|| 7072/27626 [16:39<48:43,  7.03it/s]

{'epoch': 1, 'iter': 7070, 'avg_loss': 8.601621711495202, 'avg_acc': 50.08441168151605, 'loss': 9.250836372375488}


EP_train:1:  26%|| 7082/27626 [16:40<48:42,  7.03it/s]

{'epoch': 1, 'iter': 7080, 'avg_loss': 8.6019417692453, 'avg_acc': 50.08738172574495, 'loss': 9.150785446166992}


EP_train:1:  26%|| 7092/27626 [16:42<48:21,  7.08it/s]

{'epoch': 1, 'iter': 7090, 'avg_loss': 8.60253089130925, 'avg_acc': 50.084614299816664, 'loss': 8.999190330505371}


EP_train:1:  26%|| 7102/27626 [16:43<48:23,  7.07it/s]

{'epoch': 1, 'iter': 7100, 'avg_loss': 8.602592362230755, 'avg_acc': 50.08493522039149, 'loss': 7.724380970001221}


EP_train:1:  26%|| 7112/27626 [16:44<49:04,  6.97it/s]

{'epoch': 1, 'iter': 7110, 'avg_loss': 8.602307414726909, 'avg_acc': 50.08261847841372, 'loss': 8.484999656677246}


EP_train:1:  26%|| 7122/27626 [16:46<48:35,  7.03it/s]

{'epoch': 1, 'iter': 7120, 'avg_loss': 8.602678566396847, 'avg_acc': 50.07986940036512, 'loss': 8.663987159729004}


EP_train:1:  26%|| 7132/27626 [16:47<48:39,  7.02it/s]

{'epoch': 1, 'iter': 7130, 'avg_loss': 8.602636912614296, 'avg_acc': 50.080195624737065, 'loss': 8.46384334564209}


EP_train:1:  26%|| 7142/27626 [16:49<48:13,  7.08it/s]

{'epoch': 1, 'iter': 7140, 'avg_loss': 8.60249165416248, 'avg_acc': 50.08402184567987, 'loss': 7.448821067810059}


EP_train:1:  26%|| 7152/27626 [16:50<48:02,  7.10it/s]

{'epoch': 1, 'iter': 7150, 'avg_loss': 8.602269115656043, 'avg_acc': 50.090459376311, 'loss': 9.153999328613281}


EP_train:1:  26%|| 7162/27626 [16:51<48:36,  7.02it/s]

{'epoch': 1, 'iter': 7160, 'avg_loss': 8.602340020642004, 'avg_acc': 50.08946027091188, 'loss': 8.64657211303711}


EP_train:1:  26%|| 7172/27626 [16:53<48:20,  7.05it/s]

{'epoch': 1, 'iter': 7170, 'avg_loss': 8.602363270374044, 'avg_acc': 50.08802816901409, 'loss': 9.239837646484375}


EP_train:1:  26%|| 7182/27626 [16:54<48:26,  7.03it/s]

{'epoch': 1, 'iter': 7180, 'avg_loss': 8.602448957276168, 'avg_acc': 50.089211112658404, 'loss': 9.735481262207031}


EP_train:1:  26%|| 7192/27626 [16:56<48:22,  7.04it/s]

{'epoch': 1, 'iter': 7190, 'avg_loss': 8.60172350153673, 'avg_acc': 50.091694479210126, 'loss': 7.898446559906006}


EP_train:1:  26%|| 7202/27626 [16:57<48:48,  6.98it/s]

{'epoch': 1, 'iter': 7200, 'avg_loss': 8.601014824480401, 'avg_acc': 50.09200111095681, 'loss': 7.8834991455078125}


EP_train:1:  26%|| 7212/27626 [16:59<48:21,  7.04it/s]

{'epoch': 1, 'iter': 7210, 'avg_loss': 8.601031908990937, 'avg_acc': 50.095340452087086, 'loss': 9.061034202575684}


EP_train:1:  26%|| 7222/27626 [17:00<48:05,  7.07it/s]

{'epoch': 1, 'iter': 7220, 'avg_loss': 8.601279574374283, 'avg_acc': 50.101267137515585, 'loss': 8.640693664550781}


EP_train:1:  26%|| 7232/27626 [17:01<48:10,  7.05it/s]

{'epoch': 1, 'iter': 7230, 'avg_loss': 8.601200078965025, 'avg_acc': 50.09334808463559, 'loss': 9.055188179016113}


EP_train:1:  26%|| 7242/27626 [17:03<48:10,  7.05it/s]

{'epoch': 1, 'iter': 7240, 'avg_loss': 8.6014971271468, 'avg_acc': 50.0919244579478, 'loss': 8.72130298614502}


EP_train:1:  26%|| 7252/27626 [17:04<48:14,  7.04it/s]

{'epoch': 1, 'iter': 7250, 'avg_loss': 8.601147792204843, 'avg_acc': 50.10041718383671, 'loss': 8.089045524597168}


EP_train:1:  26%|| 7262/27626 [17:06<48:04,  7.06it/s]

{'epoch': 1, 'iter': 7260, 'avg_loss': 8.600939458895118, 'avg_acc': 50.097696598264704, 'loss': 8.599656105041504}


EP_train:1:  26%|| 7272/27626 [17:07<48:06,  7.05it/s]

{'epoch': 1, 'iter': 7270, 'avg_loss': 8.601055787495318, 'avg_acc': 50.09799202310548, 'loss': 7.657448768615723}


EP_train:1:  26%|| 7282/27626 [17:08<47:58,  7.07it/s]

{'epoch': 1, 'iter': 7280, 'avg_loss': 8.601100901001317, 'avg_acc': 50.09957423430848, 'loss': 7.9862871170043945}


EP_train:1:  26%|| 7292/27626 [17:10<48:04,  7.05it/s]

{'epoch': 1, 'iter': 7290, 'avg_loss': 8.600729159016518, 'avg_acc': 50.101152105335345, 'loss': 8.614108085632324}


EP_train:1:  26%|| 7302/27626 [17:11<48:06,  7.04it/s]

{'epoch': 1, 'iter': 7300, 'avg_loss': 8.600982976962305, 'avg_acc': 50.10186960690316, 'loss': 9.361574172973633}


EP_train:1:  26%|| 7312/27626 [17:13<47:52,  7.07it/s]

{'epoch': 1, 'iter': 7310, 'avg_loss': 8.600882644415913, 'avg_acc': 50.09831076460129, 'loss': 8.881620407104492}


EP_train:1:  27%|| 7322/27626 [17:14<48:18,  7.01it/s]

{'epoch': 1, 'iter': 7320, 'avg_loss': 8.600747383710644, 'avg_acc': 50.10329872968173, 'loss': 7.9962639808654785}


EP_train:1:  27%|| 7332/27626 [17:16<47:40,  7.10it/s]

{'epoch': 1, 'iter': 7330, 'avg_loss': 8.600897558485725, 'avg_acc': 50.103157822943665, 'loss': 8.84777545928955}


EP_train:1:  27%|| 7342/27626 [17:17<47:41,  7.09it/s]

{'epoch': 1, 'iter': 7340, 'avg_loss': 8.601093198830547, 'avg_acc': 50.1000374608364, 'loss': 9.546692848205566}


EP_train:1:  27%|| 7352/27626 [17:18<48:28,  6.97it/s]

{'epoch': 1, 'iter': 7350, 'avg_loss': 8.601207343822692, 'avg_acc': 50.101601822881236, 'loss': 9.223398208618164}


EP_train:1:  27%|| 7362/27626 [17:20<47:44,  7.08it/s]

{'epoch': 1, 'iter': 7360, 'avg_loss': 8.60120721550646, 'avg_acc': 50.10019019155006, 'loss': 8.49166488647461}


EP_train:1:  27%|| 7372/27626 [17:21<47:43,  7.07it/s]

{'epoch': 1, 'iter': 7370, 'avg_loss': 8.601086234580611, 'avg_acc': 50.1072615655949, 'loss': 8.36354923248291}


EP_train:1:  27%|| 7382/27626 [17:23<47:50,  7.05it/s]

{'epoch': 1, 'iter': 7380, 'avg_loss': 8.601092629923018, 'avg_acc': 50.10499932258502, 'loss': 9.105194091796875}


EP_train:1:  27%|| 7392/27626 [17:24<47:40,  7.07it/s]

{'epoch': 1, 'iter': 7390, 'avg_loss': 8.601162528527185, 'avg_acc': 50.111199431741305, 'loss': 9.112369537353516}


EP_train:1:  27%|| 7402/27626 [17:25<47:29,  7.10it/s]

{'epoch': 1, 'iter': 7400, 'avg_loss': 8.601702196149306, 'avg_acc': 50.11273814349412, 'loss': 8.72093677520752}


EP_train:1:  27%|| 7412/27626 [17:27<47:42,  7.06it/s]

{'epoch': 1, 'iter': 7410, 'avg_loss': 8.601720901430578, 'avg_acc': 50.1117426798003, 'loss': 8.511043548583984}


EP_train:1:  27%|| 7422/27626 [17:28<47:56,  7.02it/s]

{'epoch': 1, 'iter': 7420, 'avg_loss': 8.601861278285654, 'avg_acc': 50.10485446705296, 'loss': 8.933795928955078}


EP_train:1:  27%|| 7432/27626 [17:30<47:52,  7.03it/s]

{'epoch': 1, 'iter': 7430, 'avg_loss': 8.601725387855584, 'avg_acc': 50.10345175615664, 'loss': 8.993900299072266}


EP_train:1:  27%|| 7442/27626 [17:31<47:29,  7.08it/s]

{'epoch': 1, 'iter': 7440, 'avg_loss': 8.60161521367177, 'avg_acc': 50.10373269721812, 'loss': 8.350260734558105}


EP_train:1:  27%|| 7452/27626 [17:32<47:36,  7.06it/s]

{'epoch': 1, 'iter': 7450, 'avg_loss': 8.60178415331484, 'avg_acc': 50.09814118910213, 'loss': 8.185330390930176}


EP_train:1:  27%|| 7462/27626 [17:34<47:41,  7.05it/s]

{'epoch': 1, 'iter': 7460, 'avg_loss': 8.60135397082151, 'avg_acc': 50.09465889290979, 'loss': 9.075421333312988}


EP_train:1:  27%|| 7472/27626 [17:35<47:44,  7.03it/s]

{'epoch': 1, 'iter': 7470, 'avg_loss': 8.601214085536666, 'avg_acc': 50.09536875920225, 'loss': 7.915896892547607}


EP_train:1:  27%|| 7482/27626 [17:37<47:23,  7.08it/s]

{'epoch': 1, 'iter': 7480, 'avg_loss': 8.601653762017321, 'avg_acc': 50.089810854163886, 'loss': 8.914092063903809}


EP_train:1:  27%|| 7492/27626 [17:38<47:30,  7.06it/s]

{'epoch': 1, 'iter': 7490, 'avg_loss': 8.602161872213474, 'avg_acc': 50.085936457081836, 'loss': 8.980220794677734}


EP_train:1:  27%|| 7502/27626 [17:40<47:29,  7.06it/s]

{'epoch': 1, 'iter': 7500, 'avg_loss': 8.602194953133242, 'avg_acc': 50.08207239034795, 'loss': 9.16107177734375}


EP_train:1:  27%|| 7512/27626 [17:41<47:44,  7.02it/s]

{'epoch': 1, 'iter': 7510, 'avg_loss': 8.602443404306252, 'avg_acc': 50.09111636266809, 'loss': 9.106322288513184}


EP_train:1:  27%|| 7522/27626 [17:42<47:24,  7.07it/s]

{'epoch': 1, 'iter': 7520, 'avg_loss': 8.602607326294542, 'avg_acc': 50.08767118734211, 'loss': 8.347858428955078}


EP_train:1:  27%|| 7532/27626 [17:44<47:12,  7.09it/s]

{'epoch': 1, 'iter': 7530, 'avg_loss': 8.602202308777873, 'avg_acc': 50.078840791395564, 'loss': 8.200008392333984}


EP_train:1:  27%|| 7542/27626 [17:45<47:00,  7.12it/s]

{'epoch': 1, 'iter': 7540, 'avg_loss': 8.602443148675324, 'avg_acc': 50.08329465588118, 'loss': 8.281112670898438}


EP_train:1:  27%|| 7552/27626 [17:47<47:51,  6.99it/s]

{'epoch': 1, 'iter': 7550, 'avg_loss': 8.602891455263732, 'avg_acc': 50.07945967421533, 'loss': 9.867310523986816}


EP_train:1:  27%|| 7562/27626 [17:48<47:18,  7.07it/s]

{'epoch': 1, 'iter': 7560, 'avg_loss': 8.602709769635048, 'avg_acc': 50.07728805713529, 'loss': 8.723753929138184}


EP_train:1:  27%|| 7572/27626 [17:49<47:20,  7.06it/s]

{'epoch': 1, 'iter': 7570, 'avg_loss': 8.602826749864366, 'avg_acc': 50.0804880464932, 'loss': 8.136286735534668}


EP_train:1:  27%|| 7582/27626 [17:51<47:22,  7.05it/s]

{'epoch': 1, 'iter': 7580, 'avg_loss': 8.603296647551758, 'avg_acc': 50.08326737897375, 'loss': 8.3759126663208}


EP_train:1:  27%|| 7592/27626 [17:52<47:42,  7.00it/s]

{'epoch': 1, 'iter': 7590, 'avg_loss': 8.603650988019858, 'avg_acc': 50.07904096956922, 'loss': 9.176467895507812}


EP_train:1:  28%|| 7602/27626 [17:54<47:47,  6.98it/s]

{'epoch': 1, 'iter': 7600, 'avg_loss': 8.603528233676062, 'avg_acc': 50.08181489277727, 'loss': 8.550943374633789}


EP_train:1:  28%|| 7612/27626 [17:55<47:04,  7.08it/s]

{'epoch': 1, 'iter': 7610, 'avg_loss': 8.60364230342269, 'avg_acc': 50.084992116673234, 'loss': 7.804734230041504}


EP_train:1:  28%|| 7622/27626 [17:56<47:15,  7.05it/s]

{'epoch': 1, 'iter': 7620, 'avg_loss': 8.603737577586768, 'avg_acc': 50.083240388400476, 'loss': 8.762686729431152}


EP_train:1:  28%|| 7632/27626 [17:58<47:15,  7.05it/s]

{'epoch': 1, 'iter': 7630, 'avg_loss': 8.603704380029901, 'avg_acc': 50.08927401389071, 'loss': 8.861204147338867}


EP_train:1:  28%|| 7642/27626 [17:59<47:33,  7.00it/s]

{'epoch': 1, 'iter': 7640, 'avg_loss': 8.60372467500317, 'avg_acc': 50.08302251014265, 'loss': 8.584918975830078}


EP_train:1:  28%|| 7652/27626 [18:01<47:23,  7.03it/s]

{'epoch': 1, 'iter': 7650, 'avg_loss': 8.603888344132125, 'avg_acc': 50.0820971114887, 'loss': 9.255345344543457}


EP_train:1:  28%|| 7662/27626 [18:02<47:03,  7.07it/s]

{'epoch': 1, 'iter': 7660, 'avg_loss': 8.604000330936067, 'avg_acc': 50.08402950006526, 'loss': 8.278302192687988}


EP_train:1:  28%|| 7672/27626 [18:04<47:11,  7.05it/s]

{'epoch': 1, 'iter': 7670, 'avg_loss': 8.603480218022794, 'avg_acc': 50.08269782296962, 'loss': 7.793188095092773}


EP_train:1:  28%|| 7682/27626 [18:05<47:12,  7.04it/s]

{'epoch': 1, 'iter': 7680, 'avg_loss': 8.602929968151663, 'avg_acc': 50.0764874365317, 'loss': 8.295580863952637}


EP_train:1:  28%|| 7692/27626 [18:06<47:14,  7.03it/s]

{'epoch': 1, 'iter': 7690, 'avg_loss': 8.602487498181583, 'avg_acc': 50.07598166688337, 'loss': 8.377087593078613}


EP_train:1:  28%|| 7702/27626 [18:08<47:00,  7.06it/s]

{'epoch': 1, 'iter': 7700, 'avg_loss': 8.602402003550434, 'avg_acc': 50.07710037657447, 'loss': 8.916415214538574}


EP_train:1:  28%|| 7712/27626 [18:09<47:05,  7.05it/s]

{'epoch': 1, 'iter': 7710, 'avg_loss': 8.602345395252067, 'avg_acc': 50.0737582674102, 'loss': 9.43188190460205}


EP_train:1:  28%|| 7722/27626 [18:11<47:21,  7.01it/s]

{'epoch': 1, 'iter': 7720, 'avg_loss': 8.602015218004755, 'avg_acc': 50.07204377671286, 'loss': 9.06350040435791}


EP_train:1:  28%|| 7732/27626 [18:12<46:43,  7.09it/s]

{'epoch': 1, 'iter': 7730, 'avg_loss': 8.60205815924261, 'avg_acc': 50.07235480532919, 'loss': 8.540605545043945}


EP_train:1:  28%|| 7742/27626 [18:13<47:00,  7.05it/s]

{'epoch': 1, 'iter': 7740, 'avg_loss': 8.601915052426506, 'avg_acc': 50.067013305774445, 'loss': 8.361141204833984}


EP_train:1:  28%|| 7752/27626 [18:15<46:51,  7.07it/s]

{'epoch': 1, 'iter': 7750, 'avg_loss': 8.602271385215017, 'avg_acc': 50.064104631660435, 'loss': 8.70925521850586}


EP_train:1:  28%|| 7762/27626 [18:16<47:08,  7.02it/s]

{'epoch': 1, 'iter': 7760, 'avg_loss': 8.601603407786689, 'avg_acc': 50.06522999613452, 'loss': 7.95350980758667}


EP_train:1:  28%|| 7772/27626 [18:18<47:06,  7.02it/s]

{'epoch': 1, 'iter': 7770, 'avg_loss': 8.601903632653295, 'avg_acc': 50.06675460043753, 'loss': 9.133676528930664}


EP_train:1:  28%|| 7782/27626 [18:19<46:58,  7.04it/s]

{'epoch': 1, 'iter': 7780, 'avg_loss': 8.601783329471342, 'avg_acc': 50.07429957588999, 'loss': 8.882172584533691}


EP_train:1:  28%|| 7792/27626 [18:20<47:09,  7.01it/s]

{'epoch': 1, 'iter': 7790, 'avg_loss': 8.601417259844991, 'avg_acc': 50.07901745603902, 'loss': 8.317540168762207}


EP_train:1:  28%|| 7802/27626 [18:22<46:46,  7.06it/s]

{'epoch': 1, 'iter': 7800, 'avg_loss': 8.601357859658089, 'avg_acc': 50.0785155749263, 'loss': 8.193408966064453}


EP_train:1:  28%|| 7812/27626 [18:23<46:43,  7.07it/s]

{'epoch': 1, 'iter': 7810, 'avg_loss': 8.60110418202248, 'avg_acc': 50.07321405709896, 'loss': 8.30935001373291}


EP_train:1:  28%|| 7822/27626 [18:25<46:59,  7.02it/s]

{'epoch': 1, 'iter': 7820, 'avg_loss': 8.601270483241576, 'avg_acc': 50.06752653113412, 'loss': 8.295527458190918}


EP_train:1:  28%|| 7832/27626 [18:26<47:29,  6.95it/s]

{'epoch': 1, 'iter': 7830, 'avg_loss': 8.600996475356123, 'avg_acc': 50.063050695951986, 'loss': 8.862780570983887}


EP_train:1:  28%|| 7842/27626 [18:28<46:43,  7.06it/s]

{'epoch': 1, 'iter': 7840, 'avg_loss': 8.601052924361616, 'avg_acc': 50.06815138375207, 'loss': 8.15141487121582}


EP_train:1:  28%|| 7852/27626 [18:29<46:49,  7.04it/s]

{'epoch': 1, 'iter': 7850, 'avg_loss': 8.600732528272061, 'avg_acc': 50.07085084702586, 'loss': 8.612602233886719}


EP_train:1:  28%|| 7862/27626 [18:30<46:31,  7.08it/s]

{'epoch': 1, 'iter': 7860, 'avg_loss': 8.600623296942212, 'avg_acc': 50.067580460501205, 'loss': 8.816534996032715}


EP_train:1:  28%|| 7872/27626 [18:32<46:57,  7.01it/s]

{'epoch': 1, 'iter': 7870, 'avg_loss': 8.60048547098225, 'avg_acc': 50.0670975733706, 'loss': 9.05544376373291}


EP_train:1:  29%|| 7882/27626 [18:33<46:56,  7.01it/s]

{'epoch': 1, 'iter': 7880, 'avg_loss': 8.600526183771763, 'avg_acc': 50.069788097957115, 'loss': 8.8303861618042}


EP_train:1:  29%|| 7892/27626 [18:35<46:19,  7.10it/s]

{'epoch': 1, 'iter': 7890, 'avg_loss': 8.600615368885258, 'avg_acc': 50.07484792801926, 'loss': 9.359495162963867}


EP_train:1:  29%|| 7902/27626 [18:36<46:19,  7.10it/s]

{'epoch': 1, 'iter': 7900, 'avg_loss': 8.60096212399638, 'avg_acc': 50.069611441589664, 'loss': 8.782136917114258}


EP_train:1:  29%|| 7912/27626 [18:37<46:32,  7.06it/s]

{'epoch': 1, 'iter': 7910, 'avg_loss': 8.601301806718356, 'avg_acc': 50.07070850714196, 'loss': 8.167644500732422}


EP_train:1:  29%|| 7922/27626 [18:39<46:28,  7.07it/s]

{'epoch': 1, 'iter': 7920, 'avg_loss': 8.601122811652994, 'avg_acc': 50.070619239994954, 'loss': 9.415135383605957}


EP_train:1:  29%|| 7932/27626 [18:40<46:18,  7.09it/s]

{'epoch': 1, 'iter': 7930, 'avg_loss': 8.600929133213569, 'avg_acc': 50.068560080696, 'loss': 9.4652738571167}


EP_train:1:  29%|| 7942/27626 [18:42<46:28,  7.06it/s]

{'epoch': 1, 'iter': 7940, 'avg_loss': 8.601007385467675, 'avg_acc': 50.063751416698146, 'loss': 8.38036823272705}


EP_train:1:  29%|| 7952/27626 [18:43<46:34,  7.04it/s]

{'epoch': 1, 'iter': 7950, 'avg_loss': 8.601011369801395, 'avg_acc': 50.059347880769714, 'loss': 8.67976188659668}


EP_train:1:  29%|| 7962/27626 [18:45<46:26,  7.06it/s]

{'epoch': 1, 'iter': 7960, 'avg_loss': 8.601176166187024, 'avg_acc': 50.06123602562492, 'loss': 8.886712074279785}


EP_train:1:  29%|| 7972/27626 [18:46<46:05,  7.11it/s]

{'epoch': 1, 'iter': 7970, 'avg_loss': 8.601071587142494, 'avg_acc': 50.061551248274995, 'loss': 8.595917701721191}


EP_train:1:  29%|| 7982/27626 [18:47<46:06,  7.10it/s]

{'epoch': 1, 'iter': 7980, 'avg_loss': 8.601041255626384, 'avg_acc': 50.05912479639143, 'loss': 7.852015972137451}


EP_train:1:  29%|| 7992/27626 [18:49<46:27,  7.04it/s]

{'epoch': 1, 'iter': 7990, 'avg_loss': 8.600969391432443, 'avg_acc': 50.05279376798899, 'loss': 8.607499122619629}


EP_train:1:  29%|| 8002/27626 [18:50<46:58,  6.96it/s]

{'epoch': 1, 'iter': 8000, 'avg_loss': 8.600850786124001, 'avg_acc': 50.051556055493066, 'loss': 8.161060333251953}


EP_train:1:  29%|| 8012/27626 [18:52<46:51,  6.98it/s]

{'epoch': 1, 'iter': 8010, 'avg_loss': 8.600816049731755, 'avg_acc': 50.044470103607544, 'loss': 8.933265686035156}


EP_train:1:  29%|| 8022/27626 [18:53<46:07,  7.08it/s]

{'epoch': 1, 'iter': 8020, 'avg_loss': 8.60116345625895, 'avg_acc': 50.042466650043636, 'loss': 8.833403587341309}


EP_train:1:  29%|| 8032/27626 [18:54<46:22,  7.04it/s]

{'epoch': 1, 'iter': 8030, 'avg_loss': 8.600855352988116, 'avg_acc': 50.04085730295107, 'loss': 8.185514450073242}


EP_train:1:  29%|| 8042/27626 [18:56<46:25,  7.03it/s]

{'epoch': 1, 'iter': 8040, 'avg_loss': 8.600862510642251, 'avg_acc': 50.0392519587116, 'loss': 8.675949096679688}


EP_train:1:  29%|| 8052/27626 [18:57<46:25,  7.03it/s]

{'epoch': 1, 'iter': 8050, 'avg_loss': 8.601328986784699, 'avg_acc': 50.04153210781269, 'loss': 8.763910293579102}


EP_train:1:  29%|| 8062/27626 [18:59<46:13,  7.05it/s]

{'epoch': 1, 'iter': 8060, 'avg_loss': 8.601015781146572, 'avg_acc': 50.042643592606375, 'loss': 8.286931037902832}


EP_train:1:  29%|| 8072/27626 [19:00<46:30,  7.01it/s]

{'epoch': 1, 'iter': 8070, 'avg_loss': 8.600810369111809, 'avg_acc': 50.04220356833107, 'loss': 8.889778137207031}


EP_train:1:  29%|| 8082/27626 [19:01<46:33,  7.00it/s]

{'epoch': 1, 'iter': 8080, 'avg_loss': 8.600688469526482, 'avg_acc': 50.03828424699913, 'loss': 8.97864055633545}


EP_train:1:  29%|| 8092/27626 [19:03<46:22,  7.02it/s]

{'epoch': 1, 'iter': 8090, 'avg_loss': 8.600904062259566, 'avg_acc': 50.03282968730688, 'loss': 8.828359603881836}


EP_train:1:  29%|| 8102/27626 [19:04<46:07,  7.05it/s]

{'epoch': 1, 'iter': 8100, 'avg_loss': 8.600827995192459, 'avg_acc': 50.03124614245154, 'loss': 8.158693313598633}


EP_train:1:  29%|| 8112/27626 [19:06<45:57,  7.08it/s]

{'epoch': 1, 'iter': 8110, 'avg_loss': 8.600204121212588, 'avg_acc': 50.038527925040064, 'loss': 8.58061408996582}


EP_train:1:  29%|| 8122/27626 [19:07<46:07,  7.05it/s]

{'epoch': 1, 'iter': 8120, 'avg_loss': 8.600234584023896, 'avg_acc': 50.035786848910234, 'loss': 9.168888092041016}


EP_train:1:  29%|| 8132/27626 [19:09<45:47,  7.10it/s]

{'epoch': 1, 'iter': 8130, 'avg_loss': 8.600572772431294, 'avg_acc': 50.0307465256426, 'loss': 7.941527366638184}


EP_train:1:  29%|| 8142/27626 [19:10<46:05,  7.05it/s]

{'epoch': 1, 'iter': 8140, 'avg_loss': 8.600638464091379, 'avg_acc': 50.028021741800764, 'loss': 9.005762100219727}


EP_train:1:  30%|| 8152/27626 [19:11<46:14,  7.02it/s]

{'epoch': 1, 'iter': 8150, 'avg_loss': 8.600348958850946, 'avg_acc': 50.03143786038523, 'loss': 7.661004066467285}


EP_train:1:  30%|| 8162/27626 [19:13<45:43,  7.10it/s]

{'epoch': 1, 'iter': 8160, 'avg_loss': 8.600730687054828, 'avg_acc': 50.037526038475676, 'loss': 9.201088905334473}


EP_train:1:  30%|| 8172/27626 [19:14<45:56,  7.06it/s]

{'epoch': 1, 'iter': 8170, 'avg_loss': 8.600739833410215, 'avg_acc': 50.04092216374985, 'loss': 8.351415634155273}


EP_train:1:  30%|| 8182/27626 [19:16<45:59,  7.05it/s]

{'epoch': 1, 'iter': 8180, 'avg_loss': 8.60063614546269, 'avg_acc': 50.04278205598338, 'loss': 8.658336639404297}


EP_train:1:  30%|| 8192/27626 [19:17<45:49,  7.07it/s]

{'epoch': 1, 'iter': 8190, 'avg_loss': 8.600054952050723, 'avg_acc': 50.03967769503114, 'loss': 8.000903129577637}


EP_train:1:  30%|| 8202/27626 [19:18<46:00,  7.04it/s]

{'epoch': 1, 'iter': 8200, 'avg_loss': 8.599426540074269, 'avg_acc': 50.04763138641629, 'loss': 8.406745910644531}


EP_train:1:  30%|| 8212/27626 [19:20<45:35,  7.10it/s]

{'epoch': 1, 'iter': 8210, 'avg_loss': 8.599831908473055, 'avg_acc': 50.044148094020215, 'loss': 9.29480266571045}


EP_train:1:  30%|| 8222/27626 [19:21<45:30,  7.11it/s]

{'epoch': 1, 'iter': 8220, 'avg_loss': 8.600076355300178, 'avg_acc': 50.04333414426468, 'loss': 8.864734649658203}


EP_train:1:  30%|| 8232/27626 [19:23<45:46,  7.06it/s]

{'epoch': 1, 'iter': 8230, 'avg_loss': 8.59997683993059, 'avg_acc': 50.041383185518164, 'loss': 8.151226997375488}


EP_train:1:  30%|| 8242/27626 [19:24<45:21,  7.12it/s]

{'epoch': 1, 'iter': 8240, 'avg_loss': 8.60013937976172, 'avg_acc': 50.0356449460017, 'loss': 8.394830703735352}


EP_train:1:  30%|| 8252/27626 [19:25<45:37,  7.08it/s]

{'epoch': 1, 'iter': 8250, 'avg_loss': 8.599842499016473, 'avg_acc': 50.03446551933099, 'loss': 9.425472259521484}


EP_train:1:  30%|| 8262/27626 [19:27<46:03,  7.01it/s]

{'epoch': 1, 'iter': 8260, 'avg_loss': 8.59959229250073, 'avg_acc': 50.02723641205665, 'loss': 8.100462913513184}


EP_train:1:  30%|| 8272/27626 [19:28<45:56,  7.02it/s]

{'epoch': 1, 'iter': 8270, 'avg_loss': 8.598476187203616, 'avg_acc': 50.029848265022366, 'loss': 7.904069423675537}


EP_train:1:  30%|| 8282/27626 [19:30<46:04,  7.00it/s]

{'epoch': 1, 'iter': 8280, 'avg_loss': 8.598208495260883, 'avg_acc': 50.02792537133196, 'loss': 8.348710060119629}


EP_train:1:  30%|| 8292/27626 [19:31<45:58,  7.01it/s]

{'epoch': 1, 'iter': 8290, 'avg_loss': 8.598459700884742, 'avg_acc': 50.03316849595948, 'loss': 8.264327049255371}


EP_train:1:  30%|| 8302/27626 [19:33<45:35,  7.06it/s]

{'epoch': 1, 'iter': 8300, 'avg_loss': 8.598719819111071, 'avg_acc': 50.028611010721605, 'loss': 9.638662338256836}


EP_train:1:  30%|| 8312/27626 [19:34<45:39,  7.05it/s]

{'epoch': 1, 'iter': 8310, 'avg_loss': 8.599072651776464, 'avg_acc': 50.02970460835038, 'loss': 7.751584529876709}


EP_train:1:  30%|| 8322/27626 [19:35<45:46,  7.03it/s]

{'epoch': 1, 'iter': 8320, 'avg_loss': 8.598739908314887, 'avg_acc': 50.02779113087369, 'loss': 8.111786842346191}


EP_train:1:  30%|| 8332/27626 [19:37<45:46,  7.03it/s]

{'epoch': 1, 'iter': 8330, 'avg_loss': 8.59848981246472, 'avg_acc': 50.0270075621174, 'loss': 8.180356979370117}


EP_train:1:  30%|| 8342/27626 [19:38<45:24,  7.08it/s]

{'epoch': 1, 'iter': 8340, 'avg_loss': 8.598668045683768, 'avg_acc': 50.031845701954204, 'loss': 9.002191543579102}


EP_train:1:  30%|| 8352/27626 [19:40<45:24,  7.07it/s]

{'epoch': 1, 'iter': 8350, 'avg_loss': 8.598380123522592, 'avg_acc': 50.029936534546756, 'loss': 8.307536125183105}


EP_train:1:  30%|| 8362/27626 [19:41<45:32,  7.05it/s]

{'epoch': 1, 'iter': 8360, 'avg_loss': 8.597610081772016, 'avg_acc': 50.0332645616553, 'loss': 8.571239471435547}


EP_train:1:  30%|| 8372/27626 [19:42<45:34,  7.04it/s]

{'epoch': 1, 'iter': 8370, 'avg_loss': 8.597854767317356, 'avg_acc': 50.03471807430414, 'loss': 8.580488204956055}


EP_train:1:  30%|| 8382/27626 [19:44<45:31,  7.04it/s]

{'epoch': 1, 'iter': 8380, 'avg_loss': 8.59757291779941, 'avg_acc': 50.03728671996181, 'loss': 8.099875450134277}


EP_train:1:  30%|| 8392/27626 [19:45<45:17,  7.08it/s]

{'epoch': 1, 'iter': 8390, 'avg_loss': 8.5974274743173, 'avg_acc': 50.03761470623287, 'loss': 8.953177452087402}


EP_train:1:  30%|| 8402/27626 [19:47<45:23,  7.06it/s]

{'epoch': 1, 'iter': 8400, 'avg_loss': 8.597215102011498, 'avg_acc': 50.03533805499345, 'loss': 8.670552253723145}


EP_train:1:  30%|| 8412/27626 [19:48<45:24,  7.05it/s]

{'epoch': 1, 'iter': 8410, 'avg_loss': 8.59760459216876, 'avg_acc': 50.030837593627396, 'loss': 9.09499740600586}


EP_train:1:  30%|| 8422/27626 [19:50<45:31,  7.03it/s]

{'epoch': 1, 'iter': 8420, 'avg_loss': 8.597783555725156, 'avg_acc': 50.02931658947869, 'loss': 8.434075355529785}


EP_train:1:  31%|| 8432/27626 [19:51<45:11,  7.08it/s]

{'epoch': 1, 'iter': 8430, 'avg_loss': 8.597763339899963, 'avg_acc': 50.028540505278144, 'loss': 9.043105125427246}


EP_train:1:  31%|| 8442/27626 [19:52<45:14,  7.07it/s]

{'epoch': 1, 'iter': 8440, 'avg_loss': 8.597850600713633, 'avg_acc': 50.0296173439166, 'loss': 8.512511253356934}


EP_train:1:  31%|| 8452/27626 [19:54<45:07,  7.08it/s]

{'epoch': 1, 'iter': 8450, 'avg_loss': 8.597592912698099, 'avg_acc': 50.027363625606434, 'loss': 8.625967979431152}


EP_train:1:  31%|| 8462/27626 [19:55<45:25,  7.03it/s]

{'epoch': 1, 'iter': 8460, 'avg_loss': 8.597738542535348, 'avg_acc': 50.02548457629123, 'loss': 9.025887489318848}


EP_train:1:  31%|| 8472/27626 [19:57<45:34,  7.01it/s]

{'epoch': 1, 'iter': 8470, 'avg_loss': 8.598113605509175, 'avg_acc': 50.023978869082754, 'loss': 8.782127380371094}


EP_train:1:  31%|| 8482/27626 [19:58<45:29,  7.01it/s]

{'epoch': 1, 'iter': 8480, 'avg_loss': 8.598265086939671, 'avg_acc': 50.021371300554186, 'loss': 8.881421089172363}


EP_train:1:  31%|| 8492/27626 [19:59<44:47,  7.12it/s]

{'epoch': 1, 'iter': 8490, 'avg_loss': 8.597858087346264, 'avg_acc': 50.02318631492169, 'loss': 9.057011604309082}


EP_train:1:  31%|| 8502/27626 [20:01<45:01,  7.08it/s]

{'epoch': 1, 'iter': 8500, 'avg_loss': 8.597413843119233, 'avg_acc': 50.02573226679215, 'loss': 8.856311798095703}


EP_train:1:  31%|| 8512/27626 [20:02<45:00,  7.08it/s]

{'epoch': 1, 'iter': 8510, 'avg_loss': 8.5975034431038, 'avg_acc': 50.024233345082834, 'loss': 8.224669456481934}


EP_train:1:  31%|| 8522/27626 [20:04<44:56,  7.08it/s]

{'epoch': 1, 'iter': 8520, 'avg_loss': 8.597643811326956, 'avg_acc': 50.02897253843446, 'loss': 9.208548545837402}


EP_train:1:  31%|| 8532/27626 [20:05<45:01,  7.07it/s]

{'epoch': 1, 'iter': 8530, 'avg_loss': 8.597774294389398, 'avg_acc': 50.02747333255188, 'loss': 8.77959156036377}


EP_train:1:  31%|| 8542/27626 [20:06<45:15,  7.03it/s]

{'epoch': 1, 'iter': 8540, 'avg_loss': 8.59794554790224, 'avg_acc': 50.025977637279006, 'loss': 8.07655143737793}


EP_train:1:  31%|| 8552/27626 [20:08<45:48,  6.94it/s]

{'epoch': 1, 'iter': 8550, 'avg_loss': 8.597773837173296, 'avg_acc': 50.02923634662613, 'loss': 9.043380737304688}


EP_train:1:  31%|| 8562/27626 [20:09<44:56,  7.07it/s]

{'epoch': 1, 'iter': 8560, 'avg_loss': 8.598131737632738, 'avg_acc': 50.02920219600514, 'loss': 9.342547416687012}


EP_train:1:  31%|| 8572/27626 [20:11<44:43,  7.10it/s]

{'epoch': 1, 'iter': 8570, 'avg_loss': 8.59786140808855, 'avg_acc': 50.02442830474857, 'loss': 8.817580223083496}


EP_train:1:  31%|| 8582/27626 [20:12<44:46,  7.09it/s]

{'epoch': 1, 'iter': 8580, 'avg_loss': 8.597828044798002, 'avg_acc': 50.02148642349377, 'loss': 9.053521156311035}


EP_train:1:  31%|| 8592/27626 [20:13<44:43,  7.09it/s]

{'epoch': 1, 'iter': 8590, 'avg_loss': 8.597401585099917, 'avg_acc': 50.03019147945524, 'loss': 7.8679609298706055}


EP_train:1:  31%|| 8602/27626 [20:15<44:54,  7.06it/s]

{'epoch': 1, 'iter': 8600, 'avg_loss': 8.597496031162532, 'avg_acc': 50.03269968608301, 'loss': 8.433812141418457}


EP_train:1:  31%|| 8612/27626 [20:16<44:41,  7.09it/s]

{'epoch': 1, 'iter': 8610, 'avg_loss': 8.597612127694584, 'avg_acc': 50.037016606665894, 'loss': 8.989538192749023}


EP_train:1:  31%|| 8622/27626 [20:18<44:37,  7.10it/s]

{'epoch': 1, 'iter': 8620, 'avg_loss': 8.597510251654866, 'avg_acc': 50.03661118199745, 'loss': 8.853503227233887}


EP_train:1:  31%|| 8632/27626 [20:19<44:52,  7.05it/s]

{'epoch': 1, 'iter': 8630, 'avg_loss': 8.59760898511195, 'avg_acc': 50.03367222801529, 'loss': 8.288530349731445}


EP_train:1:  31%|| 8642/27626 [20:21<45:05,  7.02it/s]

{'epoch': 1, 'iter': 8640, 'avg_loss': 8.598034467676174, 'avg_acc': 50.03290996412453, 'loss': 9.001669883728027}


EP_train:1:  31%|| 8652/27626 [20:22<44:36,  7.09it/s]

{'epoch': 1, 'iter': 8650, 'avg_loss': 8.598159088259997, 'avg_acc': 50.03214946248988, 'loss': 8.258268356323242}


EP_train:1:  31%|| 8662/27626 [20:23<44:41,  7.07it/s]

{'epoch': 1, 'iter': 8660, 'avg_loss': 8.598291951505324, 'avg_acc': 50.03499884539892, 'loss': 8.878564834594727}


EP_train:1:  31%|| 8672/27626 [20:25<44:41,  7.07it/s]

{'epoch': 1, 'iter': 8670, 'avg_loss': 8.598089784944007, 'avg_acc': 50.03928324299389, 'loss': 8.770757675170898}


EP_train:1:  31%|| 8682/27626 [20:26<44:37,  7.08it/s]

{'epoch': 1, 'iter': 8680, 'avg_loss': 8.598082626295755, 'avg_acc': 50.035638175325424, 'loss': 8.335142135620117}


EP_train:1:  31%|| 8692/27626 [20:28<44:41,  7.06it/s]

{'epoch': 1, 'iter': 8690, 'avg_loss': 8.598231421174258, 'avg_acc': 50.03739500632839, 'loss': 8.671698570251465}


EP_train:1:  31%|| 8702/27626 [20:29<44:52,  7.03it/s]

{'epoch': 1, 'iter': 8700, 'avg_loss': 8.598269360981485, 'avg_acc': 50.034119641420524, 'loss': 8.232320785522461}


EP_train:1:  32%|| 8712/27626 [20:30<45:14,  6.97it/s]

{'epoch': 1, 'iter': 8710, 'avg_loss': 8.59803817280671, 'avg_acc': 50.03228676386179, 'loss': 8.341769218444824}


EP_train:1:  32%|| 8722/27626 [20:32<45:02,  6.99it/s]

{'epoch': 1, 'iter': 8720, 'avg_loss': 8.597907659206733, 'avg_acc': 50.03619137713565, 'loss': 8.206981658935547}


EP_train:1:  32%|| 8732/27626 [20:33<45:01,  6.99it/s]

{'epoch': 1, 'iter': 8730, 'avg_loss': 8.59802369308996, 'avg_acc': 50.03078112472799, 'loss': 8.981169700622559}


EP_train:1:  32%|| 8742/27626 [20:35<44:30,  7.07it/s]

{'epoch': 1, 'iter': 8740, 'avg_loss': 8.597915710348406, 'avg_acc': 50.02967337833199, 'loss': 8.734956741333008}


EP_train:1:  32%|| 8752/27626 [20:36<44:55,  7.00it/s]

{'epoch': 1, 'iter': 8750, 'avg_loss': 8.597618576649625, 'avg_acc': 50.02356873500171, 'loss': 7.807860851287842}


EP_train:1:  32%|| 8762/27626 [20:38<44:51,  7.01it/s]

{'epoch': 1, 'iter': 8760, 'avg_loss': 8.597516865507679, 'avg_acc': 50.02924894418446, 'loss': 8.487364768981934}


EP_train:1:  32%|| 8772/27626 [20:39<44:14,  7.10it/s]

{'epoch': 1, 'iter': 8770, 'avg_loss': 8.597432530465875, 'avg_acc': 50.02565271918823, 'loss': 8.992390632629395}


EP_train:1:  32%|| 8782/27626 [20:40<44:11,  7.11it/s]

{'epoch': 1, 'iter': 8780, 'avg_loss': 8.597249513823112, 'avg_acc': 50.02953820749345, 'loss': 7.866947174072266}


EP_train:1:  32%|| 8792/27626 [20:42<44:38,  7.03it/s]

{'epoch': 1, 'iter': 8790, 'avg_loss': 8.596023622815919, 'avg_acc': 50.02737174382892, 'loss': 7.345650672912598}


EP_train:1:  32%|| 8802/27626 [20:43<44:19,  7.08it/s]

{'epoch': 1, 'iter': 8800, 'avg_loss': 8.595150835310081, 'avg_acc': 50.02378991023747, 'loss': 8.161526679992676}


EP_train:1:  32%|| 8812/27626 [20:45<44:49,  7.00it/s]

{'epoch': 1, 'iter': 8810, 'avg_loss': 8.59537788590702, 'avg_acc': 50.022344228804904, 'loss': 8.948227882385254}


EP_train:1:  32%|| 8822/27626 [20:46<44:36,  7.03it/s]

{'epoch': 1, 'iter': 8820, 'avg_loss': 8.595529050300511, 'avg_acc': 50.022673166307676, 'loss': 8.552331924438477}


EP_train:1:  32%|| 8832/27626 [20:47<44:35,  7.03it/s]

{'epoch': 1, 'iter': 8830, 'avg_loss': 8.595436607099932, 'avg_acc': 50.026186162382515, 'loss': 9.762832641601562}


EP_train:1:  32%|| 8842/27626 [20:49<44:11,  7.08it/s]

{'epoch': 1, 'iter': 8840, 'avg_loss': 8.595762941554423, 'avg_acc': 50.02438920936546, 'loss': 9.090514183044434}


EP_train:1:  32%|| 8852/27626 [20:50<44:15,  7.07it/s]

{'epoch': 1, 'iter': 8850, 'avg_loss': 8.595936756782674, 'avg_acc': 50.01977177720032, 'loss': 8.438072204589844}


EP_train:1:  32%|| 8862/27626 [20:52<44:12,  7.07it/s]

{'epoch': 1, 'iter': 8860, 'avg_loss': 8.595853110708296, 'avg_acc': 50.019396794944136, 'loss': 8.687098503112793}


EP_train:1:  32%|| 8872/27626 [20:53<44:48,  6.98it/s]

{'epoch': 1, 'iter': 8870, 'avg_loss': 8.595763167511475, 'avg_acc': 50.021840829669706, 'loss': 8.65109920501709}


EP_train:1:  32%|| 8882/27626 [20:54<44:04,  7.09it/s]

{'epoch': 1, 'iter': 8880, 'avg_loss': 8.595772292804321, 'avg_acc': 50.020408737754764, 'loss': 8.787562370300293}


EP_train:1:  32%|| 8892/27626 [20:56<44:16,  7.05it/s]

{'epoch': 1, 'iter': 8890, 'avg_loss': 8.595689089812305, 'avg_acc': 50.02214317849511, 'loss': 8.277470588684082}


EP_train:1:  32%|| 8902/27626 [20:57<44:18,  7.04it/s]

{'epoch': 1, 'iter': 8900, 'avg_loss': 8.59554307957122, 'avg_acc': 50.019660712279524, 'loss': 8.440953254699707}


EP_train:1:  32%|| 8912/27626 [20:59<44:40,  6.98it/s]

{'epoch': 1, 'iter': 8910, 'avg_loss': 8.595349908637594, 'avg_acc': 50.02139209965212, 'loss': 8.069561958312988}


EP_train:1:  32%|| 8922/27626 [21:00<44:05,  7.07it/s]

{'epoch': 1, 'iter': 8920, 'avg_loss': 8.595309976050304, 'avg_acc': 50.02206871426969, 'loss': 8.388996124267578}


EP_train:1:  32%|| 8932/27626 [21:02<44:02,  7.08it/s]

{'epoch': 1, 'iter': 8930, 'avg_loss': 8.5950673280805, 'avg_acc': 50.021344194379125, 'loss': 9.09057903289795}


EP_train:1:  32%|| 8942/27626 [21:03<44:13,  7.04it/s]

{'epoch': 1, 'iter': 8940, 'avg_loss': 8.59512614489215, 'avg_acc': 50.01922324124818, 'loss': 8.95879077911377}


EP_train:1:  32%|| 8952/27626 [21:04<44:17,  7.03it/s]

{'epoch': 1, 'iter': 8950, 'avg_loss': 8.595395029703209, 'avg_acc': 50.021645626187016, 'loss': 8.265687942504883}


EP_train:1:  32%|| 8962/27626 [21:06<44:00,  7.07it/s]

{'epoch': 1, 'iter': 8960, 'avg_loss': 8.595643564681232, 'avg_acc': 50.02022653721683, 'loss': 7.8932085037231445}


EP_train:1:  32%|| 8972/27626 [21:07<43:50,  7.09it/s]

{'epoch': 1, 'iter': 8970, 'avg_loss': 8.596026434519395, 'avg_acc': 50.02682253929328, 'loss': 8.727890014648438}


EP_train:1:  33%|| 8982/27626 [21:09<44:12,  7.03it/s]

{'epoch': 1, 'iter': 8980, 'avg_loss': 8.59576745724017, 'avg_acc': 50.02714063021935, 'loss': 9.084400177001953}


EP_train:1:  33%|| 8992/27626 [21:10<44:23,  7.00it/s]

{'epoch': 1, 'iter': 8990, 'avg_loss': 8.595831251637634, 'avg_acc': 50.03267156044934, 'loss': 8.725027084350586}


EP_train:1:  33%|| 9002/27626 [21:11<44:16,  7.01it/s]

{'epoch': 1, 'iter': 9000, 'avg_loss': 8.595882979913124, 'avg_acc': 50.03124652816354, 'loss': 8.823025703430176}


EP_train:1:  33%|| 9012/27626 [21:13<43:50,  7.08it/s]

{'epoch': 1, 'iter': 9010, 'avg_loss': 8.596380227785001, 'avg_acc': 50.030171457107976, 'loss': 9.4649019241333}


EP_train:1:  33%|| 9022/27626 [21:14<43:47,  7.08it/s]

{'epoch': 1, 'iter': 9020, 'avg_loss': 8.596672977686008, 'avg_acc': 50.026673872076266, 'loss': 8.051451683044434}


EP_train:1:  33%|| 9032/27626 [21:16<44:14,  7.00it/s]

{'epoch': 1, 'iter': 9030, 'avg_loss': 8.596967493916202, 'avg_acc': 50.0301046395748, 'loss': 8.8331298828125}


EP_train:1:  33%|| 9042/27626 [21:17<43:36,  7.10it/s]

{'epoch': 1, 'iter': 9040, 'avg_loss': 8.59704096630313, 'avg_acc': 50.03214522729786, 'loss': 9.200831413269043}


EP_train:1:  33%|| 9052/27626 [21:19<44:15,  7.00it/s]

{'epoch': 1, 'iter': 9050, 'avg_loss': 8.59677267011365, 'avg_acc': 50.034181305933046, 'loss': 8.362104415893555}


EP_train:1:  33%|| 9062/27626 [21:20<43:59,  7.03it/s]

{'epoch': 1, 'iter': 9060, 'avg_loss': 8.596665451219048, 'avg_acc': 50.03276404370378, 'loss': 7.987916469573975}


EP_train:1:  33%|| 9072/27626 [21:21<43:47,  7.06it/s]

{'epoch': 1, 'iter': 9070, 'avg_loss': 8.59647195424099, 'avg_acc': 50.03617296880167, 'loss': 8.134739875793457}


EP_train:1:  33%|| 9082/27626 [21:23<43:58,  7.03it/s]

{'epoch': 1, 'iter': 9080, 'avg_loss': 8.596872839629945, 'avg_acc': 50.0412950115626, 'loss': 9.407781600952148}


EP_train:1:  33%|| 9092/27626 [21:24<43:59,  7.02it/s]

{'epoch': 1, 'iter': 9090, 'avg_loss': 8.596951248962853, 'avg_acc': 50.04159333406666, 'loss': 9.295262336730957}


EP_train:1:  33%|| 9102/27626 [21:26<43:44,  7.06it/s]

{'epoch': 1, 'iter': 9100, 'avg_loss': 8.597130082699115, 'avg_acc': 50.039487418964946, 'loss': 8.558964729309082}


EP_train:1:  33%|| 9112/27626 [21:27<44:07,  6.99it/s]

{'epoch': 1, 'iter': 9110, 'avg_loss': 8.597141955977227, 'avg_acc': 50.04047305454945, 'loss': 8.50656795501709}


EP_train:1:  33%|| 9122/27626 [21:28<43:29,  7.09it/s]

{'epoch': 1, 'iter': 9120, 'avg_loss': 8.596726690281276, 'avg_acc': 50.04282699265431, 'loss': 8.670266151428223}


EP_train:1:  33%|| 9132/27626 [21:30<43:35,  7.07it/s]

{'epoch': 1, 'iter': 9130, 'avg_loss': 8.59662207986867, 'avg_acc': 50.04278008980396, 'loss': 8.996529579162598}


EP_train:1:  33%|| 9142/27626 [21:31<43:37,  7.06it/s]

{'epoch': 1, 'iter': 9140, 'avg_loss': 8.596531150195233, 'avg_acc': 50.04854501695657, 'loss': 9.079992294311523}


EP_train:1:  33%|| 9152/27626 [21:33<43:44,  7.04it/s]

{'epoch': 1, 'iter': 9150, 'avg_loss': 8.596553311479793, 'avg_acc': 50.045077040760575, 'loss': 8.845712661743164}


EP_train:1:  33%|| 9162/27626 [21:34<43:24,  7.09it/s]

{'epoch': 1, 'iter': 9160, 'avg_loss': 8.596894298887008, 'avg_acc': 50.04536895535422, 'loss': 8.310043334960938}


EP_train:1:  33%|| 9172/27626 [21:35<43:23,  7.09it/s]

{'epoch': 1, 'iter': 9170, 'avg_loss': 8.59712356731268, 'avg_acc': 50.05213444553483, 'loss': 8.893198013305664}


EP_train:1:  33%|| 9182/27626 [21:37<43:57,  6.99it/s]

{'epoch': 1, 'iter': 9180, 'avg_loss': 8.597013801131785, 'avg_acc': 50.052758414116106, 'loss': 8.11226749420166}


EP_train:1:  33%|| 9192/27626 [21:38<43:35,  7.05it/s]

{'epoch': 1, 'iter': 9190, 'avg_loss': 8.596884603015798, 'avg_acc': 50.05780110978131, 'loss': 8.786593437194824}


EP_train:1:  33%|| 9202/27626 [21:40<43:27,  7.06it/s]

{'epoch': 1, 'iter': 9200, 'avg_loss': 8.596898049119373, 'avg_acc': 50.06249320726008, 'loss': 8.991795539855957}


EP_train:1:  33%|| 9212/27626 [21:41<43:13,  7.10it/s]

{'epoch': 1, 'iter': 9210, 'avg_loss': 8.597173490038706, 'avg_acc': 50.0580148735208, 'loss': 9.402897834777832}


EP_train:1:  33%|| 9222/27626 [21:43<43:25,  7.06it/s]

{'epoch': 1, 'iter': 9220, 'avg_loss': 8.597245016657547, 'avg_acc': 50.05083505042837, 'loss': 8.312186241149902}


EP_train:1:  33%|| 9232/27626 [21:44<43:10,  7.10it/s]

{'epoch': 1, 'iter': 9230, 'avg_loss': 8.5973037384582, 'avg_acc': 50.04976438089048, 'loss': 8.711633682250977}


EP_train:1:  33%|| 9242/27626 [21:45<43:51,  6.99it/s]

{'epoch': 1, 'iter': 9240, 'avg_loss': 8.597137219074316, 'avg_acc': 50.04734336110811, 'loss': 8.089034080505371}


EP_train:1:  33%|| 9252/27626 [21:47<43:42,  7.01it/s]

{'epoch': 1, 'iter': 9250, 'avg_loss': 8.59705074309478, 'avg_acc': 50.05067019781645, 'loss': 8.998592376708984}


EP_train:1:  34%|| 9262/27626 [21:48<43:39,  7.01it/s]

{'epoch': 1, 'iter': 9260, 'avg_loss': 8.597386317638763, 'avg_acc': 50.05230266709858, 'loss': 8.464447975158691}


EP_train:1:  34%|| 9272/27626 [21:50<43:27,  7.04it/s]

{'epoch': 1, 'iter': 9270, 'avg_loss': 8.597429478416652, 'avg_acc': 50.05190917916082, 'loss': 8.587830543518066}


EP_train:1:  34%|| 9282/27626 [21:51<42:56,  7.12it/s]

{'epoch': 1, 'iter': 9280, 'avg_loss': 8.597556834667358, 'avg_acc': 50.05185324857236, 'loss': 8.831395149230957}


EP_train:1:  34%|| 9292/27626 [21:52<43:16,  7.06it/s]

{'epoch': 1, 'iter': 9290, 'avg_loss': 8.597319080657433, 'avg_acc': 50.050115703368846, 'loss': 8.430648803710938}


EP_train:1:  34%|| 9302/27626 [21:54<42:57,  7.11it/s]

{'epoch': 1, 'iter': 9300, 'avg_loss': 8.59734936296754, 'avg_acc': 50.046701967530375, 'loss': 8.732865333557129}


EP_train:1:  34%|| 9312/27626 [21:55<43:02,  7.09it/s]

{'epoch': 1, 'iter': 9310, 'avg_loss': 8.597715719598101, 'avg_acc': 50.04564493609709, 'loss': 10.335208892822266}


EP_train:1:  34%|| 9322/27626 [21:57<43:06,  7.08it/s]

{'epoch': 1, 'iter': 9320, 'avg_loss': 8.598012350107158, 'avg_acc': 50.04391964381504, 'loss': 9.100918769836426}


EP_train:1:  34%|| 9332/27626 [21:58<43:15,  7.05it/s]

{'epoch': 1, 'iter': 9330, 'avg_loss': 8.598168672210674, 'avg_acc': 50.03851409280892, 'loss': 8.541720390319824}


EP_train:1:  34%|| 9342/27626 [21:59<43:12,  7.05it/s]

{'epoch': 1, 'iter': 9340, 'avg_loss': 8.598224007870218, 'avg_acc': 50.03312011561931, 'loss': 8.624628067016602}


EP_train:1:  34%|| 9352/27626 [22:01<43:03,  7.07it/s]

{'epoch': 1, 'iter': 9350, 'avg_loss': 8.598350539180432, 'avg_acc': 50.034421452251095, 'loss': 8.82345199584961}


EP_train:1:  34%|| 9362/27626 [22:02<43:25,  7.01it/s]

{'epoch': 1, 'iter': 9360, 'avg_loss': 8.598407312229574, 'avg_acc': 50.036053840401664, 'loss': 8.018060684204102}


EP_train:1:  34%|| 9372/27626 [22:04<43:08,  7.05it/s]

{'epoch': 1, 'iter': 9370, 'avg_loss': 8.598415879681173, 'avg_acc': 50.0386831714865, 'loss': 8.595959663391113}


EP_train:1:  34%|| 9382/27626 [22:05<43:06,  7.05it/s]

{'epoch': 1, 'iter': 9380, 'avg_loss': 8.5982623526237, 'avg_acc': 50.03664321500906, 'loss': 8.400460243225098}


EP_train:1:  34%|| 9392/27626 [22:06<42:53,  7.09it/s]

{'epoch': 1, 'iter': 9390, 'avg_loss': 8.598279288294673, 'avg_acc': 50.03627143009264, 'loss': 8.59234619140625}


EP_train:1:  34%|| 9402/27626 [22:08<43:04,  7.05it/s]

{'epoch': 1, 'iter': 9400, 'avg_loss': 8.598610896098462, 'avg_acc': 50.036565259015, 'loss': 9.13001823425293}


EP_train:1:  34%|| 9412/27626 [22:09<43:08,  7.04it/s]

{'epoch': 1, 'iter': 9410, 'avg_loss': 8.5986462196753, 'avg_acc': 50.03586228881096, 'loss': 8.867369651794434}


EP_train:1:  34%|| 9422/27626 [22:11<43:07,  7.03it/s]

{'epoch': 1, 'iter': 9420, 'avg_loss': 8.59879267305569, 'avg_acc': 50.03350228213566, 'loss': 8.661149024963379}


EP_train:1:  34%|| 9432/27626 [22:12<43:02,  7.05it/s]

{'epoch': 1, 'iter': 9430, 'avg_loss': 8.59903554650433, 'avg_acc': 50.03678029901389, 'loss': 8.867816925048828}


EP_train:1:  34%|| 9442/27626 [22:14<43:11,  7.02it/s]

{'epoch': 1, 'iter': 9440, 'avg_loss': 8.598633608087217, 'avg_acc': 50.039720368605025, 'loss': 8.982574462890625}


EP_train:1:  34%|| 9452/27626 [22:15<42:52,  7.06it/s]

{'epoch': 1, 'iter': 9450, 'avg_loss': 8.598459250081117, 'avg_acc': 50.038686382393394, 'loss': 8.215524673461914}


EP_train:1:  34%|| 9462/27626 [22:16<42:42,  7.09it/s]

{'epoch': 1, 'iter': 9460, 'avg_loss': 8.59848121084228, 'avg_acc': 50.03402124511152, 'loss': 8.347864151000977}


EP_train:1:  34%|| 9472/27626 [22:18<42:55,  7.05it/s]

{'epoch': 1, 'iter': 9470, 'avg_loss': 8.598523508896987, 'avg_acc': 50.033655369021226, 'loss': 9.176273345947266}


EP_train:1:  34%|| 9482/27626 [22:19<42:37,  7.09it/s]

{'epoch': 1, 'iter': 9480, 'avg_loss': 8.59835389122884, 'avg_acc': 50.034938297647926, 'loss': 8.700654983520508}


EP_train:1:  34%|| 9492/27626 [22:21<42:40,  7.08it/s]

{'epoch': 1, 'iter': 9490, 'avg_loss': 8.59829355720574, 'avg_acc': 50.02930407754715, 'loss': 8.109289169311523}


EP_train:1:  34%|| 9502/27626 [22:22<43:07,  7.00it/s]

{'epoch': 1, 'iter': 9500, 'avg_loss': 8.598053174951856, 'avg_acc': 50.02894432165035, 'loss': 7.5980916023254395}


EP_train:1:  34%|| 9512/27626 [22:23<42:56,  7.03it/s]

{'epoch': 1, 'iter': 9510, 'avg_loss': 8.598053516768568, 'avg_acc': 50.032199558406056, 'loss': 8.303610801696777}


EP_train:1:  34%|| 9522/27626 [22:25<42:40,  7.07it/s]

{'epoch': 1, 'iter': 9520, 'avg_loss': 8.598173658883617, 'avg_acc': 50.03216573889298, 'loss': 8.214615821838379}


EP_train:1:  35%|| 9532/27626 [22:26<42:43,  7.06it/s]

{'epoch': 1, 'iter': 9530, 'avg_loss': 8.597977443527368, 'avg_acc': 50.0337713776099, 'loss': 7.448512554168701}


EP_train:1:  35%|| 9542/27626 [22:28<43:08,  6.99it/s]

{'epoch': 1, 'iter': 9540, 'avg_loss': 8.597963911271822, 'avg_acc': 50.03602871816371, 'loss': 9.179558753967285}


EP_train:1:  35%|| 9552/27626 [22:29<42:45,  7.05it/s]

{'epoch': 1, 'iter': 9550, 'avg_loss': 8.598012861088675, 'avg_acc': 50.0431891948487, 'loss': 8.745810508728027}


EP_train:1:  35%|| 9562/27626 [22:31<43:07,  6.98it/s]

{'epoch': 1, 'iter': 9560, 'avg_loss': 8.598031154275876, 'avg_acc': 50.043144022591775, 'loss': 8.05721664428711}


EP_train:1:  35%|| 9572/27626 [22:32<43:03,  6.99it/s]

{'epoch': 1, 'iter': 9570, 'avg_loss': 8.597790558352365, 'avg_acc': 50.04538449482813, 'loss': 8.083980560302734}


EP_train:1:  35%|| 9582/27626 [22:33<42:41,  7.04it/s]

{'epoch': 1, 'iter': 9580, 'avg_loss': 8.598079221928721, 'avg_acc': 50.04272779459347, 'loss': 8.984309196472168}


EP_train:1:  35%|| 9592/27626 [22:35<43:01,  6.99it/s]

{'epoch': 1, 'iter': 9590, 'avg_loss': 8.597987519179602, 'avg_acc': 50.04528985507246, 'loss': 9.213071823120117}


EP_train:1:  35%|| 9602/27626 [22:36<42:40,  7.04it/s]

{'epoch': 1, 'iter': 9600, 'avg_loss': 8.597855271479572, 'avg_acc': 50.04036037912717, 'loss': 8.272796630859375}


EP_train:1:  35%|| 9612/27626 [22:38<42:18,  7.10it/s]

{'epoch': 1, 'iter': 9610, 'avg_loss': 8.597996145489933, 'avg_acc': 50.040968681718866, 'loss': 8.515405654907227}


EP_train:1:  35%|| 9622/27626 [22:39<42:34,  7.05it/s]

{'epoch': 1, 'iter': 9620, 'avg_loss': 8.598084047967959, 'avg_acc': 50.03702837542875, 'loss': 8.678364753723145}


EP_train:1:  35%|| 9632/27626 [22:40<42:46,  7.01it/s]

{'epoch': 1, 'iter': 9630, 'avg_loss': 8.598186346021796, 'avg_acc': 50.04088360502544, 'loss': 9.26091480255127}


EP_train:1:  35%|| 9642/27626 [22:42<42:19,  7.08it/s]

{'epoch': 1, 'iter': 9640, 'avg_loss': 8.598386404907762, 'avg_acc': 50.044082564049376, 'loss': 9.505516052246094}


EP_train:1:  35%|| 9652/27626 [22:43<42:24,  7.06it/s]

{'epoch': 1, 'iter': 9650, 'avg_loss': 8.598673045629047, 'avg_acc': 50.04144648222982, 'loss': 9.330808639526367}


EP_train:1:  35%|| 9662/27626 [22:45<42:17,  7.08it/s]

{'epoch': 1, 'iter': 9660, 'avg_loss': 8.598553292833492, 'avg_acc': 50.04075665045027, 'loss': 8.53776741027832}


EP_train:1:  35%|| 9672/27626 [22:46<42:35,  7.03it/s]

{'epoch': 1, 'iter': 9670, 'avg_loss': 8.598381415824587, 'avg_acc': 50.04426894840244, 'loss': 7.84959077835083}


EP_train:1:  35%|| 9682/27626 [22:48<42:23,  7.05it/s]

{'epoch': 1, 'iter': 9680, 'avg_loss': 8.598473066289174, 'avg_acc': 50.04551440966842, 'loss': 8.43496322631836}


EP_train:1:  35%|| 9692/27626 [22:49<42:25,  7.05it/s]

{'epoch': 1, 'iter': 9690, 'avg_loss': 8.598344966804584, 'avg_acc': 50.044177587452275, 'loss': 9.079537391662598}


EP_train:1:  35%|| 9702/27626 [22:50<42:20,  7.05it/s]

{'epoch': 1, 'iter': 9700, 'avg_loss': 8.598314494671815, 'avg_acc': 50.04509844345944, 'loss': 8.517539024353027}


EP_train:1:  35%|| 9712/27626 [22:52<42:15,  7.07it/s]

{'epoch': 1, 'iter': 9710, 'avg_loss': 8.59822454934704, 'avg_acc': 50.04440840284213, 'loss': 8.96888542175293}


EP_train:1:  35%|| 9722/27626 [22:53<42:29,  7.02it/s]

{'epoch': 1, 'iter': 9720, 'avg_loss': 8.598200099726492, 'avg_acc': 50.04950622363954, 'loss': 8.160144805908203}


EP_train:1:  35%|| 9732/27626 [22:55<42:17,  7.05it/s]

{'epoch': 1, 'iter': 9730, 'avg_loss': 8.598768509643417, 'avg_acc': 50.04560168533553, 'loss': 9.566081047058105}


EP_train:1:  35%|| 9742/27626 [22:56<42:11,  7.06it/s]

{'epoch': 1, 'iter': 9740, 'avg_loss': 8.59904386522148, 'avg_acc': 50.04876296068166, 'loss': 9.823604583740234}


EP_train:1:  35%|| 9752/27626 [22:57<42:13,  7.06it/s]

{'epoch': 1, 'iter': 9750, 'avg_loss': 8.599107244395926, 'avg_acc': 50.04775151266537, 'loss': 8.577451705932617}


EP_train:1:  35%|| 9762/27626 [22:59<42:20,  7.03it/s]

{'epoch': 1, 'iter': 9760, 'avg_loss': 8.599534784009645, 'avg_acc': 50.044180924085644, 'loss': 8.817312240600586}


EP_train:1:  35%|| 9772/27626 [23:00<42:11,  7.05it/s]

{'epoch': 1, 'iter': 9770, 'avg_loss': 8.599712460205263, 'avg_acc': 50.042856411830925, 'loss': 7.981883525848389}


EP_train:1:  35%|| 9782/27626 [23:02<42:10,  7.05it/s]

{'epoch': 1, 'iter': 9780, 'avg_loss': 8.599651765584483, 'avg_acc': 50.04281259584909, 'loss': 8.840938568115234}


EP_train:1:  35%|| 9792/27626 [23:03<41:42,  7.13it/s]

{'epoch': 1, 'iter': 9790, 'avg_loss': 8.59938073866872, 'avg_acc': 50.03989633336737, 'loss': 9.010090827941895}


EP_train:1:  35%|| 9802/27626 [23:04<41:57,  7.08it/s]

{'epoch': 1, 'iter': 9800, 'avg_loss': 8.599243812154306, 'avg_acc': 50.036667176818696, 'loss': 8.733630180358887}


EP_train:1:  36%|| 9812/27626 [23:06<42:02,  7.06it/s]

{'epoch': 1, 'iter': 9810, 'avg_loss': 8.599319986995237, 'avg_acc': 50.03344460299664, 'loss': 8.60058879852295}


EP_train:1:  36%|| 9822/27626 [23:07<41:47,  7.10it/s]

{'epoch': 1, 'iter': 9820, 'avg_loss': 8.599222960995021, 'avg_acc': 50.030864983199265, 'loss': 8.55424690246582}


EP_train:1:  36%|| 9832/27626 [23:09<41:56,  7.07it/s]

{'epoch': 1, 'iter': 9830, 'avg_loss': 8.598983938436204, 'avg_acc': 50.02987997151867, 'loss': 8.370631217956543}


EP_train:1:  36%|| 9842/27626 [23:10<42:13,  7.02it/s]

{'epoch': 1, 'iter': 9840, 'avg_loss': 8.598967636446803, 'avg_acc': 50.031437353927444, 'loss': 8.937385559082031}


EP_train:1:  36%|| 9852/27626 [23:12<42:28,  6.97it/s]

{'epoch': 1, 'iter': 9850, 'avg_loss': 8.599112922365249, 'avg_acc': 50.0298193076845, 'loss': 9.11684799194336}


EP_train:1:  36%|| 9862/27626 [23:13<42:18,  7.00it/s]

{'epoch': 1, 'iter': 9860, 'avg_loss': 8.599188645100476, 'avg_acc': 50.0316904979211, 'loss': 8.578824996948242}


EP_train:1:  36%|| 9872/27626 [23:14<41:46,  7.08it/s]

{'epoch': 1, 'iter': 9870, 'avg_loss': 8.59917696871761, 'avg_acc': 50.0294423057441, 'loss': 8.401427268981934}


EP_train:1:  36%|| 9882/27626 [23:16<41:47,  7.08it/s]

{'epoch': 1, 'iter': 9880, 'avg_loss': 8.59918288028309, 'avg_acc': 50.02846371824714, 'loss': 7.6600871086120605}


EP_train:1:  36%|| 9892/27626 [23:17<41:44,  7.08it/s]

{'epoch': 1, 'iter': 9890, 'avg_loss': 8.599007251252782, 'avg_acc': 50.03317409766454, 'loss': 8.203340530395508}


EP_train:1:  36%|| 9902/27626 [23:19<42:05,  7.02it/s]

{'epoch': 1, 'iter': 9900, 'avg_loss': 8.598549450759803, 'avg_acc': 50.03219371780629, 'loss': 8.209065437316895}


EP_train:1:  36%|| 9912/27626 [23:20<42:01,  7.03it/s]

{'epoch': 1, 'iter': 9910, 'avg_loss': 8.598680832133189, 'avg_acc': 50.031215316315205, 'loss': 8.934791564941406}


EP_train:1:  36%|| 9922/27626 [23:21<41:48,  7.06it/s]

{'epoch': 1, 'iter': 9920, 'avg_loss': 8.598590387883247, 'avg_acc': 50.02834895675839, 'loss': 8.568853378295898}


EP_train:1:  36%|| 9932/27626 [23:23<41:47,  7.06it/s]

{'epoch': 1, 'iter': 9930, 'avg_loss': 8.598722023349817, 'avg_acc': 50.0239150135938, 'loss': 8.543392181396484}


EP_train:1:  36%|| 9942/27626 [23:24<41:35,  7.09it/s]

{'epoch': 1, 'iter': 9940, 'avg_loss': 8.598593889233955, 'avg_acc': 50.027034503571066, 'loss': 8.34331226348877}


EP_train:1:  36%|| 9952/27626 [23:26<41:39,  7.07it/s]

{'epoch': 1, 'iter': 9950, 'avg_loss': 8.598569569258506, 'avg_acc': 50.030461762636925, 'loss': 9.36825180053711}


EP_train:1:  36%|| 9962/27626 [23:27<41:42,  7.06it/s]

{'epoch': 1, 'iter': 9960, 'avg_loss': 8.598551571243512, 'avg_acc': 50.02854884047786, 'loss': 8.506930351257324}


EP_train:1:  36%|| 9972/27626 [23:28<41:48,  7.04it/s]

{'epoch': 1, 'iter': 9970, 'avg_loss': 8.598728927587054, 'avg_acc': 50.0300872530338, 'loss': 8.573840141296387}


EP_train:1:  36%|| 9982/27626 [23:30<42:16,  6.96it/s]

{'epoch': 1, 'iter': 9980, 'avg_loss': 8.598900053092896, 'avg_acc': 50.02943091874562, 'loss': 8.719131469726562}


EP_train:1:  36%|| 9992/27626 [23:31<42:24,  6.93it/s]

{'epoch': 1, 'iter': 9990, 'avg_loss': 8.599128336128965, 'avg_acc': 50.028775898308474, 'loss': 8.803000450134277}


EP_train:1:  36%|| 10002/27626 [23:33<42:42,  6.88it/s]

{'epoch': 1, 'iter': 10000, 'avg_loss': 8.598873039017128, 'avg_acc': 50.02374762523748, 'loss': 8.833527565002441}


EP_train:1:  36%|| 10012/27626 [23:34<41:16,  7.11it/s]

{'epoch': 1, 'iter': 10010, 'avg_loss': 8.598836320083157, 'avg_acc': 50.02497253021676, 'loss': 8.255462646484375}


EP_train:1:  36%|| 10022/27626 [23:36<41:28,  7.08it/s]

{'epoch': 1, 'iter': 10020, 'avg_loss': 8.598885677184617, 'avg_acc': 50.023388384392774, 'loss': 8.066783905029297}


EP_train:1:  36%|| 10032/27626 [23:37<41:40,  7.04it/s]

{'epoch': 1, 'iter': 10030, 'avg_loss': 8.598560279678484, 'avg_acc': 50.03177649287209, 'loss': 8.55367660522461}


EP_train:1:  36%|| 10042/27626 [23:38<41:19,  7.09it/s]

{'epoch': 1, 'iter': 10040, 'avg_loss': 8.598577934626434, 'avg_acc': 50.03174484613086, 'loss': 8.14769172668457}


EP_train:1:  36%|| 10052/27626 [23:40<41:30,  7.06it/s]

{'epoch': 1, 'iter': 10050, 'avg_loss': 8.598813790537779, 'avg_acc': 50.03451149139389, 'loss': 9.22642993927002}


EP_train:1:  36%|| 10062/27626 [23:41<41:26,  7.06it/s]

{'epoch': 1, 'iter': 10060, 'avg_loss': 8.599181534009121, 'avg_acc': 50.0316817413776, 'loss': 9.268287658691406}


EP_train:1:  36%|| 10072/27626 [23:43<41:26,  7.06it/s]

{'epoch': 1, 'iter': 10070, 'avg_loss': 8.59903808260802, 'avg_acc': 50.03630473637176, 'loss': 8.350918769836426}


EP_train:1:  36%|| 10082/27626 [23:44<41:25,  7.06it/s]

{'epoch': 1, 'iter': 10080, 'avg_loss': 8.598914821875638, 'avg_acc': 50.03285884336871, 'loss': 7.676274299621582}


EP_train:1:  37%|| 10092/27626 [23:45<41:04,  7.11it/s]

{'epoch': 1, 'iter': 10090, 'avg_loss': 8.598916542091782, 'avg_acc': 50.034684372212865, 'loss': 8.812213897705078}


EP_train:1:  37%|| 10102/27626 [23:47<41:28,  7.04it/s]

{'epoch': 1, 'iter': 10100, 'avg_loss': 8.598951925181975, 'avg_acc': 50.035268785268784, 'loss': 9.118896484375}


EP_train:1:  37%|| 10112/27626 [23:48<41:32,  7.03it/s]

{'epoch': 1, 'iter': 10110, 'avg_loss': 8.599066897063944, 'avg_acc': 50.03276134902581, 'loss': 8.597580909729004}


EP_train:1:  37%|| 10122/27626 [23:50<41:34,  7.02it/s]

{'epoch': 1, 'iter': 10120, 'avg_loss': 8.598648525040536, 'avg_acc': 50.03334650726213, 'loss': 8.172142028808594}


EP_train:1:  37%|| 10132/27626 [23:51<41:18,  7.06it/s]

{'epoch': 1, 'iter': 10130, 'avg_loss': 8.598227096750028, 'avg_acc': 50.0336220511302, 'loss': 8.005763053894043}


EP_train:1:  37%|| 10142/27626 [23:53<41:21,  7.05it/s]

{'epoch': 1, 'iter': 10140, 'avg_loss': 8.598431581858875, 'avg_acc': 50.03358889655852, 'loss': 8.914911270141602}


EP_train:1:  37%|| 10152/27626 [23:54<41:06,  7.08it/s]

{'epoch': 1, 'iter': 10150, 'avg_loss': 8.598819579623413, 'avg_acc': 50.039097133287356, 'loss': 9.14925765991211}


EP_train:1:  37%|| 10162/27626 [23:55<41:25,  7.03it/s]

{'epoch': 1, 'iter': 10160, 'avg_loss': 8.598913884376426, 'avg_acc': 50.039673752583404, 'loss': 8.25553035736084}


EP_train:1:  37%|| 10172/27626 [23:57<41:15,  7.05it/s]

{'epoch': 1, 'iter': 10170, 'avg_loss': 8.598688590117535, 'avg_acc': 50.0445506833153, 'loss': 8.402115821838379}


EP_train:1:  37%|| 10182/27626 [23:58<40:57,  7.10it/s]

{'epoch': 1, 'iter': 10180, 'avg_loss': 8.598647173162224, 'avg_acc': 50.04450692466359, 'loss': 8.234260559082031}


EP_train:1:  37%|| 10192/27626 [24:00<41:03,  7.08it/s]

{'epoch': 1, 'iter': 10190, 'avg_loss': 8.598251500689777, 'avg_acc': 50.04875625551958, 'loss': 7.594630718231201}


EP_train:1:  37%|| 10202/27626 [24:01<41:12,  7.05it/s]

{'epoch': 1, 'iter': 10200, 'avg_loss': 8.598285028483632, 'avg_acc': 50.05085285756299, 'loss': 8.52009105682373}


EP_train:1:  37%|| 10212/27626 [24:02<41:05,  7.06it/s]

{'epoch': 1, 'iter': 10210, 'avg_loss': 8.59798961465126, 'avg_acc': 50.04835471550288, 'loss': 8.177387237548828}


EP_train:1:  37%|| 10222/27626 [24:04<41:24,  7.00it/s]

{'epoch': 1, 'iter': 10220, 'avg_loss': 8.597950107511762, 'avg_acc': 50.04769592016437, 'loss': 8.301491737365723}


EP_train:1:  37%|| 10232/27626 [24:05<40:48,  7.10it/s]

{'epoch': 1, 'iter': 10230, 'avg_loss': 8.597645191488676, 'avg_acc': 50.04581663571498, 'loss': 8.60535717010498}


EP_train:1:  37%|| 10242/27626 [24:07<41:04,  7.05it/s]

{'epoch': 1, 'iter': 10240, 'avg_loss': 8.597718286137134, 'avg_acc': 50.04699248120301, 'loss': 8.872991561889648}


EP_train:1:  37%|| 10252/27626 [24:08<41:09,  7.03it/s]

{'epoch': 1, 'iter': 10250, 'avg_loss': 8.598098496066898, 'avg_acc': 50.045727246122325, 'loss': 8.821198463439941}


EP_train:1:  37%|| 10262/27626 [24:09<41:12,  7.02it/s]

{'epoch': 1, 'iter': 10260, 'avg_loss': 8.598158164350114, 'avg_acc': 50.04263716986649, 'loss': 7.766932487487793}


EP_train:1:  37%|| 10272/27626 [24:11<41:02,  7.05it/s]

{'epoch': 1, 'iter': 10270, 'avg_loss': 8.598175059227456, 'avg_acc': 50.04289991237465, 'loss': 8.599764823913574}


EP_train:1:  37%|| 10282/27626 [24:12<41:16,  7.00it/s]

{'epoch': 1, 'iter': 10280, 'avg_loss': 8.598110551002042, 'avg_acc': 50.04589777259022, 'loss': 9.023880004882812}


EP_train:1:  37%|| 10292/27626 [24:14<41:03,  7.04it/s]

{'epoch': 1, 'iter': 10290, 'avg_loss': 8.597970021916668, 'avg_acc': 50.04312020211835, 'loss': 8.339862823486328}


EP_train:1:  37%|| 10302/27626 [24:15<41:25,  6.97it/s]

{'epoch': 1, 'iter': 10300, 'avg_loss': 8.597719351651193, 'avg_acc': 50.04732550237841, 'loss': 7.630936145782471}


EP_train:1:  37%|| 10312/27626 [24:17<40:47,  7.07it/s]

{'epoch': 1, 'iter': 10310, 'avg_loss': 8.597862537126169, 'avg_acc': 50.047582678692656, 'loss': 8.629561424255371}


EP_train:1:  37%|| 10322/27626 [24:18<40:32,  7.11it/s]

{'epoch': 1, 'iter': 10320, 'avg_loss': 8.597924990266367, 'avg_acc': 50.04965604108129, 'loss': 9.469407081604004}


EP_train:1:  37%|| 10332/27626 [24:19<40:47,  7.07it/s]

{'epoch': 1, 'iter': 10330, 'avg_loss': 8.598147528986884, 'avg_acc': 50.05414529087213, 'loss': 8.658394813537598}


EP_train:1:  37%|| 10342/27626 [24:21<40:53,  7.04it/s]

{'epoch': 1, 'iter': 10340, 'avg_loss': 8.597891744706423, 'avg_acc': 50.054395126196695, 'loss': 8.561278343200684}


EP_train:1:  37%|| 10352/27626 [24:22<41:06,  7.00it/s]

{'epoch': 1, 'iter': 10350, 'avg_loss': 8.598119368519534, 'avg_acc': 50.05313496280552, 'loss': 8.196306228637695}


EP_train:1:  38%|| 10362/27626 [24:24<41:01,  7.01it/s]

{'epoch': 1, 'iter': 10360, 'avg_loss': 8.59789365084173, 'avg_acc': 50.05519496187627, 'loss': 9.202245712280273}


EP_train:1:  38%|| 10372/27626 [24:25<41:01,  7.01it/s]

{'epoch': 1, 'iter': 10370, 'avg_loss': 8.597753395168477, 'avg_acc': 50.05333381544692, 'loss': 8.306333541870117}


EP_train:1:  38%|| 10382/27626 [24:26<41:12,  6.97it/s]

{'epoch': 1, 'iter': 10380, 'avg_loss': 8.597683582998588, 'avg_acc': 50.05809893073885, 'loss': 8.823722839355469}


EP_train:1:  38%|| 10392/27626 [24:28<40:36,  7.07it/s]

{'epoch': 1, 'iter': 10390, 'avg_loss': 8.597722769724303, 'avg_acc': 50.060148205177555, 'loss': 8.325183868408203}


EP_train:1:  38%|| 10402/27626 [24:29<40:54,  7.02it/s]

{'epoch': 1, 'iter': 10400, 'avg_loss': 8.598085930113402, 'avg_acc': 50.06009037592539, 'loss': 9.057646751403809}


EP_train:1:  38%|| 10412/27626 [24:31<40:36,  7.06it/s]

{'epoch': 1, 'iter': 10410, 'avg_loss': 8.597801601654307, 'avg_acc': 50.06663625012007, 'loss': 7.777425765991211}


EP_train:1:  38%|| 10422/27626 [24:32<40:25,  7.09it/s]

{'epoch': 1, 'iter': 10420, 'avg_loss': 8.597509299183908, 'avg_acc': 50.06417330390558, 'loss': 8.660832405090332}


EP_train:1:  38%|| 10432/27626 [24:34<40:37,  7.05it/s]

{'epoch': 1, 'iter': 10430, 'avg_loss': 8.597881759050743, 'avg_acc': 50.068306010928964, 'loss': 9.037511825561523}


EP_train:1:  38%|| 10442/27626 [24:35<40:51,  7.01it/s]

{'epoch': 1, 'iter': 10440, 'avg_loss': 8.597942595029286, 'avg_acc': 50.06883919164831, 'loss': 8.626938819885254}


EP_train:1:  38%|| 10452/27626 [24:36<40:20,  7.10it/s]

{'epoch': 1, 'iter': 10450, 'avg_loss': 8.59803419014607, 'avg_acc': 50.07295952540427, 'loss': 8.930497169494629}


EP_train:1:  38%|| 10462/27626 [24:38<40:21,  7.09it/s]

{'epoch': 1, 'iter': 10460, 'avg_loss': 8.598246274743364, 'avg_acc': 50.07169486664754, 'loss': 7.854874134063721}


EP_train:1:  38%|| 10472/27626 [24:39<40:30,  7.06it/s]

{'epoch': 1, 'iter': 10470, 'avg_loss': 8.598276870205845, 'avg_acc': 50.06953729347722, 'loss': 8.384434700012207}


EP_train:1:  38%|| 10482/27626 [24:41<40:15,  7.10it/s]

{'epoch': 1, 'iter': 10480, 'avg_loss': 8.598424512095015, 'avg_acc': 50.06619120312947, 'loss': 7.798511981964111}


EP_train:1:  38%|| 10492/27626 [24:42<40:19,  7.08it/s]

{'epoch': 1, 'iter': 10490, 'avg_loss': 8.598445204781255, 'avg_acc': 50.0610642455438, 'loss': 8.418378829956055}


EP_train:1:  38%|| 10502/27626 [24:43<40:55,  6.97it/s]

{'epoch': 1, 'iter': 10500, 'avg_loss': 8.598436606837367, 'avg_acc': 50.065767545948006, 'loss': 8.848713874816895}


EP_train:1:  38%|| 10512/27626 [24:45<40:11,  7.10it/s]

{'epoch': 1, 'iter': 10510, 'avg_loss': 8.598475950404586, 'avg_acc': 50.0683807439825, 'loss': 8.3342866897583}


EP_train:1:  38%|| 10522/27626 [24:46<40:31,  7.04it/s]

{'epoch': 1, 'iter': 10520, 'avg_loss': 8.598671510683902, 'avg_acc': 50.068909799448726, 'loss': 10.552807807922363}


EP_train:1:  38%|| 10532/27626 [24:48<40:35,  7.02it/s]

{'epoch': 1, 'iter': 10530, 'avg_loss': 8.59866904433597, 'avg_acc': 50.06943785015668, 'loss': 8.165583610534668}


EP_train:1:  38%|| 10542/27626 [24:49<40:47,  6.98it/s]

{'epoch': 1, 'iter': 10540, 'avg_loss': 8.598444814530918, 'avg_acc': 50.06937197609334, 'loss': 7.962636470794678}


EP_train:1:  38%|| 10552/27626 [24:50<40:21,  7.05it/s]

{'epoch': 1, 'iter': 10550, 'avg_loss': 8.597914836517658, 'avg_acc': 50.071083309638894, 'loss': 9.023463249206543}


EP_train:1:  38%|| 10562/27626 [24:52<40:23,  7.04it/s]

{'epoch': 1, 'iter': 10560, 'avg_loss': 8.59787716927396, 'avg_acc': 50.07160780229145, 'loss': 8.378369331359863}


EP_train:1:  38%|| 10572/27626 [24:53<40:16,  7.06it/s]

{'epoch': 1, 'iter': 10570, 'avg_loss': 8.598098009201697, 'avg_acc': 50.07035758206414, 'loss': 9.484559059143066}


EP_train:1:  38%|| 10582/27626 [24:55<40:06,  7.08it/s]

{'epoch': 1, 'iter': 10580, 'avg_loss': 8.597890606652967, 'avg_acc': 50.07029108779888, 'loss': 8.737223625183105}


EP_train:1:  38%|| 10592/27626 [24:56<39:55,  7.11it/s]

{'epoch': 1, 'iter': 10590, 'avg_loss': 8.597716636872024, 'avg_acc': 50.07406052308564, 'loss': 7.381134510040283}


EP_train:1:  38%|| 10602/27626 [24:58<39:55,  7.11it/s]

{'epoch': 1, 'iter': 10600, 'avg_loss': 8.597798699894533, 'avg_acc': 50.071042826148485, 'loss': 8.760865211486816}


EP_train:1:  38%|| 10612/27626 [24:59<40:29,  7.00it/s]

{'epoch': 1, 'iter': 10610, 'avg_loss': 8.59776967311712, 'avg_acc': 50.072448402601076, 'loss': 8.214513778686523}


EP_train:1:  38%|| 10622/27626 [25:00<40:06,  7.07it/s]

{'epoch': 1, 'iter': 10620, 'avg_loss': 8.597647299998652, 'avg_acc': 50.07385133226626, 'loss': 8.526119232177734}


EP_train:1:  38%|| 10632/27626 [25:02<40:04,  7.07it/s]

{'epoch': 1, 'iter': 10630, 'avg_loss': 8.597655212673976, 'avg_acc': 50.076427429216444, 'loss': 8.639910697937012}


EP_train:1:  39%|| 10642/27626 [25:03<40:02,  7.07it/s]

{'epoch': 1, 'iter': 10640, 'avg_loss': 8.597531225770704, 'avg_acc': 50.07488722864393, 'loss': 8.191953659057617}


EP_train:1:  39%|| 10652/27626 [25:05<40:03,  7.06it/s]

{'epoch': 1, 'iter': 10650, 'avg_loss': 8.597723651386719, 'avg_acc': 50.07687071636467, 'loss': 8.583101272583008}


EP_train:1:  39%|| 10662/27626 [25:06<40:18,  7.01it/s]

{'epoch': 1, 'iter': 10660, 'avg_loss': 8.59760735386275, 'avg_acc': 50.07709173623488, 'loss': 8.466326713562012}


EP_train:1:  39%|| 10672/27626 [25:07<40:07,  7.04it/s]

{'epoch': 1, 'iter': 10670, 'avg_loss': 8.597419862726637, 'avg_acc': 50.07174819604535, 'loss': 8.085351943969727}


EP_train:1:  39%|| 10682/27626 [25:09<39:52,  7.08it/s]

{'epoch': 1, 'iter': 10680, 'avg_loss': 8.59759159380996, 'avg_acc': 50.07226617357925, 'loss': 9.312777519226074}


EP_train:1:  39%|| 10692/27626 [25:10<39:41,  7.11it/s]

{'epoch': 1, 'iter': 10690, 'avg_loss': 8.597621322826656, 'avg_acc': 50.07102937049856, 'loss': 8.884427070617676}


EP_train:1:  39%|| 10702/27626 [25:12<40:01,  7.05it/s]

{'epoch': 1, 'iter': 10700, 'avg_loss': 8.597977282209783, 'avg_acc': 50.0689187926362, 'loss': 8.248405456542969}


EP_train:1:  39%|| 10712/27626 [25:13<40:12,  7.01it/s]

{'epoch': 1, 'iter': 10710, 'avg_loss': 8.59813878534221, 'avg_acc': 50.0706049855289, 'loss': 8.616142272949219}


EP_train:1:  39%|| 10722/27626 [25:14<39:54,  7.06it/s]

{'epoch': 1, 'iter': 10720, 'avg_loss': 8.598254675607652, 'avg_acc': 50.07083061281597, 'loss': 8.861947059631348}


EP_train:1:  39%|| 10732/27626 [25:16<40:00,  7.04it/s]

{'epoch': 1, 'iter': 10730, 'avg_loss': 8.597786911201993, 'avg_acc': 50.071638244338835, 'loss': 8.18272876739502}


EP_train:1:  39%|| 10742/27626 [25:17<40:00,  7.03it/s]

{'epoch': 1, 'iter': 10740, 'avg_loss': 8.597567496847123, 'avg_acc': 50.069534959500984, 'loss': 8.333428382873535}


EP_train:1:  39%|| 10752/27626 [25:19<39:57,  7.04it/s]

{'epoch': 1, 'iter': 10750, 'avg_loss': 8.597756666024333, 'avg_acc': 50.07208631755186, 'loss': 9.035326957702637}


EP_train:1:  39%|| 10762/27626 [25:20<39:37,  7.09it/s]

{'epoch': 1, 'iter': 10760, 'avg_loss': 8.59757915964225, 'avg_acc': 50.070276925936255, 'loss': 7.981364727020264}


EP_train:1:  39%|| 10772/27626 [25:22<39:40,  7.08it/s]

{'epoch': 1, 'iter': 10770, 'avg_loss': 8.597709229747052, 'avg_acc': 50.071082072230986, 'loss': 8.41057300567627}


EP_train:1:  39%|| 10782/27626 [25:23<39:41,  7.07it/s]

{'epoch': 1, 'iter': 10780, 'avg_loss': 8.597688141475968, 'avg_acc': 50.07043641591689, 'loss': 8.749815940856934}


EP_train:1:  39%|| 10792/27626 [25:24<40:00,  7.01it/s]

{'epoch': 1, 'iter': 10790, 'avg_loss': 8.597508370605658, 'avg_acc': 50.07471504031137, 'loss': 7.81585693359375}


EP_train:1:  39%|| 10802/27626 [25:26<40:13,  6.97it/s]

{'epoch': 1, 'iter': 10800, 'avg_loss': 8.597360892667117, 'avg_acc': 50.07580316637349, 'loss': 8.370964050292969}


EP_train:1:  39%|| 10812/27626 [25:27<39:48,  7.04it/s]

{'epoch': 1, 'iter': 10810, 'avg_loss': 8.597392029154037, 'avg_acc': 50.0771783368791, 'loss': 8.198931694030762}


EP_train:1:  39%|| 10822/27626 [25:29<39:52,  7.02it/s]

{'epoch': 1, 'iter': 10820, 'avg_loss': 8.597264185155367, 'avg_acc': 50.08086128823584, 'loss': 8.580077171325684}


EP_train:1:  39%|| 10832/27626 [25:30<39:31,  7.08it/s]

{'epoch': 1, 'iter': 10830, 'avg_loss': 8.597315009859127, 'avg_acc': 50.08165220201274, 'loss': 7.942533493041992}


EP_train:1:  39%|| 10842/27626 [25:31<39:43,  7.04it/s]

{'epoch': 1, 'iter': 10840, 'avg_loss': 8.597672974576195, 'avg_acc': 50.07811779356148, 'loss': 9.560659408569336}


EP_train:1:  39%|| 10852/27626 [25:33<39:34,  7.06it/s]

{'epoch': 1, 'iter': 10850, 'avg_loss': 8.597822449743644, 'avg_acc': 50.07919776979081, 'loss': 8.398821830749512}


EP_train:1:  39%|| 10862/27626 [25:34<39:52,  7.01it/s]

{'epoch': 1, 'iter': 10860, 'avg_loss': 8.597783357988854, 'avg_acc': 50.07883712365344, 'loss': 8.720603942871094}


EP_train:1:  39%|| 10872/27626 [25:36<39:40,  7.04it/s]

{'epoch': 1, 'iter': 10870, 'avg_loss': 8.597815874078853, 'avg_acc': 50.075315058412286, 'loss': 7.713367462158203}


EP_train:1:  39%|| 10882/27626 [25:37<39:49,  7.01it/s]

{'epoch': 1, 'iter': 10880, 'avg_loss': 8.59779375049299, 'avg_acc': 50.076394632846245, 'loss': 9.16948127746582}


EP_train:1:  39%|| 10892/27626 [25:39<39:48,  7.01it/s]

{'epoch': 1, 'iter': 10890, 'avg_loss': 8.597986361374266, 'avg_acc': 50.07288127811955, 'loss': 9.077017784118652}


EP_train:1:  39%|| 10902/27626 [25:40<39:44,  7.01it/s]

{'epoch': 1, 'iter': 10900, 'avg_loss': 8.59796892959102, 'avg_acc': 50.07166773690487, 'loss': 9.271754264831543}


EP_train:1:  39%|| 10912/27626 [25:41<39:24,  7.07it/s]

{'epoch': 1, 'iter': 10910, 'avg_loss': 8.598254341939752, 'avg_acc': 50.073320502245444, 'loss': 8.664937973022461}


EP_train:1:  40%|| 10922/27626 [25:43<39:22,  7.07it/s]

{'epoch': 1, 'iter': 10920, 'avg_loss': 8.598358689074049, 'avg_acc': 50.07210878124714, 'loss': 8.877680778503418}


EP_train:1:  40%|| 10932/27626 [25:44<39:25,  7.06it/s]

{'epoch': 1, 'iter': 10930, 'avg_loss': 8.598154447377738, 'avg_acc': 50.07347223492818, 'loss': 8.093108177185059}


EP_train:1:  40%|| 10942/27626 [25:46<39:13,  7.09it/s]

{'epoch': 1, 'iter': 10940, 'avg_loss': 8.598063207611874, 'avg_acc': 50.07654693355269, 'loss': 9.187260627746582}


EP_train:1:  40%|| 10952/27626 [25:47<39:14,  7.08it/s]

{'epoch': 1, 'iter': 10950, 'avg_loss': 8.598157499557368, 'avg_acc': 50.07619167199343, 'loss': 9.240177154541016}


EP_train:1:  40%|| 10962/27626 [25:48<39:14,  7.08it/s]

{'epoch': 1, 'iter': 10960, 'avg_loss': 8.598444225861854, 'avg_acc': 50.074981753489645, 'loss': 9.104781150817871}


EP_train:1:  40%|| 10972/27626 [25:50<39:15,  7.07it/s]

{'epoch': 1, 'iter': 10970, 'avg_loss': 8.598552624449757, 'avg_acc': 50.07405888250843, 'loss': 9.576966285705566}


EP_train:1:  40%|| 10982/27626 [25:51<39:45,  6.98it/s]

{'epoch': 1, 'iter': 10980, 'avg_loss': 8.598645051986226, 'avg_acc': 50.07143019761406, 'loss': 9.178206443786621}


EP_train:1:  40%|| 10992/27626 [25:53<39:21,  7.04it/s]

{'epoch': 1, 'iter': 10990, 'avg_loss': 8.599058797801769, 'avg_acc': 50.07022791374761, 'loss': 8.387201309204102}


EP_train:1:  40%|| 11002/27626 [25:54<39:04,  7.09it/s]

{'epoch': 1, 'iter': 11000, 'avg_loss': 8.599105147597639, 'avg_acc': 50.07044814107808, 'loss': 8.967856407165527}


EP_train:1:  40%|| 11012/27626 [25:55<39:14,  7.05it/s]

{'epoch': 1, 'iter': 11010, 'avg_loss': 8.599169596589864, 'avg_acc': 50.06555944055944, 'loss': 9.341889381408691}


EP_train:1:  40%|| 11022/27626 [25:57<39:00,  7.09it/s]

{'epoch': 1, 'iter': 11020, 'avg_loss': 8.599158905236283, 'avg_acc': 50.06720125215498, 'loss': 9.085587501525879}


EP_train:1:  40%|| 11032/27626 [25:58<39:11,  7.06it/s]

{'epoch': 1, 'iter': 11030, 'avg_loss': 8.599155827906912, 'avg_acc': 50.067706916870634, 'loss': 8.225679397583008}


EP_train:1:  40%|| 11042/27626 [26:00<39:28,  7.00it/s]

{'epoch': 1, 'iter': 11040, 'avg_loss': 8.598849535441875, 'avg_acc': 50.07189113304954, 'loss': 8.205520629882812}


EP_train:1:  40%|| 11052/27626 [26:01<39:02,  7.08it/s]

{'epoch': 1, 'iter': 11050, 'avg_loss': 8.59871782659783, 'avg_acc': 50.067584381503934, 'loss': 7.390883445739746}


EP_train:1:  40%|| 11062/27626 [26:03<39:25,  7.00it/s]

{'epoch': 1, 'iter': 11060, 'avg_loss': 8.59866104939889, 'avg_acc': 50.06837085254497, 'loss': 9.100937843322754}


EP_train:1:  40%|| 11072/27626 [26:04<39:39,  6.96it/s]

{'epoch': 1, 'iter': 11070, 'avg_loss': 8.598761493668045, 'avg_acc': 50.06661548188962, 'loss': 7.827197074890137}


EP_train:1:  40%|| 11082/27626 [26:05<38:58,  7.08it/s]

{'epoch': 1, 'iter': 11080, 'avg_loss': 8.598572261711501, 'avg_acc': 50.06373522245284, 'loss': 8.885589599609375}


EP_train:1:  40%|| 11092/27626 [26:07<38:43,  7.11it/s]

{'epoch': 1, 'iter': 11090, 'avg_loss': 8.598621944860517, 'avg_acc': 50.06508655666757, 'loss': 8.318985939025879}


EP_train:1:  40%|| 11102/27626 [26:08<38:48,  7.10it/s]

{'epoch': 1, 'iter': 11100, 'avg_loss': 8.598631054031431, 'avg_acc': 50.067279974777044, 'loss': 9.041543006896973}


EP_train:1:  40%|| 11112/27626 [26:10<38:57,  7.06it/s]

{'epoch': 1, 'iter': 11110, 'avg_loss': 8.598746954380859, 'avg_acc': 50.06384438844389, 'loss': 8.83574390411377}


EP_train:1:  40%|| 11122/27626 [26:11<38:55,  7.07it/s]

{'epoch': 1, 'iter': 11120, 'avg_loss': 8.598843144122757, 'avg_acc': 50.066034978868814, 'loss': 8.661239624023438}


EP_train:1:  40%|| 11132/27626 [26:12<38:53,  7.07it/s]

{'epoch': 1, 'iter': 11130, 'avg_loss': 8.59855260331508, 'avg_acc': 50.06850238073848, 'loss': 7.685147285461426}


EP_train:1:  40%|| 11142/27626 [26:14<38:57,  7.05it/s]

{'epoch': 1, 'iter': 11140, 'avg_loss': 8.598804793380534, 'avg_acc': 50.06816039852796, 'loss': 8.625041961669922}


EP_train:1:  40%|| 11152/27626 [26:15<39:15,  7.00it/s]

{'epoch': 1, 'iter': 11150, 'avg_loss': 8.598774866227792, 'avg_acc': 50.07090171285087, 'loss': 8.418858528137207}


EP_train:1:  40%|| 11162/27626 [26:17<39:17,  6.98it/s]

{'epoch': 1, 'iter': 11160, 'avg_loss': 8.598615136969828, 'avg_acc': 50.06411835857002, 'loss': 8.329520225524902}


EP_train:1:  40%|| 11172/27626 [26:18<39:09,  7.00it/s]

{'epoch': 1, 'iter': 11170, 'avg_loss': 8.599046736691527, 'avg_acc': 50.061263539521974, 'loss': 8.937458038330078}


EP_train:1:  40%|| 11182/27626 [26:19<38:47,  7.06it/s]

{'epoch': 1, 'iter': 11180, 'avg_loss': 8.599005486348716, 'avg_acc': 50.06484214292103, 'loss': 8.71175479888916}


EP_train:1:  41%|| 11192/27626 [26:21<38:59,  7.02it/s]

{'epoch': 1, 'iter': 11190, 'avg_loss': 8.598986446543211, 'avg_acc': 50.065342686087035, 'loss': 8.333568572998047}


EP_train:1:  41%|| 11202/27626 [26:22<39:03,  7.01it/s]

{'epoch': 1, 'iter': 11200, 'avg_loss': 8.598832262614915, 'avg_acc': 50.063331398982235, 'loss': 8.375571250915527}


EP_train:1:  41%|| 11212/27626 [26:24<38:42,  7.07it/s]

{'epoch': 1, 'iter': 11210, 'avg_loss': 8.598879079220712, 'avg_acc': 50.06411114084381, 'loss': 9.17660140991211}


EP_train:1:  41%|| 11222/27626 [26:25<38:52,  7.03it/s]

{'epoch': 1, 'iter': 11220, 'avg_loss': 8.598706455220514, 'avg_acc': 50.06516798859282, 'loss': 8.627933502197266}


EP_train:1:  41%|| 11232/27626 [26:27<38:37,  7.07it/s]

{'epoch': 1, 'iter': 11230, 'avg_loss': 8.598735062885599, 'avg_acc': 50.06622295432286, 'loss': 8.096539497375488}


EP_train:1:  41%|| 11242/27626 [26:28<39:07,  6.98it/s]

{'epoch': 1, 'iter': 11240, 'avg_loss': 8.598844224093128, 'avg_acc': 50.06699804287875, 'loss': 8.397947311401367}


EP_train:1:  41%|| 11252/27626 [26:29<39:06,  6.98it/s]

{'epoch': 1, 'iter': 11250, 'avg_loss': 8.598852425996446, 'avg_acc': 50.06804950671051, 'loss': 8.638842582702637}


EP_train:1:  41%|| 11262/27626 [26:31<38:49,  7.02it/s]

{'epoch': 1, 'iter': 11260, 'avg_loss': 8.598783542372894, 'avg_acc': 50.0652140129651, 'loss': 8.635392189025879}


EP_train:1:  41%|| 11272/27626 [26:32<38:57,  7.00it/s]

{'epoch': 1, 'iter': 11270, 'avg_loss': 8.598897224605988, 'avg_acc': 50.06238355070535, 'loss': 9.549734115600586}


EP_train:1:  41%|| 11282/27626 [26:34<38:45,  7.03it/s]

{'epoch': 1, 'iter': 11280, 'avg_loss': 8.598706415005793, 'avg_acc': 50.05900407765269, 'loss': 8.474382400512695}


EP_train:1:  41%|| 11292/27626 [26:35<38:30,  7.07it/s]

{'epoch': 1, 'iter': 11290, 'avg_loss': 8.598713797321881, 'avg_acc': 50.06227304933133, 'loss': 8.85528564453125}


EP_train:1:  41%|| 11302/27626 [26:36<38:15,  7.11it/s]

{'epoch': 1, 'iter': 11300, 'avg_loss': 8.598931888639056, 'avg_acc': 50.05696398548801, 'loss': 8.808277130126953}


EP_train:1:  41%|| 11312/27626 [26:38<38:38,  7.04it/s]

{'epoch': 1, 'iter': 11310, 'avg_loss': 8.598812860241514, 'avg_acc': 50.056084784722835, 'loss': 7.861118793487549}


EP_train:1:  41%|| 11322/27626 [26:39<38:40,  7.03it/s]

{'epoch': 1, 'iter': 11320, 'avg_loss': 8.598692999281191, 'avg_acc': 50.059071636781205, 'loss': 8.784708023071289}


EP_train:1:  41%|| 11332/27626 [26:41<38:48,  7.00it/s]

{'epoch': 1, 'iter': 11330, 'avg_loss': 8.598636636371257, 'avg_acc': 50.059571088165214, 'loss': 8.640756607055664}


EP_train:1:  41%|| 11342/27626 [26:42<38:39,  7.02it/s]

{'epoch': 1, 'iter': 11340, 'avg_loss': 8.598574194114745, 'avg_acc': 50.06062075654704, 'loss': 8.63882827758789}


EP_train:1:  41%|| 11352/27626 [26:44<38:18,  7.08it/s]

{'epoch': 1, 'iter': 11350, 'avg_loss': 8.598608253432962, 'avg_acc': 50.06276980001761, 'loss': 8.394807815551758}


EP_train:1:  41%|| 11362/27626 [26:45<38:35,  7.02it/s]

{'epoch': 1, 'iter': 11360, 'avg_loss': 8.598606152975604, 'avg_acc': 50.06326467740516, 'loss': 9.29365062713623}


EP_train:1:  41%|| 11372/27626 [26:46<38:48,  6.98it/s]

{'epoch': 1, 'iter': 11370, 'avg_loss': 8.598296163914732, 'avg_acc': 50.061834930964736, 'loss': 8.039884567260742}


EP_train:1:  41%|| 11382/27626 [26:48<38:30,  7.03it/s]

{'epoch': 1, 'iter': 11380, 'avg_loss': 8.598433711262519, 'avg_acc': 50.06397724277304, 'loss': 8.600024223327637}


EP_train:1:  41%|| 11392/27626 [26:49<38:37,  7.00it/s]

{'epoch': 1, 'iter': 11390, 'avg_loss': 8.59833941578122, 'avg_acc': 50.06721315073304, 'loss': 8.674636840820312}


EP_train:1:  41%|| 11402/27626 [26:51<38:25,  7.04it/s]

{'epoch': 1, 'iter': 11400, 'avg_loss': 8.598247599219055, 'avg_acc': 50.06825059205333, 'loss': 9.021721839904785}


EP_train:1:  41%|| 11412/27626 [26:52<38:35,  7.00it/s]

{'epoch': 1, 'iter': 11410, 'avg_loss': 8.598416414721363, 'avg_acc': 50.071203224958374, 'loss': 8.249390602111816}


EP_train:1:  41%|| 11422/27626 [26:53<38:26,  7.03it/s]

{'epoch': 1, 'iter': 11420, 'avg_loss': 8.598351770123275, 'avg_acc': 50.070593643288674, 'loss': 9.403396606445312}


EP_train:1:  41%|| 11432/27626 [26:55<38:26,  7.02it/s]

{'epoch': 1, 'iter': 11430, 'avg_loss': 8.598152198437766, 'avg_acc': 50.070531886974024, 'loss': 8.05404281616211}


EP_train:1:  41%|| 11442/27626 [26:56<37:58,  7.10it/s]

{'epoch': 1, 'iter': 11440, 'avg_loss': 8.59791673954191, 'avg_acc': 50.069923957696005, 'loss': 8.471758842468262}


EP_train:1:  41%|| 11452/27626 [26:58<38:20,  7.03it/s]

{'epoch': 1, 'iter': 11450, 'avg_loss': 8.59812578789772, 'avg_acc': 50.06958999214043, 'loss': 7.886299133300781}


EP_train:1:  41%|| 11462/27626 [26:59<38:21,  7.02it/s]

{'epoch': 1, 'iter': 11460, 'avg_loss': 8.598010497363218, 'avg_acc': 50.07089259226943, 'loss': 8.6253023147583}


EP_train:1:  42%|| 11472/27626 [27:01<38:12,  7.05it/s]

{'epoch': 1, 'iter': 11470, 'avg_loss': 8.598117107480814, 'avg_acc': 50.06946866009938, 'loss': 8.472064018249512}


EP_train:1:  42%|| 11482/27626 [27:02<37:59,  7.08it/s]

{'epoch': 1, 'iter': 11480, 'avg_loss': 8.598117301235858, 'avg_acc': 50.06940815259995, 'loss': 8.385626792907715}


EP_train:1:  42%|| 11492/27626 [27:03<37:54,  7.09it/s]

{'epoch': 1, 'iter': 11490, 'avg_loss': 8.597961766675624, 'avg_acc': 50.06472456705248, 'loss': 8.29935359954834}


EP_train:1:  42%|| 11502/27626 [27:05<37:54,  7.09it/s]

{'epoch': 1, 'iter': 11500, 'avg_loss': 8.598216432082674, 'avg_acc': 50.06765716024694, 'loss': 8.662155151367188}


EP_train:1:  42%|| 11512/27626 [27:06<38:19,  7.01it/s]

{'epoch': 1, 'iter': 11510, 'avg_loss': 8.598076255923607, 'avg_acc': 50.06786986360873, 'loss': 8.382789611816406}


EP_train:1:  42%|| 11522/27626 [27:08<38:14,  7.02it/s]

{'epoch': 1, 'iter': 11520, 'avg_loss': 8.597991554431436, 'avg_acc': 50.07242209877615, 'loss': 8.283941268920898}


EP_train:1:  42%|| 11532/27626 [27:09<37:59,  7.06it/s]

{'epoch': 1, 'iter': 11530, 'avg_loss': 8.597574906774701, 'avg_acc': 50.076153412540116, 'loss': 7.718240737915039}


EP_train:1:  42%|| 11542/27626 [27:10<37:57,  7.06it/s]

{'epoch': 1, 'iter': 11540, 'avg_loss': 8.597277489167938, 'avg_acc': 50.07392123732779, 'loss': 8.367395401000977}


EP_train:1:  42%|| 11552/27626 [27:12<37:55,  7.06it/s]

{'epoch': 1, 'iter': 11550, 'avg_loss': 8.597377739533874, 'avg_acc': 50.070069690935846, 'loss': 9.129637718200684}


EP_train:1:  42%|| 11562/27626 [27:13<38:07,  7.02it/s]

{'epoch': 1, 'iter': 11560, 'avg_loss': 8.597359051600057, 'avg_acc': 50.07271213562841, 'loss': 8.71571159362793}


EP_train:1:  42%|| 11572/27626 [27:15<37:55,  7.05it/s]

{'epoch': 1, 'iter': 11570, 'avg_loss': 8.597462372115958, 'avg_acc': 50.07426972603923, 'loss': 9.613825798034668}


EP_train:1:  42%|| 11582/27626 [27:16<37:46,  7.08it/s]

{'epoch': 1, 'iter': 11580, 'avg_loss': 8.597745120201115, 'avg_acc': 50.07123737155686, 'loss': 9.773626327514648}


EP_train:1:  42%|| 11592/27626 [27:17<38:06,  7.01it/s]

{'epoch': 1, 'iter': 11590, 'avg_loss': 8.59775440740828, 'avg_acc': 50.070097489431454, 'loss': 8.152268409729004}


EP_train:1:  42%|| 11602/27626 [27:19<38:15,  6.98it/s]

{'epoch': 1, 'iter': 11600, 'avg_loss': 8.597833388242812, 'avg_acc': 50.068690199120766, 'loss': 8.367207527160645}


EP_train:1:  42%|| 11612/27626 [27:20<38:01,  7.02it/s]

{'epoch': 1, 'iter': 11610, 'avg_loss': 8.597809804008456, 'avg_acc': 50.07024588752046, 'loss': 8.440873146057129}


EP_train:1:  42%|| 11622/27626 [27:22<37:37,  7.09it/s]

{'epoch': 1, 'iter': 11620, 'avg_loss': 8.597815430766488, 'avg_acc': 50.07152998881336, 'loss': 8.72215747833252}


EP_train:1:  42%|| 11632/27626 [27:23<37:40,  7.08it/s]

{'epoch': 1, 'iter': 11630, 'avg_loss': 8.597473483085222, 'avg_acc': 50.072005846444846, 'loss': 8.39155101776123}


EP_train:1:  42%|| 11642/27626 [27:25<37:55,  7.03it/s]

{'epoch': 1, 'iter': 11640, 'avg_loss': 8.597255860066436, 'avg_acc': 50.07516536380036, 'loss': 7.713644027709961}


EP_train:1:  42%|| 11652/27626 [27:26<37:43,  7.06it/s]

{'epoch': 1, 'iter': 11650, 'avg_loss': 8.597211377097448, 'avg_acc': 50.0783194575573, 'loss': 8.765338897705078}


EP_train:1:  42%|| 11662/27626 [27:27<37:49,  7.03it/s]

{'epoch': 1, 'iter': 11660, 'avg_loss': 8.59743202571936, 'avg_acc': 50.079324243203835, 'loss': 8.348578453063965}


EP_train:1:  42%|| 11672/27626 [27:29<37:37,  7.07it/s]

{'epoch': 1, 'iter': 11670, 'avg_loss': 8.597545434368186, 'avg_acc': 50.07711421472024, 'loss': 9.385712623596191}


EP_train:1:  42%|| 11682/27626 [27:30<37:40,  7.05it/s]

{'epoch': 1, 'iter': 11680, 'avg_loss': 8.59772673396968, 'avg_acc': 50.07544302713809, 'loss': 9.291064262390137}


EP_train:1:  42%|| 11692/27626 [27:32<38:07,  6.97it/s]

{'epoch': 1, 'iter': 11690, 'avg_loss': 8.597581542126587, 'avg_acc': 50.0745765973826, 'loss': 8.666335105895996}


EP_train:1:  42%|| 11702/27626 [27:33<37:59,  6.99it/s]

{'epoch': 1, 'iter': 11700, 'avg_loss': 8.597273375435616, 'avg_acc': 50.076916502863, 'loss': 8.248929023742676}


EP_train:1:  42%|| 11712/27626 [27:34<37:55,  6.99it/s]

{'epoch': 1, 'iter': 11710, 'avg_loss': 8.596889931114678, 'avg_acc': 50.08085347109555, 'loss': 8.647040367126465}


EP_train:1:  42%|| 11722/27626 [27:36<37:27,  7.08it/s]

{'epoch': 1, 'iter': 11720, 'avg_loss': 8.596648493578751, 'avg_acc': 50.08051787390154, 'loss': 9.872586250305176}


EP_train:1:  42%|| 11732/27626 [27:37<37:42,  7.02it/s]

{'epoch': 1, 'iter': 11730, 'avg_loss': 8.596635814619923, 'avg_acc': 50.077785355042195, 'loss': 8.146920204162598}


EP_train:1:  43%|| 11742/27626 [27:39<37:22,  7.08it/s]

{'epoch': 1, 'iter': 11740, 'avg_loss': 8.596335793930468, 'avg_acc': 50.078251426624654, 'loss': 8.410553932189941}


EP_train:1:  43%|| 11752/27626 [27:40<37:18,  7.09it/s]

{'epoch': 1, 'iter': 11750, 'avg_loss': 8.596225107693082, 'avg_acc': 50.07632329163475, 'loss': 8.592671394348145}


EP_train:1:  43%|| 11762/27626 [27:41<37:44,  7.01it/s]

{'epoch': 1, 'iter': 11760, 'avg_loss': 8.596094734682314, 'avg_acc': 50.075461270300146, 'loss': 7.508663177490234}


EP_train:1:  43%|| 11772/27626 [27:43<37:23,  7.07it/s]

{'epoch': 1, 'iter': 11770, 'avg_loss': 8.596382136819276, 'avg_acc': 50.07778650921757, 'loss': 9.150591850280762}


EP_train:1:  43%|| 11782/27626 [27:44<37:30,  7.04it/s]

{'epoch': 1, 'iter': 11780, 'avg_loss': 8.596458254863446, 'avg_acc': 50.075067905950256, 'loss': 8.844436645507812}


EP_train:1:  43%|| 11792/27626 [27:46<37:41,  7.00it/s]

{'epoch': 1, 'iter': 11790, 'avg_loss': 8.596438245123837, 'avg_acc': 50.07844966499873, 'loss': 9.133993148803711}


EP_train:1:  43%|| 11802/27626 [27:47<37:08,  7.10it/s]

{'epoch': 1, 'iter': 11800, 'avg_loss': 8.596248860501019, 'avg_acc': 50.08156088467079, 'loss': 8.597991943359375}


EP_train:1:  43%|| 11812/27626 [27:49<37:26,  7.04it/s]

{'epoch': 1, 'iter': 11810, 'avg_loss': 8.596136562184954, 'avg_acc': 50.08175641351282, 'loss': 8.345489501953125}


EP_train:1:  43%|| 11822/27626 [27:50<37:12,  7.08it/s]

{'epoch': 1, 'iter': 11820, 'avg_loss': 8.595825440568394, 'avg_acc': 50.08142289146434, 'loss': 7.631384372711182}


EP_train:1:  43%|| 11832/27626 [27:51<37:05,  7.10it/s]

{'epoch': 1, 'iter': 11830, 'avg_loss': 8.59598475983334, 'avg_acc': 50.08214647958752, 'loss': 9.316495895385742}


EP_train:1:  43%|| 11842/27626 [27:53<37:14,  7.06it/s]

{'epoch': 1, 'iter': 11840, 'avg_loss': 8.595780251366637, 'avg_acc': 50.07732666159953, 'loss': 7.891368389129639}


EP_train:1:  43%|| 11852/27626 [27:54<37:27,  7.02it/s]

{'epoch': 1, 'iter': 11850, 'avg_loss': 8.595976972769066, 'avg_acc': 50.07963462998903, 'loss': 9.223657608032227}


EP_train:1:  43%|| 11862/27626 [27:56<36:58,  7.10it/s]

{'epoch': 1, 'iter': 11860, 'avg_loss': 8.596315361960784, 'avg_acc': 50.076405867970664, 'loss': 8.800447463989258}


EP_train:1:  43%|| 11872/27626 [27:57<37:20,  7.03it/s]

{'epoch': 1, 'iter': 11870, 'avg_loss': 8.596118788140354, 'avg_acc': 50.07739449077584, 'loss': 7.951733112335205}


EP_train:1:  43%|| 11882/27626 [27:58<37:18,  7.03it/s]

{'epoch': 1, 'iter': 11880, 'avg_loss': 8.59587585684246, 'avg_acc': 50.07654027438768, 'loss': 7.428721904754639}


EP_train:1:  43%|| 11892/27626 [28:00<37:24,  7.01it/s]

{'epoch': 1, 'iter': 11890, 'avg_loss': 8.595765568131144, 'avg_acc': 50.07778992515348, 'loss': 8.548378944396973}


EP_train:1:  43%|| 11902/27626 [28:01<37:02,  7.07it/s]

{'epoch': 1, 'iter': 11900, 'avg_loss': 8.595928775153334, 'avg_acc': 50.07851230988992, 'loss': 8.912521362304688}


EP_train:1:  43%|| 11912/27626 [28:03<37:10,  7.05it/s]

{'epoch': 1, 'iter': 11910, 'avg_loss': 8.595801926371774, 'avg_acc': 50.07923348165561, 'loss': 9.036341667175293}


EP_train:1:  43%|| 11922/27626 [28:04<37:04,  7.06it/s]

{'epoch': 1, 'iter': 11920, 'avg_loss': 8.595963416208747, 'avg_acc': 50.079953443503065, 'loss': 9.677236557006836}


EP_train:1:  43%|| 11932/27626 [28:05<37:09,  7.04it/s]

{'epoch': 1, 'iter': 11930, 'avg_loss': 8.596078004414194, 'avg_acc': 50.082243734808486, 'loss': 8.243279457092285}


EP_train:1:  43%|| 11942/27626 [28:07<37:06,  7.05it/s]

{'epoch': 1, 'iter': 11940, 'avg_loss': 8.595950647810477, 'avg_acc': 50.082436563101915, 'loss': 8.21261978149414}


EP_train:1:  43%|| 11952/27626 [28:08<37:04,  7.05it/s]

{'epoch': 1, 'iter': 11950, 'avg_loss': 8.595658394434734, 'avg_acc': 50.08262906869718, 'loss': 7.50114631652832}


EP_train:1:  43%|| 11962/27626 [28:10<37:03,  7.04it/s]

{'epoch': 1, 'iter': 11960, 'avg_loss': 8.595408983169197, 'avg_acc': 50.08255998662319, 'loss': 8.284564018249512}


EP_train:1:  43%|| 11972/27626 [28:11<36:57,  7.06it/s]

{'epoch': 1, 'iter': 11970, 'avg_loss': 8.595296680055936, 'avg_acc': 50.0793584495865, 'loss': 8.446394920349121}


EP_train:1:  43%|| 11982/27626 [28:13<36:53,  7.07it/s]

{'epoch': 1, 'iter': 11980, 'avg_loss': 8.595353820309816, 'avg_acc': 50.079292212670055, 'loss': 9.270166397094727}


EP_train:1:  43%|| 11992/27626 [28:14<36:53,  7.06it/s]

{'epoch': 1, 'iter': 11990, 'avg_loss': 8.595597397107, 'avg_acc': 50.07661996497374, 'loss': 9.405801773071289}


EP_train:1:  43%|| 12002/27626 [28:15<36:55,  7.05it/s]

{'epoch': 1, 'iter': 12000, 'avg_loss': 8.595576504689138, 'avg_acc': 50.07785809515873, 'loss': 8.636941909790039}


EP_train:1:  43%|| 12012/27626 [28:17<36:57,  7.04it/s]

{'epoch': 1, 'iter': 12010, 'avg_loss': 8.595554418542404, 'avg_acc': 50.07727291649321, 'loss': 8.378582954406738}


EP_train:1:  44%|| 12022/27626 [28:18<36:46,  7.07it/s]

{'epoch': 1, 'iter': 12020, 'avg_loss': 8.59566840660591, 'avg_acc': 50.07902836702437, 'loss': 8.818673133850098}


EP_train:1:  44%|| 12032/27626 [28:20<37:05,  7.01it/s]

{'epoch': 1, 'iter': 12030, 'avg_loss': 8.595627902323207, 'avg_acc': 50.07636522317347, 'loss': 8.732439041137695}


EP_train:1:  44%|| 12042/27626 [28:21<36:53,  7.04it/s]

{'epoch': 1, 'iter': 12040, 'avg_loss': 8.595677209943942, 'avg_acc': 50.077339921933394, 'loss': 8.77622127532959}


EP_train:1:  44%|| 12052/27626 [28:22<36:44,  7.06it/s]

{'epoch': 1, 'iter': 12050, 'avg_loss': 8.595668575336562, 'avg_acc': 50.078053688490584, 'loss': 8.267601013183594}


EP_train:1:  44%|| 12062/27626 [28:24<37:02,  7.00it/s]

{'epoch': 1, 'iter': 12060, 'avg_loss': 8.595635686680982, 'avg_acc': 50.082652765110694, 'loss': 8.082466125488281}


EP_train:1:  44%|| 12072/27626 [28:25<36:32,  7.09it/s]

{'epoch': 1, 'iter': 12070, 'avg_loss': 8.595593153023106, 'avg_acc': 50.08361983265678, 'loss': 8.066423416137695}


EP_train:1:  44%|| 12082/27626 [28:27<36:30,  7.10it/s]

{'epoch': 1, 'iter': 12080, 'avg_loss': 8.595593887294683, 'avg_acc': 50.08251593411141, 'loss': 9.075510025024414}


EP_train:1:  44%|| 12092/27626 [28:28<36:53,  7.02it/s]

{'epoch': 1, 'iter': 12090, 'avg_loss': 8.595627093392027, 'avg_acc': 50.08063849143991, 'loss': 8.802453994750977}


EP_train:1:  44%|| 12102/27626 [28:30<36:46,  7.04it/s]

{'epoch': 1, 'iter': 12100, 'avg_loss': 8.595499011979854, 'avg_acc': 50.079538881084204, 'loss': 7.845008373260498}


EP_train:1:  44%|| 12112/27626 [28:31<36:38,  7.06it/s]

{'epoch': 1, 'iter': 12110, 'avg_loss': 8.595230531499581, 'avg_acc': 50.080247295846746, 'loss': 8.595938682556152}


EP_train:1:  44%|| 12122/27626 [28:32<36:32,  7.07it/s]

{'epoch': 1, 'iter': 12120, 'avg_loss': 8.595201527833368, 'avg_acc': 50.077602920551115, 'loss': 9.534893989562988}


EP_train:1:  44%|| 12132/27626 [28:34<36:45,  7.03it/s]

{'epoch': 1, 'iter': 12130, 'avg_loss': 8.595607805405374, 'avg_acc': 50.078826972219936, 'loss': 9.293821334838867}


EP_train:1:  44%|| 12142/27626 [28:35<37:18,  6.92it/s]

{'epoch': 1, 'iter': 12140, 'avg_loss': 8.59560573852177, 'avg_acc': 50.077217692117614, 'loss': 8.640677452087402}


EP_train:1:  44%|| 12152/27626 [28:37<36:50,  7.00it/s]

{'epoch': 1, 'iter': 12150, 'avg_loss': 8.595348331783251, 'avg_acc': 50.07509669986009, 'loss': 8.99565315246582}


EP_train:1:  44%|| 12162/27626 [28:38<36:58,  6.97it/s]

{'epoch': 1, 'iter': 12160, 'avg_loss': 8.594988808429179, 'avg_acc': 50.074007071786866, 'loss': 7.888294219970703}


EP_train:1:  44%|| 12172/27626 [28:39<36:33,  7.04it/s]

{'epoch': 1, 'iter': 12170, 'avg_loss': 8.594913248968403, 'avg_acc': 50.075743570783004, 'loss': 9.214076042175293}


EP_train:1:  44%|| 12182/27626 [28:41<36:44,  7.01it/s]

{'epoch': 1, 'iter': 12180, 'avg_loss': 8.595210045823205, 'avg_acc': 50.076964124456126, 'loss': 7.910957336425781}


EP_train:1:  44%|| 12192/27626 [28:42<36:49,  6.99it/s]

{'epoch': 1, 'iter': 12190, 'avg_loss': 8.595187404550586, 'avg_acc': 50.076388319251905, 'loss': 8.208925247192383}


EP_train:1:  44%|| 12202/27626 [28:44<36:37,  7.02it/s]

{'epoch': 1, 'iter': 12200, 'avg_loss': 8.595097660656865, 'avg_acc': 50.07760634374232, 'loss': 8.983633041381836}


EP_train:1:  44%|| 12212/27626 [28:45<36:20,  7.07it/s]

{'epoch': 1, 'iter': 12210, 'avg_loss': 8.594499208721947, 'avg_acc': 50.07498362132503, 'loss': 7.689502716064453}


EP_train:1:  44%|| 12222/27626 [28:47<36:43,  6.99it/s]

{'epoch': 1, 'iter': 12220, 'avg_loss': 8.594271901059429, 'avg_acc': 50.07262089845348, 'loss': 7.525343418121338}


EP_train:1:  44%|| 12232/27626 [28:48<36:37,  7.00it/s]

{'epoch': 1, 'iter': 12230, 'avg_loss': 8.594285834440859, 'avg_acc': 50.07128403237675, 'loss': 8.5122709274292}


EP_train:1:  44%|| 12242/27626 [28:49<36:40,  6.99it/s]

{'epoch': 1, 'iter': 12240, 'avg_loss': 8.594104745381992, 'avg_acc': 50.073778694551095, 'loss': 8.425333976745605}


EP_train:1:  44%|| 12252/27626 [28:51<36:25,  7.04it/s]

{'epoch': 1, 'iter': 12250, 'avg_loss': 8.593865730180399, 'avg_acc': 50.07652436535793, 'loss': 7.766937255859375}


EP_train:1:  44%|| 12262/27626 [28:52<36:45,  6.97it/s]

{'epoch': 1, 'iter': 12260, 'avg_loss': 8.59371556225341, 'avg_acc': 50.07544245983199, 'loss': 8.677138328552246}


EP_train:1:  44%|| 12272/27626 [28:54<36:21,  7.04it/s]

{'epoch': 1, 'iter': 12270, 'avg_loss': 8.593662015442238, 'avg_acc': 50.078182299731075, 'loss': 7.790372848510742}


EP_train:1:  44%|| 12282/27626 [28:55<36:28,  7.01it/s]

{'epoch': 1, 'iter': 12280, 'avg_loss': 8.593679843396592, 'avg_acc': 50.072011644002934, 'loss': 8.401202201843262}


EP_train:1:  44%|| 12292/27626 [28:56<36:31,  7.00it/s]

{'epoch': 1, 'iter': 12290, 'avg_loss': 8.593778212031088, 'avg_acc': 50.07398706370515, 'loss': 8.522574424743652}


EP_train:1:  45%|| 12302/27626 [28:58<36:20,  7.03it/s]

{'epoch': 1, 'iter': 12300, 'avg_loss': 8.593693877107661, 'avg_acc': 50.07646736037721, 'loss': 8.43894100189209}


EP_train:1:  45%|| 12312/27626 [28:59<36:07,  7.07it/s]

{'epoch': 1, 'iter': 12310, 'avg_loss': 8.59382630637848, 'avg_acc': 50.07412070506051, 'loss': 10.402716636657715}


EP_train:1:  45%|| 12322/27626 [29:01<36:05,  7.07it/s]

{'epoch': 1, 'iter': 12320, 'avg_loss': 8.593928232565299, 'avg_acc': 50.071777858940024, 'loss': 8.639894485473633}


EP_train:1:  45%|| 12332/27626 [29:02<36:07,  7.05it/s]

{'epoch': 1, 'iter': 12330, 'avg_loss': 8.593875048852935, 'avg_acc': 50.06943881274836, 'loss': 8.588401794433594}


EP_train:1:  45%|| 12342/27626 [29:04<36:25,  6.99it/s]

{'epoch': 1, 'iter': 12340, 'avg_loss': 8.594337212113041, 'avg_acc': 50.06887610404343, 'loss': 9.117958068847656}


EP_train:1:  45%|| 12352/27626 [29:05<36:06,  7.05it/s]

{'epoch': 1, 'iter': 12350, 'avg_loss': 8.594436575628835, 'avg_acc': 50.068820338434136, 'loss': 7.675185680389404}


EP_train:1:  45%|| 12362/27626 [29:06<36:08,  7.04it/s]

{'epoch': 1, 'iter': 12360, 'avg_loss': 8.594273977030463, 'avg_acc': 50.06699498422458, 'loss': 8.0562105178833}


EP_train:1:  45%|| 12372/27626 [29:08<36:13,  7.02it/s]

{'epoch': 1, 'iter': 12370, 'avg_loss': 8.59409168606654, 'avg_acc': 50.06719343626222, 'loss': 8.964848518371582}


EP_train:1:  45%|| 12382/27626 [29:09<35:49,  7.09it/s]

{'epoch': 1, 'iter': 12380, 'avg_loss': 8.593908558195333, 'avg_acc': 50.06941079072773, 'loss': 8.077569961547852}


EP_train:1:  45%|| 12392/27626 [29:11<36:11,  7.02it/s]

{'epoch': 1, 'iter': 12390, 'avg_loss': 8.593859934881865, 'avg_acc': 50.07086796868695, 'loss': 8.309080123901367}


EP_train:1:  45%|| 12402/27626 [29:12<36:06,  7.03it/s]

{'epoch': 1, 'iter': 12400, 'avg_loss': 8.593653018220222, 'avg_acc': 50.07106281751472, 'loss': 8.3196382522583}


EP_train:1:  45%|| 12412/27626 [29:13<36:13,  7.00it/s]

{'epoch': 1, 'iter': 12410, 'avg_loss': 8.59364995187242, 'avg_acc': 50.071760937877684, 'loss': 8.876017570495605}


EP_train:1:  45%|| 12422/27626 [29:15<36:01,  7.03it/s]

{'epoch': 1, 'iter': 12420, 'avg_loss': 8.593625147862285, 'avg_acc': 50.0727095241929, 'loss': 8.885354042053223}


EP_train:1:  45%|| 12432/27626 [29:16<35:59,  7.03it/s]

{'epoch': 1, 'iter': 12430, 'avg_loss': 8.593574146284254, 'avg_acc': 50.070639932427, 'loss': 7.792806625366211}


EP_train:1:  45%|| 12442/27626 [29:18<36:04,  7.01it/s]

{'epoch': 1, 'iter': 12440, 'avg_loss': 8.59335894623367, 'avg_acc': 50.07209026605578, 'loss': 8.563002586364746}


EP_train:1:  45%|| 12452/27626 [29:19<36:03,  7.01it/s]

{'epoch': 1, 'iter': 12450, 'avg_loss': 8.593243446592135, 'avg_acc': 50.07178138302144, 'loss': 9.532708168029785}


EP_train:1:  45%|| 12462/27626 [29:21<36:09,  6.99it/s]

{'epoch': 1, 'iter': 12460, 'avg_loss': 8.593271450689885, 'avg_acc': 50.07347925527647, 'loss': 8.482523918151855}


EP_train:1:  45%|| 12472/27626 [29:22<35:50,  7.05it/s]

{'epoch': 1, 'iter': 12470, 'avg_loss': 8.593200804418498, 'avg_acc': 50.07041335899286, 'loss': 9.182868957519531}


EP_train:1:  45%|| 12482/27626 [29:23<35:48,  7.05it/s]

{'epoch': 1, 'iter': 12480, 'avg_loss': 8.593107868832146, 'avg_acc': 50.069105039660286, 'loss': 8.340714454650879}


EP_train:1:  45%|| 12492/27626 [29:25<35:52,  7.03it/s]

{'epoch': 1, 'iter': 12490, 'avg_loss': 8.59306397058977, 'avg_acc': 50.07005043631415, 'loss': 9.213847160339355}


EP_train:1:  45%|| 12502/27626 [29:26<35:52,  7.02it/s]

{'epoch': 1, 'iter': 12500, 'avg_loss': 8.5929691064702, 'avg_acc': 50.07099432045437, 'loss': 8.483957290649414}


EP_train:1:  45%|| 12512/27626 [29:28<35:45,  7.04it/s]

{'epoch': 1, 'iter': 12510, 'avg_loss': 8.5931957331543, 'avg_acc': 50.06818999280633, 'loss': 8.968428611755371}


EP_train:1:  45%|| 12522/27626 [29:29<35:51,  7.02it/s]

{'epoch': 1, 'iter': 12520, 'avg_loss': 8.59335719559408, 'avg_acc': 50.068135532305725, 'loss': 8.449076652526855}


EP_train:1:  45%|| 12532/27626 [29:30<35:52,  7.01it/s]

{'epoch': 1, 'iter': 12530, 'avg_loss': 8.592590355661915, 'avg_acc': 50.07007621099673, 'loss': 7.053359031677246}


EP_train:1:  45%|| 12542/27626 [29:32<35:51,  7.01it/s]

{'epoch': 1, 'iter': 12540, 'avg_loss': 8.592623651469689, 'avg_acc': 50.06852523722192, 'loss': 8.999842643737793}


EP_train:1:  45%|| 12552/27626 [29:33<35:42,  7.03it/s]

{'epoch': 1, 'iter': 12550, 'avg_loss': 8.59253684409291, 'avg_acc': 50.06573181419807, 'loss': 9.417953491210938}


EP_train:1:  45%|| 12562/27626 [29:35<35:18,  7.11it/s]

{'epoch': 1, 'iter': 12560, 'avg_loss': 8.592813423907772, 'avg_acc': 50.06518191226813, 'loss': 9.026846885681152}


EP_train:1:  46%|| 12572/27626 [29:36<35:41,  7.03it/s]

{'epoch': 1, 'iter': 12570, 'avg_loss': 8.592843497703187, 'avg_acc': 50.06214700501154, 'loss': 8.108927726745605}


EP_train:1:  46%|| 12582/27626 [29:38<35:47,  7.00it/s]

{'epoch': 1, 'iter': 12580, 'avg_loss': 8.592875491908563, 'avg_acc': 50.06011048406327, 'loss': 9.59996509552002}


EP_train:1:  46%|| 12592/27626 [29:39<35:34,  7.04it/s]

{'epoch': 1, 'iter': 12590, 'avg_loss': 8.593036692928884, 'avg_acc': 50.059566356921614, 'loss': 8.703524589538574}


EP_train:1:  46%|| 12602/27626 [29:40<35:42,  7.01it/s]

{'epoch': 1, 'iter': 12600, 'avg_loss': 8.59304386857445, 'avg_acc': 50.06075906674074, 'loss': 8.404584884643555}


EP_train:1:  46%|| 12612/27626 [29:42<35:41,  7.01it/s]

{'epoch': 1, 'iter': 12610, 'avg_loss': 8.59299006150911, 'avg_acc': 50.05897629054, 'loss': 8.280847549438477}


EP_train:1:  46%|| 12622/27626 [29:43<35:32,  7.04it/s]

{'epoch': 1, 'iter': 12620, 'avg_loss': 8.593188914469858, 'avg_acc': 50.05917716504239, 'loss': 9.053971290588379}


EP_train:1:  46%|| 12632/27626 [29:45<35:27,  7.05it/s]

{'epoch': 1, 'iter': 12630, 'avg_loss': 8.592980507292003, 'avg_acc': 50.05888290713324, 'loss': 8.156811714172363}


EP_train:1:  46%|| 12642/27626 [29:46<35:20,  7.07it/s]

{'epoch': 1, 'iter': 12640, 'avg_loss': 8.593222002149071, 'avg_acc': 50.05562257732774, 'loss': 9.055747032165527}


EP_train:1:  46%|| 12652/27626 [29:47<35:05,  7.11it/s]

{'epoch': 1, 'iter': 12650, 'avg_loss': 8.593196371856598, 'avg_acc': 50.05582562643269, 'loss': 8.254831314086914}


EP_train:1:  46%|| 12662/27626 [29:49<35:06,  7.10it/s]

{'epoch': 1, 'iter': 12660, 'avg_loss': 8.593095818700293, 'avg_acc': 50.05356014532817, 'loss': 8.711475372314453}


EP_train:1:  46%|| 12672/27626 [29:50<35:16,  7.06it/s]

{'epoch': 1, 'iter': 12670, 'avg_loss': 8.59307499652917, 'avg_acc': 50.05475100623471, 'loss': 8.35300350189209}


EP_train:1:  46%|| 12682/27626 [29:52<35:14,  7.07it/s]

{'epoch': 1, 'iter': 12680, 'avg_loss': 8.592991828617636, 'avg_acc': 50.05199708224903, 'loss': 7.952194690704346}


EP_train:1:  46%|| 12692/27626 [29:53<35:30,  7.01it/s]

{'epoch': 1, 'iter': 12690, 'avg_loss': 8.592794936652556, 'avg_acc': 50.0507249231739, 'loss': 8.562232971191406}


EP_train:1:  46%|| 12702/27626 [29:54<35:11,  7.07it/s]

{'epoch': 1, 'iter': 12700, 'avg_loss': 8.592648494375961, 'avg_acc': 50.04994685457837, 'loss': 9.04527473449707}


EP_train:1:  46%|| 12712/27626 [29:56<35:17,  7.04it/s]

{'epoch': 1, 'iter': 12710, 'avg_loss': 8.592449517978203, 'avg_acc': 50.051382660687594, 'loss': 7.871180534362793}


EP_train:1:  46%|| 12722/27626 [29:57<35:23,  7.02it/s]

{'epoch': 1, 'iter': 12720, 'avg_loss': 8.592283480516555, 'avg_acc': 50.051587925477556, 'loss': 8.649575233459473}


EP_train:1:  46%|| 12732/27626 [29:59<34:59,  7.09it/s]

{'epoch': 1, 'iter': 12730, 'avg_loss': 8.592128764037092, 'avg_acc': 50.05252925928835, 'loss': 9.00750732421875}


EP_train:1:  46%|| 12742/27626 [30:00<35:16,  7.03it/s]

{'epoch': 1, 'iter': 12740, 'avg_loss': 8.592154145381128, 'avg_acc': 50.055186013656694, 'loss': 8.063886642456055}


EP_train:1:  46%|| 12752/27626 [30:02<35:30,  6.98it/s]

{'epoch': 1, 'iter': 12750, 'avg_loss': 8.592297540628325, 'avg_acc': 50.05563289153792, 'loss': 9.345376968383789}


EP_train:1:  46%|| 12762/27626 [30:03<35:06,  7.06it/s]

{'epoch': 1, 'iter': 12760, 'avg_loss': 8.592392490683805, 'avg_acc': 50.05314042786615, 'loss': 9.096137046813965}


EP_train:1:  46%|| 12772/27626 [30:04<35:17,  7.01it/s]

{'epoch': 1, 'iter': 12770, 'avg_loss': 8.592575434855833, 'avg_acc': 50.0530988176337, 'loss': 9.183953285217285}


EP_train:1:  46%|| 12782/27626 [30:06<35:24,  6.99it/s]

{'epoch': 1, 'iter': 12780, 'avg_loss': 8.593000113334504, 'avg_acc': 50.05476879743369, 'loss': 8.676473617553711}


EP_train:1:  46%|| 12792/27626 [30:07<35:01,  7.06it/s]

{'epoch': 1, 'iter': 12790, 'avg_loss': 8.59282878397516, 'avg_acc': 50.056680478461416, 'loss': 8.03960132598877}


EP_train:1:  46%|| 12802/27626 [30:09<35:12,  7.02it/s]

{'epoch': 1, 'iter': 12800, 'avg_loss': 8.592784594282602, 'avg_acc': 50.05688032184985, 'loss': 8.141636848449707}


EP_train:1:  46%|| 12812/27626 [30:10<35:03,  7.04it/s]

{'epoch': 1, 'iter': 12810, 'avg_loss': 8.592888701260765, 'avg_acc': 50.05586019826711, 'loss': 8.752861976623535}


EP_train:1:  46%|| 12822/27626 [30:11<34:58,  7.05it/s]

{'epoch': 1, 'iter': 12820, 'avg_loss': 8.592937694381751, 'avg_acc': 50.057766554870916, 'loss': 8.456062316894531}


EP_train:1:  46%|| 12832/27626 [30:13<35:10,  7.01it/s]

{'epoch': 1, 'iter': 12830, 'avg_loss': 8.59270688625464, 'avg_acc': 50.05528602603071, 'loss': 8.494298934936523}


EP_train:1:  46%|| 12842/27626 [30:14<35:22,  6.97it/s]

{'epoch': 1, 'iter': 12840, 'avg_loss': 8.592661948063302, 'avg_acc': 50.05353944396854, 'loss': 8.06238079071045}


EP_train:1:  47%|| 12852/27626 [30:16<35:11,  7.00it/s]

{'epoch': 1, 'iter': 12850, 'avg_loss': 8.592727764433306, 'avg_acc': 50.05009337794725, 'loss': 9.829083442687988}


EP_train:1:  47%|| 12862/27626 [30:17<35:02,  7.02it/s]

{'epoch': 1, 'iter': 12860, 'avg_loss': 8.593200224886859, 'avg_acc': 50.053456185366606, 'loss': 8.88489818572998}


EP_train:1:  47%|| 12872/27626 [30:19<34:55,  7.04it/s]

{'epoch': 1, 'iter': 12870, 'avg_loss': 8.593365777503008, 'avg_acc': 50.056328179628615, 'loss': 8.507009506225586}


EP_train:1:  47%|| 12882/27626 [30:20<34:47,  7.06it/s]

{'epoch': 1, 'iter': 12880, 'avg_loss': 8.5934811466905, 'avg_acc': 50.05555663380172, 'loss': 8.04223346710205}


EP_train:1:  47%|| 12892/27626 [30:21<34:51,  7.04it/s]

{'epoch': 1, 'iter': 12890, 'avg_loss': 8.59321679821877, 'avg_acc': 50.056725622527345, 'loss': 8.25557804107666}


EP_train:1:  47%|| 12902/27626 [30:23<34:49,  7.05it/s]

{'epoch': 1, 'iter': 12900, 'avg_loss': 8.59322120429804, 'avg_acc': 50.058377257576936, 'loss': 8.97126293182373}


EP_train:1:  47%|| 12912/27626 [30:24<34:40,  7.07it/s]

{'epoch': 1, 'iter': 12910, 'avg_loss': 8.593376245684059, 'avg_acc': 50.05470141739602, 'loss': 8.450453758239746}


EP_train:1:  47%|| 12922/27626 [30:26<34:58,  7.01it/s]

{'epoch': 1, 'iter': 12920, 'avg_loss': 8.593331340850023, 'avg_acc': 50.057319479916416, 'loss': 9.020044326782227}


EP_train:1:  47%|| 12932/27626 [30:27<34:57,  7.01it/s]

{'epoch': 1, 'iter': 12930, 'avg_loss': 8.593083799489182, 'avg_acc': 50.05727515273374, 'loss': 8.570460319519043}


EP_train:1:  47%|| 12942/27626 [30:28<34:49,  7.03it/s]

{'epoch': 1, 'iter': 12940, 'avg_loss': 8.593083041333847, 'avg_acc': 50.05505756896685, 'loss': 8.949358940124512}


EP_train:1:  47%|| 12952/27626 [30:30<35:03,  6.98it/s]

{'epoch': 1, 'iter': 12950, 'avg_loss': 8.592937126775274, 'avg_acc': 50.04801752760405, 'loss': 8.79778003692627}


EP_train:1:  47%|| 12962/27626 [30:31<34:44,  7.04it/s]

{'epoch': 1, 'iter': 12960, 'avg_loss': 8.592559558025767, 'avg_acc': 50.05207931486768, 'loss': 7.991335391998291}


EP_train:1:  47%|| 12972/27626 [30:33<34:44,  7.03it/s]

{'epoch': 1, 'iter': 12970, 'avg_loss': 8.592373525045875, 'avg_acc': 50.05252100840336, 'loss': 8.425385475158691}


EP_train:1:  47%|| 12982/27626 [30:34<34:49,  7.01it/s]

{'epoch': 1, 'iter': 12980, 'avg_loss': 8.592546565278672, 'avg_acc': 50.052480548493946, 'loss': 8.611359596252441}


EP_train:1:  47%|| 12992/27626 [30:36<34:46,  7.01it/s]

{'epoch': 1, 'iter': 12990, 'avg_loss': 8.592552431901014, 'avg_acc': 50.0512373951197, 'loss': 9.294678688049316}


EP_train:1:  47%|| 13002/27626 [30:37<34:27,  7.07it/s]

{'epoch': 1, 'iter': 13000, 'avg_loss': 8.592452435427084, 'avg_acc': 50.052399815398815, 'loss': 7.945140361785889}


EP_train:1:  47%|| 13012/27626 [30:38<34:50,  6.99it/s]

{'epoch': 1, 'iter': 13010, 'avg_loss': 8.59249770515805, 'avg_acc': 50.056922988240714, 'loss': 8.64673137664795}


EP_train:1:  47%|| 13022/27626 [30:40<34:32,  7.05it/s]

{'epoch': 1, 'iter': 13020, 'avg_loss': 8.592875708029316, 'avg_acc': 50.057119268873365, 'loss': 9.04273509979248}


EP_train:1:  47%|| 13032/27626 [30:41<35:00,  6.95it/s]

{'epoch': 1, 'iter': 13030, 'avg_loss': 8.592951825460775, 'avg_acc': 50.057555061008365, 'loss': 7.76391077041626}


EP_train:1:  47%|| 13042/27626 [30:43<34:54,  6.96it/s]

{'epoch': 1, 'iter': 13040, 'avg_loss': 8.5931390611391, 'avg_acc': 50.06086573115559, 'loss': 8.70362377166748}


EP_train:1:  47%|| 13052/27626 [30:44<34:41,  7.00it/s]

{'epoch': 1, 'iter': 13050, 'avg_loss': 8.593066520667534, 'avg_acc': 50.056748525017234, 'loss': 8.155465126037598}


EP_train:1:  47%|| 13062/27626 [30:45<34:23,  7.06it/s]

{'epoch': 1, 'iter': 13060, 'avg_loss': 8.593001786896377, 'avg_acc': 50.056705076181, 'loss': 8.712821960449219}


EP_train:1:  47%|| 13072/27626 [30:47<34:23,  7.05it/s]

{'epoch': 1, 'iter': 13070, 'avg_loss': 8.592773370923013, 'avg_acc': 50.051641037411066, 'loss': 8.344706535339355}


EP_train:1:  47%|| 13082/27626 [30:48<34:37,  7.00it/s]

{'epoch': 1, 'iter': 13080, 'avg_loss': 8.592681650229254, 'avg_acc': 50.05327383227582, 'loss': 7.9976911544799805}


EP_train:1:  47%|| 13092/27626 [30:50<34:31,  7.02it/s]

{'epoch': 1, 'iter': 13090, 'avg_loss': 8.592400728668197, 'avg_acc': 50.05084600106944, 'loss': 8.328670501708984}


EP_train:1:  47%|| 13102/27626 [30:51<34:23,  7.04it/s]

{'epoch': 1, 'iter': 13100, 'avg_loss': 8.592083433385788, 'avg_acc': 50.04889893901229, 'loss': 7.908802032470703}


EP_train:1:  47%|| 13112/27626 [30:53<34:24,  7.03it/s]

{'epoch': 1, 'iter': 13110, 'avg_loss': 8.592225008830628, 'avg_acc': 50.04886164289528, 'loss': 8.74444580078125}


EP_train:1:  47%|| 13122/27626 [30:54<34:23,  7.03it/s]

{'epoch': 1, 'iter': 13120, 'avg_loss': 8.592615528010748, 'avg_acc': 50.049300739272915, 'loss': 9.65683650970459}


EP_train:1:  48%|| 13132/27626 [30:55<34:19,  7.04it/s]

{'epoch': 1, 'iter': 13130, 'avg_loss': 8.592662208215197, 'avg_acc': 50.04997715330135, 'loss': 8.2882080078125}


EP_train:1:  48%|| 13142/27626 [30:57<34:33,  6.99it/s]

{'epoch': 1, 'iter': 13140, 'avg_loss': 8.592623005518584, 'avg_acc': 50.0501769271745, 'loss': 8.693399429321289}


EP_train:1:  48%|| 13152/27626 [30:58<34:06,  7.07it/s]

{'epoch': 1, 'iter': 13150, 'avg_loss': 8.592572384628994, 'avg_acc': 50.04942589917116, 'loss': 8.768919944763184}


EP_train:1:  48%|| 13162/27626 [31:00<33:56,  7.10it/s]

{'epoch': 1, 'iter': 13160, 'avg_loss': 8.592487344009228, 'avg_acc': 50.04891345642428, 'loss': 8.609014511108398}


EP_train:1:  48%|| 13172/27626 [31:01<34:07,  7.06it/s]

{'epoch': 1, 'iter': 13170, 'avg_loss': 8.592381418887198, 'avg_acc': 50.05196074709589, 'loss': 7.88688850402832}


EP_train:1:  48%|| 13182/27626 [31:02<34:01,  7.07it/s]

{'epoch': 1, 'iter': 13180, 'avg_loss': 8.59238802012901, 'avg_acc': 50.05381799559972, 'loss': 9.090478897094727}


EP_train:1:  48%|| 13192/27626 [31:04<34:05,  7.06it/s]

{'epoch': 1, 'iter': 13190, 'avg_loss': 8.592487713934736, 'avg_acc': 50.05614623607004, 'loss': 8.332869529724121}


EP_train:1:  48%|| 13202/27626 [31:05<34:14,  7.02it/s]

{'epoch': 1, 'iter': 13200, 'avg_loss': 8.592396347588295, 'avg_acc': 50.05752405120825, 'loss': 9.00602912902832}


EP_train:1:  48%|| 13212/27626 [31:07<33:59,  7.07it/s]

{'epoch': 1, 'iter': 13210, 'avg_loss': 8.592254962398503, 'avg_acc': 50.05771705397017, 'loss': 8.013980865478516}


EP_train:1:  48%|| 13222/27626 [31:08<34:01,  7.06it/s]

{'epoch': 1, 'iter': 13220, 'avg_loss': 8.592183928348664, 'avg_acc': 50.058618863928594, 'loss': 8.70246696472168}


EP_train:1:  48%|| 13232/27626 [31:10<34:12,  7.01it/s]

{'epoch': 1, 'iter': 13230, 'avg_loss': 8.592157821483726, 'avg_acc': 50.06093643715517, 'loss': 7.583049297332764}


EP_train:1:  48%|| 13242/27626 [31:11<33:57,  7.06it/s]

{'epoch': 1, 'iter': 13240, 'avg_loss': 8.592090780664252, 'avg_acc': 50.06136243486141, 'loss': 8.725324630737305}


EP_train:1:  48%|| 13252/27626 [31:12<33:45,  7.10it/s]

{'epoch': 1, 'iter': 13250, 'avg_loss': 8.592120582801371, 'avg_acc': 50.059665308278625, 'loss': 8.580138206481934}


EP_train:1:  48%|| 13262/27626 [31:14<33:46,  7.09it/s]

{'epoch': 1, 'iter': 13260, 'avg_loss': 8.59226578578772, 'avg_acc': 50.061034235728826, 'loss': 8.89495849609375}


EP_train:1:  48%|| 13272/27626 [31:15<33:53,  7.06it/s]

{'epoch': 1, 'iter': 13270, 'avg_loss': 8.592226157900923, 'avg_acc': 50.06263657599277, 'loss': 8.369478225708008}


EP_train:1:  48%|| 13282/27626 [31:17<34:08,  7.00it/s]

{'epoch': 1, 'iter': 13280, 'avg_loss': 8.59208874511805, 'avg_acc': 50.063765906181764, 'loss': 8.589274406433105}


EP_train:1:  48%|| 13292/27626 [31:18<33:50,  7.06it/s]

{'epoch': 1, 'iter': 13290, 'avg_loss': 8.591892351819684, 'avg_acc': 50.061131592807165, 'loss': 8.6658296585083}


EP_train:1:  48%|| 13302/27626 [31:19<33:49,  7.06it/s]

{'epoch': 1, 'iter': 13300, 'avg_loss': 8.591929946839473, 'avg_acc': 50.06038079843621, 'loss': 9.064001083374023}


EP_train:1:  48%|| 13312/27626 [31:21<34:06,  7.00it/s]

{'epoch': 1, 'iter': 13310, 'avg_loss': 8.592178995375898, 'avg_acc': 50.061978814514305, 'loss': 8.77379035949707}


EP_train:1:  48%|| 13322/27626 [31:22<34:02,  7.00it/s]

{'epoch': 1, 'iter': 13320, 'avg_loss': 8.592255056875352, 'avg_acc': 50.05958636738984, 'loss': 9.347315788269043}


EP_train:1:  48%|| 13332/27626 [31:24<33:49,  7.04it/s]

{'epoch': 1, 'iter': 13330, 'avg_loss': 8.592271514972987, 'avg_acc': 50.06094816592904, 'loss': 8.115535736083984}


EP_train:1:  48%|| 13342/27626 [31:25<33:43,  7.06it/s]

{'epoch': 1, 'iter': 13340, 'avg_loss': 8.59217382437035, 'avg_acc': 50.06465032606251, 'loss': 8.481310844421387}


EP_train:1:  48%|| 13352/27626 [31:26<33:44,  7.05it/s]

{'epoch': 1, 'iter': 13350, 'avg_loss': 8.592304693144472, 'avg_acc': 50.06483596734327, 'loss': 8.462103843688965}


EP_train:1:  48%|| 13362/27626 [31:28<33:48,  7.03it/s]

{'epoch': 1, 'iter': 13360, 'avg_loss': 8.59241139454253, 'avg_acc': 50.065021330738716, 'loss': 8.732733726501465}


EP_train:1:  48%|| 13372/27626 [31:29<33:36,  7.07it/s]

{'epoch': 1, 'iter': 13370, 'avg_loss': 8.59235135259522, 'avg_acc': 50.06497270211652, 'loss': 8.469473838806152}


EP_train:1:  48%|| 13382/27626 [31:31<33:38,  7.06it/s]

{'epoch': 1, 'iter': 13380, 'avg_loss': 8.59223917882794, 'avg_acc': 50.06445706598909, 'loss': 7.994830131530762}


EP_train:1:  48%|| 13392/27626 [31:32<33:42,  7.04it/s]

{'epoch': 1, 'iter': 13390, 'avg_loss': 8.592427909209546, 'avg_acc': 50.06487566275857, 'loss': 8.817972183227539}


EP_train:1:  49%|| 13402/27626 [31:34<33:46,  7.02it/s]

{'epoch': 1, 'iter': 13400, 'avg_loss': 8.592559933902594, 'avg_acc': 50.06506044325051, 'loss': 8.79032039642334}


EP_train:1:  49%|| 13412/27626 [31:35<33:53,  6.99it/s]

{'epoch': 1, 'iter': 13410, 'avg_loss': 8.592503121249246, 'avg_acc': 50.06850719558571, 'loss': 8.790877342224121}


EP_train:1:  49%|| 13422/27626 [31:36<33:42,  7.02it/s]

{'epoch': 1, 'iter': 13420, 'avg_loss': 8.592423640867164, 'avg_acc': 50.070784591312126, 'loss': 7.966978073120117}


EP_train:1:  49%|| 13432/27626 [31:38<33:28,  7.07it/s]

{'epoch': 1, 'iter': 13430, 'avg_loss': 8.592316222593812, 'avg_acc': 50.073291266473085, 'loss': 8.875654220581055}


EP_train:1:  49%|| 13442/27626 [31:39<33:26,  7.07it/s]

{'epoch': 1, 'iter': 13440, 'avg_loss': 8.591976894703341, 'avg_acc': 50.075096718994125, 'loss': 8.538933753967285}


EP_train:1:  49%|| 13452/27626 [31:41<33:54,  6.97it/s]

{'epoch': 1, 'iter': 13450, 'avg_loss': 8.592044683382259, 'avg_acc': 50.07550553862167, 'loss': 7.992218017578125}


EP_train:1:  49%|| 13462/27626 [31:42<33:35,  7.03it/s]

{'epoch': 1, 'iter': 13460, 'avg_loss': 8.591724363162236, 'avg_acc': 50.073127925117, 'loss': 8.973910331726074}


EP_train:1:  49%|| 13472/27626 [31:43<33:28,  7.05it/s]

{'epoch': 1, 'iter': 13470, 'avg_loss': 8.591665334632868, 'avg_acc': 50.07562541756365, 'loss': 9.134839057922363}


EP_train:1:  49%|| 13482/27626 [31:45<33:29,  7.04it/s]

{'epoch': 1, 'iter': 13480, 'avg_loss': 8.591784374127268, 'avg_acc': 50.07835101253616, 'loss': 7.958560943603516}


EP_train:1:  49%|| 13492/27626 [31:46<33:35,  7.01it/s]

{'epoch': 1, 'iter': 13490, 'avg_loss': 8.591865015104132, 'avg_acc': 50.0829256541398, 'loss': 8.180957794189453}


EP_train:1:  49%|| 13502/27626 [31:48<33:38,  7.00it/s]

{'epoch': 1, 'iter': 13500, 'avg_loss': 8.592068167759148, 'avg_acc': 50.08448448263092, 'loss': 9.804852485656738}


EP_train:1:  49%|| 13512/27626 [31:49<33:25,  7.04it/s]

{'epoch': 1, 'iter': 13510, 'avg_loss': 8.592019903784227, 'avg_acc': 50.08557841758567, 'loss': 8.264606475830078}


EP_train:1:  49%|| 13522/27626 [31:51<33:18,  7.06it/s]

{'epoch': 1, 'iter': 13520, 'avg_loss': 8.591939320693697, 'avg_acc': 50.08389727091191, 'loss': 9.182853698730469}


EP_train:1:  49%|| 13532/27626 [31:52<33:25,  7.03it/s]

{'epoch': 1, 'iter': 13530, 'avg_loss': 8.592054810776661, 'avg_acc': 50.08129480452295, 'loss': 8.211644172668457}


EP_train:1:  49%|| 13542/27626 [31:53<33:12,  7.07it/s]

{'epoch': 1, 'iter': 13540, 'avg_loss': 8.59213857828237, 'avg_acc': 50.07892696255816, 'loss': 8.897652626037598}


EP_train:1:  49%|| 13552/27626 [31:55<33:27,  7.01it/s]

{'epoch': 1, 'iter': 13550, 'avg_loss': 8.59213517739864, 'avg_acc': 50.078176887314584, 'loss': 8.170926094055176}


EP_train:1:  49%|| 13562/27626 [31:56<33:06,  7.08it/s]

{'epoch': 1, 'iter': 13560, 'avg_loss': 8.591989418968199, 'avg_acc': 50.075123515964904, 'loss': 8.851964950561523}


EP_train:1:  49%|| 13572/27626 [31:58<33:23,  7.01it/s]

{'epoch': 1, 'iter': 13570, 'avg_loss': 8.591746447335979, 'avg_acc': 50.07207464446246, 'loss': 8.215034484863281}


EP_train:1:  49%|| 13582/27626 [31:59<33:20,  7.02it/s]

{'epoch': 1, 'iter': 13580, 'avg_loss': 8.591582723468264, 'avg_acc': 50.07432258302039, 'loss': 8.727289199829102}


EP_train:1:  49%|| 13592/27626 [32:00<33:14,  7.04it/s]

{'epoch': 1, 'iter': 13590, 'avg_loss': 8.591449316742054, 'avg_acc': 50.07242844529468, 'loss': 9.30444049835205}


EP_train:1:  49%|| 13602/27626 [32:02<33:09,  7.05it/s]

{'epoch': 1, 'iter': 13600, 'avg_loss': 8.591415169046044, 'avg_acc': 50.07145614293067, 'loss': 8.218052864074707}


EP_train:1:  49%|| 13612/27626 [32:03<33:18,  7.01it/s]

{'epoch': 1, 'iter': 13610, 'avg_loss': 8.591212138957275, 'avg_acc': 50.07278120637719, 'loss': 8.68337345123291}


EP_train:1:  49%|| 13622/27626 [32:05<33:02,  7.06it/s]

{'epoch': 1, 'iter': 13620, 'avg_loss': 8.591121847711289, 'avg_acc': 50.071810072681885, 'loss': 8.464223861694336}


EP_train:1:  49%|| 13632/27626 [32:06<33:13,  7.02it/s]

{'epoch': 1, 'iter': 13630, 'avg_loss': 8.591105884470565, 'avg_acc': 50.072445161763625, 'loss': 8.233840942382812}


EP_train:1:  49%|| 13642/27626 [32:08<33:21,  6.99it/s]

{'epoch': 1, 'iter': 13640, 'avg_loss': 8.591192410336712, 'avg_acc': 50.07491202990983, 'loss': 8.59241771697998}


EP_train:1:  49%|| 13652/27626 [32:09<33:00,  7.06it/s]

{'epoch': 1, 'iter': 13650, 'avg_loss': 8.59115278847479, 'avg_acc': 50.07714636290381, 'loss': 8.975119590759277}


EP_train:1:  49%|| 13662/27626 [32:10<33:20,  6.98it/s]

{'epoch': 1, 'iter': 13660, 'avg_loss': 8.591241865588808, 'avg_acc': 50.078462411243684, 'loss': 8.670683860778809}


EP_train:1:  49%|| 13672/27626 [32:12<33:25,  6.96it/s]

{'epoch': 1, 'iter': 13670, 'avg_loss': 8.591325681269382, 'avg_acc': 50.07794784580499, 'loss': 8.917008399963379}


EP_train:1:  50%|| 13682/27626 [32:13<33:04,  7.03it/s]

{'epoch': 1, 'iter': 13680, 'avg_loss': 8.591413676865084, 'avg_acc': 50.07880454645128, 'loss': 8.28342342376709}


EP_train:1:  50%|| 13692/27626 [32:15<32:54,  7.06it/s]

{'epoch': 1, 'iter': 13690, 'avg_loss': 8.591198476443514, 'avg_acc': 50.0810295084362, 'loss': 7.797616481781006}


EP_train:1:  50%|| 13702/27626 [32:16<32:56,  7.04it/s]

{'epoch': 1, 'iter': 13700, 'avg_loss': 8.591089235223233, 'avg_acc': 50.07618057076125, 'loss': 8.482970237731934}


EP_train:1:  50%|| 13712/27626 [32:17<33:03,  7.01it/s]

{'epoch': 1, 'iter': 13710, 'avg_loss': 8.59092101424087, 'avg_acc': 50.077264605061636, 'loss': 8.000150680541992}


EP_train:1:  50%|| 13722/27626 [32:19<33:13,  6.97it/s]

{'epoch': 1, 'iter': 13720, 'avg_loss': 8.590827125943825, 'avg_acc': 50.0747030099847, 'loss': 8.690924644470215}


EP_train:1:  50%|| 13732/27626 [32:20<32:42,  7.08it/s]

{'epoch': 1, 'iter': 13730, 'avg_loss': 8.590977439841478, 'avg_acc': 50.072372733231376, 'loss': 7.89052677154541}


EP_train:1:  50%|| 13742/27626 [32:22<32:59,  7.02it/s]

{'epoch': 1, 'iter': 13740, 'avg_loss': 8.591095693263586, 'avg_acc': 50.070500691361616, 'loss': 8.544367790222168}


EP_train:1:  50%|| 13752/27626 [32:23<33:06,  6.98it/s]

{'epoch': 1, 'iter': 13750, 'avg_loss': 8.591141980445634, 'avg_acc': 50.072040215257076, 'loss': 9.413553237915039}


EP_train:1:  50%|| 13762/27626 [32:25<33:08,  6.97it/s]

{'epoch': 1, 'iter': 13760, 'avg_loss': 8.59098388767374, 'avg_acc': 50.07221495530848, 'loss': 8.021286010742188}


EP_train:1:  50%|| 13772/27626 [32:26<32:46,  7.05it/s]

{'epoch': 1, 'iter': 13770, 'avg_loss': 8.591133655316515, 'avg_acc': 50.07397792462422, 'loss': 8.76476764678955}


EP_train:1:  50%|| 13782/27626 [32:27<32:28,  7.11it/s]

{'epoch': 1, 'iter': 13780, 'avg_loss': 8.59138863626996, 'avg_acc': 50.07233691314128, 'loss': 10.169450759887695}


EP_train:1:  50%|| 13792/27626 [32:29<32:37,  7.07it/s]

{'epoch': 1, 'iter': 13790, 'avg_loss': 8.591305515520828, 'avg_acc': 50.073190849104485, 'loss': 7.677263259887695}


EP_train:1:  50%|| 13802/27626 [32:30<32:43,  7.04it/s]

{'epoch': 1, 'iter': 13800, 'avg_loss': 8.59126313798074, 'avg_acc': 50.07449641330338, 'loss': 7.492374420166016}


EP_train:1:  50%|| 13812/27626 [32:32<32:45,  7.03it/s]

{'epoch': 1, 'iter': 13810, 'avg_loss': 8.59111616943102, 'avg_acc': 50.0717272463978, 'loss': 8.788430213928223}


EP_train:1:  50%|| 13822/27626 [32:33<32:35,  7.06it/s]

{'epoch': 1, 'iter': 13820, 'avg_loss': 8.591083264093859, 'avg_acc': 50.07144924390421, 'loss': 9.104880332946777}


EP_train:1:  50%|| 13832/27626 [32:34<32:34,  7.06it/s]

{'epoch': 1, 'iter': 13830, 'avg_loss': 8.59115595421437, 'avg_acc': 50.0727532354855, 'loss': 8.751697540283203}


EP_train:1:  50%|| 13842/27626 [32:36<32:49,  7.00it/s]

{'epoch': 1, 'iter': 13840, 'avg_loss': 8.591143778315962, 'avg_acc': 50.073378007369406, 'loss': 8.544737815856934}


EP_train:1:  50%|| 13852/27626 [32:37<32:38,  7.03it/s]

{'epoch': 1, 'iter': 13850, 'avg_loss': 8.590982289858548, 'avg_acc': 50.07558118547397, 'loss': 8.673376083374023}


EP_train:1:  50%|| 13862/27626 [32:39<32:35,  7.04it/s]

{'epoch': 1, 'iter': 13860, 'avg_loss': 8.591077668680084, 'avg_acc': 50.077555731909676, 'loss': 9.768529891967773}


EP_train:1:  50%|| 13872/27626 [32:40<32:30,  7.05it/s]

{'epoch': 1, 'iter': 13870, 'avg_loss': 8.591033157914884, 'avg_acc': 50.08178033306899, 'loss': 7.785269260406494}


EP_train:1:  50%|| 13882/27626 [32:42<32:31,  7.04it/s]

{'epoch': 1, 'iter': 13880, 'avg_loss': 8.590896495489039, 'avg_acc': 50.08329731287371, 'loss': 8.532983779907227}


EP_train:1:  50%|| 13892/27626 [32:43<32:46,  6.98it/s]

{'epoch': 1, 'iter': 13890, 'avg_loss': 8.590773898095792, 'avg_acc': 50.08346231372831, 'loss': 8.950550079345703}


EP_train:1:  50%|| 13902/27626 [32:44<32:30,  7.04it/s]

{'epoch': 1, 'iter': 13900, 'avg_loss': 8.590530939989232, 'avg_acc': 50.08070462556651, 'loss': 7.755860328674316}


EP_train:1:  50%|| 13912/27626 [32:46<32:26,  7.05it/s]

{'epoch': 1, 'iter': 13910, 'avg_loss': 8.590306337093452, 'avg_acc': 50.07884947164115, 'loss': 8.114771842956543}


EP_train:1:  50%|| 13922/27626 [32:47<32:46,  6.97it/s]

{'epoch': 1, 'iter': 13920, 'avg_loss': 8.590647948112787, 'avg_acc': 50.08013971697436, 'loss': 8.434990882873535}


EP_train:1:  50%|| 13932/27626 [32:49<32:30,  7.02it/s]

{'epoch': 1, 'iter': 13930, 'avg_loss': 8.59069522426113, 'avg_acc': 50.08277402914364, 'loss': 9.367752075195312}


EP_train:1:  50%|| 13942/27626 [32:50<32:34,  7.00it/s]

{'epoch': 1, 'iter': 13940, 'avg_loss': 8.590406510026304, 'avg_acc': 50.082938813571474, 'loss': 8.222907066345215}


EP_train:1:  51%|| 13952/27626 [32:51<32:20,  7.05it/s]

{'epoch': 1, 'iter': 13950, 'avg_loss': 8.59039893696175, 'avg_acc': 50.0826553652068, 'loss': 8.772133827209473}


EP_train:1:  51%|| 13962/27626 [32:53<32:20,  7.04it/s]

{'epoch': 1, 'iter': 13960, 'avg_loss': 8.59013576277364, 'avg_acc': 50.0837153499033, 'loss': 7.683091163635254}


EP_train:1:  51%|| 13972/27626 [32:54<32:15,  7.05it/s]

{'epoch': 1, 'iter': 13970, 'avg_loss': 8.590113275181325, 'avg_acc': 50.08320807386729, 'loss': 7.748707294464111}


EP_train:1:  51%|| 13982/27626 [32:56<32:22,  7.02it/s]

{'epoch': 1, 'iter': 13980, 'avg_loss': 8.59024802148534, 'avg_acc': 50.08493669980688, 'loss': 7.734811782836914}


EP_train:1:  51%|| 13992/27626 [32:57<32:13,  7.05it/s]

{'epoch': 1, 'iter': 13990, 'avg_loss': 8.590408995889218, 'avg_acc': 50.08264241297977, 'loss': 10.191783905029297}


EP_train:1:  51%|| 14002/27626 [32:59<32:15,  7.04it/s]

{'epoch': 1, 'iter': 14000, 'avg_loss': 8.590405999962819, 'avg_acc': 50.07968180844225, 'loss': 8.65906047821045}


EP_train:1:  51%|| 14012/27626 [33:00<32:07,  7.06it/s]

{'epoch': 1, 'iter': 14010, 'avg_loss': 8.590425566580096, 'avg_acc': 50.078509742345304, 'loss': 9.008624076843262}


EP_train:1:  51%|| 14022/27626 [33:01<32:05,  7.07it/s]

{'epoch': 1, 'iter': 14020, 'avg_loss': 8.590345504815973, 'avg_acc': 50.077339348120674, 'loss': 8.77609920501709}


EP_train:1:  51%|| 14032/27626 [33:03<32:06,  7.06it/s]

{'epoch': 1, 'iter': 14030, 'avg_loss': 8.59038554198998, 'avg_acc': 50.076838785546286, 'loss': 8.698433876037598}


EP_train:1:  51%|| 14042/27626 [33:04<31:55,  7.09it/s]

{'epoch': 1, 'iter': 14040, 'avg_loss': 8.590286929165398, 'avg_acc': 50.07411331101773, 'loss': 8.52372932434082}


EP_train:1:  51%|| 14052/27626 [33:06<31:59,  7.07it/s]

{'epoch': 1, 'iter': 14050, 'avg_loss': 8.590161491972587, 'avg_acc': 50.07294854458757, 'loss': 8.45478343963623}


EP_train:1:  51%|| 14062/27626 [33:07<32:10,  7.03it/s]

{'epoch': 1, 'iter': 14060, 'avg_loss': 8.58996711293763, 'avg_acc': 50.07089645117701, 'loss': 7.697453022003174}


EP_train:1:  51%|| 14072/27626 [33:08<32:20,  6.98it/s]

{'epoch': 1, 'iter': 14070, 'avg_loss': 8.589762851497392, 'avg_acc': 50.071512330324786, 'loss': 8.437280654907227}


EP_train:1:  51%|| 14082/27626 [33:10<32:13,  7.01it/s]

{'epoch': 1, 'iter': 14080, 'avg_loss': 8.589787053652136, 'avg_acc': 50.06502556636603, 'loss': 9.345593452453613}


EP_train:1:  51%|| 14092/27626 [33:11<32:00,  7.05it/s]

{'epoch': 1, 'iter': 14090, 'avg_loss': 8.590063004638886, 'avg_acc': 50.06675360158967, 'loss': 8.919509887695312}


EP_train:1:  51%|| 14102/27626 [33:13<31:57,  7.05it/s]

{'epoch': 1, 'iter': 14100, 'avg_loss': 8.589981509340493, 'avg_acc': 50.06581980001419, 'loss': 8.936934471130371}


EP_train:1:  51%|| 14112/27626 [33:14<32:07,  7.01it/s]

{'epoch': 1, 'iter': 14110, 'avg_loss': 8.58975430192502, 'avg_acc': 50.063558571327334, 'loss': 7.981850624084473}


EP_train:1:  51%|| 14122/27626 [33:16<32:18,  6.97it/s]

{'epoch': 1, 'iter': 14120, 'avg_loss': 8.589855367220053, 'avg_acc': 50.06462006940018, 'loss': 7.997847557067871}


EP_train:1:  51%|| 14132/27626 [33:17<31:48,  7.07it/s]

{'epoch': 1, 'iter': 14130, 'avg_loss': 8.589981104375855, 'avg_acc': 50.06214174509943, 'loss': 8.786544799804688}


EP_train:1:  51%|| 14142/27626 [33:18<31:58,  7.03it/s]

{'epoch': 1, 'iter': 14140, 'avg_loss': 8.589909994216951, 'avg_acc': 50.062097800721304, 'loss': 8.253875732421875}


EP_train:1:  51%|| 14152/27626 [33:20<31:41,  7.09it/s]

{'epoch': 1, 'iter': 14150, 'avg_loss': 8.590033905993552, 'avg_acc': 50.06404141050103, 'loss': 9.20761489868164}


EP_train:1:  51%|| 14162/27626 [33:21<31:57,  7.02it/s]

{'epoch': 1, 'iter': 14160, 'avg_loss': 8.590064139312611, 'avg_acc': 50.0626721276746, 'loss': 7.82790470123291}


EP_train:1:  51%|| 14172/27626 [33:23<31:54,  7.03it/s]

{'epoch': 1, 'iter': 14170, 'avg_loss': 8.589872145785684, 'avg_acc': 50.064171547526634, 'loss': 8.323824882507324}


EP_train:1:  51%|| 14182/27626 [33:24<31:33,  7.10it/s]

{'epoch': 1, 'iter': 14180, 'avg_loss': 8.589731978072189, 'avg_acc': 50.06588921796771, 'loss': 8.022680282592773}


EP_train:1:  51%|| 14192/27626 [33:25<31:52,  7.02it/s]

{'epoch': 1, 'iter': 14190, 'avg_loss': 8.58939077035795, 'avg_acc': 50.064961947713336, 'loss': 8.198508262634277}


EP_train:1:  51%|| 14202/27626 [33:27<32:00,  6.99it/s]

{'epoch': 1, 'iter': 14200, 'avg_loss': 8.589428280014513, 'avg_acc': 50.06403598338145, 'loss': 7.999461650848389}


EP_train:1:  51%|| 14212/27626 [33:28<31:50,  7.02it/s]

{'epoch': 1, 'iter': 14210, 'avg_loss': 8.58959455586077, 'avg_acc': 50.06355112237, 'loss': 8.654739379882812}


EP_train:1:  51%|| 14222/27626 [33:30<31:51,  7.01it/s]

{'epoch': 1, 'iter': 14220, 'avg_loss': 8.589468178153247, 'avg_acc': 50.06262745235919, 'loss': 8.244495391845703}


EP_train:1:  52%|| 14232/27626 [33:31<31:38,  7.06it/s]

{'epoch': 1, 'iter': 14230, 'avg_loss': 8.589271560287234, 'avg_acc': 50.06214426252548, 'loss': 8.623106002807617}


EP_train:1:  52%|| 14242/27626 [33:32<31:43,  7.03it/s]

{'epoch': 1, 'iter': 14240, 'avg_loss': 8.589361717355912, 'avg_acc': 50.06210062495611, 'loss': 8.286867141723633}


EP_train:1:  52%|| 14252/27626 [33:34<31:24,  7.10it/s]

{'epoch': 1, 'iter': 14250, 'avg_loss': 8.58931240029322, 'avg_acc': 50.06249561434285, 'loss': 9.853376388549805}


EP_train:1:  52%|| 14262/27626 [33:35<31:29,  7.07it/s]

{'epoch': 1, 'iter': 14260, 'avg_loss': 8.589245132623903, 'avg_acc': 50.06047962975948, 'loss': 8.080423355102539}


EP_train:1:  52%|| 14272/27626 [33:37<31:51,  6.99it/s]

{'epoch': 1, 'iter': 14270, 'avg_loss': 8.58911943215143, 'avg_acc': 50.064597785719286, 'loss': 8.495850563049316}


EP_train:1:  52%|| 14282/27626 [33:38<31:47,  7.00it/s]

{'epoch': 1, 'iter': 14280, 'avg_loss': 8.589039442143715, 'avg_acc': 50.06542784118759, 'loss': 7.7409348487854}


EP_train:1:  52%|| 14292/27626 [33:40<31:45,  7.00it/s]

{'epoch': 1, 'iter': 14290, 'avg_loss': 8.589048247116791, 'avg_acc': 50.06581939682317, 'loss': 8.20761489868164}


EP_train:1:  52%|| 14302/27626 [33:41<31:35,  7.03it/s]

{'epoch': 1, 'iter': 14300, 'avg_loss': 8.58916521759252, 'avg_acc': 50.066428921054474, 'loss': 8.178474426269531}


EP_train:1:  52%|| 14312/27626 [33:42<31:17,  7.09it/s]

{'epoch': 1, 'iter': 14310, 'avg_loss': 8.589485298370251, 'avg_acc': 50.064417231500244, 'loss': 8.879325866699219}


EP_train:1:  52%|| 14322/27626 [33:44<31:23,  7.06it/s]

{'epoch': 1, 'iter': 14320, 'avg_loss': 8.589473774775922, 'avg_acc': 50.06568151665387, 'loss': 8.021076202392578}


EP_train:1:  52%|| 14332/27626 [33:45<31:30,  7.03it/s]

{'epoch': 1, 'iter': 14330, 'avg_loss': 8.589499424427371, 'avg_acc': 50.069996859953946, 'loss': 8.936985969543457}


EP_train:1:  52%|| 14342/27626 [33:47<31:27,  7.04it/s]

{'epoch': 1, 'iter': 14340, 'avg_loss': 8.589578177438321, 'avg_acc': 50.06885851753713, 'loss': 8.480582237243652}


EP_train:1:  52%|| 14352/27626 [33:48<31:08,  7.11it/s]

{'epoch': 1, 'iter': 14350, 'avg_loss': 8.589547527034926, 'avg_acc': 50.06510870322626, 'loss': 8.472485542297363}


EP_train:1:  52%|| 14362/27626 [33:49<31:17,  7.07it/s]

{'epoch': 1, 'iter': 14360, 'avg_loss': 8.589504888284987, 'avg_acc': 50.065063366060855, 'loss': 8.52629280090332}


EP_train:1:  52%|| 14372/27626 [33:51<31:16,  7.06it/s]

{'epoch': 1, 'iter': 14370, 'avg_loss': 8.589743351805277, 'avg_acc': 50.06523554380349, 'loss': 8.463961601257324}


EP_train:1:  52%|| 14382/27626 [33:52<31:10,  7.08it/s]

{'epoch': 1, 'iter': 14380, 'avg_loss': 8.589771736415054, 'avg_acc': 50.06475558027954, 'loss': 7.299317359924316}


EP_train:1:  52%|| 14392/27626 [33:54<31:16,  7.05it/s]

{'epoch': 1, 'iter': 14390, 'avg_loss': 8.589755725416595, 'avg_acc': 50.06579633104023, 'loss': 8.150310516357422}


EP_train:1:  52%|| 14402/27626 [33:55<31:11,  7.07it/s]

{'epoch': 1, 'iter': 14400, 'avg_loss': 8.589765731447828, 'avg_acc': 50.06661863759461, 'loss': 8.884035110473633}


EP_train:1:  52%|| 14412/27626 [33:56<31:19,  7.03it/s]

{'epoch': 1, 'iter': 14410, 'avg_loss': 8.589853295544682, 'avg_acc': 50.067222954687395, 'loss': 8.399785995483398}


EP_train:1:  52%|| 14422/27626 [33:58<31:22,  7.01it/s]

{'epoch': 1, 'iter': 14420, 'avg_loss': 8.589917121156063, 'avg_acc': 50.06804313154427, 'loss': 8.057324409484863}


EP_train:1:  52%|| 14432/27626 [33:59<31:15,  7.03it/s]

{'epoch': 1, 'iter': 14430, 'avg_loss': 8.589776971188018, 'avg_acc': 50.068645624003885, 'loss': 8.718802452087402}


EP_train:1:  52%|| 14442/27626 [34:01<31:26,  6.99it/s]

{'epoch': 1, 'iter': 14440, 'avg_loss': 8.589638025425005, 'avg_acc': 50.06838169101863, 'loss': 7.958123683929443}


EP_train:1:  52%|| 14452/27626 [34:02<30:57,  7.09it/s]

{'epoch': 1, 'iter': 14450, 'avg_loss': 8.589445876329467, 'avg_acc': 50.06941561137638, 'loss': 8.366622924804688}


EP_train:1:  52%|| 14462/27626 [34:04<30:57,  7.09it/s]

{'epoch': 1, 'iter': 14460, 'avg_loss': 8.589535287006722, 'avg_acc': 50.069583707904016, 'loss': 9.075044631958008}


EP_train:1:  52%|| 14472/27626 [34:05<31:20,  7.00it/s]

{'epoch': 1, 'iter': 14470, 'avg_loss': 8.589609703356308, 'avg_acc': 50.07039941952871, 'loss': 8.471019744873047}


EP_train:1:  52%|| 14482/27626 [34:06<31:14,  7.01it/s]

{'epoch': 1, 'iter': 14480, 'avg_loss': 8.589640858604568, 'avg_acc': 50.069703404461016, 'loss': 7.862840175628662}


EP_train:1:  52%|| 14492/27626 [34:08<31:03,  7.05it/s]

{'epoch': 1, 'iter': 14490, 'avg_loss': 8.589616917533894, 'avg_acc': 50.07267441860465, 'loss': 8.46687126159668}


EP_train:1:  52%|| 14502/27626 [34:09<30:58,  7.06it/s]

{'epoch': 1, 'iter': 14500, 'avg_loss': 8.589545721350419, 'avg_acc': 50.07090028273912, 'loss': 8.749044418334961}


EP_train:1:  53%|| 14512/27626 [34:11<30:56,  7.06it/s]

{'epoch': 1, 'iter': 14510, 'avg_loss': 8.589362081969748, 'avg_acc': 50.07063606918889, 'loss': 8.388246536254883}


EP_train:1:  53%|| 14522/27626 [34:12<30:53,  7.07it/s]

{'epoch': 1, 'iter': 14520, 'avg_loss': 8.589379305813596, 'avg_acc': 50.06735934164313, 'loss': 8.944101333618164}


EP_train:1:  53%|| 14532/27626 [34:13<30:53,  7.06it/s]

{'epoch': 1, 'iter': 14530, 'avg_loss': 8.589477116764145, 'avg_acc': 50.06559252632303, 'loss': 9.460902214050293}


EP_train:1:  53%|| 14542/27626 [34:15<30:52,  7.06it/s]

{'epoch': 1, 'iter': 14540, 'avg_loss': 8.589452482993087, 'avg_acc': 50.064902688948486, 'loss': 8.306042671203613}


EP_train:1:  53%|| 14552/27626 [34:16<30:48,  7.07it/s]

{'epoch': 1, 'iter': 14550, 'avg_loss': 8.589573972307607, 'avg_acc': 50.066361418459216, 'loss': 8.25175666809082}


EP_train:1:  53%|| 14562/27626 [34:18<31:00,  7.02it/s]

{'epoch': 1, 'iter': 14560, 'avg_loss': 8.589409495929335, 'avg_acc': 50.06631584369205, 'loss': 8.216004371643066}


EP_train:1:  53%|| 14572/27626 [34:19<31:05,  7.00it/s]

{'epoch': 1, 'iter': 14570, 'avg_loss': 8.589430523821914, 'avg_acc': 50.065841397296, 'loss': 9.14143180847168}


EP_train:1:  53%|| 14582/27626 [34:21<30:50,  7.05it/s]

{'epoch': 1, 'iter': 14580, 'avg_loss': 8.589772252027295, 'avg_acc': 50.063867361635005, 'loss': 7.600823879241943}


EP_train:1:  53%|| 14592/27626 [34:22<30:46,  7.06it/s]

{'epoch': 1, 'iter': 14590, 'avg_loss': 8.589716485361869, 'avg_acc': 50.06382358988417, 'loss': 7.818158149719238}


EP_train:1:  53%|| 14602/27626 [34:23<30:48,  7.04it/s]

{'epoch': 1, 'iter': 14600, 'avg_loss': 8.589707772018697, 'avg_acc': 50.06313779878091, 'loss': 8.054332733154297}


EP_train:1:  53%|| 14612/27626 [34:25<30:51,  7.03it/s]

{'epoch': 1, 'iter': 14610, 'avg_loss': 8.589520893745718, 'avg_acc': 50.059672507015264, 'loss': 8.159689903259277}


EP_train:1:  53%|| 14622/27626 [34:26<30:47,  7.04it/s]

{'epoch': 1, 'iter': 14620, 'avg_loss': 8.589402427899023, 'avg_acc': 50.06241023185829, 'loss': 8.564051628112793}


EP_train:1:  53%|| 14632/27626 [34:28<30:41,  7.06it/s]

{'epoch': 1, 'iter': 14630, 'avg_loss': 8.589391600726348, 'avg_acc': 50.061726812931454, 'loss': 8.118891716003418}


EP_train:1:  53%|| 14642/27626 [34:29<30:33,  7.08it/s]

{'epoch': 1, 'iter': 14640, 'avg_loss': 8.589364635318235, 'avg_acc': 50.060190560754044, 'loss': 8.733565330505371}


EP_train:1:  53%|| 14652/27626 [34:30<30:45,  7.03it/s]

{'epoch': 1, 'iter': 14650, 'avg_loss': 8.589179315320315, 'avg_acc': 50.05972288580984, 'loss': 7.989612102508545}


EP_train:1:  53%|| 14662/27626 [34:32<30:35,  7.06it/s]

{'epoch': 1, 'iter': 14660, 'avg_loss': 8.589218110734432, 'avg_acc': 50.06373201009481, 'loss': 7.377723217010498}


EP_train:1:  53%|| 14672/27626 [34:33<30:24,  7.10it/s]

{'epoch': 1, 'iter': 14670, 'avg_loss': 8.589240885960708, 'avg_acc': 50.06368856928635, 'loss': 8.164813995361328}


EP_train:1:  53%|| 14682/27626 [34:35<30:20,  7.11it/s]

{'epoch': 1, 'iter': 14680, 'avg_loss': 8.589198306685011, 'avg_acc': 50.062155166541785, 'loss': 8.109061241149902}


EP_train:1:  53%|| 14692/27626 [34:36<30:24,  7.09it/s]

{'epoch': 1, 'iter': 14690, 'avg_loss': 8.589459737953371, 'avg_acc': 50.06019842080185, 'loss': 9.36681079864502}


EP_train:1:  53%|| 14702/27626 [34:37<30:33,  7.05it/s]

{'epoch': 1, 'iter': 14700, 'avg_loss': 8.589677387350685, 'avg_acc': 50.05824433711993, 'loss': 9.271017074584961}


EP_train:1:  53%|| 14712/27626 [34:39<30:33,  7.05it/s]

{'epoch': 1, 'iter': 14710, 'avg_loss': 8.58975128174217, 'avg_acc': 50.055230779688664, 'loss': 8.94387149810791}


EP_train:1:  53%|| 14722/27626 [34:40<30:25,  7.07it/s]

{'epoch': 1, 'iter': 14720, 'avg_loss': 8.589752494264463, 'avg_acc': 50.05901433326541, 'loss': 9.566225051879883}


EP_train:1:  53%|| 14732/27626 [34:42<30:30,  7.05it/s]

{'epoch': 1, 'iter': 14730, 'avg_loss': 8.589764475620138, 'avg_acc': 50.05918640961238, 'loss': 8.484583854675293}


EP_train:1:  53%|| 14742/27626 [34:43<30:23,  7.07it/s]

{'epoch': 1, 'iter': 14740, 'avg_loss': 8.589911437036225, 'avg_acc': 50.0625381588766, 'loss': 8.132769584655762}


EP_train:1:  53%|| 14752/27626 [34:45<30:29,  7.04it/s]

{'epoch': 1, 'iter': 14750, 'avg_loss': 8.589741092820498, 'avg_acc': 50.0608009626466, 'loss': 8.270171165466309}


EP_train:1:  53%|| 14762/27626 [34:46<30:12,  7.10it/s]

{'epoch': 1, 'iter': 14760, 'avg_loss': 8.58982774844847, 'avg_acc': 50.055467109274446, 'loss': 8.893333435058594}


EP_train:1:  53%|| 14772/27626 [34:47<30:14,  7.08it/s]

{'epoch': 1, 'iter': 14770, 'avg_loss': 8.58988799971997, 'avg_acc': 50.05669893710649, 'loss': 8.837459564208984}


EP_train:1:  54%|| 14782/27626 [34:49<30:43,  6.97it/s]

{'epoch': 1, 'iter': 14780, 'avg_loss': 8.589871181153564, 'avg_acc': 50.05539205737095, 'loss': 8.242256164550781}


EP_train:1:  54%|| 14792/27626 [34:50<30:22,  7.04it/s]

{'epoch': 1, 'iter': 14790, 'avg_loss': 8.59002187474328, 'avg_acc': 50.057044824555476, 'loss': 8.676216125488281}


EP_train:1:  54%|| 14802/27626 [34:52<30:17,  7.06it/s]

{'epoch': 1, 'iter': 14800, 'avg_loss': 8.590126729093688, 'avg_acc': 50.05742855212486, 'loss': 8.721870422363281}


EP_train:1:  54%|| 14812/27626 [34:53<30:08,  7.08it/s]

{'epoch': 1, 'iter': 14810, 'avg_loss': 8.590271130925565, 'avg_acc': 50.05823374518938, 'loss': 9.843684196472168}


EP_train:1:  54%|| 14822/27626 [34:54<30:11,  7.07it/s]

{'epoch': 1, 'iter': 14820, 'avg_loss': 8.590253108635693, 'avg_acc': 50.05967040010796, 'loss': 8.458023071289062}


EP_train:1:  54%|| 14832/27626 [34:56<30:10,  7.07it/s]

{'epoch': 1, 'iter': 14830, 'avg_loss': 8.590141043612547, 'avg_acc': 50.05899804463624, 'loss': 9.32567024230957}


EP_train:1:  54%|| 14842/27626 [34:57<30:17,  7.03it/s]

{'epoch': 1, 'iter': 14840, 'avg_loss': 8.589941183404695, 'avg_acc': 50.061063944478136, 'loss': 8.960826873779297}


EP_train:1:  54%|| 14852/27626 [34:59<30:23,  7.01it/s]

{'epoch': 1, 'iter': 14850, 'avg_loss': 8.590010626405492, 'avg_acc': 50.0629166386102, 'loss': 8.701995849609375}


EP_train:1:  54%|| 14862/27626 [35:00<30:24,  7.00it/s]

{'epoch': 1, 'iter': 14860, 'avg_loss': 8.590051849872971, 'avg_acc': 50.06729022273064, 'loss': 8.894804954528809}


EP_train:1:  54%|| 14872/27626 [35:01<30:12,  7.03it/s]

{'epoch': 1, 'iter': 14870, 'avg_loss': 8.590029913113678, 'avg_acc': 50.06724497343824, 'loss': 8.835223197937012}


EP_train:1:  54%|| 14882/27626 [35:03<30:14,  7.02it/s]

{'epoch': 1, 'iter': 14880, 'avg_loss': 8.589944883665325, 'avg_acc': 50.068879779584705, 'loss': 8.929927825927734}


EP_train:1:  54%|| 14892/27626 [35:04<29:58,  7.08it/s]

{'epoch': 1, 'iter': 14890, 'avg_loss': 8.589936450621126, 'avg_acc': 50.06715465717547, 'loss': 8.624833106994629}


EP_train:1:  54%|| 14902/27626 [35:06<30:18,  7.00it/s]

{'epoch': 1, 'iter': 14900, 'avg_loss': 8.589817126127995, 'avg_acc': 50.068997047178044, 'loss': 8.899740219116211}


EP_train:1:  54%|| 14912/27626 [35:07<29:52,  7.09it/s]

{'epoch': 1, 'iter': 14910, 'avg_loss': 8.58967166854841, 'avg_acc': 50.06853162095097, 'loss': 7.859794616699219}


EP_train:1:  54%|| 14922/27626 [35:09<30:11,  7.01it/s]

{'epoch': 1, 'iter': 14920, 'avg_loss': 8.589551183127757, 'avg_acc': 50.06806681857784, 'loss': 7.776535511016846}


EP_train:1:  54%|| 14932/27626 [35:10<30:00,  7.05it/s]

{'epoch': 1, 'iter': 14930, 'avg_loss': 8.58954224756706, 'avg_acc': 50.06927700756815, 'loss': 7.768381595611572}


EP_train:1:  54%|| 14942/27626 [35:11<29:49,  7.09it/s]

{'epoch': 1, 'iter': 14940, 'avg_loss': 8.58952387770243, 'avg_acc': 50.06839401646476, 'loss': 8.303173065185547}


EP_train:1:  54%|| 14952/27626 [35:13<30:11,  7.00it/s]

{'epoch': 1, 'iter': 14950, 'avg_loss': 8.589774817353897, 'avg_acc': 50.068557287137985, 'loss': 9.756325721740723}


EP_train:1:  54%|| 14962/27626 [35:14<30:10,  7.00it/s]

{'epoch': 1, 'iter': 14960, 'avg_loss': 8.58993626826508, 'avg_acc': 50.06830258672549, 'loss': 8.833564758300781}


EP_train:1:  54%|| 14972/27626 [35:16<29:48,  7.08it/s]

{'epoch': 1, 'iter': 14970, 'avg_loss': 8.590044701332703, 'avg_acc': 50.06867443724534, 'loss': 8.184741973876953}


EP_train:1:  54%|| 14982/27626 [35:17<30:05,  7.00it/s]

{'epoch': 1, 'iter': 14980, 'avg_loss': 8.589912414518952, 'avg_acc': 50.06675121820974, 'loss': 8.137171745300293}


EP_train:1:  54%|| 14992/27626 [35:18<29:54,  7.04it/s]

{'epoch': 1, 'iter': 14990, 'avg_loss': 8.589846097065587, 'avg_acc': 50.06608131545593, 'loss': 8.425019264221191}


EP_train:1:  54%|| 15002/27626 [35:20<30:03,  7.00it/s]

{'epoch': 1, 'iter': 15000, 'avg_loss': 8.58951215351131, 'avg_acc': 50.06770381974535, 'loss': 9.115396499633789}


EP_train:1:  54%|| 15012/27626 [35:21<30:08,  6.98it/s]

{'epoch': 1, 'iter': 15010, 'avg_loss': 8.589511845004003, 'avg_acc': 50.07015688495103, 'loss': 8.859879493713379}


EP_train:1:  54%|| 15022/27626 [35:23<29:54,  7.02it/s]

{'epoch': 1, 'iter': 15020, 'avg_loss': 8.589523671541746, 'avg_acc': 50.06865388456161, 'loss': 8.570878028869629}


EP_train:1:  54%|| 15032/27626 [35:24<29:35,  7.09it/s]

{'epoch': 1, 'iter': 15030, 'avg_loss': 8.58958399474252, 'avg_acc': 50.07338999401237, 'loss': 8.442998886108398}


EP_train:1:  54%|| 15042/27626 [35:26<29:44,  7.05it/s]

{'epoch': 1, 'iter': 15040, 'avg_loss': 8.589696715085893, 'avg_acc': 50.0745877933648, 'loss': 8.058478355407715}


EP_train:1:  54%|| 15052/27626 [35:27<29:51,  7.02it/s]

{'epoch': 1, 'iter': 15050, 'avg_loss': 8.589733984278537, 'avg_acc': 50.07350009966115, 'loss': 7.941378593444824}


EP_train:1:  55%|| 15062/27626 [35:28<29:37,  7.07it/s]

{'epoch': 1, 'iter': 15060, 'avg_loss': 8.589579026637804, 'avg_acc': 50.07365878759711, 'loss': 8.313960075378418}


EP_train:1:  55%|| 15072/27626 [35:30<29:31,  7.08it/s]

{'epoch': 1, 'iter': 15070, 'avg_loss': 8.589659307134482, 'avg_acc': 50.0744393205494, 'loss': 8.987720489501953}


EP_train:1:  55%|| 15082/27626 [35:31<29:37,  7.06it/s]

{'epoch': 1, 'iter': 15080, 'avg_loss': 8.589619702681347, 'avg_acc': 50.07438996087793, 'loss': 7.895744323730469}


EP_train:1:  55%|| 15092/27626 [35:33<29:30,  7.08it/s]

{'epoch': 1, 'iter': 15090, 'avg_loss': 8.589487907039851, 'avg_acc': 50.077032668477905, 'loss': 8.939940452575684}


EP_train:1:  55%|| 15102/27626 [35:34<29:44,  7.02it/s]

{'epoch': 1, 'iter': 15100, 'avg_loss': 8.589397590291886, 'avg_acc': 50.07760247665718, 'loss': 8.475092887878418}


EP_train:1:  55%|| 15112/27626 [35:35<29:36,  7.04it/s]

{'epoch': 1, 'iter': 15110, 'avg_loss': 8.589302605050932, 'avg_acc': 50.07858513665542, 'loss': 8.339670181274414}


EP_train:1:  55%|| 15122/27626 [35:37<29:24,  7.09it/s]

{'epoch': 1, 'iter': 15120, 'avg_loss': 8.58939294002728, 'avg_acc': 50.07873983202169, 'loss': 7.840512752532959}


EP_train:1:  55%|| 15132/27626 [35:38<29:49,  6.98it/s]

{'epoch': 1, 'iter': 15130, 'avg_loss': 8.589535090953405, 'avg_acc': 50.07972044147776, 'loss': 8.726061820983887}


EP_train:1:  55%|| 15142/27626 [35:40<29:50,  6.97it/s]

{'epoch': 1, 'iter': 15140, 'avg_loss': 8.589380724396545, 'avg_acc': 50.07884221649825, 'loss': 8.384658813476562}


EP_train:1:  55%|| 15152/27626 [35:41<29:48,  6.98it/s]

{'epoch': 1, 'iter': 15150, 'avg_loss': 8.589346944408506, 'avg_acc': 50.07734637977691, 'loss': 8.121506690979004}


EP_train:1:  55%|| 15162/27626 [35:42<29:36,  7.02it/s]

{'epoch': 1, 'iter': 15160, 'avg_loss': 8.58936203495587, 'avg_acc': 50.076264758261324, 'loss': 8.020835876464844}


EP_train:1:  55%|| 15172/27626 [35:44<29:20,  7.08it/s]

{'epoch': 1, 'iter': 15170, 'avg_loss': 8.589374786453895, 'avg_acc': 50.07642047327138, 'loss': 7.844754695892334}


EP_train:1:  55%|| 15182/27626 [35:45<29:12,  7.10it/s]

{'epoch': 1, 'iter': 15180, 'avg_loss': 8.589103626989736, 'avg_acc': 50.07410579013241, 'loss': 8.051509857177734}


EP_train:1:  55%|| 15192/27626 [35:47<29:15,  7.08it/s]

{'epoch': 1, 'iter': 15190, 'avg_loss': 8.589138198248857, 'avg_acc': 50.07055987097624, 'loss': 8.695951461791992}


EP_train:1:  55%|| 15202/27626 [35:48<29:37,  6.99it/s]

{'epoch': 1, 'iter': 15200, 'avg_loss': 8.589069366956979, 'avg_acc': 50.07133576738373, 'loss': 8.459964752197266}


EP_train:1:  55%|| 15212/27626 [35:50<29:13,  7.08it/s]

{'epoch': 1, 'iter': 15210, 'avg_loss': 8.589172788049082, 'avg_acc': 50.070467096180394, 'loss': 8.569860458374023}


EP_train:1:  55%|| 15222/27626 [35:51<29:14,  7.07it/s]

{'epoch': 1, 'iter': 15220, 'avg_loss': 8.589182538104648, 'avg_acc': 50.07206326785363, 'loss': 9.18857479095459}


EP_train:1:  55%|| 15232/27626 [35:52<29:14,  7.06it/s]

{'epoch': 1, 'iter': 15230, 'avg_loss': 8.589151130800495, 'avg_acc': 50.0724263016217, 'loss': 8.237316131591797}


EP_train:1:  55%|| 15242/27626 [35:54<29:18,  7.04it/s]

{'epoch': 1, 'iter': 15240, 'avg_loss': 8.588902216400033, 'avg_acc': 50.072993898038185, 'loss': 8.20909595489502}


EP_train:1:  55%|| 15252/27626 [35:55<29:23,  7.02it/s]

{'epoch': 1, 'iter': 15250, 'avg_loss': 8.588761252642819, 'avg_acc': 50.07479017769326, 'loss': 8.356247901916504}


EP_train:1:  55%|| 15262/27626 [35:57<29:14,  7.05it/s]

{'epoch': 1, 'iter': 15260, 'avg_loss': 8.588577376023773, 'avg_acc': 50.074945940632986, 'loss': 8.503382682800293}


EP_train:1:  55%|| 15272/27626 [35:58<29:05,  7.08it/s]

{'epoch': 1, 'iter': 15270, 'avg_loss': 8.588510196465467, 'avg_acc': 50.07469222709712, 'loss': 8.3908109664917}


EP_train:1:  55%|| 15282/27626 [35:59<29:06,  7.07it/s]

{'epoch': 1, 'iter': 15280, 'avg_loss': 8.588586217578063, 'avg_acc': 50.074847850271574, 'loss': 8.388602256774902}


EP_train:1:  55%|| 15292/27626 [36:01<29:32,  6.96it/s]

{'epoch': 1, 'iter': 15290, 'avg_loss': 8.588652999302395, 'avg_acc': 50.07582074422863, 'loss': 7.861724376678467}


EP_train:1:  55%|| 15302/27626 [36:02<28:58,  7.09it/s]

{'epoch': 1, 'iter': 15300, 'avg_loss': 8.588588675208578, 'avg_acc': 50.07699660152931, 'loss': 8.152480125427246}


EP_train:1:  55%|| 15312/27626 [36:04<29:20,  7.00it/s]

{'epoch': 1, 'iter': 15310, 'avg_loss': 8.588555484988737, 'avg_acc': 50.07694631310823, 'loss': 8.90544319152832}


EP_train:1:  55%|| 15322/27626 [36:05<29:14,  7.01it/s]

{'epoch': 1, 'iter': 15320, 'avg_loss': 8.588342135261131, 'avg_acc': 50.07852783760851, 'loss': 7.702414035797119}


EP_train:1:  55%|| 15332/27626 [36:06<28:55,  7.08it/s]

{'epoch': 1, 'iter': 15330, 'avg_loss': 8.588373732391103, 'avg_acc': 50.07868045137304, 'loss': 8.945816040039062}


EP_train:1:  56%|| 15342/27626 [36:08<28:56,  7.07it/s]

{'epoch': 1, 'iter': 15340, 'avg_loss': 8.588710309442583, 'avg_acc': 50.07598103122351, 'loss': 9.328882217407227}


EP_train:1:  56%|| 15352/27626 [36:09<28:47,  7.11it/s]

{'epoch': 1, 'iter': 15350, 'avg_loss': 8.588823403064051, 'avg_acc': 50.07369226760472, 'loss': 8.277253150939941}


EP_train:1:  56%|| 15362/27626 [36:11<28:56,  7.06it/s]

{'epoch': 1, 'iter': 15360, 'avg_loss': 8.588657074813987, 'avg_acc': 50.070592734848, 'loss': 8.728371620178223}


EP_train:1:  56%|| 15372/27626 [36:12<28:49,  7.08it/s]

{'epoch': 1, 'iter': 15370, 'avg_loss': 8.58848536657485, 'avg_acc': 50.067293930128166, 'loss': 8.306205749511719}


EP_train:1:  56%|| 15382/27626 [36:14<29:02,  7.03it/s]

{'epoch': 1, 'iter': 15380, 'avg_loss': 8.58862815115001, 'avg_acc': 50.07070411546714, 'loss': 9.360298156738281}


EP_train:1:  56%|| 15392/27626 [36:15<29:01,  7.02it/s]

{'epoch': 1, 'iter': 15390, 'avg_loss': 8.588926188933842, 'avg_acc': 50.06964297316614, 'loss': 9.552659034729004}


EP_train:1:  56%|| 15402/27626 [36:16<28:57,  7.03it/s]

{'epoch': 1, 'iter': 15400, 'avg_loss': 8.588904722272076, 'avg_acc': 50.0671628465684, 'loss': 8.633299827575684}


EP_train:1:  56%|| 15412/27626 [36:18<29:01,  7.01it/s]

{'epoch': 1, 'iter': 15410, 'avg_loss': 8.588720586667943, 'avg_acc': 50.067524819933816, 'loss': 9.669275283813477}


EP_train:1:  56%|| 15422/27626 [36:19<28:56,  7.03it/s]

{'epoch': 1, 'iter': 15420, 'avg_loss': 8.58853661158261, 'avg_acc': 50.07031807275793, 'loss': 8.176798820495605}


EP_train:1:  56%|| 15432/27626 [36:21<28:50,  7.05it/s]

{'epoch': 1, 'iter': 15430, 'avg_loss': 8.588677320312861, 'avg_acc': 50.072500162011536, 'loss': 9.85534954071045}


EP_train:1:  56%|| 15442/27626 [36:22<28:31,  7.12it/s]

{'epoch': 1, 'iter': 15440, 'avg_loss': 8.588771834304609, 'avg_acc': 50.07326274205038, 'loss': 8.64253044128418}


EP_train:1:  56%|| 15452/27626 [36:23<28:38,  7.09it/s]

{'epoch': 1, 'iter': 15450, 'avg_loss': 8.588665537259839, 'avg_acc': 50.07321532586888, 'loss': 8.68890380859375}


EP_train:1:  56%|| 15462/27626 [36:25<28:56,  7.01it/s]

{'epoch': 1, 'iter': 15460, 'avg_loss': 8.588487359701944, 'avg_acc': 50.07559342862686, 'loss': 8.44420051574707}


EP_train:1:  56%|| 15472/27626 [36:26<28:48,  7.03it/s]

{'epoch': 1, 'iter': 15470, 'avg_loss': 8.588568276775252, 'avg_acc': 50.07594854889794, 'loss': 8.243437767028809}


EP_train:1:  56%|| 15482/27626 [36:28<28:50,  7.02it/s]

{'epoch': 1, 'iter': 15480, 'avg_loss': 8.588602884927235, 'avg_acc': 50.07529390866223, 'loss': 8.623048782348633}


EP_train:1:  56%|| 15492/27626 [36:29<28:48,  7.02it/s]

{'epoch': 1, 'iter': 15490, 'avg_loss': 8.58880958393371, 'avg_acc': 50.072622813246404, 'loss': 10.364099502563477}


EP_train:1:  56%|| 15502/27626 [36:30<28:32,  7.08it/s]

{'epoch': 1, 'iter': 15500, 'avg_loss': 8.589116912112376, 'avg_acc': 50.07499516160247, 'loss': 9.028095245361328}


EP_train:1:  56%|| 15512/27626 [36:32<28:28,  7.09it/s]

{'epoch': 1, 'iter': 15510, 'avg_loss': 8.589017455824246, 'avg_acc': 50.07454387209077, 'loss': 8.72307014465332}


EP_train:1:  56%|| 15522/27626 [36:33<28:30,  7.08it/s]

{'epoch': 1, 'iter': 15520, 'avg_loss': 8.589099340037613, 'avg_acc': 50.07228110302171, 'loss': 8.498554229736328}


EP_train:1:  56%|| 15532/27626 [36:35<28:29,  7.07it/s]

{'epoch': 1, 'iter': 15530, 'avg_loss': 8.589211465801062, 'avg_acc': 50.07022245830919, 'loss': 8.391547203063965}


EP_train:1:  56%|| 15542/27626 [36:36<28:44,  7.01it/s]

{'epoch': 1, 'iter': 15540, 'avg_loss': 8.58930111459136, 'avg_acc': 50.06816646290457, 'loss': 9.77147388458252}


EP_train:1:  56%|| 15552/27626 [36:38<28:31,  7.05it/s]

{'epoch': 1, 'iter': 15550, 'avg_loss': 8.589492107999952, 'avg_acc': 50.067921677062564, 'loss': 8.84355640411377}


EP_train:1:  56%|| 15562/27626 [36:39<28:42,  7.00it/s]

{'epoch': 1, 'iter': 15560, 'avg_loss': 8.589462688623284, 'avg_acc': 50.0662714478504, 'loss': 9.060866355895996}


EP_train:1:  56%|| 15572/27626 [36:40<28:32,  7.04it/s]

{'epoch': 1, 'iter': 15570, 'avg_loss': 8.589267277149526, 'avg_acc': 50.06382056386873, 'loss': 8.421704292297363}


EP_train:1:  56%|| 15582/27626 [36:42<28:35,  7.02it/s]

{'epoch': 1, 'iter': 15580, 'avg_loss': 8.589411528989903, 'avg_acc': 50.06257621462037, 'loss': 8.376242637634277}


EP_train:1:  56%|| 15592/27626 [36:43<28:15,  7.10it/s]

{'epoch': 1, 'iter': 15590, 'avg_loss': 8.589343289354568, 'avg_acc': 50.062335642357766, 'loss': 7.944897174835205}


EP_train:1:  56%|| 15602/27626 [36:45<28:12,  7.10it/s]

{'epoch': 1, 'iter': 15600, 'avg_loss': 8.589388962701593, 'avg_acc': 50.06389814755464, 'loss': 8.48501205444336}


EP_train:1:  57%|| 15612/27626 [36:46<28:18,  7.08it/s]

{'epoch': 1, 'iter': 15610, 'avg_loss': 8.589464250758475, 'avg_acc': 50.065058292229836, 'loss': 8.219335556030273}


EP_train:1:  57%|| 15622/27626 [36:47<28:21,  7.05it/s]

{'epoch': 1, 'iter': 15620, 'avg_loss': 8.589563874060989, 'avg_acc': 50.06361628576915, 'loss': 9.458263397216797}


EP_train:1:  57%|| 15632/27626 [36:49<28:22,  7.05it/s]

{'epoch': 1, 'iter': 15630, 'avg_loss': 8.589518077969437, 'avg_acc': 50.06297581728616, 'loss': 8.419675827026367}


EP_train:1:  57%|| 15642/27626 [36:50<28:30,  7.01it/s]

{'epoch': 1, 'iter': 15640, 'avg_loss': 8.58916030863912, 'avg_acc': 50.062535963173715, 'loss': 7.559192180633545}


EP_train:1:  57%|| 15652/27626 [36:52<28:19,  7.05it/s]

{'epoch': 1, 'iter': 15650, 'avg_loss': 8.589073767731207, 'avg_acc': 50.065491022937834, 'loss': 8.44597053527832}


EP_train:1:  57%|| 15662/27626 [36:53<28:23,  7.03it/s]

{'epoch': 1, 'iter': 15660, 'avg_loss': 8.58906028645377, 'avg_acc': 50.067843688142524, 'loss': 8.779253959655762}


EP_train:1:  57%|| 15672/27626 [36:54<28:25,  7.01it/s]

{'epoch': 1, 'iter': 15670, 'avg_loss': 8.589376424055871, 'avg_acc': 50.07158924127369, 'loss': 9.162345886230469}


EP_train:1:  57%|| 15682/27626 [36:56<28:03,  7.10it/s]

{'epoch': 1, 'iter': 15680, 'avg_loss': 8.589395864833474, 'avg_acc': 50.06975001594286, 'loss': 8.448701858520508}


EP_train:1:  57%|| 15692/27626 [36:57<28:15,  7.04it/s]

{'epoch': 1, 'iter': 15690, 'avg_loss': 8.589388909071568, 'avg_acc': 50.06592154738385, 'loss': 9.26918888092041}


EP_train:1:  57%|| 15702/27626 [36:59<28:06,  7.07it/s]

{'epoch': 1, 'iter': 15700, 'avg_loss': 8.58951188436656, 'avg_acc': 50.06647665753774, 'loss': 8.369107246398926}


EP_train:1:  57%|| 15712/27626 [37:00<28:03,  7.08it/s]

{'epoch': 1, 'iter': 15710, 'avg_loss': 8.589305970983073, 'avg_acc': 50.06703106104003, 'loss': 8.744131088256836}


EP_train:1:  57%|| 15722/27626 [37:02<28:29,  6.96it/s]

{'epoch': 1, 'iter': 15720, 'avg_loss': 8.589356225786997, 'avg_acc': 50.06798231664652, 'loss': 9.561336517333984}


EP_train:1:  57%|| 15732/27626 [37:03<28:21,  6.99it/s]

{'epoch': 1, 'iter': 15730, 'avg_loss': 8.589368471118107, 'avg_acc': 50.06992562456296, 'loss': 7.848865985870361}


EP_train:1:  57%|| 15742/27626 [37:04<28:06,  7.04it/s]

{'epoch': 1, 'iter': 15740, 'avg_loss': 8.589556546868897, 'avg_acc': 50.06730036211168, 'loss': 9.368696212768555}


EP_train:1:  57%|| 15752/27626 [37:06<28:05,  7.05it/s]

{'epoch': 1, 'iter': 15750, 'avg_loss': 8.589552479723917, 'avg_acc': 50.067654434639074, 'loss': 9.319845199584961}


EP_train:1:  57%|| 15762/27626 [37:07<28:11,  7.02it/s]

{'epoch': 1, 'iter': 15760, 'avg_loss': 8.589707224269663, 'avg_acc': 50.07118044540321, 'loss': 8.848541259765625}


EP_train:1:  57%|| 15772/27626 [37:09<27:55,  7.08it/s]

{'epoch': 1, 'iter': 15770, 'avg_loss': 8.589580175849557, 'avg_acc': 50.07252235115085, 'loss': 8.791999816894531}


EP_train:1:  57%|| 15782/27626 [37:10<27:52,  7.08it/s]

{'epoch': 1, 'iter': 15780, 'avg_loss': 8.58949243888664, 'avg_acc': 50.07128825803181, 'loss': 8.11231803894043}


EP_train:1:  57%|| 15792/27626 [37:11<28:01,  7.04it/s]

{'epoch': 1, 'iter': 15790, 'avg_loss': 8.589425410322086, 'avg_acc': 50.07223260084859, 'loss': 7.873032569885254}


EP_train:1:  57%|| 15802/27626 [37:13<28:04,  7.02it/s]

{'epoch': 1, 'iter': 15800, 'avg_loss': 8.590112502375295, 'avg_acc': 50.07159357002722, 'loss': 9.690497398376465}


EP_train:1:  57%|| 15812/27626 [37:14<27:58,  7.04it/s]

{'epoch': 1, 'iter': 15810, 'avg_loss': 8.590563214788038, 'avg_acc': 50.07154828916577, 'loss': 9.061311721801758}


EP_train:1:  57%|| 15822/27626 [37:16<27:55,  7.05it/s]

{'epoch': 1, 'iter': 15820, 'avg_loss': 8.590577771559534, 'avg_acc': 50.07288572150939, 'loss': 9.057106971740723}


EP_train:1:  57%|| 15832/27626 [37:17<27:50,  7.06it/s]

{'epoch': 1, 'iter': 15830, 'avg_loss': 8.5904458971682, 'avg_acc': 50.07086570652517, 'loss': 8.318129539489746}


EP_train:1:  57%|| 15842/27626 [37:19<27:49,  7.06it/s]

{'epoch': 1, 'iter': 15840, 'avg_loss': 8.59036381950087, 'avg_acc': 50.07220188119437, 'loss': 8.31610107421875}


EP_train:1:  57%|| 15852/27626 [37:20<27:45,  7.07it/s]

{'epoch': 1, 'iter': 15850, 'avg_loss': 8.590406804255947, 'avg_acc': 50.07452211216958, 'loss': 8.408454895019531}


EP_train:1:  57%|| 15862/27626 [37:21<27:43,  7.07it/s]

{'epoch': 1, 'iter': 15860, 'avg_loss': 8.590371267242846, 'avg_acc': 50.07565727255533, 'loss': 8.585433006286621}


EP_train:1:  57%|| 15872/27626 [37:23<27:30,  7.12it/s]

{'epoch': 1, 'iter': 15870, 'avg_loss': 8.590171365279435, 'avg_acc': 50.0750189024006, 'loss': 8.356050491333008}


EP_train:1:  57%|| 15882/27626 [37:24<27:51,  7.02it/s]

{'epoch': 1, 'iter': 15880, 'avg_loss': 8.590033616822085, 'avg_acc': 50.0779233045778, 'loss': 8.354310989379883}


EP_train:1:  58%|| 15892/27626 [37:26<27:37,  7.08it/s]

{'epoch': 1, 'iter': 15890, 'avg_loss': 8.589816592396952, 'avg_acc': 50.07708765968159, 'loss': 8.525106430053711}


EP_train:1:  58%|| 15902/27626 [37:27<27:42,  7.05it/s]

{'epoch': 1, 'iter': 15900, 'avg_loss': 8.589561102847755, 'avg_acc': 50.078021822526885, 'loss': 8.47210693359375}


EP_train:1:  58%|| 15912/27626 [37:28<27:43,  7.04it/s]

{'epoch': 1, 'iter': 15910, 'avg_loss': 8.589835829306544, 'avg_acc': 50.077776381119975, 'loss': 9.307899475097656}


EP_train:1:  58%|| 15922/27626 [37:30<27:54,  6.99it/s]

{'epoch': 1, 'iter': 15920, 'avg_loss': 8.589912039454457, 'avg_acc': 50.07753124803719, 'loss': 9.007213592529297}


EP_train:1:  58%|| 15932/27626 [37:31<27:48,  7.01it/s]

{'epoch': 1, 'iter': 15930, 'avg_loss': 8.590068487294626, 'avg_acc': 50.07767873956437, 'loss': 8.179040908813477}


EP_train:1:  58%|| 15942/27626 [37:33<27:50,  6.99it/s]

{'epoch': 1, 'iter': 15940, 'avg_loss': 8.589939193230233, 'avg_acc': 50.076061727620605, 'loss': 8.648574829101562}


EP_train:1:  58%|| 15952/27626 [37:34<27:30,  7.07it/s]

{'epoch': 1, 'iter': 15950, 'avg_loss': 8.589949850408, 'avg_acc': 50.07464265563287, 'loss': 7.375912666320801}


EP_train:1:  58%|| 15962/27626 [37:35<27:31,  7.06it/s]

{'epoch': 1, 'iter': 15960, 'avg_loss': 8.589876664042599, 'avg_acc': 50.075770628406744, 'loss': 8.238825798034668}


EP_train:1:  58%|| 15972/27626 [37:37<27:21,  7.10it/s]

{'epoch': 1, 'iter': 15970, 'avg_loss': 8.589685377842661, 'avg_acc': 50.07591885292092, 'loss': 8.172386169433594}


EP_train:1:  58%|| 15982/27626 [37:38<27:39,  7.02it/s]

{'epoch': 1, 'iter': 15980, 'avg_loss': 8.58978550210777, 'avg_acc': 50.07059164007258, 'loss': 8.857372283935547}


EP_train:1:  58%|| 15992/27626 [37:40<27:50,  6.96it/s]

{'epoch': 1, 'iter': 15990, 'avg_loss': 8.589821006558296, 'avg_acc': 50.07132918516666, 'loss': 8.773244857788086}


EP_train:1:  58%|| 16002/27626 [37:41<27:25,  7.07it/s]

{'epoch': 1, 'iter': 16000, 'avg_loss': 8.589747747460958, 'avg_acc': 50.06796450221861, 'loss': 8.790018081665039}


EP_train:1:  58%|| 16012/27626 [37:43<27:30,  7.04it/s]

{'epoch': 1, 'iter': 16010, 'avg_loss': 8.589891931072671, 'avg_acc': 50.066360627068896, 'loss': 9.468151092529297}


EP_train:1:  58%|| 16022/27626 [37:44<27:36,  7.01it/s]

{'epoch': 1, 'iter': 16020, 'avg_loss': 8.590021081843124, 'avg_acc': 50.06631920604207, 'loss': 9.209484100341797}


EP_train:1:  58%|| 16032/27626 [37:45<27:22,  7.06it/s]

{'epoch': 1, 'iter': 16030, 'avg_loss': 8.589914726784162, 'avg_acc': 50.062184205601646, 'loss': 8.986037254333496}


EP_train:1:  58%|| 16042/27626 [37:47<27:12,  7.10it/s]

{'epoch': 1, 'iter': 16040, 'avg_loss': 8.589840194812016, 'avg_acc': 50.064288386010844, 'loss': 8.750823974609375}


EP_train:1:  58%|| 16052/27626 [37:48<27:14,  7.08it/s]

{'epoch': 1, 'iter': 16050, 'avg_loss': 8.589573235773504, 'avg_acc': 50.06658463647125, 'loss': 7.975963115692139}


EP_train:1:  58%|| 16062/27626 [37:50<27:16,  7.07it/s]

{'epoch': 1, 'iter': 16060, 'avg_loss': 8.589717063680546, 'avg_acc': 50.069656310316915, 'loss': 9.64920711517334}


EP_train:1:  58%|| 16072/27626 [37:51<27:11,  7.08it/s]

{'epoch': 1, 'iter': 16070, 'avg_loss': 8.589904806519124, 'avg_acc': 50.069418517827145, 'loss': 8.46939754486084}


EP_train:1:  58%|| 16082/27626 [37:52<27:11,  7.08it/s]

{'epoch': 1, 'iter': 16080, 'avg_loss': 8.589853060378207, 'avg_acc': 50.0730675952988, 'loss': 7.91009521484375}


EP_train:1:  58%|| 16092/27626 [37:54<27:32,  6.98it/s]

{'epoch': 1, 'iter': 16090, 'avg_loss': 8.589918699638504, 'avg_acc': 50.07399322602697, 'loss': 8.577423095703125}


EP_train:1:  58%|| 16102/27626 [37:55<27:26,  7.00it/s]

{'epoch': 1, 'iter': 16100, 'avg_loss': 8.589870033142144, 'avg_acc': 50.07045369852804, 'loss': 8.325621604919434}


EP_train:1:  58%|| 16112/27626 [37:57<27:05,  7.08it/s]

{'epoch': 1, 'iter': 16110, 'avg_loss': 8.589938924725617, 'avg_acc': 50.07021600148966, 'loss': 7.856114387512207}


EP_train:1:  58%|| 16122/27626 [37:58<27:30,  6.97it/s]

{'epoch': 1, 'iter': 16120, 'avg_loss': 8.589862263148687, 'avg_acc': 50.07211091123379, 'loss': 7.6992316246032715}


EP_train:1:  58%|| 16132/27626 [37:59<27:06,  7.06it/s]

{'epoch': 1, 'iter': 16130, 'avg_loss': 8.589796379682454, 'avg_acc': 50.07264738701879, 'loss': 8.722312927246094}


EP_train:1:  58%|| 16142/27626 [38:01<26:58,  7.10it/s]

{'epoch': 1, 'iter': 16140, 'avg_loss': 8.589938429339757, 'avg_acc': 50.072602379034755, 'loss': 8.389236450195312}


EP_train:1:  58%|| 16152/27626 [38:02<27:01,  7.08it/s]

{'epoch': 1, 'iter': 16150, 'avg_loss': 8.590013382744992, 'avg_acc': 50.07371834561327, 'loss': 9.140486717224121}


EP_train:1:  59%|| 16162/27626 [38:04<26:55,  7.09it/s]

{'epoch': 1, 'iter': 16160, 'avg_loss': 8.589871477338688, 'avg_acc': 50.074639564383396, 'loss': 8.531086921691895}


EP_train:1:  59%|| 16172/27626 [38:05<26:54,  7.09it/s]

{'epoch': 1, 'iter': 16170, 'avg_loss': 8.589623674303413, 'avg_acc': 50.07440016078165, 'loss': 8.661775588989258}


EP_train:1:  59%|| 16182/27626 [38:07<27:01,  7.06it/s]

{'epoch': 1, 'iter': 16180, 'avg_loss': 8.589503679351543, 'avg_acc': 50.07647858599592, 'loss': 8.612255096435547}


EP_train:1:  59%|| 16192/27626 [38:08<27:10,  7.01it/s]

{'epoch': 1, 'iter': 16190, 'avg_loss': 8.589408693124525, 'avg_acc': 50.076624359211905, 'loss': 8.272805213928223}


EP_train:1:  59%|| 16202/27626 [38:09<26:57,  7.06it/s]

{'epoch': 1, 'iter': 16200, 'avg_loss': 8.58935737115396, 'avg_acc': 50.07696284179989, 'loss': 9.279629707336426}


EP_train:1:  59%|| 16212/27626 [38:11<26:50,  7.09it/s]

{'epoch': 1, 'iter': 16210, 'avg_loss': 8.589476728665822, 'avg_acc': 50.0730599592869, 'loss': 7.5833659172058105}


EP_train:1:  59%|| 16222/27626 [38:12<26:53,  7.07it/s]

{'epoch': 1, 'iter': 16220, 'avg_loss': 8.589384977212413, 'avg_acc': 50.073014918932245, 'loss': 8.49919605255127}


EP_train:1:  59%|| 16232/27626 [38:14<26:49,  7.08it/s]

{'epoch': 1, 'iter': 16230, 'avg_loss': 8.589564033345193, 'avg_acc': 50.07200727003881, 'loss': 9.728997230529785}


EP_train:1:  59%|| 16242/27626 [38:15<26:45,  7.09it/s]

{'epoch': 1, 'iter': 16240, 'avg_loss': 8.589643705053419, 'avg_acc': 50.07330983313836, 'loss': 8.089427947998047}


EP_train:1:  59%|| 16252/27626 [38:16<26:39,  7.11it/s]

{'epoch': 1, 'iter': 16250, 'avg_loss': 8.58951392466307, 'avg_acc': 50.075956864192975, 'loss': 8.378727912902832}


EP_train:1:  59%|| 16262/27626 [38:18<26:36,  7.12it/s]

{'epoch': 1, 'iter': 16260, 'avg_loss': 8.58922621904869, 'avg_acc': 50.07821628436135, 'loss': 9.001373291015625}


EP_train:1:  59%|| 16272/27626 [38:19<26:52,  7.04it/s]

{'epoch': 1, 'iter': 16270, 'avg_loss': 8.589421715676899, 'avg_acc': 50.0818173437404, 'loss': 9.34061336517334}


EP_train:1:  59%|| 16282/27626 [38:21<27:09,  6.96it/s]

{'epoch': 1, 'iter': 16280, 'avg_loss': 8.589549000085583, 'avg_acc': 50.08426233032369, 'loss': 8.213644027709961}


EP_train:1:  59%|| 16292/27626 [38:22<26:46,  7.06it/s]

{'epoch': 1, 'iter': 16290, 'avg_loss': 8.589651326434797, 'avg_acc': 50.08248419372659, 'loss': 8.469738006591797}


EP_train:1:  59%|| 16302/27626 [38:23<26:38,  7.08it/s]

{'epoch': 1, 'iter': 16300, 'avg_loss': 8.589256157678195, 'avg_acc': 50.08530918348567, 'loss': 9.035614013671875}


EP_train:1:  59%|| 16312/27626 [38:25<26:33,  7.10it/s]

{'epoch': 1, 'iter': 16310, 'avg_loss': 8.589171630821253, 'avg_acc': 50.08583164735455, 'loss': 8.63739013671875}


EP_train:1:  59%|| 16322/27626 [38:26<26:35,  7.09it/s]

{'epoch': 1, 'iter': 16320, 'avg_loss': 8.58914919710577, 'avg_acc': 50.08673641320998, 'loss': 8.034114837646484}


EP_train:1:  59%|| 16332/27626 [38:28<26:36,  7.08it/s]

{'epoch': 1, 'iter': 16330, 'avg_loss': 8.589208809103548, 'avg_acc': 50.086300593962406, 'loss': 9.120619773864746}


EP_train:1:  59%|| 16342/27626 [38:29<26:34,  7.08it/s]

{'epoch': 1, 'iter': 16340, 'avg_loss': 8.589230095973235, 'avg_acc': 50.08682149195276, 'loss': 8.75985336303711}


EP_train:1:  59%|| 16352/27626 [38:30<26:39,  7.05it/s]

{'epoch': 1, 'iter': 16350, 'avg_loss': 8.58925668282375, 'avg_acc': 50.08523943489694, 'loss': 8.648433685302734}


EP_train:1:  59%|| 16362/27626 [38:32<26:45,  7.02it/s]

{'epoch': 1, 'iter': 16360, 'avg_loss': 8.58932627448898, 'avg_acc': 50.08518733573742, 'loss': 8.81194019317627}


EP_train:1:  59%|| 16372/27626 [38:33<26:38,  7.04it/s]

{'epoch': 1, 'iter': 16370, 'avg_loss': 8.589102681064261, 'avg_acc': 50.08494441390263, 'loss': 7.983134746551514}


EP_train:1:  59%|| 16382/27626 [38:35<26:38,  7.04it/s]

{'epoch': 1, 'iter': 16380, 'avg_loss': 8.588865201964163, 'avg_acc': 50.08393870948049, 'loss': 9.61663818359375}


EP_train:1:  59%|| 16392/27626 [38:36<26:30,  7.06it/s]

{'epoch': 1, 'iter': 16390, 'avg_loss': 8.588962956625709, 'avg_acc': 50.08407815264474, 'loss': 8.163748741149902}


EP_train:1:  59%|| 16402/27626 [38:38<26:21,  7.10it/s]

{'epoch': 1, 'iter': 16400, 'avg_loss': 8.589296822344107, 'avg_acc': 50.0813593683312, 'loss': 8.217484474182129}


EP_train:1:  59%|| 16412/27626 [38:39<26:31,  7.05it/s]

{'epoch': 1, 'iter': 16410, 'avg_loss': 8.589486953480124, 'avg_acc': 50.080167265858265, 'loss': 8.692883491516113}


EP_train:1:  59%|| 16422/27626 [38:40<26:37,  7.02it/s]

{'epoch': 1, 'iter': 16420, 'avg_loss': 8.589431677439, 'avg_acc': 50.080879666280985, 'loss': 8.249154090881348}


EP_train:1:  59%|| 16432/27626 [38:42<26:24,  7.06it/s]

{'epoch': 1, 'iter': 16430, 'avg_loss': 8.589410024742428, 'avg_acc': 50.08387347087822, 'loss': 8.687456130981445}


EP_train:1:  60%|| 16442/27626 [38:43<26:25,  7.06it/s]

{'epoch': 1, 'iter': 16440, 'avg_loss': 8.589281500056526, 'avg_acc': 50.08382245605498, 'loss': 8.589173316955566}


EP_train:1:  60%|| 16452/27626 [38:45<26:35,  7.00it/s]

{'epoch': 1, 'iter': 16450, 'avg_loss': 8.58918609677691, 'avg_acc': 50.08453133548113, 'loss': 8.42473030090332}


EP_train:1:  60%|| 16462/27626 [38:46<26:22,  7.06it/s]

{'epoch': 1, 'iter': 16460, 'avg_loss': 8.58907542377303, 'avg_acc': 50.083340927039664, 'loss': 8.00157642364502}


EP_train:1:  60%|| 16472/27626 [38:47<26:18,  7.07it/s]

{'epoch': 1, 'iter': 16470, 'avg_loss': 8.589062696901236, 'avg_acc': 50.08329032845607, 'loss': 8.719993591308594}


EP_train:1:  60%|| 16482/27626 [38:49<26:12,  7.09it/s]

{'epoch': 1, 'iter': 16480, 'avg_loss': 8.589119178072268, 'avg_acc': 50.08248134215157, 'loss': 8.998786926269531}


EP_train:1:  60%|| 16492/27626 [38:50<26:14,  7.07it/s]

{'epoch': 1, 'iter': 16490, 'avg_loss': 8.589099085372167, 'avg_acc': 50.0820523315748, 'loss': 8.662359237670898}


EP_train:1:  60%|| 16502/27626 [38:52<26:23,  7.02it/s]

{'epoch': 1, 'iter': 16500, 'avg_loss': 8.588986414359212, 'avg_acc': 50.08067692867099, 'loss': 8.541653633117676}


EP_train:1:  60%|| 16512/27626 [38:53<26:17,  7.05it/s]

{'epoch': 1, 'iter': 16510, 'avg_loss': 8.588952875636952, 'avg_acc': 50.08157440494217, 'loss': 8.933466911315918}


EP_train:1:  60%|| 16522/27626 [38:55<27:03,  6.84it/s]

{'epoch': 1, 'iter': 16520, 'avg_loss': 8.589176595907336, 'avg_acc': 50.0796334967617, 'loss': 9.019278526306152}


EP_train:1:  60%|| 16532/27626 [38:56<26:09,  7.07it/s]

{'epoch': 1, 'iter': 16530, 'avg_loss': 8.589142322280678, 'avg_acc': 50.08034147964431, 'loss': 8.654350280761719}


EP_train:1:  60%|| 16542/27626 [38:57<26:27,  6.98it/s]

{'epoch': 1, 'iter': 16540, 'avg_loss': 8.589091537033486, 'avg_acc': 50.08350462487153, 'loss': 7.677260875701904}


EP_train:1:  60%|| 16552/27626 [38:59<26:19,  7.01it/s]

{'epoch': 1, 'iter': 16550, 'avg_loss': 8.588941631861074, 'avg_acc': 50.08590870642258, 'loss': 8.756004333496094}


EP_train:1:  60%|| 16562/27626 [39:00<26:08,  7.06it/s]

{'epoch': 1, 'iter': 16560, 'avg_loss': 8.588756644317662, 'avg_acc': 50.085479439647365, 'loss': 8.861072540283203}


EP_train:1:  60%|| 16572/27626 [39:02<26:02,  7.08it/s]

{'epoch': 1, 'iter': 16570, 'avg_loss': 8.588755648034418, 'avg_acc': 50.08448494357612, 'loss': 8.882457733154297}


EP_train:1:  60%|| 16582/27626 [39:03<25:56,  7.09it/s]

{'epoch': 1, 'iter': 16580, 'avg_loss': 8.58891549517914, 'avg_acc': 50.08368011579519, 'loss': 9.928587913513184}


EP_train:1:  60%|| 16592/27626 [39:04<25:58,  7.08it/s]

{'epoch': 1, 'iter': 16590, 'avg_loss': 8.588960964190695, 'avg_acc': 50.085889940329096, 'loss': 8.655887603759766}


EP_train:1:  60%|| 16602/27626 [39:06<25:53,  7.10it/s]

{'epoch': 1, 'iter': 16600, 'avg_loss': 8.588943806335687, 'avg_acc': 50.08339106077947, 'loss': 8.648163795471191}


EP_train:1:  60%|| 16612/27626 [39:07<26:02,  7.05it/s]

{'epoch': 1, 'iter': 16610, 'avg_loss': 8.588783356220643, 'avg_acc': 50.08446962855939, 'loss': 8.151724815368652}


EP_train:1:  60%|| 16622/27626 [39:09<26:01,  7.05it/s]

{'epoch': 1, 'iter': 16620, 'avg_loss': 8.58910895524261, 'avg_acc': 50.08216262559413, 'loss': 9.187376022338867}


EP_train:1:  60%|| 16632/27626 [39:10<26:12,  6.99it/s]

{'epoch': 1, 'iter': 16630, 'avg_loss': 8.58932053835664, 'avg_acc': 50.08248902651674, 'loss': 8.912525177001953}


EP_train:1:  60%|| 16642/27626 [39:11<25:59,  7.04it/s]

{'epoch': 1, 'iter': 16640, 'avg_loss': 8.589485080172956, 'avg_acc': 50.08093714320053, 'loss': 8.536744117736816}


EP_train:1:  60%|| 16652/27626 [39:13<26:01,  7.03it/s]

{'epoch': 1, 'iter': 16650, 'avg_loss': 8.589427066918901, 'avg_acc': 50.08070085880728, 'loss': 8.824204444885254}


EP_train:1:  60%|| 16662/27626 [39:14<26:03,  7.01it/s]

{'epoch': 1, 'iter': 16660, 'avg_loss': 8.589331523545413, 'avg_acc': 50.083465878398655, 'loss': 8.524569511413574}


EP_train:1:  60%|| 16672/27626 [39:16<25:49,  7.07it/s]

{'epoch': 1, 'iter': 16670, 'avg_loss': 8.58902922632541, 'avg_acc': 50.08472797072761, 'loss': 9.210931777954102}


EP_train:1:  60%|| 16682/27626 [39:17<25:43,  7.09it/s]

{'epoch': 1, 'iter': 16680, 'avg_loss': 8.589132081532563, 'avg_acc': 50.08561387207002, 'loss': 9.246563911437988}


EP_train:1:  60%|| 16692/27626 [39:18<25:40,  7.10it/s]

{'epoch': 1, 'iter': 16690, 'avg_loss': 8.589135655819153, 'avg_acc': 50.08593703193338, 'loss': 8.450884819030762}


EP_train:1:  60%|| 16702/27626 [39:20<25:41,  7.09it/s]

{'epoch': 1, 'iter': 16700, 'avg_loss': 8.589135782696246, 'avg_acc': 50.08551134662596, 'loss': 8.45376205444336}


EP_train:1:  60%|| 16712/27626 [39:21<25:39,  7.09it/s]

{'epoch': 1, 'iter': 16710, 'avg_loss': 8.589076425648129, 'avg_acc': 50.0880782119562, 'loss': 8.836799621582031}


EP_train:1:  61%|| 16722/27626 [39:23<25:55,  7.01it/s]

{'epoch': 1, 'iter': 16720, 'avg_loss': 8.589199058376897, 'avg_acc': 50.08989444411219, 'loss': 8.9715576171875}


EP_train:1:  61%|| 16732/27626 [39:24<25:48,  7.04it/s]

{'epoch': 1, 'iter': 16730, 'avg_loss': 8.589117723517065, 'avg_acc': 50.09096138903831, 'loss': 8.59371280670166}


EP_train:1:  61%|| 16742/27626 [39:26<25:40,  7.07it/s]

{'epoch': 1, 'iter': 16740, 'avg_loss': 8.589214069941132, 'avg_acc': 50.089413714831856, 'loss': 8.381567001342773}


EP_train:1:  61%|| 16752/27626 [39:27<25:31,  7.10it/s]

{'epoch': 1, 'iter': 16750, 'avg_loss': 8.589297603218672, 'avg_acc': 50.090293116828846, 'loss': 8.984987258911133}


EP_train:1:  61%|| 16762/27626 [39:28<25:26,  7.12it/s]

{'epoch': 1, 'iter': 16760, 'avg_loss': 8.589189716927669, 'avg_acc': 50.090612135314124, 'loss': 8.530867576599121}


EP_train:1:  61%|| 16772/27626 [39:30<25:29,  7.09it/s]

{'epoch': 1, 'iter': 16770, 'avg_loss': 8.589189181607308, 'avg_acc': 50.092235108222525, 'loss': 8.363880157470703}


EP_train:1:  61%|| 16782/27626 [39:31<25:54,  6.97it/s]

{'epoch': 1, 'iter': 16780, 'avg_loss': 8.589018541598277, 'avg_acc': 50.094787259400505, 'loss': 7.275543689727783}


EP_train:1:  61%|| 16792/27626 [39:33<25:44,  7.02it/s]

{'epoch': 1, 'iter': 16790, 'avg_loss': 8.588990073925075, 'avg_acc': 50.092869692096954, 'loss': 8.590357780456543}


EP_train:1:  61%|| 16802/27626 [39:34<25:53,  6.97it/s]

{'epoch': 1, 'iter': 16800, 'avg_loss': 8.588823276959348, 'avg_acc': 50.09244241414201, 'loss': 8.049615859985352}


EP_train:1:  61%|| 16812/27626 [39:35<25:42,  7.01it/s]

{'epoch': 1, 'iter': 16810, 'avg_loss': 8.588753228127711, 'avg_acc': 50.09127208375469, 'loss': 8.335182189941406}


EP_train:1:  61%|| 16822/27626 [39:37<25:37,  7.03it/s]

{'epoch': 1, 'iter': 16820, 'avg_loss': 8.588771648819725, 'avg_acc': 50.09251828072053, 'loss': 8.670845985412598}


EP_train:1:  61%|| 16832/27626 [39:38<25:21,  7.09it/s]

{'epoch': 1, 'iter': 16830, 'avg_loss': 8.588794237392612, 'avg_acc': 50.09023528013784, 'loss': 7.817853927612305}


EP_train:1:  61%|| 16842/27626 [39:40<25:22,  7.08it/s]

{'epoch': 1, 'iter': 16840, 'avg_loss': 8.588541473007904, 'avg_acc': 50.09092393563328, 'loss': 8.810247421264648}


EP_train:1:  61%|| 16852/27626 [39:41<25:15,  7.11it/s]

{'epoch': 1, 'iter': 16850, 'avg_loss': 8.588621029817883, 'avg_acc': 50.091982671651536, 'loss': 8.82485294342041}


EP_train:1:  61%|| 16862/27626 [39:43<25:32,  7.02it/s]

{'epoch': 1, 'iter': 16860, 'avg_loss': 8.588471084887939, 'avg_acc': 50.09063074550738, 'loss': 8.972162246704102}


EP_train:1:  61%|| 16872/27626 [39:44<25:17,  7.09it/s]

{'epoch': 1, 'iter': 16870, 'avg_loss': 8.588448889896686, 'avg_acc': 50.091688400213386, 'loss': 7.685303688049316}


EP_train:1:  61%|| 16882/27626 [39:45<25:33,  7.01it/s]

{'epoch': 1, 'iter': 16880, 'avg_loss': 8.588504354928768, 'avg_acc': 50.0890424145489, 'loss': 9.505908966064453}


EP_train:1:  61%|| 16892/27626 [39:47<25:17,  7.07it/s]

{'epoch': 1, 'iter': 16890, 'avg_loss': 8.588579696248532, 'avg_acc': 50.08824965958203, 'loss': 7.690262794494629}


EP_train:1:  61%|| 16902/27626 [39:48<25:15,  7.08it/s]

{'epoch': 1, 'iter': 16900, 'avg_loss': 8.588487857799192, 'avg_acc': 50.0848692385066, 'loss': 8.034427642822266}


EP_train:1:  61%|| 16912/27626 [39:50<25:22,  7.04it/s]

{'epoch': 1, 'iter': 16910, 'avg_loss': 8.588549927952908, 'avg_acc': 50.084079888829756, 'loss': 8.975985527038574}


EP_train:1:  61%|| 16922/27626 [39:51<25:16,  7.06it/s]

{'epoch': 1, 'iter': 16920, 'avg_loss': 8.588482424973469, 'avg_acc': 50.0840301991608, 'loss': 8.398377418518066}


EP_train:1:  61%|| 16932/27626 [39:52<25:06,  7.10it/s]

{'epoch': 1, 'iter': 16930, 'avg_loss': 8.58857610909911, 'avg_acc': 50.084349713543205, 'loss': 8.43816089630127}


EP_train:1:  61%|| 16942/27626 [39:54<25:06,  7.09it/s]

{'epoch': 1, 'iter': 16940, 'avg_loss': 8.588627476526414, 'avg_acc': 50.08171743108435, 'loss': 8.932708740234375}


EP_train:1:  61%|| 16952/27626 [39:55<25:21,  7.01it/s]

{'epoch': 1, 'iter': 16950, 'avg_loss': 8.588546258454814, 'avg_acc': 50.08295970739189, 'loss': 8.17199993133545}


EP_train:1:  61%|| 16962/27626 [39:57<25:04,  7.09it/s]

{'epoch': 1, 'iter': 16960, 'avg_loss': 8.588534386369826, 'avg_acc': 50.083463533989736, 'loss': 8.490546226501465}


EP_train:1:  61%|| 16972/27626 [39:58<25:03,  7.08it/s]

{'epoch': 1, 'iter': 16970, 'avg_loss': 8.588409827163073, 'avg_acc': 50.08470331742384, 'loss': 9.067146301269531}


EP_train:1:  61%|| 16982/27626 [39:59<25:09,  7.05it/s]

{'epoch': 1, 'iter': 16980, 'avg_loss': 8.588454018422048, 'avg_acc': 50.08060479359284, 'loss': 8.531736373901367}


EP_train:1:  62%|| 16992/27626 [40:01<25:21,  6.99it/s]

{'epoch': 1, 'iter': 16990, 'avg_loss': 8.588385967290336, 'avg_acc': 50.07982167029604, 'loss': 7.12797212600708}


EP_train:1:  62%|| 17002/27626 [40:02<25:05,  7.06it/s]

{'epoch': 1, 'iter': 17000, 'avg_loss': 8.588210792136497, 'avg_acc': 50.07940709370037, 'loss': 8.351837158203125}


EP_train:1:  62%|| 17012/27626 [40:04<25:05,  7.05it/s]

{'epoch': 1, 'iter': 17010, 'avg_loss': 8.588066186008472, 'avg_acc': 50.080646346481686, 'loss': 7.644892692565918}


EP_train:1:  62%|| 17022/27626 [40:05<25:16,  6.99it/s]

{'epoch': 1, 'iter': 17020, 'avg_loss': 8.587921460320153, 'avg_acc': 50.08206773985078, 'loss': 8.702107429504395}


EP_train:1:  62%|| 17032/27626 [40:07<25:13,  7.00it/s]

{'epoch': 1, 'iter': 17030, 'avg_loss': 8.588046966915815, 'avg_acc': 50.08367095296812, 'loss': 9.19394588470459}


EP_train:1:  62%|| 17042/27626 [40:08<24:56,  7.07it/s]

{'epoch': 1, 'iter': 17040, 'avg_loss': 8.588094520719949, 'avg_acc': 50.08050437180916, 'loss': 9.189212799072266}


EP_train:1:  62%|| 17052/27626 [40:09<24:54,  7.07it/s]

{'epoch': 1, 'iter': 17050, 'avg_loss': 8.588010072050944, 'avg_acc': 50.081740073895965, 'loss': 8.873495101928711}


EP_train:1:  62%|| 17062/27626 [40:11<24:57,  7.06it/s]

{'epoch': 1, 'iter': 17060, 'avg_loss': 8.58803137237082, 'avg_acc': 50.083706992556124, 'loss': 9.026612281799316}


EP_train:1:  62%|| 17072/27626 [40:12<24:52,  7.07it/s]

{'epoch': 1, 'iter': 17070, 'avg_loss': 8.588144455159403, 'avg_acc': 50.080362896139654, 'loss': 9.814760208129883}


EP_train:1:  62%|| 17082/27626 [40:14<24:58,  7.04it/s]

{'epoch': 1, 'iter': 17080, 'avg_loss': 8.588026535817397, 'avg_acc': 50.08049879983607, 'loss': 8.123421669006348}


EP_train:1:  62%|| 17092/27626 [40:15<24:56,  7.04it/s]

{'epoch': 1, 'iter': 17090, 'avg_loss': 8.58801503315656, 'avg_acc': 50.08282868176232, 'loss': 9.104009628295898}


EP_train:1:  62%|| 17102/27626 [40:16<24:47,  7.08it/s]

{'epoch': 1, 'iter': 17100, 'avg_loss': 8.58808114220403, 'avg_acc': 50.082962984620785, 'loss': 8.296231269836426}


EP_train:1:  62%|| 17112/27626 [40:18<24:55,  7.03it/s]

{'epoch': 1, 'iter': 17110, 'avg_loss': 8.587970782995544, 'avg_acc': 50.08510607211735, 'loss': 8.867117881774902}


EP_train:1:  62%|| 17122/27626 [40:19<24:41,  7.09it/s]

{'epoch': 1, 'iter': 17120, 'avg_loss': 8.587958115380012, 'avg_acc': 50.083231119677585, 'loss': 8.140810012817383}


EP_train:1:  62%|| 17132/27626 [40:21<24:53,  7.03it/s]

{'epoch': 1, 'iter': 17130, 'avg_loss': 8.587842734833815, 'avg_acc': 50.080993520518355, 'loss': 9.377043724060059}


EP_train:1:  62%|| 17142/27626 [40:22<24:57,  7.00it/s]

{'epoch': 1, 'iter': 17140, 'avg_loss': 8.58785577151692, 'avg_acc': 50.07985240067674, 'loss': 9.030447959899902}


EP_train:1:  62%|| 17152/27626 [40:23<24:50,  7.03it/s]

{'epoch': 1, 'iter': 17150, 'avg_loss': 8.587856810264327, 'avg_acc': 50.07780158591336, 'loss': 8.208395004272461}


EP_train:1:  62%|| 17162/27626 [40:25<24:46,  7.04it/s]

{'epoch': 1, 'iter': 17160, 'avg_loss': 8.58759080134838, 'avg_acc': 50.0788488433075, 'loss': 7.652635097503662}


EP_train:1:  62%|| 17172/27626 [40:26<24:49,  7.02it/s]

{'epoch': 1, 'iter': 17170, 'avg_loss': 8.587481631236281, 'avg_acc': 50.078438937743876, 'loss': 8.5730562210083}


EP_train:1:  62%|| 17182/27626 [40:28<24:49,  7.01it/s]

{'epoch': 1, 'iter': 17180, 'avg_loss': 8.587696093840188, 'avg_acc': 50.0800302659915, 'loss': 8.755838394165039}


EP_train:1:  62%|| 17192/27626 [40:29<24:53,  6.99it/s]

{'epoch': 1, 'iter': 17190, 'avg_loss': 8.58768708393438, 'avg_acc': 50.07907480658484, 'loss': 8.76333236694336}


EP_train:1:  62%|| 17202/27626 [40:31<24:32,  7.08it/s]

{'epoch': 1, 'iter': 17200, 'avg_loss': 8.58778123357713, 'avg_acc': 50.07666705424103, 'loss': 8.716252326965332}


EP_train:1:  62%|| 17212/27626 [40:32<24:44,  7.01it/s]

{'epoch': 1, 'iter': 17210, 'avg_loss': 8.587563010416291, 'avg_acc': 50.07825663819651, 'loss': 8.820707321166992}


EP_train:1:  62%|| 17222/27626 [40:33<24:42,  7.02it/s]

{'epoch': 1, 'iter': 17220, 'avg_loss': 8.587660259073823, 'avg_acc': 50.07784826665118, 'loss': 9.381653785705566}


EP_train:1:  62%|| 17232/27626 [40:35<24:25,  7.09it/s]

{'epoch': 1, 'iter': 17230, 'avg_loss': 8.587703959233846, 'avg_acc': 50.07725900992397, 'loss': 8.56528377532959}


EP_train:1:  62%|| 17242/27626 [40:36<24:40,  7.01it/s]

{'epoch': 1, 'iter': 17240, 'avg_loss': 8.587700832803485, 'avg_acc': 50.07812046865031, 'loss': 8.559017181396484}


EP_train:1:  62%|| 17252/27626 [40:38<24:49,  6.96it/s]

{'epoch': 1, 'iter': 17250, 'avg_loss': 8.587728962968187, 'avg_acc': 50.080973566749755, 'loss': 8.779098510742188}


EP_train:1:  62%|| 17262/27626 [40:39<24:32,  7.04it/s]

{'epoch': 1, 'iter': 17260, 'avg_loss': 8.587842603152739, 'avg_acc': 50.080021435606284, 'loss': 8.46940803527832}


EP_train:1:  63%|| 17272/27626 [40:40<24:38,  7.00it/s]

{'epoch': 1, 'iter': 17270, 'avg_loss': 8.587706022978754, 'avg_acc': 50.080698859359615, 'loss': 8.311878204345703}


EP_train:1:  63%|| 17282/27626 [40:42<24:36,  7.01it/s]

{'epoch': 1, 'iter': 17280, 'avg_loss': 8.587718863791045, 'avg_acc': 50.07992882356345, 'loss': 7.875481128692627}


EP_train:1:  63%|| 17292/27626 [40:43<24:31,  7.02it/s]

{'epoch': 1, 'iter': 17290, 'avg_loss': 8.587742249083611, 'avg_acc': 50.079340408304894, 'loss': 8.319343566894531}


EP_train:1:  63%|| 17302/27626 [40:45<24:16,  7.09it/s]

{'epoch': 1, 'iter': 17300, 'avg_loss': 8.587854351530764, 'avg_acc': 50.07965580024276, 'loss': 9.597054481506348}


EP_train:1:  63%|| 17312/27626 [40:46<24:21,  7.06it/s]

{'epoch': 1, 'iter': 17310, 'avg_loss': 8.587908878131396, 'avg_acc': 50.08213708046907, 'loss': 8.33556079864502}


EP_train:1:  63%|| 17322/27626 [40:48<24:17,  7.07it/s]

{'epoch': 1, 'iter': 17320, 'avg_loss': 8.587787050826742, 'avg_acc': 50.080285491599795, 'loss': 7.845627307891846}


EP_train:1:  63%|| 17332/27626 [40:49<24:33,  6.99it/s]

{'epoch': 1, 'iter': 17330, 'avg_loss': 8.587777690338282, 'avg_acc': 50.08186198142057, 'loss': 7.900811195373535}


EP_train:1:  63%|| 17342/27626 [40:50<24:22,  7.03it/s]

{'epoch': 1, 'iter': 17340, 'avg_loss': 8.587799519295306, 'avg_acc': 50.08055331295773, 'loss': 8.731462478637695}


EP_train:1:  63%|| 17352/27626 [40:52<24:27,  7.00it/s]

{'epoch': 1, 'iter': 17350, 'avg_loss': 8.587690704116614, 'avg_acc': 50.08032678231802, 'loss': 8.367691040039062}


EP_train:1:  63%|| 17362/27626 [40:53<24:19,  7.03it/s]

{'epoch': 1, 'iter': 17360, 'avg_loss': 8.587483523511494, 'avg_acc': 50.078660503427216, 'loss': 8.791555404663086}


EP_train:1:  63%|| 17372/27626 [40:55<24:12,  7.06it/s]

{'epoch': 1, 'iter': 17370, 'avg_loss': 8.587713856625987, 'avg_acc': 50.07699614299695, 'loss': 8.360898971557617}


EP_train:1:  63%|| 17382/27626 [40:56<24:10,  7.06it/s]

{'epoch': 1, 'iter': 17380, 'avg_loss': 8.587590949649634, 'avg_acc': 50.07821040216328, 'loss': 8.598397254943848}


EP_train:1:  63%|| 17392/27626 [40:57<24:06,  7.08it/s]

{'epoch': 1, 'iter': 17390, 'avg_loss': 8.587699626643815, 'avg_acc': 50.07708728652751, 'loss': 9.205092430114746}


EP_train:1:  63%|| 17402/27626 [40:59<24:15,  7.02it/s]

{'epoch': 1, 'iter': 17400, 'avg_loss': 8.587631348378151, 'avg_acc': 50.074887937474855, 'loss': 9.556446075439453}


EP_train:1:  63%|| 17412/27626 [41:00<24:13,  7.03it/s]

{'epoch': 1, 'iter': 17410, 'avg_loss': 8.58773194722706, 'avg_acc': 50.07646028372867, 'loss': 8.265308380126953}


EP_train:1:  63%|| 17422/27626 [41:02<24:08,  7.05it/s]

{'epoch': 1, 'iter': 17420, 'avg_loss': 8.587712757048626, 'avg_acc': 50.072290626255665, 'loss': 8.781495094299316}


EP_train:1:  63%|| 17432/27626 [41:03<24:11,  7.02it/s]

{'epoch': 1, 'iter': 17430, 'avg_loss': 8.58782637139126, 'avg_acc': 50.069022144455275, 'loss': 8.15318775177002}


EP_train:1:  63%|| 17442/27626 [41:05<24:30,  6.93it/s]

{'epoch': 1, 'iter': 17440, 'avg_loss': 8.587908923014213, 'avg_acc': 50.0686242187948, 'loss': 8.092935562133789}


EP_train:1:  63%|| 17452/27626 [41:06<24:13,  7.00it/s]

{'epoch': 1, 'iter': 17450, 'avg_loss': 8.587981909463949, 'avg_acc': 50.06912211334594, 'loss': 9.199166297912598}


EP_train:1:  63%|| 17462/27626 [41:07<24:15,  6.98it/s]

{'epoch': 1, 'iter': 17460, 'avg_loss': 8.58814558615001, 'avg_acc': 50.0706932592635, 'loss': 8.784249305725098}


EP_train:1:  63%|| 17472/27626 [41:09<24:13,  6.99it/s]

{'epoch': 1, 'iter': 17470, 'avg_loss': 8.587986124857597, 'avg_acc': 50.069758456871384, 'loss': 8.480971336364746}


EP_train:1:  63%|| 17482/27626 [41:10<23:57,  7.06it/s]

{'epoch': 1, 'iter': 17480, 'avg_loss': 8.587830317715309, 'avg_acc': 50.06900348950288, 'loss': 8.129326820373535}


EP_train:1:  63%|| 17492/27626 [41:12<23:57,  7.05it/s]

{'epoch': 1, 'iter': 17490, 'avg_loss': 8.587840075108739, 'avg_acc': 50.06878537533589, 'loss': 8.514409065246582}


EP_train:1:  63%|| 17502/27626 [41:13<23:51,  7.07it/s]

{'epoch': 1, 'iter': 17500, 'avg_loss': 8.5878433308515, 'avg_acc': 50.0723172961545, 'loss': 9.61598014831543}


EP_train:1:  63%|| 17512/27626 [41:14<24:00,  7.02it/s]

{'epoch': 1, 'iter': 17510, 'avg_loss': 8.587843784129461, 'avg_acc': 50.072275997944146, 'loss': 8.464982032775879}


EP_train:1:  63%|| 17522/27626 [41:16<23:52,  7.05it/s]

{'epoch': 1, 'iter': 17520, 'avg_loss': 8.587804430499006, 'avg_acc': 50.07259146167456, 'loss': 8.720062255859375}


EP_train:1:  63%|| 17532/27626 [41:17<24:01,  7.00it/s]

{'epoch': 1, 'iter': 17530, 'avg_loss': 8.587675942988943, 'avg_acc': 50.07237179852832, 'loss': 8.124999046325684}


EP_train:1:  63%|| 17542/27626 [41:19<24:06,  6.97it/s]

{'epoch': 1, 'iter': 17540, 'avg_loss': 8.587612247651894, 'avg_acc': 50.073399464112654, 'loss': 8.940075874328613}


EP_train:1:  64%|| 17552/27626 [41:20<23:55,  7.02it/s]

{'epoch': 1, 'iter': 17550, 'avg_loss': 8.587572307303029, 'avg_acc': 50.07371374850435, 'loss': 8.149595260620117}


EP_train:1:  64%|| 17562/27626 [41:22<23:50,  7.04it/s]

{'epoch': 1, 'iter': 17560, 'avg_loss': 8.587551552561976, 'avg_acc': 50.07384972381983, 'loss': 9.023377418518066}


EP_train:1:  64%|| 17572/27626 [41:23<23:44,  7.06it/s]

{'epoch': 1, 'iter': 17570, 'avg_loss': 8.587611430588623, 'avg_acc': 50.07238489556656, 'loss': 9.073870658874512}


EP_train:1:  64%|| 17582/27626 [41:24<23:43,  7.05it/s]

{'epoch': 1, 'iter': 17580, 'avg_loss': 8.587550816780864, 'avg_acc': 50.070566236277806, 'loss': 7.799939155578613}


EP_train:1:  64%|| 17592/27626 [41:26<23:38,  7.07it/s]

{'epoch': 1, 'iter': 17590, 'avg_loss': 8.587570470308242, 'avg_acc': 50.06946023534762, 'loss': 8.115432739257812}


EP_train:1:  64%|| 17602/27626 [41:27<23:46,  7.03it/s]

{'epoch': 1, 'iter': 17600, 'avg_loss': 8.587609330846954, 'avg_acc': 50.06959831827737, 'loss': 9.15912914276123}


EP_train:1:  64%|| 17612/27626 [41:29<24:00,  6.95it/s]

{'epoch': 1, 'iter': 17610, 'avg_loss': 8.58763687509497, 'avg_acc': 50.07310771676793, 'loss': 7.735828876495361}


EP_train:1:  64%|| 17622/27626 [41:30<23:40,  7.04it/s]

{'epoch': 1, 'iter': 17620, 'avg_loss': 8.587513581164396, 'avg_acc': 50.07359826343567, 'loss': 7.998055458068848}


EP_train:1:  64%|| 17632/27626 [41:31<23:52,  6.98it/s]

{'epoch': 1, 'iter': 17630, 'avg_loss': 8.587250318214036, 'avg_acc': 50.07355651976631, 'loss': 7.194937229156494}


EP_train:1:  64%|| 17642/27626 [41:33<23:47,  7.00it/s]

{'epoch': 1, 'iter': 17640, 'avg_loss': 8.587297864136232, 'avg_acc': 50.07298339096423, 'loss': 8.888130187988281}


EP_train:1:  64%|| 17652/27626 [41:34<23:41,  7.02it/s]

{'epoch': 1, 'iter': 17650, 'avg_loss': 8.587210524001815, 'avg_acc': 50.07559769984703, 'loss': 8.398313522338867}


EP_train:1:  64%|| 17662/27626 [41:36<23:32,  7.06it/s]

{'epoch': 1, 'iter': 17660, 'avg_loss': 8.587275014945034, 'avg_acc': 50.07325462884321, 'loss': 8.513052940368652}


EP_train:1:  64%|| 17672/27626 [41:37<23:37,  7.02it/s]

{'epoch': 1, 'iter': 17670, 'avg_loss': 8.587457693261989, 'avg_acc': 50.069145775564486, 'loss': 9.829817771911621}


EP_train:1:  64%|| 17682/27626 [41:39<23:30,  7.05it/s]

{'epoch': 1, 'iter': 17680, 'avg_loss': 8.58732643295188, 'avg_acc': 50.06786946439681, 'loss': 8.208548545837402}


EP_train:1:  64%|| 17692/27626 [41:40<23:32,  7.03it/s]

{'epoch': 1, 'iter': 17690, 'avg_loss': 8.587266281766839, 'avg_acc': 50.069244248487934, 'loss': 8.349261283874512}


EP_train:1:  64%|| 17702/27626 [41:41<23:32,  7.03it/s]

{'epoch': 1, 'iter': 17700, 'avg_loss': 8.587388896220189, 'avg_acc': 50.070087848144176, 'loss': 8.984926223754883}


EP_train:1:  64%|| 17712/27626 [41:43<23:43,  6.96it/s]

{'epoch': 1, 'iter': 17710, 'avg_loss': 8.587448507380792, 'avg_acc': 50.069695387047595, 'loss': 8.517224311828613}


EP_train:1:  64%|| 17722/27626 [41:44<23:25,  7.05it/s]

{'epoch': 1, 'iter': 17720, 'avg_loss': 8.58738358325785, 'avg_acc': 50.07018509113481, 'loss': 8.466421127319336}


EP_train:1:  64%|| 17732/27626 [41:46<23:35,  6.99it/s]

{'epoch': 1, 'iter': 17730, 'avg_loss': 8.587331997239579, 'avg_acc': 50.070674242851496, 'loss': 8.787321090698242}


EP_train:1:  64%|| 17742/27626 [41:47<23:19,  7.06it/s]

{'epoch': 1, 'iter': 17740, 'avg_loss': 8.58709616540391, 'avg_acc': 50.067816075756724, 'loss': 8.355472564697266}


EP_train:1:  64%|| 17752/27626 [41:48<23:19,  7.06it/s]

{'epoch': 1, 'iter': 17750, 'avg_loss': 8.58710363394616, 'avg_acc': 50.065841361050076, 'loss': 8.59689998626709}


EP_train:1:  64%|| 17762/27626 [41:50<23:18,  7.06it/s]

{'epoch': 1, 'iter': 17760, 'avg_loss': 8.586964149758607, 'avg_acc': 50.06422076459659, 'loss': 8.526269912719727}


EP_train:1:  64%|| 17772/27626 [41:51<23:15,  7.06it/s]

{'epoch': 1, 'iter': 17770, 'avg_loss': 8.587018325275029, 'avg_acc': 50.06312953688594, 'loss': 8.151315689086914}


EP_train:1:  64%|| 17782/27626 [41:53<23:07,  7.10it/s]

{'epoch': 1, 'iter': 17780, 'avg_loss': 8.58671822101942, 'avg_acc': 50.064324278724484, 'loss': 8.342636108398438}


EP_train:1:  64%|| 17792/27626 [41:54<23:15,  7.05it/s]

{'epoch': 1, 'iter': 17790, 'avg_loss': 8.586718295880349, 'avg_acc': 50.065166376257665, 'loss': 8.844956398010254}


EP_train:1:  64%|| 17802/27626 [41:56<23:14,  7.05it/s]

{'epoch': 1, 'iter': 17800, 'avg_loss': 8.586622822332245, 'avg_acc': 50.06951856637267, 'loss': 8.803425788879395}


EP_train:1:  64%|| 17812/27626 [41:57<23:23,  6.99it/s]

{'epoch': 1, 'iter': 17810, 'avg_loss': 8.586636055219275, 'avg_acc': 50.07053225534782, 'loss': 8.178632736206055}


EP_train:1:  65%|| 17822/27626 [41:58<23:08,  7.06it/s]

{'epoch': 1, 'iter': 17820, 'avg_loss': 8.586521024743044, 'avg_acc': 50.07259693619886, 'loss': 9.210787773132324}


EP_train:1:  65%|| 17832/27626 [42:00<23:11,  7.04it/s]

{'epoch': 1, 'iter': 17830, 'avg_loss': 8.5864872394117, 'avg_acc': 50.0688758342213, 'loss': 8.084335327148438}


EP_train:1:  65%|| 17842/27626 [42:01<23:09,  7.04it/s]

{'epoch': 1, 'iter': 17840, 'avg_loss': 8.58638423430359, 'avg_acc': 50.07216523737459, 'loss': 7.6215105056762695}


EP_train:1:  65%|| 17852/27626 [42:03<23:15,  7.00it/s]

{'epoch': 1, 'iter': 17850, 'avg_loss': 8.586400968782321, 'avg_acc': 50.06949890762422, 'loss': 8.886098861694336}


EP_train:1:  65%|| 17862/27626 [42:04<23:01,  7.07it/s]

{'epoch': 1, 'iter': 17860, 'avg_loss': 8.586290284958727, 'avg_acc': 50.066135714685636, 'loss': 9.043792724609375}


EP_train:1:  65%|| 17872/27626 [42:05<23:08,  7.03it/s]

{'epoch': 1, 'iter': 17870, 'avg_loss': 8.58632709874483, 'avg_acc': 50.06714789323485, 'loss': 8.944856643676758}


EP_train:1:  65%|| 17882/27626 [42:07<22:54,  7.09it/s]

{'epoch': 1, 'iter': 17880, 'avg_loss': 8.586532746296815, 'avg_acc': 50.069557071752136, 'loss': 8.53847599029541}


EP_train:1:  65%|| 17892/27626 [42:08<23:03,  7.04it/s]

{'epoch': 1, 'iter': 17890, 'avg_loss': 8.586861728455847, 'avg_acc': 50.070566206472535, 'loss': 9.580416679382324}


EP_train:1:  65%|| 17902/27626 [42:10<23:00,  7.04it/s]

{'epoch': 1, 'iter': 17900, 'avg_loss': 8.586977209953494, 'avg_acc': 50.071225071225065, 'loss': 9.438193321228027}


EP_train:1:  65%|| 17912/27626 [42:11<22:59,  7.04it/s]

{'epoch': 1, 'iter': 17910, 'avg_loss': 8.586997280447775, 'avg_acc': 50.07153425269387, 'loss': 9.485490798950195}


EP_train:1:  65%|| 17922/27626 [42:13<22:54,  7.06it/s]

{'epoch': 1, 'iter': 17920, 'avg_loss': 8.587009510366814, 'avg_acc': 50.07131995982367, 'loss': 7.982052326202393}


EP_train:1:  65%|| 17932/27626 [42:14<23:02,  7.01it/s]

{'epoch': 1, 'iter': 17930, 'avg_loss': 8.587244697838006, 'avg_acc': 50.07284869778596, 'loss': 9.580507278442383}


EP_train:1:  65%|| 17942/27626 [42:15<22:52,  7.06it/s]

{'epoch': 1, 'iter': 17940, 'avg_loss': 8.587283174859117, 'avg_acc': 50.07263391115322, 'loss': 9.063301086425781}


EP_train:1:  65%|| 17952/27626 [42:17<22:51,  7.05it/s]

{'epoch': 1, 'iter': 17950, 'avg_loss': 8.587366235640697, 'avg_acc': 50.069459918667484, 'loss': 9.151803016662598}


EP_train:1:  65%|| 17962/27626 [42:18<22:49,  7.06it/s]

{'epoch': 1, 'iter': 17960, 'avg_loss': 8.587371400573273, 'avg_acc': 50.067855353265415, 'loss': 7.834137916564941}


EP_train:1:  65%|| 17972/27626 [42:20<22:46,  7.06it/s]

{'epoch': 1, 'iter': 17970, 'avg_loss': 8.587151912541469, 'avg_acc': 50.06433976962885, 'loss': 8.228880882263184}


EP_train:1:  65%|| 17982/27626 [42:21<22:42,  7.08it/s]

{'epoch': 1, 'iter': 17980, 'avg_loss': 8.587106155796262, 'avg_acc': 50.07056059173573, 'loss': 9.054099082946777}


EP_train:1:  65%|| 17992/27626 [42:22<22:46,  7.05it/s]

{'epoch': 1, 'iter': 17990, 'avg_loss': 8.587193366969833, 'avg_acc': 50.069652881996554, 'loss': 9.725485801696777}


EP_train:1:  65%|| 18002/27626 [42:24<22:45,  7.05it/s]

{'epoch': 1, 'iter': 18000, 'avg_loss': 8.587147498937403, 'avg_acc': 50.069614188100665, 'loss': 8.363553047180176}


EP_train:1:  65%|| 18012/27626 [42:25<22:51,  7.01it/s]

{'epoch': 1, 'iter': 18010, 'avg_loss': 8.587037207582433, 'avg_acc': 50.06714646604853, 'loss': 8.1344633102417}


EP_train:1:  65%|| 18022/27626 [42:27<22:37,  7.07it/s]

{'epoch': 1, 'iter': 18020, 'avg_loss': 8.587132139837271, 'avg_acc': 50.06710920592642, 'loss': 8.572750091552734}


EP_train:1:  65%|| 18032/27626 [42:28<22:34,  7.08it/s]

{'epoch': 1, 'iter': 18030, 'avg_loss': 8.587023912415741, 'avg_acc': 50.0698449891853, 'loss': 8.737680435180664}


EP_train:1:  65%|| 18042/27626 [42:29<22:42,  7.03it/s]

{'epoch': 1, 'iter': 18040, 'avg_loss': 8.587058345478988, 'avg_acc': 50.07275095615542, 'loss': 8.908045768737793}


EP_train:1:  65%|| 18052/27626 [42:31<22:46,  7.01it/s]

{'epoch': 1, 'iter': 18050, 'avg_loss': 8.587123852305503, 'avg_acc': 50.07236441194394, 'loss': 8.89000129699707}


EP_train:1:  65%|| 18062/27626 [42:32<22:39,  7.04it/s]

{'epoch': 1, 'iter': 18060, 'avg_loss': 8.58716250688331, 'avg_acc': 50.07301644427219, 'loss': 8.624112129211426}


EP_train:1:  65%|| 18072/27626 [42:34<22:38,  7.04it/s]

{'epoch': 1, 'iter': 18070, 'avg_loss': 8.587007600793696, 'avg_acc': 50.07453239997787, 'loss': 8.463225364685059}


EP_train:1:  65%|| 18082/27626 [42:35<22:36,  7.04it/s]

{'epoch': 1, 'iter': 18080, 'avg_loss': 8.587074597702967, 'avg_acc': 50.07535534539019, 'loss': 9.101057052612305}


EP_train:1:  65%|| 18092/27626 [42:37<22:28,  7.07it/s]

{'epoch': 1, 'iter': 18090, 'avg_loss': 8.587097265782177, 'avg_acc': 50.07583190536731, 'loss': 8.102813720703125}


EP_train:1:  66%|| 18102/27626 [42:38<22:26,  7.07it/s]

{'epoch': 1, 'iter': 18100, 'avg_loss': 8.58694854985745, 'avg_acc': 50.07061073973813, 'loss': 8.59298038482666}


EP_train:1:  66%|| 18112/27626 [42:39<22:18,  7.11it/s]

{'epoch': 1, 'iter': 18110, 'avg_loss': 8.586938931567989, 'avg_acc': 50.06574043399039, 'loss': 7.849512100219727}


EP_train:1:  66%|| 18122/27626 [42:41<22:20,  7.09it/s]

{'epoch': 1, 'iter': 18120, 'avg_loss': 8.586791164557551, 'avg_acc': 50.067946029468565, 'loss': 8.847424507141113}


EP_train:1:  66%|| 18132/27626 [42:42<22:35,  7.01it/s]

{'epoch': 1, 'iter': 18130, 'avg_loss': 8.586752733532824, 'avg_acc': 50.06928740830622, 'loss': 8.289388656616211}


EP_train:1:  66%|| 18142/27626 [42:44<22:21,  7.07it/s]

{'epoch': 1, 'iter': 18140, 'avg_loss': 8.586814646223836, 'avg_acc': 50.0685601675762, 'loss': 10.730961799621582}


EP_train:1:  66%|| 18152/27626 [42:45<22:15,  7.09it/s]

{'epoch': 1, 'iter': 18150, 'avg_loss': 8.586883904881256, 'avg_acc': 50.0685223954603, 'loss': 8.91499137878418}


EP_train:1:  66%|| 18162/27626 [42:46<22:21,  7.05it/s]

{'epoch': 1, 'iter': 18160, 'avg_loss': 8.58693100496192, 'avg_acc': 50.07003331314355, 'loss': 9.042793273925781}


EP_train:1:  66%|| 18172/27626 [42:48<22:27,  7.02it/s]

{'epoch': 1, 'iter': 18170, 'avg_loss': 8.58701660426959, 'avg_acc': 50.068962907930214, 'loss': 9.380748748779297}


EP_train:1:  66%|| 18182/27626 [42:49<22:27,  7.01it/s]

{'epoch': 1, 'iter': 18180, 'avg_loss': 8.587198825082096, 'avg_acc': 50.06806556295034, 'loss': 8.80700969696045}


EP_train:1:  66%|| 18192/27626 [42:51<22:26,  7.01it/s]

{'epoch': 1, 'iter': 18190, 'avg_loss': 8.587182113319695, 'avg_acc': 50.06905887526799, 'loss': 8.42835807800293}


EP_train:1:  66%|| 18202/27626 [42:52<22:02,  7.12it/s]

{'epoch': 1, 'iter': 18200, 'avg_loss': 8.58712075386825, 'avg_acc': 50.069192626778744, 'loss': 7.9657182693481445}


EP_train:1:  66%|| 18212/27626 [42:53<22:11,  7.07it/s]

{'epoch': 1, 'iter': 18210, 'avg_loss': 8.58694144221155, 'avg_acc': 50.07138542639065, 'loss': 8.54006290435791}


EP_train:1:  66%|| 18222/27626 [42:55<22:11,  7.06it/s]

{'epoch': 1, 'iter': 18220, 'avg_loss': 8.58683666466659, 'avg_acc': 50.073232808298116, 'loss': 8.05994987487793}


EP_train:1:  66%|| 18232/27626 [42:56<22:04,  7.09it/s]

{'epoch': 1, 'iter': 18230, 'avg_loss': 8.586897638911092, 'avg_acc': 50.07250699358236, 'loss': 8.580863952636719}


EP_train:1:  66%|| 18242/27626 [42:58<22:00,  7.10it/s]

{'epoch': 1, 'iter': 18240, 'avg_loss': 8.586925728214812, 'avg_acc': 50.07246724412039, 'loss': 9.086714744567871}


EP_train:1:  66%|| 18252/27626 [42:59<22:09,  7.05it/s]

{'epoch': 1, 'iter': 18250, 'avg_loss': 8.586861153144287, 'avg_acc': 50.070201632787246, 'loss': 8.446027755737305}


EP_train:1:  66%|| 18262/27626 [43:01<22:34,  6.91it/s]

{'epoch': 1, 'iter': 18260, 'avg_loss': 8.587051615697531, 'avg_acc': 50.07050544877061, 'loss': 8.85474967956543}


EP_train:1:  66%|| 18272/27626 [43:02<22:00,  7.08it/s]

{'epoch': 1, 'iter': 18270, 'avg_loss': 8.586963255438961, 'avg_acc': 50.06995375184719, 'loss': 7.935751438140869}


EP_train:1:  66%|| 18282/27626 [43:03<22:12,  7.01it/s]

{'epoch': 1, 'iter': 18280, 'avg_loss': 8.586904182468512, 'avg_acc': 50.06871888846344, 'loss': 9.320225715637207}


EP_train:1:  66%|| 18292/27626 [43:05<22:03,  7.05it/s]

{'epoch': 1, 'iter': 18290, 'avg_loss': 8.586897312874648, 'avg_acc': 50.06406839429227, 'loss': 7.80809211730957}


EP_train:1:  66%|| 18302/27626 [43:06<21:53,  7.10it/s]

{'epoch': 1, 'iter': 18300, 'avg_loss': 8.586889505243049, 'avg_acc': 50.065228676028624, 'loss': 8.691789627075195}


EP_train:1:  66%|| 18312/27626 [43:08<22:09,  7.00it/s]

{'epoch': 1, 'iter': 18310, 'avg_loss': 8.58697554371292, 'avg_acc': 50.06621702801595, 'loss': 8.334807395935059}


EP_train:1:  66%|| 18322/27626 [43:09<22:11,  6.99it/s]

{'epoch': 1, 'iter': 18320, 'avg_loss': 8.587155360086575, 'avg_acc': 50.06669259319906, 'loss': 8.429594993591309}


EP_train:1:  66%|| 18332/27626 [43:10<21:56,  7.06it/s]

{'epoch': 1, 'iter': 18330, 'avg_loss': 8.587135611094974, 'avg_acc': 50.06784954448747, 'loss': 9.330256462097168}


EP_train:1:  66%|| 18342/27626 [43:12<21:59,  7.03it/s]

{'epoch': 1, 'iter': 18340, 'avg_loss': 8.587044699190358, 'avg_acc': 50.069175617469064, 'loss': 7.184096336364746}


EP_train:1:  66%|| 18352/27626 [43:13<21:57,  7.04it/s]

{'epoch': 1, 'iter': 18350, 'avg_loss': 8.586904922250662, 'avg_acc': 50.06828646940221, 'loss': 8.82654857635498}


EP_train:1:  66%|| 18362/27626 [43:15<21:54,  7.05it/s]

{'epoch': 1, 'iter': 18360, 'avg_loss': 8.586940896088414, 'avg_acc': 50.0694406622733, 'loss': 8.7538423538208}


EP_train:1:  67%|| 18372/27626 [43:16<21:47,  7.08it/s]

{'epoch': 1, 'iter': 18370, 'avg_loss': 8.586892714258259, 'avg_acc': 50.071784334004676, 'loss': 8.489110946655273}


EP_train:1:  67%|| 18382/27626 [43:18<21:49,  7.06it/s]

{'epoch': 1, 'iter': 18380, 'avg_loss': 8.586953800097234, 'avg_acc': 50.07395544312061, 'loss': 8.548243522644043}


EP_train:1:  67%|| 18392/27626 [43:19<21:38,  7.11it/s]

{'epoch': 1, 'iter': 18390, 'avg_loss': 8.58712135728437, 'avg_acc': 50.073235549997285, 'loss': 8.09115982055664}


EP_train:1:  67%|| 18402/27626 [43:20<21:50,  7.04it/s]

{'epoch': 1, 'iter': 18400, 'avg_loss': 8.587231114897856, 'avg_acc': 50.07166730069018, 'loss': 8.538477897644043}


EP_train:1:  67%|| 18412/27626 [43:22<21:41,  7.08it/s]

{'epoch': 1, 'iter': 18410, 'avg_loss': 8.587225350815109, 'avg_acc': 50.07077969692032, 'loss': 8.189678192138672}


EP_train:1:  67%|| 18422/27626 [43:23<21:40,  7.08it/s]

{'epoch': 1, 'iter': 18420, 'avg_loss': 8.587335118667083, 'avg_acc': 50.07091091688834, 'loss': 9.04638385772705}


EP_train:1:  67%|| 18432/27626 [43:25<21:53,  7.00it/s]

{'epoch': 1, 'iter': 18430, 'avg_loss': 8.587285459090104, 'avg_acc': 50.07104199446585, 'loss': 8.855481147766113}


EP_train:1:  67%|| 18442/27626 [43:26<21:45,  7.04it/s]

{'epoch': 1, 'iter': 18440, 'avg_loss': 8.587363712829388, 'avg_acc': 50.07252860473944, 'loss': 7.829046726226807}


EP_train:1:  67%|| 18452/27626 [43:27<21:36,  7.08it/s]

{'epoch': 1, 'iter': 18450, 'avg_loss': 8.587266869354, 'avg_acc': 50.07384423608477, 'loss': 8.017518043518066}


EP_train:1:  67%|| 18462/27626 [43:29<21:45,  7.02it/s]

{'epoch': 1, 'iter': 18460, 'avg_loss': 8.587301413459254, 'avg_acc': 50.07092654785764, 'loss': 8.882568359375}


EP_train:1:  67%|| 18472/27626 [43:30<21:33,  7.08it/s]

{'epoch': 1, 'iter': 18470, 'avg_loss': 8.58719716447181, 'avg_acc': 50.07021141248443, 'loss': 8.233451843261719}


EP_train:1:  67%|| 18482/27626 [43:32<21:34,  7.06it/s]

{'epoch': 1, 'iter': 18480, 'avg_loss': 8.587108672023188, 'avg_acc': 50.06983523618852, 'loss': 8.756628036499023}


EP_train:1:  67%|| 18492/27626 [43:33<21:43,  7.01it/s]

{'epoch': 1, 'iter': 18490, 'avg_loss': 8.587037375461842, 'avg_acc': 50.07216348493861, 'loss': 8.805828094482422}


EP_train:1:  67%|| 18502/27626 [43:34<21:32,  7.06it/s]

{'epoch': 1, 'iter': 18500, 'avg_loss': 8.5871832976824, 'avg_acc': 50.07432030701043, 'loss': 9.800345420837402}


EP_train:1:  67%|| 18512/27626 [43:36<21:33,  7.05it/s]

{'epoch': 1, 'iter': 18510, 'avg_loss': 8.58721685324539, 'avg_acc': 50.07343606504241, 'loss': 8.780834197998047}


EP_train:1:  67%|| 18522/27626 [43:37<21:31,  7.05it/s]

{'epoch': 1, 'iter': 18520, 'avg_loss': 8.587203812762668, 'avg_acc': 50.073902597052, 'loss': 9.30604362487793}


EP_train:1:  67%|| 18532/27626 [43:39<21:28,  7.06it/s]

{'epoch': 1, 'iter': 18530, 'avg_loss': 8.587220408720782, 'avg_acc': 50.07453726188549, 'loss': 8.839517593383789}


EP_train:1:  67%|| 18542/27626 [43:40<21:27,  7.05it/s]

{'epoch': 1, 'iter': 18540, 'avg_loss': 8.587210647625596, 'avg_acc': 50.074834151340276, 'loss': 8.656269073486328}


EP_train:1:  67%|| 18552/27626 [43:42<21:23,  7.07it/s]

{'epoch': 1, 'iter': 18550, 'avg_loss': 8.587449690356266, 'avg_acc': 50.07513072071587, 'loss': 9.789681434631348}


EP_train:1:  67%|| 18562/27626 [43:43<21:28,  7.04it/s]

{'epoch': 1, 'iter': 18560, 'avg_loss': 8.587347428967332, 'avg_acc': 50.07660551694413, 'loss': 8.251653671264648}


EP_train:1:  67%|| 18572/27626 [43:44<21:22,  7.06it/s]

{'epoch': 1, 'iter': 18570, 'avg_loss': 8.587364264383408, 'avg_acc': 50.07605944752571, 'loss': 8.652087211608887}


EP_train:1:  67%|| 18582/27626 [43:46<21:23,  7.05it/s]

{'epoch': 1, 'iter': 18580, 'avg_loss': 8.587368043475898, 'avg_acc': 50.077532156503956, 'loss': 8.45430850982666}


EP_train:1:  67%|| 18592/27626 [43:47<21:24,  7.03it/s]

{'epoch': 1, 'iter': 18590, 'avg_loss': 8.587170782715578, 'avg_acc': 50.077490452369425, 'loss': 8.537360191345215}


EP_train:1:  67%|| 18602/27626 [43:49<21:18,  7.06it/s]

{'epoch': 1, 'iter': 18600, 'avg_loss': 8.586972494188428, 'avg_acc': 50.07728079135531, 'loss': 8.815957069396973}


EP_train:1:  67%|| 18612/27626 [43:50<21:22,  7.03it/s]

{'epoch': 1, 'iter': 18610, 'avg_loss': 8.586846243066281, 'avg_acc': 50.076231798398794, 'loss': 8.5115327835083}


EP_train:1:  67%|| 18622/27626 [43:51<21:20,  7.03it/s]

{'epoch': 1, 'iter': 18620, 'avg_loss': 8.586958373888026, 'avg_acc': 50.074848289565544, 'loss': 8.633703231811523}


EP_train:1:  67%|| 18632/27626 [43:53<21:13,  7.06it/s]

{'epoch': 1, 'iter': 18630, 'avg_loss': 8.586943830258027, 'avg_acc': 50.07480811550642, 'loss': 7.898746013641357}


EP_train:1:  67%|| 18642/27626 [43:54<21:12,  7.06it/s]

{'epoch': 1, 'iter': 18640, 'avg_loss': 8.587002182165431, 'avg_acc': 50.076612037980794, 'loss': 8.194191932678223}


EP_train:1:  68%|| 18652/27626 [43:56<21:05,  7.09it/s]

{'epoch': 1, 'iter': 18650, 'avg_loss': 8.58683178542568, 'avg_acc': 50.07791137204439, 'loss': 8.269553184509277}


EP_train:1:  68%|| 18662/27626 [43:57<21:07,  7.07it/s]

{'epoch': 1, 'iter': 18660, 'avg_loss': 8.587082268449311, 'avg_acc': 50.07820454423665, 'loss': 8.291447639465332}


EP_train:1:  68%|| 18672/27626 [43:58<21:10,  7.05it/s]

{'epoch': 1, 'iter': 18670, 'avg_loss': 8.58712779503222, 'avg_acc': 50.078497402388734, 'loss': 8.72295093536377}


EP_train:1:  68%|| 18682/27626 [44:00<21:05,  7.07it/s]

{'epoch': 1, 'iter': 18680, 'avg_loss': 8.58722894075985, 'avg_acc': 50.079626358332, 'loss': 9.16298770904541}


EP_train:1:  68%|| 18692/27626 [44:01<21:00,  7.09it/s]

{'epoch': 1, 'iter': 18690, 'avg_loss': 8.5872858357043, 'avg_acc': 50.07891498582205, 'loss': 8.563720703125}


EP_train:1:  68%|| 18702/27626 [44:03<21:07,  7.04it/s]

{'epoch': 1, 'iter': 18700, 'avg_loss': 8.587234030636317, 'avg_acc': 50.07703465055344, 'loss': 8.735116958618164}


EP_train:1:  68%|| 18712/27626 [44:04<21:03,  7.06it/s]

{'epoch': 1, 'iter': 18710, 'avg_loss': 8.587189539718281, 'avg_acc': 50.078830634386186, 'loss': 8.182699203491211}


EP_train:1:  68%|| 18722/27626 [44:06<20:57,  7.08it/s]

{'epoch': 1, 'iter': 18720, 'avg_loss': 8.58738461903964, 'avg_acc': 50.07661850328509, 'loss': 8.195845603942871}


EP_train:1:  68%|| 18732/27626 [44:07<20:53,  7.10it/s]

{'epoch': 1, 'iter': 18730, 'avg_loss': 8.587276096256423, 'avg_acc': 50.07807912017511, 'loss': 7.948588848114014}


EP_train:1:  68%|| 18742/27626 [44:08<20:52,  7.09it/s]

{'epoch': 1, 'iter': 18740, 'avg_loss': 8.587199607072789, 'avg_acc': 50.077703964569665, 'loss': 8.828092575073242}


EP_train:1:  68%|| 18752/27626 [44:10<21:07,  7.00it/s]

{'epoch': 1, 'iter': 18750, 'avg_loss': 8.587380156670372, 'avg_acc': 50.078329155778356, 'loss': 8.74593448638916}


EP_train:1:  68%|| 18762/27626 [44:11<21:07,  6.99it/s]

{'epoch': 1, 'iter': 18760, 'avg_loss': 8.587436741243884, 'avg_acc': 50.08195192153936, 'loss': 8.575236320495605}


EP_train:1:  68%|| 18772/27626 [44:13<20:46,  7.10it/s]

{'epoch': 1, 'iter': 18770, 'avg_loss': 8.587334215733106, 'avg_acc': 50.08274066378989, 'loss': 8.498368263244629}


EP_train:1:  68%|| 18782/27626 [44:14<20:48,  7.09it/s]

{'epoch': 1, 'iter': 18780, 'avg_loss': 8.587175502226659, 'avg_acc': 50.083195782972155, 'loss': 8.260459899902344}


EP_train:1:  68%|| 18792/27626 [44:15<20:52,  7.05it/s]

{'epoch': 1, 'iter': 18790, 'avg_loss': 8.586938029942877, 'avg_acc': 50.08414932680538, 'loss': 8.11273193359375}


EP_train:1:  68%|| 18802/27626 [44:17<20:53,  7.04it/s]

{'epoch': 1, 'iter': 18800, 'avg_loss': 8.586922139818634, 'avg_acc': 50.08510185628424, 'loss': 8.449162483215332}


EP_train:1:  68%|| 18812/27626 [44:18<20:42,  7.09it/s]

{'epoch': 1, 'iter': 18810, 'avg_loss': 8.58707403174669, 'avg_acc': 50.08472436340439, 'loss': 9.50151538848877}


EP_train:1:  68%|| 18822/27626 [44:20<20:51,  7.04it/s]

{'epoch': 1, 'iter': 18820, 'avg_loss': 8.587059604976568, 'avg_acc': 50.08384915785559, 'loss': 8.475211143493652}


EP_train:1:  68%|| 18832/27626 [44:21<20:47,  7.05it/s]

{'epoch': 1, 'iter': 18830, 'avg_loss': 8.587100435763093, 'avg_acc': 50.08513222877171, 'loss': 9.545501708984375}


EP_train:1:  68%|| 18842/27626 [44:22<20:53,  7.01it/s]

{'epoch': 1, 'iter': 18840, 'avg_loss': 8.587073117948554, 'avg_acc': 50.0830967039966, 'loss': 8.424351692199707}


EP_train:1:  68%|| 18852/27626 [44:24<20:47,  7.03it/s]

{'epoch': 1, 'iter': 18850, 'avg_loss': 8.586887160395255, 'avg_acc': 50.08421303909607, 'loss': 7.907155513763428}


EP_train:1:  68%|| 18862/27626 [44:25<20:37,  7.08it/s]

{'epoch': 1, 'iter': 18860, 'avg_loss': 8.5869213606291, 'avg_acc': 50.083671332378984, 'loss': 8.460227012634277}


EP_train:1:  68%|| 18872/27626 [44:27<20:41,  7.05it/s]

{'epoch': 1, 'iter': 18870, 'avg_loss': 8.586914463456601, 'avg_acc': 50.081971013724754, 'loss': 8.742244720458984}


EP_train:1:  68%|| 18882/27626 [44:28<20:42,  7.04it/s]

{'epoch': 1, 'iter': 18880, 'avg_loss': 8.587069200273604, 'avg_acc': 50.08126555796832, 'loss': 8.43974494934082}


EP_train:1:  68%|| 18892/27626 [44:30<20:43,  7.03it/s]

{'epoch': 1, 'iter': 18890, 'avg_loss': 8.587054201207579, 'avg_acc': 50.08089169445768, 'loss': 8.328909873962402}


EP_train:1:  68%|| 18902/27626 [44:31<20:35,  7.06it/s]

{'epoch': 1, 'iter': 18900, 'avg_loss': 8.58713474261572, 'avg_acc': 50.07952621554415, 'loss': 9.037586212158203}


EP_train:1:  68%|| 18912/27626 [44:32<20:33,  7.06it/s]

{'epoch': 1, 'iter': 18910, 'avg_loss': 8.58762687482194, 'avg_acc': 50.08229337422664, 'loss': 9.083778381347656}


EP_train:1:  68%|| 18922/27626 [44:34<20:28,  7.09it/s]

{'epoch': 1, 'iter': 18920, 'avg_loss': 8.587725365529208, 'avg_acc': 50.08538792875641, 'loss': 7.96273946762085}


EP_train:1:  69%|| 18932/27626 [44:35<20:27,  7.08it/s]

{'epoch': 1, 'iter': 18930, 'avg_loss': 8.587858267048574, 'avg_acc': 50.08517775077914, 'loss': 8.747032165527344}


EP_train:1:  69%|| 18942/27626 [44:37<20:30,  7.06it/s]

{'epoch': 1, 'iter': 18940, 'avg_loss': 8.587727761760785, 'avg_acc': 50.08612269679531, 'loss': 8.163602828979492}


EP_train:1:  69%|| 18952/27626 [44:38<20:23,  7.09it/s]

{'epoch': 1, 'iter': 18950, 'avg_loss': 8.587685730128822, 'avg_acc': 50.08723154450953, 'loss': 8.788211822509766}


EP_train:1:  69%|| 18962/27626 [44:39<20:34,  7.02it/s]

{'epoch': 1, 'iter': 18960, 'avg_loss': 8.58766447366727, 'avg_acc': 50.08586704287749, 'loss': 8.074508666992188}


EP_train:1:  69%|| 18972/27626 [44:41<20:30,  7.03it/s]

{'epoch': 1, 'iter': 18970, 'avg_loss': 8.587733667932762, 'avg_acc': 50.08269200358442, 'loss': 8.814860343933105}


EP_train:1:  69%|| 18982/27626 [44:42<20:30,  7.03it/s]

{'epoch': 1, 'iter': 18980, 'avg_loss': 8.587761920342913, 'avg_acc': 50.08248379958906, 'loss': 8.863720893859863}


EP_train:1:  69%|| 18992/27626 [44:44<20:34,  7.00it/s]

{'epoch': 1, 'iter': 18990, 'avg_loss': 8.587689268401332, 'avg_acc': 50.08260491811911, 'loss': 7.951441287994385}


EP_train:1:  69%|| 19002/27626 [44:45<20:30,  7.01it/s]

{'epoch': 1, 'iter': 19000, 'avg_loss': 8.58774462303835, 'avg_acc': 50.08354823430346, 'loss': 9.02333927154541}


EP_train:1:  69%|| 19012/27626 [44:47<20:20,  7.06it/s]

{'epoch': 1, 'iter': 19010, 'avg_loss': 8.587687774595196, 'avg_acc': 50.082846772920945, 'loss': 9.313882827758789}


EP_train:1:  69%|| 19022/27626 [44:48<20:19,  7.06it/s]

{'epoch': 1, 'iter': 19020, 'avg_loss': 8.587592848480002, 'avg_acc': 50.08296750959465, 'loss': 7.923356533050537}


EP_train:1:  69%|| 19032/27626 [44:49<20:15,  7.07it/s]

{'epoch': 1, 'iter': 19030, 'avg_loss': 8.58731518179612, 'avg_acc': 50.083744942462296, 'loss': 7.754740238189697}


EP_train:1:  69%|| 19042/27626 [44:51<20:13,  7.07it/s]

{'epoch': 1, 'iter': 19040, 'avg_loss': 8.587769921465423, 'avg_acc': 50.08320860248936, 'loss': 9.215872764587402}


EP_train:1:  69%|| 19052/27626 [44:52<20:08,  7.10it/s]

{'epoch': 1, 'iter': 19050, 'avg_loss': 8.587724207780548, 'avg_acc': 50.08382105926198, 'loss': 8.251119613647461}


EP_train:1:  69%|| 19062/27626 [44:54<20:21,  7.01it/s]

{'epoch': 1, 'iter': 19060, 'avg_loss': 8.587691851649378, 'avg_acc': 50.08607234667646, 'loss': 8.37430191040039}


EP_train:1:  69%|| 19072/27626 [44:55<20:16,  7.03it/s]

{'epoch': 1, 'iter': 19070, 'avg_loss': 8.587780765954301, 'avg_acc': 50.085699491374335, 'loss': 9.01882553100586}


EP_train:1:  69%|| 19082/27626 [44:56<20:15,  7.03it/s]

{'epoch': 1, 'iter': 19080, 'avg_loss': 8.587767321698214, 'avg_acc': 50.08516325140192, 'loss': 8.900618553161621}


EP_train:1:  69%|| 19092/27626 [44:58<20:13,  7.03it/s]

{'epoch': 1, 'iter': 19090, 'avg_loss': 8.58782024492445, 'avg_acc': 50.08331805562831, 'loss': 10.12753963470459}


EP_train:1:  69%|| 19102/27626 [44:59<20:05,  7.07it/s]

{'epoch': 1, 'iter': 19100, 'avg_loss': 8.587821607208772, 'avg_acc': 50.08229281189467, 'loss': 8.556095123291016}


EP_train:1:  69%|| 19112/27626 [45:01<20:04,  7.07it/s]

{'epoch': 1, 'iter': 19110, 'avg_loss': 8.587610734063597, 'avg_acc': 50.08323086180734, 'loss': 8.912331581115723}


EP_train:1:  69%|| 19122/27626 [45:02<20:08,  7.04it/s]

{'epoch': 1, 'iter': 19120, 'avg_loss': 8.587641332384047, 'avg_acc': 50.084494796297264, 'loss': 8.007006645202637}


EP_train:1:  69%|| 19132/27626 [45:03<20:10,  7.02it/s]

{'epoch': 1, 'iter': 19130, 'avg_loss': 8.587593916412006, 'avg_acc': 50.08379724008154, 'loss': 8.160104751586914}


EP_train:1:  69%|| 19142/27626 [45:05<20:12,  7.00it/s]

{'epoch': 1, 'iter': 19140, 'avg_loss': 8.587552063268689, 'avg_acc': 50.08179431586647, 'loss': 8.887008666992188}


EP_train:1:  69%|| 19152/27626 [45:06<20:07,  7.02it/s]

{'epoch': 1, 'iter': 19150, 'avg_loss': 8.58745681819365, 'avg_acc': 50.08273066680591, 'loss': 8.26496410369873}


EP_train:1:  69%|| 19162/27626 [45:08<20:02,  7.04it/s]

{'epoch': 1, 'iter': 19160, 'avg_loss': 8.587569840012485, 'avg_acc': 50.082524398517826, 'loss': 9.250429153442383}


EP_train:1:  69%|| 19172/27626 [45:09<19:58,  7.06it/s]

{'epoch': 1, 'iter': 19170, 'avg_loss': 8.587555633825687, 'avg_acc': 50.08248135204215, 'loss': 7.860811233520508}


EP_train:1:  69%|| 19182/27626 [45:11<19:53,  7.07it/s]

{'epoch': 1, 'iter': 19180, 'avg_loss': 8.58758043453558, 'avg_acc': 50.080809134038894, 'loss': 9.754951477050781}


EP_train:1:  69%|| 19192/27626 [45:12<19:51,  7.08it/s]

{'epoch': 1, 'iter': 19190, 'avg_loss': 8.587505934417456, 'avg_acc': 50.081255536449376, 'loss': 8.260507583618164}


EP_train:1:  70%|| 19202/27626 [45:13<19:52,  7.06it/s]

{'epoch': 1, 'iter': 19200, 'avg_loss': 8.587397584169645, 'avg_acc': 50.0833289932816, 'loss': 8.614505767822266}


EP_train:1:  70%|| 19212/27626 [45:15<19:43,  7.11it/s]

{'epoch': 1, 'iter': 19210, 'avg_loss': 8.5876171735466, 'avg_acc': 50.084261620946336, 'loss': 9.883369445800781}


EP_train:1:  70%|| 19222/27626 [45:16<19:46,  7.08it/s]

{'epoch': 1, 'iter': 19220, 'avg_loss': 8.58751998134498, 'avg_acc': 50.084380365225535, 'loss': 8.85362720489502}


EP_train:1:  70%|| 19232/27626 [45:18<19:45,  7.08it/s]

{'epoch': 1, 'iter': 19230, 'avg_loss': 8.587485066603762, 'avg_acc': 50.08498648016224, 'loss': 9.269917488098145}


EP_train:1:  70%|| 19242/27626 [45:19<19:53,  7.02it/s]

{'epoch': 1, 'iter': 19240, 'avg_loss': 8.587200405258045, 'avg_acc': 50.08477989709475, 'loss': 7.402807235717773}


EP_train:1:  70%|| 19252/27626 [45:20<19:44,  7.07it/s]

{'epoch': 1, 'iter': 19250, 'avg_loss': 8.587076434878075, 'avg_acc': 50.08473585787752, 'loss': 8.965787887573242}


EP_train:1:  70%|| 19262/27626 [45:22<19:45,  7.06it/s]

{'epoch': 1, 'iter': 19260, 'avg_loss': 8.58704892742715, 'avg_acc': 50.0856653340948, 'loss': 9.240427017211914}


EP_train:1:  70%|| 19272/27626 [45:23<19:44,  7.05it/s]

{'epoch': 1, 'iter': 19270, 'avg_loss': 8.58704381308022, 'avg_acc': 50.0822155051632, 'loss': 7.881059646606445}


EP_train:1:  70%|| 19282/27626 [45:25<19:41,  7.06it/s]

{'epoch': 1, 'iter': 19280, 'avg_loss': 8.586982137391097, 'avg_acc': 50.08233494113375, 'loss': 8.642962455749512}


EP_train:1:  70%|| 19292/27626 [45:26<19:42,  7.05it/s]

{'epoch': 1, 'iter': 19290, 'avg_loss': 8.586909908991531, 'avg_acc': 50.07905240785858, 'loss': 8.401217460632324}


EP_train:1:  70%|| 19302/27626 [45:27<19:38,  7.06it/s]

{'epoch': 1, 'iter': 19300, 'avg_loss': 8.586887260130863, 'avg_acc': 50.0796590850215, 'loss': 8.003853797912598}


EP_train:1:  70%|| 19312/27626 [45:29<19:36,  7.07it/s]

{'epoch': 1, 'iter': 19310, 'avg_loss': 8.586672056938006, 'avg_acc': 50.07977965926156, 'loss': 8.733479499816895}


EP_train:1:  70%|| 19322/27626 [45:30<19:45,  7.01it/s]

{'epoch': 1, 'iter': 19320, 'avg_loss': 8.586473114414865, 'avg_acc': 50.080870555354274, 'loss': 8.077934265136719}


EP_train:1:  70%|| 19332/27626 [45:32<19:35,  7.06it/s]

{'epoch': 1, 'iter': 19330, 'avg_loss': 8.586271810677152, 'avg_acc': 50.08179866535616, 'loss': 7.889377593994141}


EP_train:1:  70%|| 19342/27626 [45:33<19:43,  7.00it/s]

{'epoch': 1, 'iter': 19340, 'avg_loss': 8.586311970395132, 'avg_acc': 50.08159479861434, 'loss': 8.601678848266602}


EP_train:1:  70%|| 19352/27626 [45:35<19:33,  7.05it/s]

{'epoch': 1, 'iter': 19350, 'avg_loss': 8.586342340439495, 'avg_acc': 50.080099219678566, 'loss': 8.090590476989746}


EP_train:1:  70%|| 19362/27626 [45:36<19:27,  7.08it/s]

{'epoch': 1, 'iter': 19360, 'avg_loss': 8.586250646283919, 'avg_acc': 50.07941222044315, 'loss': 9.059069633483887}


EP_train:1:  70%|| 19372/27626 [45:37<19:28,  7.06it/s]

{'epoch': 1, 'iter': 19370, 'avg_loss': 8.586172582688278, 'avg_acc': 50.0793712250271, 'loss': 8.193264961242676}


EP_train:1:  70%|| 19382/27626 [45:39<19:23,  7.08it/s]

{'epoch': 1, 'iter': 19380, 'avg_loss': 8.586097666897535, 'avg_acc': 50.079169031525716, 'loss': 8.479926109313965}


EP_train:1:  70%|| 19392/27626 [45:40<19:21,  7.09it/s]

{'epoch': 1, 'iter': 19390, 'avg_loss': 8.585925691376511, 'avg_acc': 50.08106209066061, 'loss': 8.611482620239258}


EP_train:1:  70%|| 19402/27626 [45:42<19:27,  7.04it/s]

{'epoch': 1, 'iter': 19400, 'avg_loss': 8.585788368214086, 'avg_acc': 50.07860419566001, 'loss': 8.580511093139648}


EP_train:1:  70%|| 19412/27626 [45:43<19:27,  7.03it/s]

{'epoch': 1, 'iter': 19410, 'avg_loss': 8.585468383766658, 'avg_acc': 50.07985163051878, 'loss': 8.544818878173828}


EP_train:1:  70%|| 19422/27626 [45:44<19:21,  7.06it/s]

{'epoch': 1, 'iter': 19420, 'avg_loss': 8.585387010639652, 'avg_acc': 50.07997142268679, 'loss': 8.039721488952637}


EP_train:1:  70%|| 19432/27626 [45:46<19:25,  7.03it/s]

{'epoch': 1, 'iter': 19430, 'avg_loss': 8.58547698247145, 'avg_acc': 50.07896531315939, 'loss': 8.042198181152344}


EP_train:1:  70%|| 19442/27626 [45:47<19:17,  7.07it/s]

{'epoch': 1, 'iter': 19440, 'avg_loss': 8.585425491974645, 'avg_acc': 50.0798891517926, 'loss': 8.037203788757324}


EP_train:1:  70%|| 19452/27626 [45:49<19:15,  7.08it/s]

{'epoch': 1, 'iter': 19450, 'avg_loss': 8.585478087120219, 'avg_acc': 50.081615341113576, 'loss': 7.8606719970703125}


EP_train:1:  70%|| 19462/27626 [45:50<19:15,  7.06it/s]

{'epoch': 1, 'iter': 19460, 'avg_loss': 8.585727668814348, 'avg_acc': 50.081733980782076, 'loss': 8.776276588439941}


EP_train:1:  70%|| 19472/27626 [45:51<19:16,  7.05it/s]

{'epoch': 1, 'iter': 19470, 'avg_loss': 8.585843792522972, 'avg_acc': 50.08217348877818, 'loss': 7.789028167724609}


EP_train:1:  71%|| 19482/27626 [45:53<19:09,  7.08it/s]

{'epoch': 1, 'iter': 19480, 'avg_loss': 8.586083020831543, 'avg_acc': 50.084377085365226, 'loss': 8.75647258758545}


EP_train:1:  71%|| 19492/27626 [45:54<19:14,  7.04it/s]

{'epoch': 1, 'iter': 19490, 'avg_loss': 8.58616610098372, 'avg_acc': 50.08273049099584, 'loss': 9.290823936462402}


EP_train:1:  71%|| 19502/27626 [45:56<19:04,  7.10it/s]

{'epoch': 1, 'iter': 19500, 'avg_loss': 8.58597140712962, 'avg_acc': 50.0810855853546, 'loss': 7.988829612731934}


EP_train:1:  71%|| 19512/27626 [45:57<19:09,  7.06it/s]

{'epoch': 1, 'iter': 19510, 'avg_loss': 8.585811796917138, 'avg_acc': 50.080243196145766, 'loss': 8.199735641479492}


EP_train:1:  71%|| 19522/27626 [45:58<19:17,  7.00it/s]

{'epoch': 1, 'iter': 19520, 'avg_loss': 8.585823009837437, 'avg_acc': 50.07684032580298, 'loss': 8.743310928344727}


EP_train:1:  71%|| 19532/27626 [46:00<19:02,  7.08it/s]

{'epoch': 1, 'iter': 19530, 'avg_loss': 8.585910159171522, 'avg_acc': 50.074880958476264, 'loss': 9.059720039367676}


EP_train:1:  71%|| 19542/27626 [46:01<19:03,  7.07it/s]

{'epoch': 1, 'iter': 19540, 'avg_loss': 8.586213277468607, 'avg_acc': 50.074043037715576, 'loss': 9.588837623596191}


EP_train:1:  71%|| 19552/27626 [46:03<19:02,  7.06it/s]

{'epoch': 1, 'iter': 19550, 'avg_loss': 8.586139653882775, 'avg_acc': 50.075124034576234, 'loss': 7.803508758544922}


EP_train:1:  71%|| 19562/27626 [46:04<19:06,  7.04it/s]

{'epoch': 1, 'iter': 19560, 'avg_loss': 8.586127471109638, 'avg_acc': 50.07189049639589, 'loss': 8.845805168151855}


EP_train:1:  71%|| 19572/27626 [46:06<19:02,  7.05it/s]

{'epoch': 1, 'iter': 19570, 'avg_loss': 8.586213890517362, 'avg_acc': 50.07281181339738, 'loss': 8.537528038024902}


EP_train:1:  71%|| 19582/27626 [46:07<18:59,  7.06it/s]

{'epoch': 1, 'iter': 19580, 'avg_loss': 8.586166797427733, 'avg_acc': 50.07181706756549, 'loss': 8.254648208618164}


EP_train:1:  71%|| 19592/27626 [46:08<18:51,  7.10it/s]

{'epoch': 1, 'iter': 19590, 'avg_loss': 8.586253103402555, 'avg_acc': 50.07034480118422, 'loss': 9.688157081604004}


EP_train:1:  71%|| 19602/27626 [46:10<19:03,  7.02it/s]

{'epoch': 1, 'iter': 19600, 'avg_loss': 8.586198078333787, 'avg_acc': 50.070627774093154, 'loss': 8.851007461547852}


EP_train:1:  71%|| 19612/27626 [46:11<19:01,  7.02it/s]

{'epoch': 1, 'iter': 19610, 'avg_loss': 8.58613424525816, 'avg_acc': 50.07011371169242, 'loss': 8.985048294067383}


EP_train:1:  71%|| 19622/27626 [46:13<18:57,  7.03it/s]

{'epoch': 1, 'iter': 19620, 'avg_loss': 8.586052214903093, 'avg_acc': 50.071352122725656, 'loss': 8.435763359069824}


EP_train:1:  71%|| 19632/27626 [46:14<18:47,  7.09it/s]

{'epoch': 1, 'iter': 19630, 'avg_loss': 8.586004879342138, 'avg_acc': 50.07051984106769, 'loss': 8.336922645568848}


EP_train:1:  71%|| 19642/27626 [46:15<18:47,  7.08it/s]

{'epoch': 1, 'iter': 19640, 'avg_loss': 8.586200049608001, 'avg_acc': 50.069529300952084, 'loss': 8.838258743286133}


EP_train:1:  71%|| 19652/27626 [46:17<19:04,  6.97it/s]

{'epoch': 1, 'iter': 19650, 'avg_loss': 8.586150910337368, 'avg_acc': 50.070448068800566, 'loss': 9.417619705200195}


EP_train:1:  71%|| 19662/27626 [46:18<18:45,  7.08it/s]

{'epoch': 1, 'iter': 19660, 'avg_loss': 8.586198315592046, 'avg_acc': 50.073591119475104, 'loss': 9.241182327270508}


EP_train:1:  71%|| 19672/27626 [46:20<18:42,  7.08it/s]

{'epoch': 1, 'iter': 19670, 'avg_loss': 8.586276888550403, 'avg_acc': 50.07403029840882, 'loss': 8.124900817871094}


EP_train:1:  71%|| 19682/27626 [46:21<18:49,  7.03it/s]

{'epoch': 1, 'iter': 19680, 'avg_loss': 8.58619605750489, 'avg_acc': 50.07605685686703, 'loss': 7.29009485244751}


EP_train:1:  71%|| 19692/27626 [46:23<18:45,  7.05it/s]

{'epoch': 1, 'iter': 19690, 'avg_loss': 8.586292187150864, 'avg_acc': 50.07697044334976, 'loss': 8.184791564941406}


EP_train:1:  71%|| 19702/27626 [46:24<18:45,  7.04it/s]

{'epoch': 1, 'iter': 19700, 'avg_loss': 8.586310874313929, 'avg_acc': 50.07328308207705, 'loss': 9.43045425415039}


EP_train:1:  71%|| 19712/27626 [46:25<18:51,  7.00it/s]

{'epoch': 1, 'iter': 19710, 'avg_loss': 8.586240711582771, 'avg_acc': 50.07403860788392, 'loss': 8.950335502624512}


EP_train:1:  71%|| 19722/27626 [46:27<18:40,  7.06it/s]

{'epoch': 1, 'iter': 19720, 'avg_loss': 8.586185371517306, 'avg_acc': 50.075744130622176, 'loss': 8.595589637756348}


EP_train:1:  71%|| 19732/27626 [46:28<18:51,  6.97it/s]

{'epoch': 1, 'iter': 19730, 'avg_loss': 8.586165573114704, 'avg_acc': 50.07554736201916, 'loss': 8.611902236938477}


EP_train:1:  71%|| 19742/27626 [46:30<18:40,  7.03it/s]

{'epoch': 1, 'iter': 19740, 'avg_loss': 8.586027225675835, 'avg_acc': 50.07614229269034, 'loss': 8.19137191772461}


EP_train:1:  71%|| 19752/27626 [46:31<18:41,  7.02it/s]

{'epoch': 1, 'iter': 19750, 'avg_loss': 8.585967342507464, 'avg_acc': 50.07436332337603, 'loss': 7.801591396331787}


EP_train:1:  72%|| 19762/27626 [46:32<18:42,  7.01it/s]

{'epoch': 1, 'iter': 19760, 'avg_loss': 8.58581071794953, 'avg_acc': 50.074958251100654, 'loss': 8.5420560836792}


EP_train:1:  72%|| 19772/27626 [46:34<18:28,  7.09it/s]

{'epoch': 1, 'iter': 19770, 'avg_loss': 8.585707459076662, 'avg_acc': 50.073655859592336, 'loss': 8.003315925598145}


EP_train:1:  72%|| 19782/27626 [46:35<18:31,  7.06it/s]

{'epoch': 1, 'iter': 19780, 'avg_loss': 8.585521512310942, 'avg_acc': 50.073618623932056, 'loss': 8.36253833770752}


EP_train:1:  72%|| 19792/27626 [46:37<18:37,  7.01it/s]

{'epoch': 1, 'iter': 19790, 'avg_loss': 8.585260326857155, 'avg_acc': 50.0748446263453, 'loss': 8.962369918823242}


EP_train:1:  72%|| 19802/27626 [46:38<18:28,  7.06it/s]

{'epoch': 1, 'iter': 19800, 'avg_loss': 8.585285353218689, 'avg_acc': 50.07685849199536, 'loss': 8.4585542678833}


EP_train:1:  72%|| 19812/27626 [46:39<18:27,  7.06it/s]

{'epoch': 1, 'iter': 19810, 'avg_loss': 8.584976057144365, 'avg_acc': 50.078239361970624, 'loss': 8.84160041809082}


EP_train:1:  72%|| 19822/27626 [46:41<18:32,  7.01it/s]

{'epoch': 1, 'iter': 19820, 'avg_loss': 8.585036988395728, 'avg_acc': 50.07867287220624, 'loss': 8.525699615478516}


EP_train:1:  72%|| 19832/27626 [46:42<18:24,  7.06it/s]

{'epoch': 1, 'iter': 19830, 'avg_loss': 8.585090881384398, 'avg_acc': 50.078948363673035, 'loss': 8.932808876037598}


EP_train:1:  72%|| 19842/27626 [46:44<18:21,  7.07it/s]

{'epoch': 1, 'iter': 19840, 'avg_loss': 8.585085066736756, 'avg_acc': 50.08032609243486, 'loss': 8.583985328674316}


EP_train:1:  72%|| 19852/27626 [46:45<18:18,  7.07it/s]

{'epoch': 1, 'iter': 19850, 'avg_loss': 8.585055866695928, 'avg_acc': 50.07902624552919, 'loss': 8.879383087158203}


EP_train:1:  72%|| 19862/27626 [46:47<18:25,  7.03it/s]

{'epoch': 1, 'iter': 19860, 'avg_loss': 8.584955802371233, 'avg_acc': 50.08071723478174, 'loss': 7.97788143157959}


EP_train:1:  72%|| 19872/27626 [46:48<18:26,  7.01it/s]

{'epoch': 1, 'iter': 19870, 'avg_loss': 8.584791367693857, 'avg_acc': 50.07910397061044, 'loss': 7.804509162902832}


EP_train:1:  72%|| 19882/27626 [46:49<18:22,  7.02it/s]

{'epoch': 1, 'iter': 19880, 'avg_loss': 8.58480426323893, 'avg_acc': 50.07733514410744, 'loss': 9.01673412322998}


EP_train:1:  72%|| 19892/27626 [46:51<18:19,  7.03it/s]

{'epoch': 1, 'iter': 19890, 'avg_loss': 8.584756596416474, 'avg_acc': 50.07776758332915, 'loss': 8.549140930175781}


EP_train:1:  72%|| 19902/27626 [46:52<18:10,  7.08it/s]

{'epoch': 1, 'iter': 19900, 'avg_loss': 8.585018420373967, 'avg_acc': 50.07600120596955, 'loss': 8.747336387634277}


EP_train:1:  72%|| 19912/27626 [46:54<18:22,  7.00it/s]

{'epoch': 1, 'iter': 19910, 'avg_loss': 8.58497939213499, 'avg_acc': 50.07611998392848, 'loss': 8.837400436401367}


EP_train:1:  72%|| 19922/27626 [46:55<18:07,  7.09it/s]

{'epoch': 1, 'iter': 19920, 'avg_loss': 8.584898875974567, 'avg_acc': 50.07561116409819, 'loss': 8.566774368286133}


EP_train:1:  72%|| 19932/27626 [46:56<18:07,  7.07it/s]

{'epoch': 1, 'iter': 19930, 'avg_loss': 8.584803667868036, 'avg_acc': 50.07557322763534, 'loss': 8.867496490478516}


EP_train:1:  72%|| 19942/27626 [46:58<18:03,  7.09it/s]

{'epoch': 1, 'iter': 19940, 'avg_loss': 8.58470171979626, 'avg_acc': 50.07490848001604, 'loss': 9.473969459533691}


EP_train:1:  72%|| 19952/27626 [46:59<18:02,  7.09it/s]

{'epoch': 1, 'iter': 19950, 'avg_loss': 8.584778531620266, 'avg_acc': 50.074870933787786, 'loss': 9.01546859741211}


EP_train:1:  72%|| 19962/27626 [47:01<18:09,  7.04it/s]

{'epoch': 1, 'iter': 19960, 'avg_loss': 8.584887343354552, 'avg_acc': 50.072798206502675, 'loss': 9.505748748779297}


EP_train:1:  72%|| 19972/27626 [47:02<18:05,  7.05it/s]

{'epoch': 1, 'iter': 19970, 'avg_loss': 8.584892457800917, 'avg_acc': 50.07291823143558, 'loss': 8.919066429138184}


EP_train:1:  72%|| 19982/27626 [47:04<18:12,  7.00it/s]

{'epoch': 1, 'iter': 19980, 'avg_loss': 8.58489697602745, 'avg_acc': 50.074445723437265, 'loss': 8.03174114227295}


EP_train:1:  72%|| 19992/27626 [47:05<17:59,  7.07it/s]

{'epoch': 1, 'iter': 19990, 'avg_loss': 8.584754264287323, 'avg_acc': 50.07331424140863, 'loss': 9.106953620910645}


EP_train:1:  72%|| 20002/27626 [47:06<18:00,  7.06it/s]

{'epoch': 1, 'iter': 20000, 'avg_loss': 8.584667251346218, 'avg_acc': 50.07202764861757, 'loss': 8.647974967956543}


EP_train:1:  72%|| 20012/27626 [47:08<17:58,  7.06it/s]

{'epoch': 1, 'iter': 20010, 'avg_loss': 8.584664775778236, 'avg_acc': 50.069961521163364, 'loss': 8.312300682067871}


EP_train:1:  72%|| 20022/27626 [47:09<17:50,  7.10it/s]

{'epoch': 1, 'iter': 20020, 'avg_loss': 8.58465842439497, 'avg_acc': 50.06914614654613, 'loss': 8.454654693603516}


EP_train:1:  73%|| 20032/27626 [47:11<17:46,  7.12it/s]

{'epoch': 1, 'iter': 20030, 'avg_loss': 8.584579761026742, 'avg_acc': 50.067707553292394, 'loss': 8.839850425720215}


EP_train:1:  73%|| 20042/27626 [47:12<17:49,  7.09it/s]

{'epoch': 1, 'iter': 20040, 'avg_loss': 8.584572102315565, 'avg_acc': 50.0687652811736, 'loss': 8.732669830322266}


EP_train:1:  73%|| 20052/27626 [47:13<18:00,  7.01it/s]

{'epoch': 1, 'iter': 20050, 'avg_loss': 8.584613217480385, 'avg_acc': 50.06841928083388, 'loss': 7.620396614074707}


EP_train:1:  73%|| 20062/27626 [47:15<17:57,  7.02it/s]

{'epoch': 1, 'iter': 20060, 'avg_loss': 8.584631747057582, 'avg_acc': 50.06745052589602, 'loss': 7.770523548126221}


EP_train:1:  73%|| 20072/27626 [47:16<17:45,  7.09it/s]

{'epoch': 1, 'iter': 20070, 'avg_loss': 8.584564042301624, 'avg_acc': 50.064925763539435, 'loss': 8.641440391540527}


EP_train:1:  73%|| 20082/27626 [47:18<17:54,  7.02it/s]

{'epoch': 1, 'iter': 20080, 'avg_loss': 8.58476748522769, 'avg_acc': 50.06504905134207, 'loss': 9.10323715209961}


EP_train:1:  73%|| 20092/27626 [47:19<17:50,  7.04it/s]

{'epoch': 1, 'iter': 20090, 'avg_loss': 8.584594999620661, 'avg_acc': 50.068438604350206, 'loss': 7.618082523345947}


EP_train:1:  73%|| 20102/27626 [47:20<17:51,  7.02it/s]

{'epoch': 1, 'iter': 20100, 'avg_loss': 8.584554645269227, 'avg_acc': 50.06731630267151, 'loss': 9.670378684997559}


EP_train:1:  73%|| 20112/27626 [47:22<17:43,  7.07it/s]

{'epoch': 1, 'iter': 20110, 'avg_loss': 8.584489900323339, 'avg_acc': 50.066816667495395, 'loss': 8.107969284057617}


EP_train:1:  73%|| 20122/27626 [47:23<17:35,  7.11it/s]

{'epoch': 1, 'iter': 20120, 'avg_loss': 8.584458866766827, 'avg_acc': 50.06740470155558, 'loss': 8.229463577270508}


EP_train:1:  73%|| 20132/27626 [47:25<17:44,  7.04it/s]

{'epoch': 1, 'iter': 20130, 'avg_loss': 8.584442943546279, 'avg_acc': 50.06876831752024, 'loss': 9.008440017700195}


EP_train:1:  73%|| 20142/27626 [47:26<17:44,  7.03it/s]

{'epoch': 1, 'iter': 20140, 'avg_loss': 8.584472087388313, 'avg_acc': 50.06857901792364, 'loss': 8.416436195373535}


EP_train:1:  73%|| 20152/27626 [47:28<17:49,  6.99it/s]

{'epoch': 1, 'iter': 20150, 'avg_loss': 8.58445532262417, 'avg_acc': 50.06932038112253, 'loss': 7.994668483734131}


EP_train:1:  73%|| 20162/27626 [47:29<17:41,  7.03it/s]

{'epoch': 1, 'iter': 20160, 'avg_loss': 8.584290379008204, 'avg_acc': 50.06680596200586, 'loss': 7.55187463760376}


EP_train:1:  73%|| 20172/27626 [47:30<17:33,  7.08it/s]

{'epoch': 1, 'iter': 20170, 'avg_loss': 8.583971175397608, 'avg_acc': 50.066462991423336, 'loss': 7.761175632476807}


EP_train:1:  73%|| 20182/27626 [47:32<17:38,  7.03it/s]

{'epoch': 1, 'iter': 20180, 'avg_loss': 8.58397172760381, 'avg_acc': 50.06689460383529, 'loss': 9.003618240356445}


EP_train:1:  73%|| 20192/27626 [47:33<17:28,  7.09it/s]

{'epoch': 1, 'iter': 20190, 'avg_loss': 8.583968346673084, 'avg_acc': 50.06763533257392, 'loss': 8.75013542175293}


EP_train:1:  73%|| 20202/27626 [47:35<17:24,  7.11it/s]

{'epoch': 1, 'iter': 20200, 'avg_loss': 8.583854817015176, 'avg_acc': 50.06590020296024, 'loss': 8.678086280822754}


EP_train:1:  73%|| 20212/27626 [47:36<17:39,  6.99it/s]

{'epoch': 1, 'iter': 20210, 'avg_loss': 8.583664287930281, 'avg_acc': 50.063393696501905, 'loss': 7.999512672424316}


EP_train:1:  73%|| 20222/27626 [47:37<17:32,  7.03it/s]

{'epoch': 1, 'iter': 20220, 'avg_loss': 8.583643622016595, 'avg_acc': 50.06351688838336, 'loss': 8.157869338989258}


EP_train:1:  73%|| 20232/27626 [47:39<17:27,  7.06it/s]

{'epoch': 1, 'iter': 20230, 'avg_loss': 8.583623313201434, 'avg_acc': 50.061014037862684, 'loss': 8.71298599243164}


EP_train:1:  73%|| 20242/27626 [47:40<17:23,  7.07it/s]

{'epoch': 1, 'iter': 20240, 'avg_loss': 8.583589162440658, 'avg_acc': 50.06237340052368, 'loss': 9.00950813293457}


EP_train:1:  73%|| 20252/27626 [47:42<17:30,  7.02it/s]

{'epoch': 1, 'iter': 20250, 'avg_loss': 8.583479384411518, 'avg_acc': 50.06218828699818, 'loss': 8.703825950622559}


EP_train:1:  73%|| 20262/27626 [47:43<17:27,  7.03it/s]

{'epoch': 1, 'iter': 20260, 'avg_loss': 8.58331997183573, 'avg_acc': 50.062157593406056, 'loss': 8.440268516540527}


EP_train:1:  73%|| 20272/27626 [47:44<17:20,  7.07it/s]

{'epoch': 1, 'iter': 20270, 'avg_loss': 8.583403122853788, 'avg_acc': 50.06443934685018, 'loss': 10.033614158630371}


EP_train:1:  73%|| 20282/27626 [47:46<17:13,  7.11it/s]

{'epoch': 1, 'iter': 20280, 'avg_loss': 8.583433714204977, 'avg_acc': 50.06409940338248, 'loss': 9.428150177001953}


EP_train:1:  73%|| 20292/27626 [47:47<17:17,  7.07it/s]

{'epoch': 1, 'iter': 20290, 'avg_loss': 8.583529851969006, 'avg_acc': 50.064991868316, 'loss': 9.144043922424316}


EP_train:1:  73%|| 20302/27626 [47:49<17:19,  7.04it/s]

{'epoch': 1, 'iter': 20300, 'avg_loss': 8.58380844203024, 'avg_acc': 50.06311265454904, 'loss': 8.678680419921875}


EP_train:1:  74%|| 20312/27626 [47:50<17:10,  7.10it/s]

{'epoch': 1, 'iter': 20310, 'avg_loss': 8.583776045336245, 'avg_acc': 50.06231229383092, 'loss': 8.469206809997559}


EP_train:1:  74%|| 20322/27626 [47:51<17:16,  7.04it/s]

{'epoch': 1, 'iter': 20320, 'avg_loss': 8.583664569861307, 'avg_acc': 50.06335810245559, 'loss': 7.576458930969238}


EP_train:1:  74%|| 20332/27626 [47:53<17:15,  7.04it/s]

{'epoch': 1, 'iter': 20330, 'avg_loss': 8.583553411541835, 'avg_acc': 50.065939943927994, 'loss': 8.250749588012695}


EP_train:1:  74%|| 20342/27626 [47:54<17:17,  7.02it/s]

{'epoch': 1, 'iter': 20340, 'avg_loss': 8.583317632854738, 'avg_acc': 50.06467848188388, 'loss': 7.998701095581055}


EP_train:1:  74%|| 20352/27626 [47:56<17:14,  7.03it/s]

{'epoch': 1, 'iter': 20350, 'avg_loss': 8.583247165493423, 'avg_acc': 50.06403247997642, 'loss': 8.561745643615723}


EP_train:1:  74%|| 20362/27626 [47:57<17:17,  7.00it/s]

{'epoch': 1, 'iter': 20360, 'avg_loss': 8.583334284103676, 'avg_acc': 50.06154535631845, 'loss': 9.555963516235352}


EP_train:1:  74%|| 20372/27626 [47:59<17:05,  7.07it/s]

{'epoch': 1, 'iter': 20370, 'avg_loss': 8.583396807691145, 'avg_acc': 50.06120833537872, 'loss': 8.974686622619629}


EP_train:1:  74%|| 20382/27626 [48:00<17:11,  7.02it/s]

{'epoch': 1, 'iter': 20380, 'avg_loss': 8.583450051789592, 'avg_acc': 50.06133163240273, 'loss': 9.339095115661621}


EP_train:1:  74%|| 20392/27626 [48:01<17:07,  7.04it/s]

{'epoch': 1, 'iter': 20390, 'avg_loss': 8.583386399222007, 'avg_acc': 50.06099504683439, 'loss': 7.929762840270996}


EP_train:1:  74%|| 20402/27626 [48:03<17:05,  7.04it/s]

{'epoch': 1, 'iter': 20400, 'avg_loss': 8.583344261048266, 'avg_acc': 50.060965148767224, 'loss': 8.434861183166504}


EP_train:1:  74%|| 20412/27626 [48:04<17:12,  6.99it/s]

{'epoch': 1, 'iter': 20410, 'avg_loss': 8.583476299225895, 'avg_acc': 50.06062907255892, 'loss': 9.20409107208252}


EP_train:1:  74%|| 20422/27626 [48:06<17:10,  6.99it/s]

{'epoch': 1, 'iter': 20420, 'avg_loss': 8.583604663133237, 'avg_acc': 50.06151755545761, 'loss': 8.710217475891113}


EP_train:1:  74%|| 20432/27626 [48:07<17:01,  7.04it/s]

{'epoch': 1, 'iter': 20430, 'avg_loss': 8.583682008942787, 'avg_acc': 50.062252214771675, 'loss': 8.716157913208008}


EP_train:1:  74%|| 20442/27626 [48:08<17:02,  7.03it/s]

{'epoch': 1, 'iter': 20440, 'avg_loss': 8.583723491374588, 'avg_acc': 50.06298615527616, 'loss': 8.627110481262207}


EP_train:1:  74%|| 20452/27626 [48:10<16:58,  7.05it/s]

{'epoch': 1, 'iter': 20450, 'avg_loss': 8.583708896490538, 'avg_acc': 50.06402498655322, 'loss': 8.387702941894531}


EP_train:1:  74%|| 20462/27626 [48:11<16:55,  7.06it/s]

{'epoch': 1, 'iter': 20460, 'avg_loss': 8.583673151373356, 'avg_acc': 50.06460461365525, 'loss': 9.107382774353027}


EP_train:1:  74%|| 20472/27626 [48:13<16:53,  7.06it/s]

{'epoch': 1, 'iter': 20470, 'avg_loss': 8.583729945630918, 'avg_acc': 50.064115089639, 'loss': 9.013527870178223}


EP_train:1:  74%|| 20482/27626 [48:14<16:51,  7.06it/s]

{'epoch': 1, 'iter': 20480, 'avg_loss': 8.583823809836282, 'avg_acc': 50.06377862409062, 'loss': 8.674527168273926}


EP_train:1:  74%|| 20492/27626 [48:16<16:57,  7.01it/s]

{'epoch': 1, 'iter': 20490, 'avg_loss': 8.583930254948273, 'avg_acc': 50.06100239129374, 'loss': 9.136372566223145}


EP_train:1:  74%|| 20502/27626 [48:17<16:54,  7.02it/s]

{'epoch': 1, 'iter': 20500, 'avg_loss': 8.584108512289122, 'avg_acc': 50.06036290912639, 'loss': 8.793780326843262}


EP_train:1:  74%|| 20512/27626 [48:18<16:53,  7.02it/s]

{'epoch': 1, 'iter': 20510, 'avg_loss': 8.584240688235447, 'avg_acc': 50.0621617668568, 'loss': 8.51192569732666}


EP_train:1:  74%|| 20522/27626 [48:20<16:55,  7.00it/s]

{'epoch': 1, 'iter': 20520, 'avg_loss': 8.584498962924515, 'avg_acc': 50.06228375810146, 'loss': 8.621336936950684}


EP_train:1:  74%|| 20532/27626 [48:21<16:51,  7.02it/s]

{'epoch': 1, 'iter': 20530, 'avg_loss': 8.584402626009355, 'avg_acc': 50.065145389898206, 'loss': 8.046216011047363}


EP_train:1:  74%|| 20542/27626 [48:23<16:43,  7.06it/s]

{'epoch': 1, 'iter': 20540, 'avg_loss': 8.584285204113764, 'avg_acc': 50.06328805803029, 'loss': 8.092345237731934}


EP_train:1:  74%|| 20552/27626 [48:24<16:46,  7.03it/s]

{'epoch': 1, 'iter': 20550, 'avg_loss': 8.58431395265773, 'avg_acc': 50.06508199114398, 'loss': 8.76162338256836}


EP_train:1:  74%|| 20562/27626 [48:25<16:49,  7.00it/s]

{'epoch': 1, 'iter': 20560, 'avg_loss': 8.584262268256019, 'avg_acc': 50.064898351247514, 'loss': 7.940372943878174}


EP_train:1:  74%|| 20572/27626 [48:27<16:40,  7.05it/s]

{'epoch': 1, 'iter': 20570, 'avg_loss': 8.584119379187202, 'avg_acc': 50.06577828010306, 'loss': 8.902580261230469}


EP_train:1:  75%|| 20582/27626 [48:28<16:39,  7.04it/s]

{'epoch': 1, 'iter': 20580, 'avg_loss': 8.584292900387942, 'avg_acc': 50.06741654924445, 'loss': 8.45893669128418}


EP_train:1:  75%|| 20592/27626 [48:30<16:39,  7.04it/s]

{'epoch': 1, 'iter': 20590, 'avg_loss': 8.584445895521576, 'avg_acc': 50.0669285124569, 'loss': 8.879988670349121}


EP_train:1:  75%|| 20602/27626 [48:31<16:43,  7.00it/s]

{'epoch': 1, 'iter': 20600, 'avg_loss': 8.584427776802373, 'avg_acc': 50.065834182806654, 'loss': 8.232346534729004}


EP_train:1:  75%|| 20612/27626 [48:33<16:44,  6.98it/s]

{'epoch': 1, 'iter': 20610, 'avg_loss': 8.584409358806703, 'avg_acc': 50.065044151181404, 'loss': 8.610525131225586}


EP_train:1:  75%|| 20622/27626 [48:34<16:37,  7.02it/s]

{'epoch': 1, 'iter': 20620, 'avg_loss': 8.584505678697353, 'avg_acc': 50.064709519421946, 'loss': 9.191475868225098}


EP_train:1:  75%|| 20632/27626 [48:35<16:37,  7.01it/s]

{'epoch': 1, 'iter': 20630, 'avg_loss': 8.58448015445711, 'avg_acc': 50.06301197227473, 'loss': 8.384824752807617}


EP_train:1:  75%|| 20642/27626 [48:37<16:34,  7.02it/s]

{'epoch': 1, 'iter': 20640, 'avg_loss': 8.584475575511721, 'avg_acc': 50.063435637808254, 'loss': 8.469968795776367}


EP_train:1:  75%|| 20652/27626 [48:38<16:38,  6.99it/s]

{'epoch': 1, 'iter': 20650, 'avg_loss': 8.584482132104123, 'avg_acc': 50.06325359546753, 'loss': 8.140626907348633}


EP_train:1:  75%|| 20662/27626 [48:40<16:23,  7.08it/s]

{'epoch': 1, 'iter': 20660, 'avg_loss': 8.58429959242575, 'avg_acc': 50.064735491989744, 'loss': 7.434754371643066}


EP_train:1:  75%|| 20672/27626 [48:41<16:22,  7.08it/s]

{'epoch': 1, 'iter': 20670, 'avg_loss': 8.584298775738286, 'avg_acc': 50.06500653088869, 'loss': 7.928297519683838}


EP_train:1:  75%|| 20682/27626 [48:42<16:24,  7.06it/s]

{'epoch': 1, 'iter': 20680, 'avg_loss': 8.584313203942516, 'avg_acc': 50.066939461341335, 'loss': 7.665139198303223}


EP_train:1:  75%|| 20692/27626 [48:44<16:33,  6.98it/s]

{'epoch': 1, 'iter': 20690, 'avg_loss': 8.584708664670885, 'avg_acc': 50.069172587115176, 'loss': 9.557658195495605}


EP_train:1:  75%|| 20702/27626 [48:45<16:24,  7.03it/s]

{'epoch': 1, 'iter': 20700, 'avg_loss': 8.584678581052227, 'avg_acc': 50.07004492536592, 'loss': 8.13505744934082}


EP_train:1:  75%|| 20712/27626 [48:47<16:20,  7.05it/s]

{'epoch': 1, 'iter': 20710, 'avg_loss': 8.584616963023365, 'avg_acc': 50.07182173724109, 'loss': 8.894726753234863}


EP_train:1:  75%|| 20722/27626 [48:48<16:14,  7.08it/s]

{'epoch': 1, 'iter': 20720, 'avg_loss': 8.584766483233246, 'avg_acc': 50.0702789440664, 'loss': 8.707984924316406}


EP_train:1:  75%|| 20732/27626 [48:50<16:12,  7.09it/s]

{'epoch': 1, 'iter': 20730, 'avg_loss': 8.584953829104863, 'avg_acc': 50.06979282234335, 'loss': 8.78598690032959}


EP_train:1:  75%|| 20742/27626 [48:51<16:19,  7.03it/s]

{'epoch': 1, 'iter': 20740, 'avg_loss': 8.584950247137831, 'avg_acc': 50.06840316281761, 'loss': 8.326008796691895}


EP_train:1:  75%|| 20752/27626 [48:52<16:14,  7.05it/s]

{'epoch': 1, 'iter': 20750, 'avg_loss': 8.584881056004804, 'avg_acc': 50.06821960387451, 'loss': 8.214425086975098}


EP_train:1:  75%|| 20762/27626 [48:54<16:14,  7.04it/s]

{'epoch': 1, 'iter': 20760, 'avg_loss': 8.584889783478445, 'avg_acc': 50.068638312220024, 'loss': 8.110922813415527}


EP_train:1:  75%|| 20772/27626 [48:55<16:13,  7.04it/s]

{'epoch': 1, 'iter': 20770, 'avg_loss': 8.58491410275221, 'avg_acc': 50.068906167252415, 'loss': 8.299605369567871}


EP_train:1:  75%|| 20782/27626 [48:57<16:17,  7.00it/s]

{'epoch': 1, 'iter': 20780, 'avg_loss': 8.584926698614112, 'avg_acc': 50.0687226312497, 'loss': 8.75167179107666}


EP_train:1:  75%|| 20792/27626 [48:58<16:23,  6.95it/s]

{'epoch': 1, 'iter': 20790, 'avg_loss': 8.584879175901447, 'avg_acc': 50.069290798903374, 'loss': 9.116107940673828}


EP_train:1:  75%|| 20802/27626 [48:59<16:13,  7.01it/s]

{'epoch': 1, 'iter': 20800, 'avg_loss': 8.584867433632608, 'avg_acc': 50.071661218210664, 'loss': 8.091983795166016}


EP_train:1:  75%|| 20812/27626 [49:01<16:05,  7.06it/s]

{'epoch': 1, 'iter': 20810, 'avg_loss': 8.584850549800983, 'avg_acc': 50.06997501321416, 'loss': 8.14561653137207}


EP_train:1:  75%|| 20822/27626 [49:02<16:04,  7.05it/s]

{'epoch': 1, 'iter': 20820, 'avg_loss': 8.584792864619812, 'avg_acc': 50.06889078334375, 'loss': 7.747006416320801}


EP_train:1:  75%|| 20832/27626 [49:04<16:00,  7.07it/s]

{'epoch': 1, 'iter': 20830, 'avg_loss': 8.584887608593192, 'avg_acc': 50.070357880082575, 'loss': 8.908944129943848}


EP_train:1:  75%|| 20842/27626 [49:05<15:56,  7.09it/s]

{'epoch': 1, 'iter': 20840, 'avg_loss': 8.584726792082872, 'avg_acc': 50.07017417590327, 'loss': 8.63209342956543}


EP_train:1:  75%|| 20852/27626 [49:07<15:53,  7.11it/s]

{'epoch': 1, 'iter': 20850, 'avg_loss': 8.584779573466806, 'avg_acc': 50.06954102920723, 'loss': 9.023155212402344}


EP_train:1:  76%|| 20862/27626 [49:08<15:54,  7.09it/s]

{'epoch': 1, 'iter': 20860, 'avg_loss': 8.584831346916241, 'avg_acc': 50.06711087675567, 'loss': 9.38090991973877}


EP_train:1:  76%|| 20872/27626 [49:09<16:06,  6.99it/s]

{'epoch': 1, 'iter': 20870, 'avg_loss': 8.584803449502703, 'avg_acc': 50.066330075223995, 'loss': 8.115778923034668}


EP_train:1:  76%|| 20882/27626 [49:11<15:58,  7.04it/s]

{'epoch': 1, 'iter': 20880, 'avg_loss': 8.585053187324126, 'avg_acc': 50.06899214597003, 'loss': 8.867382049560547}


EP_train:1:  76%|| 20892/27626 [49:12<15:53,  7.06it/s]

{'epoch': 1, 'iter': 20890, 'avg_loss': 8.584946534511637, 'avg_acc': 50.06865994926045, 'loss': 7.69257926940918}


EP_train:1:  76%|| 20902/27626 [49:14<15:53,  7.05it/s]

{'epoch': 1, 'iter': 20900, 'avg_loss': 8.58485186519215, 'avg_acc': 50.06907564231376, 'loss': 8.64983081817627}


EP_train:1:  76%|| 20912/27626 [49:15<15:47,  7.09it/s]

{'epoch': 1, 'iter': 20910, 'avg_loss': 8.584806838374666, 'avg_acc': 50.06784706613744, 'loss': 8.748143196105957}


EP_train:1:  76%|| 20922/27626 [49:16<15:49,  7.06it/s]

{'epoch': 1, 'iter': 20920, 'avg_loss': 8.584683155182711, 'avg_acc': 50.067964007456624, 'loss': 8.90088176727295}


EP_train:1:  76%|| 20932/27626 [49:18<15:52,  7.03it/s]

{'epoch': 1, 'iter': 20930, 'avg_loss': 8.58476329008123, 'avg_acc': 50.06793153695476, 'loss': 7.977900981903076}


EP_train:1:  76%|| 20942/27626 [49:19<15:47,  7.05it/s]

{'epoch': 1, 'iter': 20940, 'avg_loss': 8.584857715792314, 'avg_acc': 50.067899097464306, 'loss': 8.243054389953613}


EP_train:1:  76%|| 20952/27626 [49:21<15:48,  7.04it/s]

{'epoch': 1, 'iter': 20950, 'avg_loss': 8.584953512532529, 'avg_acc': 50.06652427091785, 'loss': 9.385354995727539}


EP_train:1:  76%|| 20962/27626 [49:22<15:58,  6.95it/s]

{'epoch': 1, 'iter': 20960, 'avg_loss': 8.584942336869794, 'avg_acc': 50.069921520919806, 'loss': 8.128490447998047}


EP_train:1:  76%|| 20972/27626 [49:24<15:46,  7.03it/s]

{'epoch': 1, 'iter': 20970, 'avg_loss': 8.584942062470336, 'avg_acc': 50.07301750035764, 'loss': 7.994849681854248}


EP_train:1:  76%|| 20982/27626 [49:25<15:45,  7.03it/s]

{'epoch': 1, 'iter': 20980, 'avg_loss': 8.584948559642822, 'avg_acc': 50.072982698632096, 'loss': 8.467029571533203}


EP_train:1:  76%|| 20992/27626 [49:26<15:38,  7.07it/s]

{'epoch': 1, 'iter': 20990, 'avg_loss': 8.584932066669493, 'avg_acc': 50.07354342337192, 'loss': 8.469751358032227}


EP_train:1:  76%|| 21002/27626 [49:28<15:37,  7.06it/s]

{'epoch': 1, 'iter': 21000, 'avg_loss': 8.584886377842379, 'avg_acc': 50.07231798485786, 'loss': 8.102126121520996}


EP_train:1:  76%|| 21012/27626 [49:29<15:37,  7.05it/s]

{'epoch': 1, 'iter': 21010, 'avg_loss': 8.584726935934007, 'avg_acc': 50.07124244443387, 'loss': 8.009537696838379}


EP_train:1:  76%|| 21022/27626 [49:31<15:37,  7.04it/s]

{'epoch': 1, 'iter': 21020, 'avg_loss': 8.58476434675612, 'avg_acc': 50.07031658817374, 'loss': 8.400469779968262}


EP_train:1:  76%|| 21032/27626 [49:32<15:36,  7.04it/s]

{'epoch': 1, 'iter': 21030, 'avg_loss': 8.584689729252599, 'avg_acc': 50.06983738291094, 'loss': 7.745779514312744}


EP_train:1:  76%|| 21042/27626 [49:33<15:40,  7.00it/s]

{'epoch': 1, 'iter': 21040, 'avg_loss': 8.584503762662301, 'avg_acc': 50.069952711373034, 'loss': 7.9586286544799805}


EP_train:1:  76%|| 21052/27626 [49:35<15:42,  6.98it/s]

{'epoch': 1, 'iter': 21050, 'avg_loss': 8.584439312758782, 'avg_acc': 50.07066172628378, 'loss': 8.485718727111816}


EP_train:1:  76%|| 21062/27626 [49:36<15:28,  7.07it/s]

{'epoch': 1, 'iter': 21060, 'avg_loss': 8.584410402975294, 'avg_acc': 50.070628175300314, 'loss': 8.328866958618164}


EP_train:1:  76%|| 21072/27626 [49:38<15:30,  7.05it/s]

{'epoch': 1, 'iter': 21070, 'avg_loss': 8.584269419997742, 'avg_acc': 50.07118788856722, 'loss': 8.24765396118164}


EP_train:1:  76%|| 21082/27626 [49:39<15:23,  7.09it/s]

{'epoch': 1, 'iter': 21080, 'avg_loss': 8.584221977746711, 'avg_acc': 50.07204354632133, 'loss': 8.862617492675781}


EP_train:1:  76%|| 21092/27626 [49:40<15:23,  7.08it/s]

{'epoch': 1, 'iter': 21090, 'avg_loss': 8.584353508636603, 'avg_acc': 50.07304656014414, 'loss': 9.807647705078125}


EP_train:1:  76%|| 21102/27626 [49:42<15:23,  7.07it/s]

{'epoch': 1, 'iter': 21100, 'avg_loss': 8.58443325934526, 'avg_acc': 50.0752334012606, 'loss': 8.840558052062988}


EP_train:1:  76%|| 21112/27626 [49:43<15:19,  7.08it/s]

{'epoch': 1, 'iter': 21110, 'avg_loss': 8.584516714232587, 'avg_acc': 50.07682606224243, 'loss': 8.351219177246094}


EP_train:1:  76%|| 21122/27626 [49:45<15:29,  7.00it/s]

{'epoch': 1, 'iter': 21120, 'avg_loss': 8.584419163456074, 'avg_acc': 50.07397850480564, 'loss': 8.780352592468262}


EP_train:1:  76%|| 21132/27626 [49:46<15:23,  7.03it/s]

{'epoch': 1, 'iter': 21130, 'avg_loss': 8.584493635663318, 'avg_acc': 50.07409138232928, 'loss': 8.495360374450684}


EP_train:1:  77%|| 21142/27626 [49:48<15:21,  7.03it/s]

{'epoch': 1, 'iter': 21140, 'avg_loss': 8.58450059066133, 'avg_acc': 50.07376070195355, 'loss': 9.030054092407227}


EP_train:1:  77%|| 21152/27626 [49:49<15:14,  7.08it/s]

{'epoch': 1, 'iter': 21150, 'avg_loss': 8.584346571226085, 'avg_acc': 50.07476005862607, 'loss': 7.543308258056641}


EP_train:1:  77%|| 21162/27626 [49:50<15:17,  7.04it/s]

{'epoch': 1, 'iter': 21160, 'avg_loss': 8.584328910611672, 'avg_acc': 50.07531543877889, 'loss': 8.54531478881836}


EP_train:1:  77%|| 21172/27626 [49:52<15:17,  7.03it/s]

{'epoch': 1, 'iter': 21170, 'avg_loss': 8.584448536546773, 'avg_acc': 50.076608332152475, 'loss': 9.252697944641113}


EP_train:1:  77%|| 21182/27626 [49:53<15:09,  7.09it/s]

{'epoch': 1, 'iter': 21180, 'avg_loss': 8.584419825254793, 'avg_acc': 50.07229356498749, 'loss': 8.92006778717041}


EP_train:1:  77%|| 21192/27626 [49:55<15:13,  7.05it/s]

{'epoch': 1, 'iter': 21190, 'avg_loss': 8.584310646848294, 'avg_acc': 50.07240691803124, 'loss': 8.215875625610352}


EP_train:1:  77%|| 21202/27626 [49:56<15:14,  7.03it/s]

{'epoch': 1, 'iter': 21200, 'avg_loss': 8.58408430228677, 'avg_acc': 50.07237276543559, 'loss': 8.579930305480957}


EP_train:1:  77%|| 21212/27626 [49:57<15:13,  7.02it/s]

{'epoch': 1, 'iter': 21210, 'avg_loss': 8.58410635939144, 'avg_acc': 50.07292796190656, 'loss': 8.99317741394043}


EP_train:1:  77%|| 21222/27626 [49:59<15:16,  6.99it/s]

{'epoch': 1, 'iter': 21220, 'avg_loss': 8.584005958571868, 'avg_acc': 50.07510249281372, 'loss': 8.253748893737793}


EP_train:1:  77%|| 21232/27626 [50:00<15:14,  6.99it/s]

{'epoch': 1, 'iter': 21230, 'avg_loss': 8.584098009802418, 'avg_acc': 50.07447835711931, 'loss': 9.364303588867188}


EP_train:1:  77%|| 21242/27626 [50:02<15:01,  7.08it/s]

{'epoch': 1, 'iter': 21240, 'avg_loss': 8.584148454798585, 'avg_acc': 50.07503177816487, 'loss': 8.099749565124512}


EP_train:1:  77%|| 21252/27626 [50:03<15:02,  7.07it/s]

{'epoch': 1, 'iter': 21250, 'avg_loss': 8.584103900759894, 'avg_acc': 50.075437626464634, 'loss': 7.858026504516602}


EP_train:1:  77%|| 21262/27626 [50:04<14:57,  7.09it/s]

{'epoch': 1, 'iter': 21260, 'avg_loss': 8.58399798545893, 'avg_acc': 50.07657800667889, 'loss': 8.152775764465332}


EP_train:1:  77%|| 21272/27626 [50:06<14:57,  7.08it/s]

{'epoch': 1, 'iter': 21270, 'avg_loss': 8.584021698004504, 'avg_acc': 50.07948027831319, 'loss': 9.414530754089355}


EP_train:1:  77%|| 21282/27626 [50:07<14:56,  7.07it/s]

{'epoch': 1, 'iter': 21280, 'avg_loss': 8.583960137036302, 'avg_acc': 50.077974484281754, 'loss': 8.047699928283691}


EP_train:1:  77%|| 21292/27626 [50:09<14:56,  7.06it/s]

{'epoch': 1, 'iter': 21290, 'avg_loss': 8.583951022468085, 'avg_acc': 50.075442675308814, 'loss': 9.103338241577148}


EP_train:1:  77%|| 21302/27626 [50:10<14:53,  7.08it/s]

{'epoch': 1, 'iter': 21300, 'avg_loss': 8.583923179047481, 'avg_acc': 50.07408689732876, 'loss': 8.115360260009766}


EP_train:1:  77%|| 21312/27626 [50:12<15:02,  7.00it/s]

{'epoch': 1, 'iter': 21310, 'avg_loss': 8.583958009603766, 'avg_acc': 50.07375885692835, 'loss': 8.56452465057373}


EP_train:1:  77%|| 21322/27626 [50:13<15:01,  6.99it/s]

{'epoch': 1, 'iter': 21320, 'avg_loss': 8.583969825349937, 'avg_acc': 50.073284555133434, 'loss': 8.826177597045898}


EP_train:1:  77%|| 21332/27626 [50:14<14:50,  7.07it/s]

{'epoch': 1, 'iter': 21330, 'avg_loss': 8.583924114505738, 'avg_acc': 50.073689700435985, 'loss': 8.973633766174316}


EP_train:1:  77%|| 21342/27626 [50:16<14:48,  7.07it/s]

{'epoch': 1, 'iter': 21340, 'avg_loss': 8.58387794671591, 'avg_acc': 50.07438732955344, 'loss': 8.138700485229492}


EP_train:1:  77%|| 21352/27626 [50:17<14:48,  7.06it/s]

{'epoch': 1, 'iter': 21350, 'avg_loss': 8.583830058047054, 'avg_acc': 50.07479157884877, 'loss': 8.602117538452148}


EP_train:1:  77%|| 21362/27626 [50:19<14:47,  7.06it/s]

{'epoch': 1, 'iter': 21360, 'avg_loss': 8.583936196186977, 'avg_acc': 50.07329361921259, 'loss': 8.595484733581543}


EP_train:1:  77%|| 21372/27626 [50:20<14:49,  7.03it/s]

{'epoch': 1, 'iter': 21370, 'avg_loss': 8.583967906680943, 'avg_acc': 50.07223574002152, 'loss': 8.298197746276855}


EP_train:1:  77%|| 21382/27626 [50:21<14:44,  7.06it/s]

{'epoch': 1, 'iter': 21380, 'avg_loss': 8.584065085883381, 'avg_acc': 50.070448061362896, 'loss': 9.075072288513184}


EP_train:1:  77%|| 21392/27626 [50:23<14:47,  7.03it/s]

{'epoch': 1, 'iter': 21390, 'avg_loss': 8.584082712816615, 'avg_acc': 50.07143775419569, 'loss': 8.705894470214844}


EP_train:1:  77%|| 21402/27626 [50:24<14:44,  7.03it/s]

{'epoch': 1, 'iter': 21400, 'avg_loss': 8.58405728115333, 'avg_acc': 50.07009018270174, 'loss': 8.233880996704102}


EP_train:1:  78%|| 21412/27626 [50:26<14:45,  7.02it/s]

{'epoch': 1, 'iter': 21410, 'avg_loss': 8.583840164530924, 'avg_acc': 50.06888982298818, 'loss': 8.610783576965332}


EP_train:1:  78%|| 21422/27626 [50:27<14:44,  7.01it/s]

{'epoch': 1, 'iter': 21420, 'avg_loss': 8.583742389391352, 'avg_acc': 50.07031651183418, 'loss': 8.063223838806152}


EP_train:1:  78%|| 21432/27626 [50:29<14:36,  7.07it/s]

{'epoch': 1, 'iter': 21430, 'avg_loss': 8.583694846935698, 'avg_acc': 50.06955461714339, 'loss': 8.405503273010254}


EP_train:1:  78%|| 21442/27626 [50:30<14:37,  7.04it/s]

{'epoch': 1, 'iter': 21440, 'avg_loss': 8.583811832182633, 'avg_acc': 50.06966792593629, 'loss': 8.745129585266113}


EP_train:1:  78%|| 21452/27626 [50:31<14:35,  7.05it/s]

{'epoch': 1, 'iter': 21450, 'avg_loss': 8.583772521703416, 'avg_acc': 50.069198405668736, 'loss': 8.347576141357422}


EP_train:1:  78%|| 21462/27626 [50:33<14:34,  7.05it/s]

{'epoch': 1, 'iter': 21460, 'avg_loss': 8.583707470900748, 'avg_acc': 50.068874935930296, 'loss': 8.459775924682617}


EP_train:1:  78%|| 21472/27626 [50:34<14:32,  7.05it/s]

{'epoch': 1, 'iter': 21470, 'avg_loss': 8.583581191049557, 'avg_acc': 50.06811513203856, 'loss': 7.849887847900391}


EP_train:1:  78%|| 21482/27626 [50:36<14:26,  7.09it/s]

{'epoch': 1, 'iter': 21480, 'avg_loss': 8.583421247941445, 'avg_acc': 50.0682288999581, 'loss': 7.98245096206665}


EP_train:1:  78%|| 21492/27626 [50:37<14:40,  6.97it/s]

{'epoch': 1, 'iter': 21490, 'avg_loss': 8.583287199114551, 'avg_acc': 50.06950583965381, 'loss': 8.847389221191406}


EP_train:1:  78%|| 21502/27626 [50:38<14:39,  6.97it/s]

{'epoch': 1, 'iter': 21500, 'avg_loss': 8.583183176555965, 'avg_acc': 50.069182828705635, 'loss': 8.112727165222168}


EP_train:1:  78%|| 21512/27626 [50:40<14:26,  7.06it/s]

{'epoch': 1, 'iter': 21510, 'avg_loss': 8.583091654752478, 'avg_acc': 50.068860118079115, 'loss': 8.36896800994873}


EP_train:1:  78%|| 21522/27626 [50:41<14:30,  7.01it/s]

{'epoch': 1, 'iter': 21520, 'avg_loss': 8.583140826824623, 'avg_acc': 50.068973328376934, 'loss': 9.192620277404785}


EP_train:1:  78%|| 21532/27626 [50:43<14:21,  7.07it/s]

{'epoch': 1, 'iter': 21530, 'avg_loss': 8.583043500276533, 'avg_acc': 50.070682968742744, 'loss': 8.737203598022461}


EP_train:1:  78%|| 21542/27626 [50:44<14:21,  7.07it/s]

{'epoch': 1, 'iter': 21540, 'avg_loss': 8.582888042653959, 'avg_acc': 50.071665660832835, 'loss': 7.599732875823975}


EP_train:1:  78%|| 21552/27626 [50:45<14:20,  7.06it/s]

{'epoch': 1, 'iter': 21550, 'avg_loss': 8.582864719313163, 'avg_acc': 50.068152289916945, 'loss': 9.442787170410156}


EP_train:1:  78%|| 21562/27626 [50:47<14:15,  7.08it/s]

{'epoch': 1, 'iter': 21560, 'avg_loss': 8.582994134542998, 'avg_acc': 50.06783080562126, 'loss': 9.180282592773438}


EP_train:1:  78%|| 21572/27626 [50:48<14:15,  7.07it/s]

{'epoch': 1, 'iter': 21570, 'avg_loss': 8.583017952302573, 'avg_acc': 50.06808910110797, 'loss': 8.250271797180176}


EP_train:1:  78%|| 21582/27626 [50:50<14:23,  7.00it/s]

{'epoch': 1, 'iter': 21580, 'avg_loss': 8.582989815449086, 'avg_acc': 50.06791274732404, 'loss': 8.22610092163086}


EP_train:1:  78%|| 21592/27626 [50:51<14:19,  7.02it/s]

{'epoch': 1, 'iter': 21590, 'avg_loss': 8.583043549071974, 'avg_acc': 50.06759182066601, 'loss': 9.011958122253418}


EP_train:1:  78%|| 21602/27626 [50:53<14:16,  7.03it/s]

{'epoch': 1, 'iter': 21600, 'avg_loss': 8.583041567453868, 'avg_acc': 50.06828387574649, 'loss': 8.281316757202148}


EP_train:1:  78%|| 21612/27626 [50:54<14:11,  7.06it/s]

{'epoch': 1, 'iter': 21610, 'avg_loss': 8.58317666168287, 'avg_acc': 50.06781847207441, 'loss': 9.343143463134766}


EP_train:1:  78%|| 21622/27626 [50:55<14:10,  7.06it/s]

{'epoch': 1, 'iter': 21620, 'avg_loss': 8.583186379160924, 'avg_acc': 50.06749803431849, 'loss': 8.608236312866211}


EP_train:1:  78%|| 21632/27626 [50:57<14:08,  7.07it/s]

{'epoch': 1, 'iter': 21630, 'avg_loss': 8.583078650494432, 'avg_acc': 50.06920045305348, 'loss': 8.412334442138672}


EP_train:1:  78%|| 21642/27626 [50:58<14:08,  7.06it/s]

{'epoch': 1, 'iter': 21640, 'avg_loss': 8.583055547863447, 'avg_acc': 50.070323691141816, 'loss': 7.931804180145264}


EP_train:1:  78%|| 21652/27626 [51:00<14:05,  7.07it/s]

{'epoch': 1, 'iter': 21650, 'avg_loss': 8.583097875919371, 'avg_acc': 50.07115722137545, 'loss': 9.184123039245605}


EP_train:1:  78%|| 21662/27626 [51:01<14:02,  7.08it/s]

{'epoch': 1, 'iter': 21660, 'avg_loss': 8.583089100456563, 'avg_acc': 50.070258759983375, 'loss': 8.891984939575195}


EP_train:1:  78%|| 21672/27626 [51:02<14:09,  7.01it/s]

{'epoch': 1, 'iter': 21670, 'avg_loss': 8.583110012558587, 'avg_acc': 50.067919108485995, 'loss': 8.647960662841797}


EP_train:1:  78%|| 21682/27626 [51:04<14:05,  7.03it/s]

{'epoch': 1, 'iter': 21680, 'avg_loss': 8.583065122163887, 'avg_acc': 50.07091462570915, 'loss': 8.477277755737305}


EP_train:1:  79%|| 21692/27626 [51:05<14:01,  7.06it/s]

{'epoch': 1, 'iter': 21690, 'avg_loss': 8.583213717911859, 'avg_acc': 50.07001751878659, 'loss': 8.997584342956543}


EP_train:1:  79%|| 21702/27626 [51:07<14:05,  7.00it/s]

{'epoch': 1, 'iter': 21700, 'avg_loss': 8.583249030006211, 'avg_acc': 50.07041726187733, 'loss': 8.6831693649292}


EP_train:1:  79%|| 21712/27626 [51:08<13:54,  7.09it/s]

{'epoch': 1, 'iter': 21710, 'avg_loss': 8.583161729257663, 'avg_acc': 50.06894546543227, 'loss': 8.160008430480957}


EP_train:1:  79%|| 21722/27626 [51:10<13:51,  7.10it/s]

{'epoch': 1, 'iter': 21720, 'avg_loss': 8.583194843681415, 'avg_acc': 50.070927903871834, 'loss': 9.238142967224121}


EP_train:1:  79%|| 21732/27626 [51:11<13:50,  7.10it/s]

{'epoch': 1, 'iter': 21730, 'avg_loss': 8.583124867717022, 'avg_acc': 50.06931342322029, 'loss': 8.304811477661133}


EP_train:1:  79%|| 21742/27626 [51:12<13:59,  7.01it/s]

{'epoch': 1, 'iter': 21740, 'avg_loss': 8.58292667914334, 'avg_acc': 50.06885032887172, 'loss': 8.456328392028809}


EP_train:1:  79%|| 21752/27626 [51:14<13:53,  7.05it/s]

{'epoch': 1, 'iter': 21750, 'avg_loss': 8.582880551522038, 'avg_acc': 50.07025539055676, 'loss': 7.899572372436523}


EP_train:1:  79%|| 21762/27626 [51:15<13:59,  6.99it/s]

{'epoch': 1, 'iter': 21760, 'avg_loss': 8.582926170970909, 'avg_acc': 50.07151555535132, 'loss': 8.676159858703613}


EP_train:1:  79%|| 21772/27626 [51:17<14:01,  6.96it/s]

{'epoch': 1, 'iter': 21770, 'avg_loss': 8.583013760074914, 'avg_acc': 50.07004731064259, 'loss': 8.496767044067383}


EP_train:1:  79%|| 21782/27626 [51:18<13:52,  7.02it/s]

{'epoch': 1, 'iter': 21780, 'avg_loss': 8.582920379729744, 'avg_acc': 50.071449887516636, 'loss': 8.032228469848633}


EP_train:1:  79%|| 21792/27626 [51:19<13:44,  7.07it/s]

{'epoch': 1, 'iter': 21790, 'avg_loss': 8.582813584259119, 'avg_acc': 50.07084346748658, 'loss': 8.5072660446167}


EP_train:1:  79%|| 21802/27626 [51:21<13:39,  7.10it/s]

{'epoch': 1, 'iter': 21800, 'avg_loss': 8.582570046745557, 'avg_acc': 50.07066762992524, 'loss': 9.020613670349121}


EP_train:1:  79%|| 21812/27626 [51:22<13:46,  7.04it/s]

{'epoch': 1, 'iter': 21810, 'avg_loss': 8.582699216394301, 'avg_acc': 50.07063522992985, 'loss': 8.875643730163574}


EP_train:1:  79%|| 21822/27626 [51:24<13:44,  7.04it/s]

{'epoch': 1, 'iter': 21820, 'avg_loss': 8.582785850123871, 'avg_acc': 50.07203496631685, 'loss': 8.214523315429688}


EP_train:1:  79%|| 21832/27626 [51:25<13:43,  7.04it/s]

{'epoch': 1, 'iter': 21830, 'avg_loss': 8.582774978841439, 'avg_acc': 50.073003985158714, 'loss': 8.689208984375}


EP_train:1:  79%|| 21842/27626 [51:26<13:43,  7.03it/s]

{'epoch': 1, 'iter': 21840, 'avg_loss': 8.582754200191802, 'avg_acc': 50.073829037132, 'loss': 8.993866920471191}


EP_train:1:  79%|| 21852/27626 [51:28<13:43,  7.01it/s]

{'epoch': 1, 'iter': 21850, 'avg_loss': 8.582586935175229, 'avg_acc': 50.07479634799322, 'loss': 9.262554168701172}


EP_train:1:  79%|| 21862/27626 [51:29<13:45,  6.98it/s]

{'epoch': 1, 'iter': 21860, 'avg_loss': 8.58243324656177, 'avg_acc': 50.07447623621976, 'loss': 7.31723165512085}


EP_train:1:  79%|| 21872/27626 [51:31<13:38,  7.03it/s]

{'epoch': 1, 'iter': 21870, 'avg_loss': 8.582439918935798, 'avg_acc': 50.073727767363174, 'loss': 9.049049377441406}


EP_train:1:  79%|| 21882/27626 [51:32<13:33,  7.06it/s]

{'epoch': 1, 'iter': 21880, 'avg_loss': 8.582385463561183, 'avg_acc': 50.073265618573195, 'loss': 8.045611381530762}


EP_train:1:  79%|| 21892/27626 [51:34<13:31,  7.06it/s]

{'epoch': 1, 'iter': 21890, 'avg_loss': 8.582408517020513, 'avg_acc': 50.073374902928144, 'loss': 7.884063243865967}


EP_train:1:  79%|| 21902/27626 [51:35<13:36,  7.01it/s]

{'epoch': 1, 'iter': 21900, 'avg_loss': 8.582378537201684, 'avg_acc': 50.07305602483905, 'loss': 8.488710403442383}


EP_train:1:  79%|| 21912/27626 [51:36<13:28,  7.07it/s]

{'epoch': 1, 'iter': 21910, 'avg_loss': 8.582452638054269, 'avg_acc': 50.07373579480626, 'loss': 9.51965045928955}


EP_train:1:  79%|| 21922/27626 [51:38<13:30,  7.04it/s]

{'epoch': 1, 'iter': 21920, 'avg_loss': 8.582573286640095, 'avg_acc': 50.07241914146252, 'loss': 8.740753173828125}


EP_train:1:  79%|| 21932/27626 [51:39<13:26,  7.06it/s]

{'epoch': 1, 'iter': 21930, 'avg_loss': 8.582826163720826, 'avg_acc': 50.07153116592951, 'loss': 9.382624626159668}


EP_train:1:  79%|| 21942/27626 [51:41<13:30,  7.01it/s]

{'epoch': 1, 'iter': 21940, 'avg_loss': 8.582694315963584, 'avg_acc': 50.071925846588584, 'loss': 8.270001411437988}


EP_train:1:  79%|| 21952/27626 [51:42<13:28,  7.02it/s]

{'epoch': 1, 'iter': 21950, 'avg_loss': 8.582695150086453, 'avg_acc': 50.07274725525033, 'loss': 8.920921325683594}


EP_train:1:  79%|| 21962/27626 [51:43<13:19,  7.09it/s]

{'epoch': 1, 'iter': 21960, 'avg_loss': 8.582668714991012, 'avg_acc': 50.07043736624015, 'loss': 8.765190124511719}


EP_train:1:  80%|| 21972/27626 [51:45<13:17,  7.09it/s]

{'epoch': 1, 'iter': 21970, 'avg_loss': 8.58268988059229, 'avg_acc': 50.07026307405216, 'loss': 8.700660705566406}


EP_train:1:  80%|| 21982/27626 [51:46<13:14,  7.11it/s]

{'epoch': 1, 'iter': 21980, 'avg_loss': 8.582706577236292, 'avg_acc': 50.069520267503755, 'loss': 9.555107116699219}


EP_train:1:  80%|| 21992/27626 [51:48<13:16,  7.07it/s]

{'epoch': 1, 'iter': 21990, 'avg_loss': 8.582988510992228, 'avg_acc': 50.068778136510396, 'loss': 9.751777648925781}


EP_train:1:  80%|| 22002/27626 [51:49<13:14,  7.08it/s]

{'epoch': 1, 'iter': 22000, 'avg_loss': 8.58293491809738, 'avg_acc': 50.067610563156215, 'loss': 8.23975944519043}


EP_train:1:  80%|| 22012/27626 [51:50<13:14,  7.06it/s]

{'epoch': 1, 'iter': 22010, 'avg_loss': 8.582950958025611, 'avg_acc': 50.06701194857117, 'loss': 8.486714363098145}


EP_train:1:  80%|| 22022/27626 [51:52<13:14,  7.05it/s]

{'epoch': 1, 'iter': 22020, 'avg_loss': 8.582950637362307, 'avg_acc': 50.068826347577314, 'loss': 9.340734481811523}


EP_train:1:  80%|| 22032/27626 [51:53<13:15,  7.03it/s]

{'epoch': 1, 'iter': 22030, 'avg_loss': 8.582832464146541, 'avg_acc': 50.06794403340747, 'loss': 8.963041305541992}


EP_train:1:  80%|| 22042/27626 [51:55<13:16,  7.01it/s]

{'epoch': 1, 'iter': 22040, 'avg_loss': 8.58295014873671, 'avg_acc': 50.069331019463725, 'loss': 8.92206859588623}


EP_train:1:  80%|| 22052/27626 [51:56<13:11,  7.04it/s]

{'epoch': 1, 'iter': 22050, 'avg_loss': 8.582963811940905, 'avg_acc': 50.069299578250416, 'loss': 9.22993278503418}


EP_train:1:  80%|| 22062/27626 [51:58<13:13,  7.01it/s]

{'epoch': 1, 'iter': 22060, 'avg_loss': 8.582911173596267, 'avg_acc': 50.069834776302066, 'loss': 8.637373924255371}


EP_train:1:  80%|| 22072/27626 [51:59<13:10,  7.02it/s]

{'epoch': 1, 'iter': 22070, 'avg_loss': 8.58292901632086, 'avg_acc': 50.07093584341443, 'loss': 8.483002662658691}


EP_train:1:  80%|| 22082/27626 [52:00<13:06,  7.05it/s]

{'epoch': 1, 'iter': 22080, 'avg_loss': 8.582956984112117, 'avg_acc': 50.070762193741224, 'loss': 8.197469711303711}


EP_train:1:  80%|| 22092/27626 [52:02<13:05,  7.04it/s]

{'epoch': 1, 'iter': 22090, 'avg_loss': 8.582965941876166, 'avg_acc': 50.07087162192748, 'loss': 7.86627721786499}


EP_train:1:  80%|| 22102/27626 [52:03<13:01,  7.07it/s]

{'epoch': 1, 'iter': 22100, 'avg_loss': 8.582996995733538, 'avg_acc': 50.07253631057418, 'loss': 9.149420738220215}


EP_train:1:  80%|| 22112/27626 [52:05<13:05,  7.02it/s]

{'epoch': 1, 'iter': 22110, 'avg_loss': 8.583146392967532, 'avg_acc': 50.07250350504274, 'loss': 9.621939659118652}


EP_train:1:  80%|| 22122/27626 [52:06<13:03,  7.03it/s]

{'epoch': 1, 'iter': 22120, 'avg_loss': 8.58310811286065, 'avg_acc': 50.07148184982596, 'loss': 8.763005256652832}


EP_train:1:  80%|| 22132/27626 [52:07<13:05,  6.99it/s]

{'epoch': 1, 'iter': 22130, 'avg_loss': 8.583144505947962, 'avg_acc': 50.07201436898468, 'loss': 9.736921310424805}


EP_train:1:  80%|| 22142/27626 [52:09<12:52,  7.10it/s]

{'epoch': 1, 'iter': 22140, 'avg_loss': 8.583108917246838, 'avg_acc': 50.07141728015898, 'loss': 8.613322257995605}


EP_train:1:  80%|| 22152/27626 [52:10<12:48,  7.12it/s]

{'epoch': 1, 'iter': 22150, 'avg_loss': 8.582934853573276, 'avg_acc': 50.07096180759334, 'loss': 8.235650062561035}


EP_train:1:  80%|| 22162/27626 [52:12<12:56,  7.03it/s]

{'epoch': 1, 'iter': 22160, 'avg_loss': 8.582992386309057, 'avg_acc': 50.07107080005415, 'loss': 8.295174598693848}


EP_train:1:  80%|| 22172/27626 [52:13<12:54,  7.04it/s]

{'epoch': 1, 'iter': 22170, 'avg_loss': 8.58318966775252, 'avg_acc': 50.069206395742185, 'loss': 8.905475616455078}


EP_train:1:  80%|| 22182/27626 [52:15<12:59,  6.99it/s]

{'epoch': 1, 'iter': 22180, 'avg_loss': 8.583155842093102, 'avg_acc': 50.06973874036338, 'loss': 8.798656463623047}


EP_train:1:  80%|| 22192/27626 [52:16<12:52,  7.03it/s]

{'epoch': 1, 'iter': 22190, 'avg_loss': 8.582968691869315, 'avg_acc': 50.06886237663918, 'loss': 8.552974700927734}


EP_train:1:  80%|| 22202/27626 [52:17<12:49,  7.05it/s]

{'epoch': 1, 'iter': 22200, 'avg_loss': 8.582800213525244, 'avg_acc': 50.06643844871853, 'loss': 8.71772289276123}


EP_train:1:  80%|| 22212/27626 [52:19<12:59,  6.95it/s]

{'epoch': 1, 'iter': 22210, 'avg_loss': 8.58280885929645, 'avg_acc': 50.065845752104806, 'loss': 8.794194221496582}


EP_train:1:  80%|| 22222/27626 [52:20<12:51,  7.01it/s]

{'epoch': 1, 'iter': 22220, 'avg_loss': 8.58267505408932, 'avg_acc': 50.06398789433418, 'loss': 8.740726470947266}


EP_train:1:  80%|| 22232/27626 [52:22<12:44,  7.06it/s]

{'epoch': 1, 'iter': 22230, 'avg_loss': 8.582716323088432, 'avg_acc': 50.06044487427466, 'loss': 9.060831069946289}


EP_train:1:  81%|| 22242/27626 [52:23<12:44,  7.04it/s]

{'epoch': 1, 'iter': 22240, 'avg_loss': 8.58251545040209, 'avg_acc': 50.06154174722359, 'loss': 7.954707145690918}


EP_train:1:  81%|| 22252/27626 [52:24<12:40,  7.07it/s]

{'epoch': 1, 'iter': 22250, 'avg_loss': 8.582640412630518, 'avg_acc': 50.0601096579929, 'loss': 7.932770729064941}


EP_train:1:  81%|| 22262/27626 [52:26<12:36,  7.09it/s]

{'epoch': 1, 'iter': 22260, 'avg_loss': 8.582680762357658, 'avg_acc': 50.05994227572885, 'loss': 9.084959983825684}


EP_train:1:  81%|| 22272/27626 [52:27<12:39,  7.05it/s]

{'epoch': 1, 'iter': 22270, 'avg_loss': 8.582689687884667, 'avg_acc': 50.058512190741325, 'loss': 8.97275447845459}


EP_train:1:  81%|| 22282/27626 [52:29<12:39,  7.04it/s]

{'epoch': 1, 'iter': 22280, 'avg_loss': 8.582706062986663, 'avg_acc': 50.05708338943494, 'loss': 9.157390594482422}


EP_train:1:  81%|| 22292/27626 [52:30<12:39,  7.02it/s]

{'epoch': 1, 'iter': 22290, 'avg_loss': 8.582774984965463, 'avg_acc': 50.05705778116729, 'loss': 8.326529502868652}


EP_train:1:  81%|| 22302/27626 [52:31<12:37,  7.03it/s]

{'epoch': 1, 'iter': 22300, 'avg_loss': 8.582826823836715, 'avg_acc': 50.05689206762028, 'loss': 8.959856986999512}


EP_train:1:  81%|| 22312/27626 [52:33<12:35,  7.03it/s]

{'epoch': 1, 'iter': 22310, 'avg_loss': 8.5827727720464, 'avg_acc': 50.0568665680606, 'loss': 8.686591148376465}


EP_train:1:  81%|| 22322/27626 [52:34<12:34,  7.03it/s]

{'epoch': 1, 'iter': 22320, 'avg_loss': 8.582649315083403, 'avg_acc': 50.05474105102819, 'loss': 8.544024467468262}


EP_train:1:  81%|| 22332/27626 [52:36<12:35,  7.01it/s]

{'epoch': 1, 'iter': 22330, 'avg_loss': 8.582558143507761, 'avg_acc': 50.053037257623934, 'loss': 8.763813018798828}


EP_train:1:  81%|| 22342/27626 [52:37<12:33,  7.02it/s]

{'epoch': 1, 'iter': 22340, 'avg_loss': 8.582489263263824, 'avg_acc': 50.053573027169776, 'loss': 8.936951637268066}


EP_train:1:  81%|| 22352/27626 [52:39<12:31,  7.01it/s]

{'epoch': 1, 'iter': 22350, 'avg_loss': 8.582457803526765, 'avg_acc': 50.05173146615364, 'loss': 9.682086944580078}


EP_train:1:  81%|| 22362/27626 [52:40<12:23,  7.08it/s]

{'epoch': 1, 'iter': 22360, 'avg_loss': 8.582318421185516, 'avg_acc': 50.05310585394213, 'loss': 8.832414627075195}


EP_train:1:  81%|| 22372/27626 [52:41<12:18,  7.11it/s]

{'epoch': 1, 'iter': 22370, 'avg_loss': 8.582501201805274, 'avg_acc': 50.052523356130706, 'loss': 8.975639343261719}


EP_train:1:  81%|| 22382/27626 [52:43<12:28,  7.01it/s]

{'epoch': 1, 'iter': 22380, 'avg_loss': 8.58258212434559, 'avg_acc': 50.05166212412314, 'loss': 7.59089994430542}


EP_train:1:  81%|| 22392/27626 [52:44<12:26,  7.01it/s]

{'epoch': 1, 'iter': 22390, 'avg_loss': 8.582460747265165, 'avg_acc': 50.05135992139699, 'loss': 8.51706600189209}


EP_train:1:  81%|| 22402/27626 [52:46<12:27,  6.99it/s]

{'epoch': 1, 'iter': 22400, 'avg_loss': 8.582591405963639, 'avg_acc': 50.05091848578188, 'loss': 9.025967597961426}


EP_train:1:  81%|| 22412/27626 [52:47<12:15,  7.09it/s]

{'epoch': 1, 'iter': 22410, 'avg_loss': 8.582700657475389, 'avg_acc': 50.05047744411226, 'loss': 9.29471492767334}


EP_train:1:  81%|| 22422/27626 [52:48<12:14,  7.09it/s]

{'epoch': 1, 'iter': 22420, 'avg_loss': 8.582712776032148, 'avg_acc': 50.05129120021409, 'loss': 8.43836784362793}


EP_train:1:  81%|| 22432/27626 [52:50<12:19,  7.03it/s]

{'epoch': 1, 'iter': 22430, 'avg_loss': 8.582663643169347, 'avg_acc': 50.04903927600196, 'loss': 8.203375816345215}


EP_train:1:  81%|| 22442/27626 [52:51<12:14,  7.05it/s]

{'epoch': 1, 'iter': 22440, 'avg_loss': 8.58255936596824, 'avg_acc': 50.04804264515842, 'loss': 7.859756946563721}


EP_train:1:  81%|| 22452/27626 [52:53<12:18,  7.00it/s]

{'epoch': 1, 'iter': 22450, 'avg_loss': 8.582627584172709, 'avg_acc': 50.04969155048773, 'loss': 9.034867286682129}


EP_train:1:  81%|| 22462/27626 [52:54<12:10,  7.07it/s]

{'epoch': 1, 'iter': 22460, 'avg_loss': 8.582494811167631, 'avg_acc': 50.04994768710209, 'loss': 8.389474868774414}


EP_train:1:  81%|| 22472/27626 [52:56<12:10,  7.05it/s]

{'epoch': 1, 'iter': 22470, 'avg_loss': 8.582448343267046, 'avg_acc': 50.052289617729514, 'loss': 8.297890663146973}


EP_train:1:  81%|| 22482/27626 [52:57<12:16,  6.99it/s]

{'epoch': 1, 'iter': 22480, 'avg_loss': 8.582386585016023, 'avg_acc': 50.04976424536275, 'loss': 8.952462196350098}


EP_train:1:  81%|| 22492/27626 [52:58<12:14,  6.99it/s]

{'epoch': 1, 'iter': 22490, 'avg_loss': 8.58219182806671, 'avg_acc': 50.05099261926993, 'loss': 8.266607284545898}


EP_train:1:  81%|| 22502/27626 [53:00<12:09,  7.03it/s]

{'epoch': 1, 'iter': 22500, 'avg_loss': 8.582220372017382, 'avg_acc': 50.049303364294914, 'loss': 9.434063911437988}


EP_train:1:  81%|| 22512/27626 [53:01<12:04,  7.06it/s]

{'epoch': 1, 'iter': 22510, 'avg_loss': 8.582177771990718, 'avg_acc': 50.04914264137533, 'loss': 8.542089462280273}


EP_train:1:  82%|| 22522/27626 [53:03<12:03,  7.05it/s]

{'epoch': 1, 'iter': 22520, 'avg_loss': 8.582117100400541, 'avg_acc': 50.04759446738599, 'loss': 8.609471321105957}


EP_train:1:  82%|| 22532/27626 [53:04<12:02,  7.05it/s]

{'epoch': 1, 'iter': 22530, 'avg_loss': 8.582105188880641, 'avg_acc': 50.047989436776, 'loss': 8.992290496826172}


EP_train:1:  82%|| 22542/27626 [53:05<12:01,  7.05it/s]

{'epoch': 1, 'iter': 22540, 'avg_loss': 8.582179029029875, 'avg_acc': 50.04630451177854, 'loss': 8.569343566894531}


EP_train:1:  82%|| 22552/27626 [53:07<12:05,  6.99it/s]

{'epoch': 1, 'iter': 22550, 'avg_loss': 8.582043936181654, 'avg_acc': 50.04919404904439, 'loss': 7.665825843811035}


EP_train:1:  82%|| 22562/27626 [53:08<11:58,  7.05it/s]

{'epoch': 1, 'iter': 22560, 'avg_loss': 8.581936020265895, 'avg_acc': 50.04737157040912, 'loss': 9.16884994506836}


EP_train:1:  82%|| 22572/27626 [53:10<12:01,  7.00it/s]

{'epoch': 1, 'iter': 22570, 'avg_loss': 8.581916876072889, 'avg_acc': 50.047350582606, 'loss': 8.178901672363281}


EP_train:1:  82%|| 22582/27626 [53:11<12:00,  7.00it/s]

{'epoch': 1, 'iter': 22580, 'avg_loss': 8.58184324612277, 'avg_acc': 50.04760639475665, 'loss': 7.900030612945557}


EP_train:1:  82%|| 22592/27626 [53:13<11:54,  7.05it/s]

{'epoch': 1, 'iter': 22590, 'avg_loss': 8.58177643568201, 'avg_acc': 50.04551038023992, 'loss': 8.323468208312988}


EP_train:1:  82%|| 22602/27626 [53:14<11:54,  7.03it/s]

{'epoch': 1, 'iter': 22600, 'avg_loss': 8.581756980612944, 'avg_acc': 50.046319853103846, 'loss': 8.634958267211914}


EP_train:1:  82%|| 22612/27626 [53:15<11:52,  7.03it/s]

{'epoch': 1, 'iter': 22610, 'avg_loss': 8.581644881215027, 'avg_acc': 50.044364468621474, 'loss': 8.332022666931152}


EP_train:1:  82%|| 22622/27626 [53:17<11:45,  7.09it/s]

{'epoch': 1, 'iter': 22620, 'avg_loss': 8.581672333695044, 'avg_acc': 50.044759294460896, 'loss': 8.922475814819336}


EP_train:1:  82%|| 22632/27626 [53:18<11:45,  7.08it/s]

{'epoch': 1, 'iter': 22630, 'avg_loss': 8.58160872373235, 'avg_acc': 50.0452918563033, 'loss': 8.98300552368164}


EP_train:1:  82%|| 22642/27626 [53:20<11:46,  7.05it/s]

{'epoch': 1, 'iter': 22640, 'avg_loss': 8.581481188442233, 'avg_acc': 50.04499580407226, 'loss': 7.8878092765808105}


EP_train:1:  82%|| 22652/27626 [53:21<11:41,  7.09it/s]

{'epoch': 1, 'iter': 22650, 'avg_loss': 8.581569746763606, 'avg_acc': 50.04511390225596, 'loss': 8.253586769104004}


EP_train:1:  82%|| 22662/27626 [53:22<11:48,  7.00it/s]

{'epoch': 1, 'iter': 22660, 'avg_loss': 8.581474499302441, 'avg_acc': 50.04536979833194, 'loss': 7.613985538482666}


EP_train:1:  82%|| 22672/27626 [53:24<11:45,  7.03it/s]

{'epoch': 1, 'iter': 22670, 'avg_loss': 8.581441419787456, 'avg_acc': 50.044798420890125, 'loss': 8.99677848815918}


EP_train:1:  82%|| 22682/27626 [53:25<11:44,  7.02it/s]

{'epoch': 1, 'iter': 22680, 'avg_loss': 8.581507212144084, 'avg_acc': 50.0446408888497, 'loss': 8.650520324707031}


EP_train:1:  82%|| 22692/27626 [53:27<11:36,  7.08it/s]

{'epoch': 1, 'iter': 22690, 'avg_loss': 8.58142113625717, 'avg_acc': 50.04613613326869, 'loss': 8.539233207702637}


EP_train:1:  82%|| 22702/27626 [53:28<11:37,  7.06it/s]

{'epoch': 1, 'iter': 22700, 'avg_loss': 8.581342862411814, 'avg_acc': 50.04611580987621, 'loss': 8.802103042602539}


EP_train:1:  82%|| 22712/27626 [53:30<11:36,  7.06it/s]

{'epoch': 1, 'iter': 22710, 'avg_loss': 8.581462899180732, 'avg_acc': 50.04444432213465, 'loss': 7.989755153656006}


EP_train:1:  82%|| 22722/27626 [53:31<11:37,  7.03it/s]

{'epoch': 1, 'iter': 22720, 'avg_loss': 8.581279389240505, 'avg_acc': 50.045662602878394, 'loss': 8.522139549255371}


EP_train:1:  82%|| 22732/27626 [53:32<11:35,  7.03it/s]

{'epoch': 1, 'iter': 22730, 'avg_loss': 8.581297876149852, 'avg_acc': 50.045092604812815, 'loss': 9.171712875366211}


EP_train:1:  82%|| 22742/27626 [53:34<11:31,  7.06it/s]

{'epoch': 1, 'iter': 22740, 'avg_loss': 8.581348438417265, 'avg_acc': 50.04617211204433, 'loss': 9.590401649475098}


EP_train:1:  82%|| 22752/27626 [53:35<11:33,  7.03it/s]

{'epoch': 1, 'iter': 22750, 'avg_loss': 8.58116230231726, 'avg_acc': 50.0458771043031, 'loss': 7.99412202835083}


EP_train:1:  82%|| 22762/27626 [53:37<11:34,  7.01it/s]

{'epoch': 1, 'iter': 22760, 'avg_loss': 8.581079194386632, 'avg_acc': 50.04475857826984, 'loss': 8.656140327453613}


EP_train:1:  82%|| 22772/27626 [53:38<11:29,  7.04it/s]

{'epoch': 1, 'iter': 22770, 'avg_loss': 8.580952913455093, 'avg_acc': 50.045287866145536, 'loss': 8.826362609863281}


EP_train:1:  82%|| 22782/27626 [53:39<11:38,  6.93it/s]

{'epoch': 1, 'iter': 22780, 'avg_loss': 8.581164112030782, 'avg_acc': 50.0462282164962, 'loss': 9.357848167419434}


EP_train:1:  83%|| 22792/27626 [53:41<11:26,  7.04it/s]

{'epoch': 1, 'iter': 22790, 'avg_loss': 8.581228683895647, 'avg_acc': 50.046345048484056, 'loss': 8.067890167236328}


EP_train:1:  83%|| 22802/27626 [53:42<11:25,  7.04it/s]

{'epoch': 1, 'iter': 22800, 'avg_loss': 8.581178731392582, 'avg_acc': 50.04509122406912, 'loss': 8.535672187805176}


EP_train:1:  83%|| 22812/27626 [53:44<11:20,  7.07it/s]

{'epoch': 1, 'iter': 22810, 'avg_loss': 8.581237066623533, 'avg_acc': 50.0445234755162, 'loss': 8.673408508300781}


EP_train:1:  83%|| 22822/27626 [53:45<11:22,  7.04it/s]

{'epoch': 1, 'iter': 22820, 'avg_loss': 8.58112163791512, 'avg_acc': 50.043682353972216, 'loss': 8.954010963439941}


EP_train:1:  83%|| 22832/27626 [53:47<11:23,  7.01it/s]

{'epoch': 1, 'iter': 22830, 'avg_loss': 8.5809537737114, 'avg_acc': 50.042294468047835, 'loss': 8.463079452514648}


EP_train:1:  83%|| 22842/27626 [53:48<11:24,  6.98it/s]

{'epoch': 1, 'iter': 22840, 'avg_loss': 8.580857279943578, 'avg_acc': 50.04419136640252, 'loss': 7.819570541381836}


EP_train:1:  83%|| 22852/27626 [53:49<11:26,  6.96it/s]

{'epoch': 1, 'iter': 22850, 'avg_loss': 8.58076830049254, 'avg_acc': 50.04677038204017, 'loss': 8.760869026184082}


EP_train:1:  83%|| 22862/27626 [53:51<11:19,  7.02it/s]

{'epoch': 1, 'iter': 22860, 'avg_loss': 8.580619474318985, 'avg_acc': 50.046613227767814, 'loss': 7.659088134765625}


EP_train:1:  83%|| 22872/27626 [53:52<11:20,  6.99it/s]

{'epoch': 1, 'iter': 22870, 'avg_loss': 8.580589857231121, 'avg_acc': 50.0465928468366, 'loss': 9.2481689453125}


EP_train:1:  83%|| 22882/27626 [53:54<11:17,  7.00it/s]

{'epoch': 1, 'iter': 22880, 'avg_loss': 8.580663289324, 'avg_acc': 50.04643590752153, 'loss': 9.030333518981934}


EP_train:1:  83%|| 22892/27626 [53:55<11:12,  7.03it/s]

{'epoch': 1, 'iter': 22890, 'avg_loss': 8.580629418163008, 'avg_acc': 50.04491393997641, 'loss': 8.137979507446289}


EP_train:1:  83%|| 22902/27626 [53:56<11:09,  7.05it/s]

{'epoch': 1, 'iter': 22900, 'avg_loss': 8.580587937214412, 'avg_acc': 50.04544015545173, 'loss': 7.992717266082764}


EP_train:1:  83%|| 22912/27626 [53:58<11:12,  7.01it/s]

{'epoch': 1, 'iter': 22910, 'avg_loss': 8.58053143407084, 'avg_acc': 50.045420322116016, 'loss': 7.988856792449951}


EP_train:1:  83%|| 22922/27626 [53:59<11:06,  7.06it/s]

{'epoch': 1, 'iter': 22920, 'avg_loss': 8.580529815171548, 'avg_acc': 50.04458247894944, 'loss': 8.062533378601074}


EP_train:1:  83%|| 22932/27626 [54:01<11:10,  7.00it/s]

{'epoch': 1, 'iter': 22930, 'avg_loss': 8.580427891325064, 'avg_acc': 50.047016048144435, 'loss': 8.643165588378906}


EP_train:1:  83%|| 22942/27626 [54:02<11:06,  7.03it/s]

{'epoch': 1, 'iter': 22940, 'avg_loss': 8.58035399871074, 'avg_acc': 50.04699555381195, 'loss': 8.081619262695312}


EP_train:1:  83%|| 22952/27626 [54:04<11:02,  7.05it/s]

{'epoch': 1, 'iter': 22950, 'avg_loss': 8.580300932963956, 'avg_acc': 50.04479652302732, 'loss': 8.92478084564209}


EP_train:1:  83%|| 22962/27626 [54:05<11:01,  7.05it/s]

{'epoch': 1, 'iter': 22960, 'avg_loss': 8.580293715911317, 'avg_acc': 50.04409651147598, 'loss': 8.073095321655273}


EP_train:1:  83%|| 22972/27626 [54:06<11:01,  7.04it/s]

{'epoch': 1, 'iter': 22970, 'avg_loss': 8.580306287418177, 'avg_acc': 50.04149253406469, 'loss': 8.561065673828125}


EP_train:1:  83%|| 22982/27626 [54:08<10:55,  7.08it/s]

{'epoch': 1, 'iter': 22980, 'avg_loss': 8.580337349228643, 'avg_acc': 50.04215438840781, 'loss': 8.111157417297363}


EP_train:1:  83%|| 22992/27626 [54:09<10:57,  7.05it/s]

{'epoch': 1, 'iter': 22990, 'avg_loss': 8.580273110373657, 'avg_acc': 50.043223435257275, 'loss': 8.667491912841797}


EP_train:1:  83%|| 23002/27626 [54:11<10:58,  7.02it/s]

{'epoch': 1, 'iter': 23000, 'avg_loss': 8.5804010292099, 'avg_acc': 50.04143841572105, 'loss': 8.36884593963623}


EP_train:1:  83%|| 23012/27626 [54:12<10:55,  7.04it/s]

{'epoch': 1, 'iter': 23010, 'avg_loss': 8.580404955873195, 'avg_acc': 50.04427230455, 'loss': 9.524450302124023}


EP_train:1:  83%|| 23022/27626 [54:13<10:56,  7.01it/s]

{'epoch': 1, 'iter': 23020, 'avg_loss': 8.58041595837869, 'avg_acc': 50.04180965205681, 'loss': 9.390118598937988}


EP_train:1:  83%|| 23032/27626 [54:15<10:50,  7.06it/s]

{'epoch': 1, 'iter': 23030, 'avg_loss': 8.580472469868141, 'avg_acc': 50.04124875168251, 'loss': 8.030862808227539}


EP_train:1:  83%|| 23042/27626 [54:16<10:54,  7.00it/s]

{'epoch': 1, 'iter': 23040, 'avg_loss': 8.580458473938837, 'avg_acc': 50.04177336053123, 'loss': 9.236563682556152}


EP_train:1:  83%|| 23052/27626 [54:18<10:46,  7.08it/s]

{'epoch': 1, 'iter': 23050, 'avg_loss': 8.580535464025951, 'avg_acc': 50.041212962561275, 'loss': 8.67357063293457}


EP_train:1:  83%|| 23062/27626 [54:19<10:44,  7.08it/s]

{'epoch': 1, 'iter': 23060, 'avg_loss': 8.58053217152969, 'avg_acc': 50.04038203026755, 'loss': 8.67668628692627}


EP_train:1:  84%|| 23072/27626 [54:20<10:41,  7.10it/s]

{'epoch': 1, 'iter': 23070, 'avg_loss': 8.580560972443493, 'avg_acc': 50.04063542976031, 'loss': 9.206426620483398}


EP_train:1:  84%|| 23082/27626 [54:22<10:42,  7.07it/s]

{'epoch': 1, 'iter': 23080, 'avg_loss': 8.58065154096819, 'avg_acc': 50.04075321693168, 'loss': 8.33891487121582}


EP_train:1:  84%|| 23092/27626 [54:23<10:44,  7.04it/s]

{'epoch': 1, 'iter': 23090, 'avg_loss': 8.580563766640068, 'avg_acc': 50.041547572647346, 'loss': 7.925070285797119}


EP_train:1:  84%|| 23102/27626 [54:25<10:38,  7.08it/s]

{'epoch': 1, 'iter': 23100, 'avg_loss': 8.580444886381917, 'avg_acc': 50.042476516168136, 'loss': 7.747089385986328}


EP_train:1:  84%|| 23112/27626 [54:26<10:39,  7.05it/s]

{'epoch': 1, 'iter': 23110, 'avg_loss': 8.580406507571542, 'avg_acc': 50.04272857081043, 'loss': 8.368515968322754}


EP_train:1:  84%|| 23122/27626 [54:28<10:43,  7.00it/s]

{'epoch': 1, 'iter': 23120, 'avg_loss': 8.58043860023881, 'avg_acc': 50.0423046148523, 'loss': 8.607053756713867}


EP_train:1:  84%|| 23132/27626 [54:29<10:41,  7.01it/s]

{'epoch': 1, 'iter': 23130, 'avg_loss': 8.580609244124433, 'avg_acc': 50.043637326531496, 'loss': 9.728259086608887}


EP_train:1:  84%|| 23142/27626 [54:30<10:33,  7.07it/s]

{'epoch': 1, 'iter': 23140, 'avg_loss': 8.580602032523915, 'avg_acc': 50.04172788557106, 'loss': 8.795129776000977}


EP_train:1:  84%|| 23152/27626 [54:32<10:31,  7.08it/s]

{'epoch': 1, 'iter': 23150, 'avg_loss': 8.580538200645188, 'avg_acc': 50.04116992786489, 'loss': 8.292847633361816}


EP_train:1:  84%|| 23162/27626 [54:33<10:30,  7.08it/s]

{'epoch': 1, 'iter': 23160, 'avg_loss': 8.580485878004723, 'avg_acc': 50.04047752687708, 'loss': 8.25900650024414}


EP_train:1:  84%|| 23172/27626 [54:35<10:34,  7.01it/s]

{'epoch': 1, 'iter': 23170, 'avg_loss': 8.580421373484116, 'avg_acc': 50.04059492469034, 'loss': 8.297061920166016}


EP_train:1:  84%|| 23182/27626 [54:36<10:34,  7.00it/s]

{'epoch': 1, 'iter': 23180, 'avg_loss': 8.58049668967891, 'avg_acc': 50.04057741253612, 'loss': 8.378927230834961}


EP_train:1:  84%|| 23192/27626 [54:37<10:28,  7.05it/s]

{'epoch': 1, 'iter': 23190, 'avg_loss': 8.580639116915503, 'avg_acc': 50.04029041438489, 'loss': 9.117408752441406}


EP_train:1:  84%|| 23202/27626 [54:39<10:24,  7.08it/s]

{'epoch': 1, 'iter': 23200, 'avg_loss': 8.580616030856035, 'avg_acc': 50.04188935821732, 'loss': 7.385266304016113}


EP_train:1:  84%|| 23212/27626 [54:40<10:28,  7.02it/s]

{'epoch': 1, 'iter': 23210, 'avg_loss': 8.580532610285447, 'avg_acc': 50.04160204213519, 'loss': 9.040919303894043}


EP_train:1:  84%|| 23222/27626 [54:42<10:23,  7.07it/s]

{'epoch': 1, 'iter': 23220, 'avg_loss': 8.580539853149523, 'avg_acc': 50.043333620429785, 'loss': 8.313271522521973}


EP_train:1:  84%|| 23232/27626 [54:43<10:18,  7.10it/s]

{'epoch': 1, 'iter': 23230, 'avg_loss': 8.580490148512373, 'avg_acc': 50.04277689294477, 'loss': 8.139984130859375}


EP_train:1:  84%|| 23242/27626 [54:44<10:18,  7.09it/s]

{'epoch': 1, 'iter': 23240, 'avg_loss': 8.580420870937079, 'avg_acc': 50.0411449593391, 'loss': 8.136946678161621}


EP_train:1:  84%|| 23252/27626 [54:46<10:18,  7.07it/s]

{'epoch': 1, 'iter': 23250, 'avg_loss': 8.580190341552916, 'avg_acc': 50.03938002666551, 'loss': 7.334428787231445}


EP_train:1:  84%|| 23262/27626 [54:47<10:20,  7.04it/s]

{'epoch': 1, 'iter': 23260, 'avg_loss': 8.580170855077933, 'avg_acc': 50.03976613215253, 'loss': 8.949718475341797}


EP_train:1:  84%|| 23272/27626 [54:49<10:20,  7.02it/s]

{'epoch': 1, 'iter': 23270, 'avg_loss': 8.58023973763473, 'avg_acc': 50.04095762966783, 'loss': 8.27939510345459}


EP_train:1:  84%|| 23282/27626 [54:50<10:15,  7.06it/s]

{'epoch': 1, 'iter': 23280, 'avg_loss': 8.580353735587087, 'avg_acc': 50.040000429534814, 'loss': 9.083568572998047}


EP_train:1:  84%|| 23292/27626 [54:52<10:13,  7.06it/s]

{'epoch': 1, 'iter': 23290, 'avg_loss': 8.580289172540692, 'avg_acc': 50.039580739341375, 'loss': 8.667984962463379}


EP_train:1:  84%|| 23302/27626 [54:53<10:19,  6.98it/s]

{'epoch': 1, 'iter': 23300, 'avg_loss': 8.580250416450308, 'avg_acc': 50.039563752628645, 'loss': 8.257499694824219}


EP_train:1:  84%|| 23312/27626 [54:54<10:15,  7.01it/s]

{'epoch': 1, 'iter': 23310, 'avg_loss': 8.580227550497428, 'avg_acc': 50.03766998412766, 'loss': 8.776272773742676}


EP_train:1:  84%|| 23322/27626 [54:56<10:06,  7.09it/s]

{'epoch': 1, 'iter': 23320, 'avg_loss': 8.580256133457588, 'avg_acc': 50.03778783071052, 'loss': 8.598872184753418}


EP_train:1:  84%|| 23332/27626 [54:57<10:05,  7.09it/s]

{'epoch': 1, 'iter': 23330, 'avg_loss': 8.580066241084122, 'avg_acc': 50.03750375037504, 'loss': 8.366028785705566}


EP_train:1:  84%|| 23342/27626 [54:59<10:08,  7.04it/s]

{'epoch': 1, 'iter': 23340, 'avg_loss': 8.579842305010784, 'avg_acc': 50.03909429758794, 'loss': 7.4775543212890625}


EP_train:1:  85%|| 23352/27626 [55:00<10:07,  7.03it/s]

{'epoch': 1, 'iter': 23350, 'avg_loss': 8.57975814839838, 'avg_acc': 50.03840841933964, 'loss': 8.509367942810059}


EP_train:1:  85%|| 23362/27626 [55:01<10:08,  7.01it/s]

{'epoch': 1, 'iter': 23360, 'avg_loss': 8.5797796262249, 'avg_acc': 50.0367867385814, 'loss': 8.427780151367188}


EP_train:1:  85%|| 23372/27626 [55:03<10:01,  7.07it/s]

{'epoch': 1, 'iter': 23370, 'avg_loss': 8.579731183845533, 'avg_acc': 50.03543387103675, 'loss': 8.903264045715332}


EP_train:1:  85%|| 23382/27626 [55:04<10:03,  7.04it/s]

{'epoch': 1, 'iter': 23380, 'avg_loss': 8.579703446710433, 'avg_acc': 50.03421581626106, 'loss': 8.062204360961914}


EP_train:1:  85%|| 23392/27626 [55:06<10:03,  7.02it/s]

{'epoch': 1, 'iter': 23390, 'avg_loss': 8.57975133700418, 'avg_acc': 50.03580436920183, 'loss': 8.52780532836914}


EP_train:1:  85%|| 23402/27626 [55:07<10:01,  7.03it/s]

{'epoch': 1, 'iter': 23400, 'avg_loss': 8.579787790731999, 'avg_acc': 50.03632323404983, 'loss': 8.017186164855957}


EP_train:1:  85%|| 23412/27626 [55:09<10:00,  7.02it/s]

{'epoch': 1, 'iter': 23410, 'avg_loss': 8.579715748720485, 'avg_acc': 50.03590726581522, 'loss': 9.08239459991455}


EP_train:1:  85%|| 23422/27626 [55:10<09:56,  7.04it/s]

{'epoch': 1, 'iter': 23420, 'avg_loss': 8.579557163231522, 'avg_acc': 50.03482451645959, 'loss': 7.858516693115234}


EP_train:1:  85%|| 23432/27626 [55:11<09:51,  7.09it/s]

{'epoch': 1, 'iter': 23430, 'avg_loss': 8.579627120526895, 'avg_acc': 50.037477060304724, 'loss': 9.29786491394043}


EP_train:1:  85%|| 23442/27626 [55:13<09:55,  7.03it/s]

{'epoch': 1, 'iter': 23440, 'avg_loss': 8.579762182516031, 'avg_acc': 50.039060833582184, 'loss': 8.586028099060059}


EP_train:1:  85%|| 23452/27626 [55:14<09:50,  7.07it/s]

{'epoch': 1, 'iter': 23450, 'avg_loss': 8.579836593142533, 'avg_acc': 50.03811138117777, 'loss': 8.323555946350098}


EP_train:1:  85%|| 23462/27626 [55:16<09:47,  7.08it/s]

{'epoch': 1, 'iter': 23460, 'avg_loss': 8.579734847346543, 'avg_acc': 50.04035953284174, 'loss': 9.777420043945312}


EP_train:1:  85%|| 23472/27626 [55:17<09:54,  6.98it/s]

{'epoch': 1, 'iter': 23470, 'avg_loss': 8.57980421932234, 'avg_acc': 50.04287205487623, 'loss': 8.468354225158691}


EP_train:1:  85%|| 23482/27626 [55:18<09:52,  7.00it/s]

{'epoch': 1, 'iter': 23480, 'avg_loss': 8.579695017850256, 'avg_acc': 50.042188365061115, 'loss': 8.374310493469238}


EP_train:1:  85%|| 23492/27626 [55:20<09:45,  7.06it/s]

{'epoch': 1, 'iter': 23490, 'avg_loss': 8.579606989548381, 'avg_acc': 50.04296858371291, 'loss': 8.37169075012207}


EP_train:1:  85%|| 23502/27626 [55:21<09:47,  7.02it/s]

{'epoch': 1, 'iter': 23500, 'avg_loss': 8.579619065035526, 'avg_acc': 50.04587570741671, 'loss': 8.373980522155762}


EP_train:1:  85%|| 23512/27626 [55:23<09:43,  7.05it/s]

{'epoch': 1, 'iter': 23510, 'avg_loss': 8.579664290501459, 'avg_acc': 50.04784994258007, 'loss': 9.203566551208496}


EP_train:1:  85%|| 23522/27626 [55:24<09:42,  7.04it/s]

{'epoch': 1, 'iter': 23520, 'avg_loss': 8.579590939536832, 'avg_acc': 50.047032439096974, 'loss': 8.622469902038574}


EP_train:1:  85%|| 23532/27626 [55:25<09:46,  6.98it/s]

{'epoch': 1, 'iter': 23530, 'avg_loss': 8.579457405456385, 'avg_acc': 50.04595002337342, 'loss': 7.766987323760986}


EP_train:1:  85%|| 23542/27626 [55:27<09:38,  7.06it/s]

{'epoch': 1, 'iter': 23540, 'avg_loss': 8.579440503176935, 'avg_acc': 50.047656216813216, 'loss': 8.564664840698242}


EP_train:1:  85%|| 23552/27626 [55:28<09:38,  7.04it/s]

{'epoch': 1, 'iter': 23550, 'avg_loss': 8.579617611238502, 'avg_acc': 50.04604369241221, 'loss': 9.68828296661377}


EP_train:1:  85%|| 23562/27626 [55:30<09:40,  7.00it/s]

{'epoch': 1, 'iter': 23560, 'avg_loss': 8.579740811980978, 'avg_acc': 50.04681995670812, 'loss': 8.339594841003418}


EP_train:1:  85%|| 23572/27626 [55:31<09:40,  6.98it/s]

{'epoch': 1, 'iter': 23570, 'avg_loss': 8.579749144887161, 'avg_acc': 50.04573946799032, 'loss': 8.122286796569824}


EP_train:1:  85%|| 23582/27626 [55:33<09:39,  6.98it/s]

{'epoch': 1, 'iter': 23580, 'avg_loss': 8.579732234451406, 'avg_acc': 50.04598511513506, 'loss': 8.524837493896484}


EP_train:1:  85%|| 23592/27626 [55:34<09:31,  7.06it/s]

{'epoch': 1, 'iter': 23590, 'avg_loss': 8.579679969328204, 'avg_acc': 50.04636301979568, 'loss': 7.7367072105407715}


EP_train:1:  85%|| 23602/27626 [55:35<09:26,  7.10it/s]

{'epoch': 1, 'iter': 23600, 'avg_loss': 8.579689075197134, 'avg_acc': 50.04740265242999, 'loss': 8.741321563720703}


EP_train:1:  85%|| 23612/27626 [55:37<09:32,  7.01it/s]

{'epoch': 1, 'iter': 23610, 'avg_loss': 8.579654102398951, 'avg_acc': 50.04552962602177, 'loss': 8.3757963180542}


EP_train:1:  86%|| 23622/27626 [55:38<09:30,  7.02it/s]

{'epoch': 1, 'iter': 23620, 'avg_loss': 8.579552054410259, 'avg_acc': 50.04445197070403, 'loss': 8.395861625671387}


EP_train:1:  86%|| 23632/27626 [55:40<09:24,  7.08it/s]

{'epoch': 1, 'iter': 23630, 'avg_loss': 8.579415660469275, 'avg_acc': 50.044697642926664, 'loss': 7.988855361938477}


EP_train:1:  86%|| 23642/27626 [55:41<09:24,  7.05it/s]

{'epoch': 1, 'iter': 23640, 'avg_loss': 8.579438945856914, 'avg_acc': 50.04441436487458, 'loss': 8.349090576171875}


EP_train:1:  86%|| 23652/27626 [55:42<09:23,  7.06it/s]

{'epoch': 1, 'iter': 23650, 'avg_loss': 8.57941169548446, 'avg_acc': 50.044659845249676, 'loss': 8.888050079345703}


EP_train:1:  86%|| 23662/27626 [55:44<09:28,  6.98it/s]

{'epoch': 1, 'iter': 23660, 'avg_loss': 8.579507151992608, 'avg_acc': 50.04582963526478, 'loss': 8.41690444946289}


EP_train:1:  86%|| 23672/27626 [55:45<09:21,  7.05it/s]

{'epoch': 1, 'iter': 23670, 'avg_loss': 8.57935632349375, 'avg_acc': 50.0440940391196, 'loss': 8.544600486755371}


EP_train:1:  86%|| 23682/27626 [55:47<09:16,  7.09it/s]

{'epoch': 1, 'iter': 23680, 'avg_loss': 8.579362085273221, 'avg_acc': 50.043943456779694, 'loss': 8.491620063781738}


EP_train:1:  86%|| 23692/27626 [55:48<09:16,  7.06it/s]

{'epoch': 1, 'iter': 23690, 'avg_loss': 8.579228349476908, 'avg_acc': 50.04458444134904, 'loss': 8.000161170959473}


EP_train:1:  86%|| 23702/27626 [55:50<09:14,  7.08it/s]

{'epoch': 1, 'iter': 23700, 'avg_loss': 8.579429223886407, 'avg_acc': 50.04245601451416, 'loss': 8.995782852172852}


EP_train:1:  86%|| 23712/27626 [55:51<09:11,  7.10it/s]

{'epoch': 1, 'iter': 23710, 'avg_loss': 8.57941528092824, 'avg_acc': 50.04322888111004, 'loss': 8.91526985168457}


EP_train:1:  86%|| 23722/27626 [55:52<09:14,  7.04it/s]

{'epoch': 1, 'iter': 23720, 'avg_loss': 8.579531430102566, 'avg_acc': 50.043737616457996, 'loss': 9.280680656433105}


EP_train:1:  86%|| 23732/27626 [55:54<09:11,  7.06it/s]

{'epoch': 1, 'iter': 23730, 'avg_loss': 8.579661075597988, 'avg_acc': 50.04279739581139, 'loss': 7.793299198150635}


EP_train:1:  86%|| 23742/27626 [55:55<09:16,  6.97it/s]

{'epoch': 1, 'iter': 23740, 'avg_loss': 8.57947748009947, 'avg_acc': 50.04146308074638, 'loss': 8.442483901977539}


EP_train:1:  86%|| 23752/27626 [55:57<09:13,  7.00it/s]

{'epoch': 1, 'iter': 23750, 'avg_loss': 8.579477414377743, 'avg_acc': 50.04157719674961, 'loss': 8.228301048278809}


EP_train:1:  86%|| 23762/27626 [55:58<09:03,  7.11it/s]

{'epoch': 1, 'iter': 23760, 'avg_loss': 8.579402832073633, 'avg_acc': 50.04445309540845, 'loss': 8.732694625854492}


EP_train:1:  86%|| 23772/27626 [55:59<09:03,  7.10it/s]

{'epoch': 1, 'iter': 23770, 'avg_loss': 8.579434361417103, 'avg_acc': 50.04614341003744, 'loss': 8.545732498168945}


EP_train:1:  86%|| 23782/27626 [56:01<09:04,  7.06it/s]

{'epoch': 1, 'iter': 23780, 'avg_loss': 8.579428989097924, 'avg_acc': 50.04441571002061, 'loss': 8.067249298095703}


EP_train:1:  86%|| 23792/27626 [56:02<09:00,  7.09it/s]

{'epoch': 1, 'iter': 23790, 'avg_loss': 8.57940278548941, 'avg_acc': 50.04465974528183, 'loss': 8.277800559997559}


EP_train:1:  86%|| 23802/27626 [56:04<09:01,  7.06it/s]

{'epoch': 1, 'iter': 23800, 'avg_loss': 8.579336919492444, 'avg_acc': 50.04713562455358, 'loss': 7.814246654510498}


EP_train:1:  86%|| 23812/27626 [56:05<09:03,  7.01it/s]

{'epoch': 1, 'iter': 23810, 'avg_loss': 8.579204817680553, 'avg_acc': 50.046459619503594, 'loss': 10.141538619995117}


EP_train:1:  86%|| 23822/27626 [56:06<08:59,  7.04it/s]

{'epoch': 1, 'iter': 23820, 'avg_loss': 8.57926361881402, 'avg_acc': 50.046702489400104, 'loss': 8.9502534866333}


EP_train:1:  86%|| 23832/27626 [56:08<09:00,  7.02it/s]

{'epoch': 1, 'iter': 23830, 'avg_loss': 8.5792479228145, 'avg_acc': 50.045240443120306, 'loss': 8.91589069366455}


EP_train:1:  86%|| 23842/27626 [56:09<09:03,  6.97it/s]

{'epoch': 1, 'iter': 23840, 'avg_loss': 8.579298540027914, 'avg_acc': 50.045352543936914, 'loss': 9.037897109985352}


EP_train:1:  86%|| 23852/27626 [56:11<08:58,  7.01it/s]

{'epoch': 1, 'iter': 23850, 'avg_loss': 8.579264409516794, 'avg_acc': 50.04572659427278, 'loss': 7.953135013580322}


EP_train:1:  86%|| 23862/27626 [56:12<08:50,  7.10it/s]

{'epoch': 1, 'iter': 23860, 'avg_loss': 8.579277376642791, 'avg_acc': 50.04662419848288, 'loss': 8.1239013671875}


EP_train:1:  86%|| 23872/27626 [56:14<08:50,  7.07it/s]

{'epoch': 1, 'iter': 23870, 'avg_loss': 8.579264419438687, 'avg_acc': 50.04555737086842, 'loss': 9.255631446838379}


EP_train:1:  86%|| 23882/27626 [56:15<08:50,  7.06it/s]

{'epoch': 1, 'iter': 23880, 'avg_loss': 8.579271340249377, 'avg_acc': 50.046323437042, 'loss': 8.850271224975586}


EP_train:1:  86%|| 23892/27626 [56:16<08:46,  7.09it/s]

{'epoch': 1, 'iter': 23890, 'avg_loss': 8.579255123105694, 'avg_acc': 50.04617324515508, 'loss': 8.571388244628906}


EP_train:1:  87%|| 23902/27626 [56:18<08:47,  7.06it/s]

{'epoch': 1, 'iter': 23900, 'avg_loss': 8.579139815150512, 'avg_acc': 50.04759215095602, 'loss': 8.485296249389648}


EP_train:1:  87%|| 23912/27626 [56:19<08:48,  7.03it/s]

{'epoch': 1, 'iter': 23910, 'avg_loss': 8.579129404537273, 'avg_acc': 50.04809501902891, 'loss': 8.600412368774414}


EP_train:1:  87%|| 23922/27626 [56:21<08:47,  7.02it/s]

{'epoch': 1, 'iter': 23920, 'avg_loss': 8.57908727839551, 'avg_acc': 50.049511935119774, 'loss': 9.127732276916504}


EP_train:1:  87%|| 23932/27626 [56:22<08:50,  6.96it/s]

{'epoch': 1, 'iter': 23930, 'avg_loss': 8.579171595613873, 'avg_acc': 50.05092766704274, 'loss': 8.383809089660645}


EP_train:1:  87%|| 23942/27626 [56:23<08:43,  7.04it/s]

{'epoch': 1, 'iter': 23940, 'avg_loss': 8.579354599929236, 'avg_acc': 50.050645336452114, 'loss': 8.49344539642334}


EP_train:1:  87%|| 23952/27626 [56:25<08:38,  7.09it/s]

{'epoch': 1, 'iter': 23950, 'avg_loss': 8.579387498993917, 'avg_acc': 50.050102292179865, 'loss': 8.672050476074219}


EP_train:1:  87%|| 23962/27626 [56:26<08:39,  7.06it/s]

{'epoch': 1, 'iter': 23960, 'avg_loss': 8.579265817687029, 'avg_acc': 50.05255936730521, 'loss': 8.801899909973145}


EP_train:1:  87%|| 23972/27626 [56:28<08:36,  7.08it/s]

{'epoch': 1, 'iter': 23970, 'avg_loss': 8.579379538790334, 'avg_acc': 50.0501908556172, 'loss': 9.163630485534668}


EP_train:1:  87%|| 23982/27626 [56:29<08:34,  7.09it/s]

{'epoch': 1, 'iter': 23980, 'avg_loss': 8.579520345484811, 'avg_acc': 50.05069117217798, 'loss': 8.734742164611816}


EP_train:1:  87%|| 23992/27626 [56:31<08:38,  7.01it/s]

{'epoch': 1, 'iter': 23990, 'avg_loss': 8.579477603008884, 'avg_acc': 50.05301467216873, 'loss': 8.348481178283691}


EP_train:1:  87%|| 24002/27626 [56:32<08:35,  7.02it/s]

{'epoch': 1, 'iter': 24000, 'avg_loss': 8.579582127554854, 'avg_acc': 50.05416440981626, 'loss': 9.186842918395996}


EP_train:1:  87%|| 24012/27626 [56:33<08:33,  7.04it/s]

{'epoch': 1, 'iter': 24010, 'avg_loss': 8.579595498910406, 'avg_acc': 50.05362125692391, 'loss': 9.055875778198242}


EP_train:1:  87%|| 24022/27626 [56:35<08:32,  7.04it/s]

{'epoch': 1, 'iter': 24020, 'avg_loss': 8.579646522598518, 'avg_acc': 50.053208650763914, 'loss': 9.443359375}


EP_train:1:  87%|| 24032/27626 [56:36<08:26,  7.10it/s]

{'epoch': 1, 'iter': 24030, 'avg_loss': 8.57962257566139, 'avg_acc': 50.05344658982148, 'loss': 8.321364402770996}


EP_train:1:  87%|| 24042/27626 [56:38<08:24,  7.11it/s]

{'epoch': 1, 'iter': 24040, 'avg_loss': 8.579460083330082, 'avg_acc': 50.054074289755, 'loss': 9.338579177856445}


EP_train:1:  87%|| 24052/27626 [56:39<08:28,  7.03it/s]

{'epoch': 1, 'iter': 24050, 'avg_loss': 8.579427246839051, 'avg_acc': 50.05236268762213, 'loss': 8.920503616333008}


EP_train:1:  87%|| 24062/27626 [56:40<08:24,  7.06it/s]

{'epoch': 1, 'iter': 24060, 'avg_loss': 8.579381434639307, 'avg_acc': 50.05156165579153, 'loss': 9.332864761352539}


EP_train:1:  87%|| 24072/27626 [56:42<08:23,  7.07it/s]

{'epoch': 1, 'iter': 24070, 'avg_loss': 8.579407253608483, 'avg_acc': 50.05218935648706, 'loss': 8.215497016906738}


EP_train:1:  87%|| 24082/27626 [56:43<08:24,  7.03it/s]

{'epoch': 1, 'iter': 24080, 'avg_loss': 8.579473424396426, 'avg_acc': 50.05099975084091, 'loss': 8.524186134338379}


EP_train:1:  87%|| 24092/27626 [56:45<08:22,  7.04it/s]

{'epoch': 1, 'iter': 24090, 'avg_loss': 8.579368904020821, 'avg_acc': 50.05188659665435, 'loss': 7.856861114501953}


EP_train:1:  87%|| 24102/27626 [56:46<08:26,  6.96it/s]

{'epoch': 1, 'iter': 24100, 'avg_loss': 8.579378844464914, 'avg_acc': 50.05186506783951, 'loss': 7.880369186401367}


EP_train:1:  87%|| 24112/27626 [56:47<08:19,  7.04it/s]

{'epoch': 1, 'iter': 24110, 'avg_loss': 8.579261128319319, 'avg_acc': 50.05288042802041, 'loss': 8.116922378540039}


EP_train:1:  87%|| 24122/27626 [56:49<08:13,  7.09it/s]

{'epoch': 1, 'iter': 24120, 'avg_loss': 8.579270552849524, 'avg_acc': 50.052081174080676, 'loss': 8.793879508972168}


EP_train:1:  87%|| 24132/27626 [56:50<08:15,  7.06it/s]

{'epoch': 1, 'iter': 24130, 'avg_loss': 8.579242440502275, 'avg_acc': 50.05167108698355, 'loss': 8.715059280395508}


EP_train:1:  87%|| 24142/27626 [56:52<08:14,  7.05it/s]

{'epoch': 1, 'iter': 24140, 'avg_loss': 8.579299489069458, 'avg_acc': 50.05177913093907, 'loss': 9.023317337036133}


EP_train:1:  87%|| 24152/27626 [56:53<08:16,  7.00it/s]

{'epoch': 1, 'iter': 24150, 'avg_loss': 8.579261053719582, 'avg_acc': 50.0518870854209, 'loss': 8.539937019348145}


EP_train:1:  87%|| 24162/27626 [56:55<08:14,  7.00it/s]

{'epoch': 1, 'iter': 24160, 'avg_loss': 8.579179467361921, 'avg_acc': 50.05173626919416, 'loss': 7.751172065734863}


EP_train:1:  87%|| 24172/27626 [56:56<08:07,  7.09it/s]

{'epoch': 1, 'iter': 24170, 'avg_loss': 8.579214515183374, 'avg_acc': 50.053395598030704, 'loss': 7.856383800506592}


EP_train:1:  88%|| 24182/27626 [56:57<08:09,  7.03it/s]

{'epoch': 1, 'iter': 24180, 'avg_loss': 8.579243657330732, 'avg_acc': 50.05311504900541, 'loss': 8.815345764160156}


EP_train:1:  88%|| 24192/27626 [56:59<08:08,  7.04it/s]

{'epoch': 1, 'iter': 24190, 'avg_loss': 8.579218496725769, 'avg_acc': 50.05193047000951, 'loss': 8.534160614013672}


EP_train:1:  88%|| 24202/27626 [57:00<08:05,  7.05it/s]

{'epoch': 1, 'iter': 24200, 'avg_loss': 8.579114793396249, 'avg_acc': 50.05216726581546, 'loss': 8.38744831085205}


EP_train:1:  88%|| 24212/27626 [57:02<08:05,  7.03it/s]

{'epoch': 1, 'iter': 24210, 'avg_loss': 8.579220657636968, 'avg_acc': 50.051629424641696, 'loss': 7.6687116622924805}


EP_train:1:  88%|| 24222/27626 [57:03<08:03,  7.04it/s]

{'epoch': 1, 'iter': 24220, 'avg_loss': 8.579304471131492, 'avg_acc': 50.052253210024354, 'loss': 9.04269790649414}


EP_train:1:  88%|| 24232/27626 [57:04<08:05,  6.99it/s]

{'epoch': 1, 'iter': 24230, 'avg_loss': 8.579403952549415, 'avg_acc': 50.0533923486443, 'loss': 9.02390193939209}


EP_train:1:  88%|| 24242/27626 [57:06<07:59,  7.05it/s]

{'epoch': 1, 'iter': 24240, 'avg_loss': 8.579407680113812, 'avg_acc': 50.05349923683017, 'loss': 8.937569618225098}


EP_train:1:  88%|| 24252/27626 [57:07<07:59,  7.04it/s]

{'epoch': 1, 'iter': 24250, 'avg_loss': 8.579553508841128, 'avg_acc': 50.0543792008577, 'loss': 8.93796157836914}


EP_train:1:  88%|| 24262/27626 [57:09<08:03,  6.96it/s]

{'epoch': 1, 'iter': 24260, 'avg_loss': 8.579448703233469, 'avg_acc': 50.05409917150983, 'loss': 9.026851654052734}


EP_train:1:  88%|| 24272/27626 [57:10<07:57,  7.03it/s]

{'epoch': 1, 'iter': 24270, 'avg_loss': 8.579367635316006, 'avg_acc': 50.05407688187549, 'loss': 8.026819229125977}


EP_train:1:  88%|| 24282/27626 [57:12<07:55,  7.04it/s]

{'epoch': 1, 'iter': 24280, 'avg_loss': 8.579358238594756, 'avg_acc': 50.05366850623945, 'loss': 9.990927696228027}


EP_train:1:  88%|| 24292/27626 [57:13<07:54,  7.03it/s]

{'epoch': 1, 'iter': 24290, 'avg_loss': 8.579368331404762, 'avg_acc': 50.051973982133305, 'loss': 7.9510908126831055}


EP_train:1:  88%|| 24302/27626 [57:14<07:48,  7.09it/s]

{'epoch': 1, 'iter': 24300, 'avg_loss': 8.579419025672934, 'avg_acc': 50.05169540348133, 'loss': 9.182333946228027}


EP_train:1:  88%|| 24312/27626 [57:16<07:52,  7.02it/s]

{'epoch': 1, 'iter': 24310, 'avg_loss': 8.57957161400124, 'avg_acc': 50.049488914483156, 'loss': 9.02509593963623}


EP_train:1:  88%|| 24322/27626 [57:17<07:49,  7.04it/s]

{'epoch': 1, 'iter': 24320, 'avg_loss': 8.57961440732382, 'avg_acc': 50.04856913778216, 'loss': 9.131791114807129}


EP_train:1:  88%|| 24332/27626 [57:19<07:49,  7.01it/s]

{'epoch': 1, 'iter': 24330, 'avg_loss': 8.579597328119872, 'avg_acc': 50.048549175948374, 'loss': 8.8579683303833}


EP_train:1:  88%|| 24342/27626 [57:20<07:50,  6.98it/s]

{'epoch': 1, 'iter': 24340, 'avg_loss': 8.579655162722263, 'avg_acc': 50.04942791997043, 'loss': 9.220272064208984}


EP_train:1:  88%|| 24352/27626 [57:21<07:50,  6.95it/s]

{'epoch': 1, 'iter': 24350, 'avg_loss': 8.579623188024126, 'avg_acc': 50.0483809699807, 'loss': 7.780788898468018}


EP_train:1:  88%|| 24362/27626 [57:23<07:43,  7.04it/s]

{'epoch': 1, 'iter': 24360, 'avg_loss': 8.57956432531847, 'avg_acc': 50.04797627355199, 'loss': 9.224852561950684}


EP_train:1:  88%|| 24372/27626 [57:24<07:46,  6.98it/s]

{'epoch': 1, 'iter': 24370, 'avg_loss': 8.579581742546711, 'avg_acc': 50.048469492429525, 'loss': 9.045771598815918}


EP_train:1:  88%|| 24382/27626 [57:26<07:45,  6.96it/s]

{'epoch': 1, 'iter': 24380, 'avg_loss': 8.579567245040455, 'avg_acc': 50.04960317460318, 'loss': 8.681199073791504}


EP_train:1:  88%|| 24392/27626 [57:27<07:36,  7.09it/s]

{'epoch': 1, 'iter': 24390, 'avg_loss': 8.579526223417314, 'avg_acc': 50.04958283793203, 'loss': 7.7679548263549805}


EP_train:1:  88%|| 24402/27626 [57:29<07:40,  7.00it/s]

{'epoch': 1, 'iter': 24400, 'avg_loss': 8.579521465133455, 'avg_acc': 50.04917831236425, 'loss': 8.754806518554688}


EP_train:1:  88%|| 24412/27626 [57:30<07:39,  7.00it/s]

{'epoch': 1, 'iter': 24410, 'avg_loss': 8.579462591986758, 'avg_acc': 50.04736594158371, 'loss': 8.786348342895508}


EP_train:1:  88%|| 24422/27626 [57:31<07:37,  7.00it/s]

{'epoch': 1, 'iter': 24420, 'avg_loss': 8.579293007473996, 'avg_acc': 50.04465930961059, 'loss': 8.637410163879395}


EP_train:1:  88%|| 24432/27626 [57:33<07:39,  6.96it/s]

{'epoch': 1, 'iter': 24430, 'avg_loss': 8.579206277782413, 'avg_acc': 50.044129384798, 'loss': 7.9867634773254395}


EP_train:1:  88%|| 24442/27626 [57:34<07:33,  7.02it/s]

{'epoch': 1, 'iter': 24440, 'avg_loss': 8.579138454880361, 'avg_acc': 50.04372775254695, 'loss': 8.068309783935547}


EP_train:1:  89%|| 24452/27626 [57:36<07:35,  6.98it/s]

{'epoch': 1, 'iter': 24450, 'avg_loss': 8.578998337971457, 'avg_acc': 50.04255960901395, 'loss': 8.534276008605957}


EP_train:1:  89%|| 24462/27626 [57:37<07:35,  6.95it/s]

{'epoch': 1, 'iter': 24460, 'avg_loss': 8.579232172215299, 'avg_acc': 50.041647929356934, 'loss': 9.069876670837402}


EP_train:1:  89%|| 24472/27626 [57:39<07:37,  6.90it/s]

{'epoch': 1, 'iter': 24470, 'avg_loss': 8.579362167500879, 'avg_acc': 50.04201401659106, 'loss': 9.341758728027344}


EP_train:1:  89%|| 24482/27626 [57:40<07:31,  6.96it/s]

{'epoch': 1, 'iter': 24480, 'avg_loss': 8.579409075603342, 'avg_acc': 50.04250745476083, 'loss': 9.214421272277832}


EP_train:1:  89%|| 24492/27626 [57:41<07:29,  6.97it/s]

{'epoch': 1, 'iter': 24490, 'avg_loss': 8.579350136536537, 'avg_acc': 50.04236250051039, 'loss': 8.650341987609863}


EP_train:1:  89%|| 24502/27626 [57:43<07:27,  6.98it/s]

{'epoch': 1, 'iter': 24500, 'avg_loss': 8.579446135372596, 'avg_acc': 50.043365576915235, 'loss': 8.286521911621094}


EP_train:1:  89%|| 24512/27626 [57:44<07:22,  7.04it/s]

{'epoch': 1, 'iter': 24510, 'avg_loss': 8.579463329714383, 'avg_acc': 50.04462282240627, 'loss': 8.782630920410156}


EP_train:1:  89%|| 24522/27626 [57:46<07:24,  6.99it/s]

{'epoch': 1, 'iter': 24520, 'avg_loss': 8.57944109136394, 'avg_acc': 50.04447718282289, 'loss': 9.2033052444458}


EP_train:1:  89%|| 24532/27626 [57:47<07:23,  6.98it/s]

{'epoch': 1, 'iter': 24530, 'avg_loss': 8.579516241501947, 'avg_acc': 50.04509600097835, 'loss': 9.478379249572754}


EP_train:1:  89%|| 24542/27626 [57:49<07:19,  7.02it/s]

{'epoch': 1, 'iter': 24540, 'avg_loss': 8.579524106673539, 'avg_acc': 50.04495028727436, 'loss': 8.656377792358398}


EP_train:1:  89%|| 24552/27626 [57:50<07:20,  6.97it/s]

{'epoch': 1, 'iter': 24550, 'avg_loss': 8.57955644467781, 'avg_acc': 50.04556840861879, 'loss': 8.653438568115234}


EP_train:1:  89%|| 24562/27626 [57:51<07:20,  6.96it/s]

{'epoch': 1, 'iter': 24560, 'avg_loss': 8.57947483837767, 'avg_acc': 50.045931558161314, 'loss': 8.114662170410156}


EP_train:1:  89%|| 24572/27626 [57:53<07:16,  6.99it/s]

{'epoch': 1, 'iter': 24570, 'avg_loss': 8.579441569458991, 'avg_acc': 50.04832933132555, 'loss': 9.23867130279541}


EP_train:1:  89%|| 24582/27626 [57:54<07:16,  6.98it/s]

{'epoch': 1, 'iter': 24580, 'avg_loss': 8.579458512969296, 'avg_acc': 50.0495809771775, 'loss': 8.935547828674316}


EP_train:1:  89%|| 24592/27626 [57:56<07:11,  7.03it/s]

{'epoch': 1, 'iter': 24590, 'avg_loss': 8.579516209629542, 'avg_acc': 50.04917957789436, 'loss': 8.653657913208008}


EP_train:1:  89%|| 24602/27626 [57:57<07:16,  6.92it/s]

{'epoch': 1, 'iter': 24600, 'avg_loss': 8.579466604671849, 'avg_acc': 50.049413641721884, 'loss': 8.324378967285156}


EP_train:1:  89%|| 24612/27626 [57:59<07:12,  6.98it/s]

{'epoch': 1, 'iter': 24610, 'avg_loss': 8.579332723838816, 'avg_acc': 50.050282394051436, 'loss': 8.291610717773438}


EP_train:1:  89%|| 24622/27626 [58:00<07:08,  7.00it/s]

{'epoch': 1, 'iter': 24620, 'avg_loss': 8.57943692456591, 'avg_acc': 50.04962735063564, 'loss': 8.953173637390137}


EP_train:1:  89%|| 24632/27626 [58:01<07:05,  7.03it/s]

{'epoch': 1, 'iter': 24630, 'avg_loss': 8.579533584941334, 'avg_acc': 50.05049531078723, 'loss': 8.720460891723633}


EP_train:1:  89%|| 24642/27626 [58:03<07:09,  6.94it/s]

{'epoch': 1, 'iter': 24640, 'avg_loss': 8.579432868767876, 'avg_acc': 50.04806521650907, 'loss': 8.656195640563965}


EP_train:1:  89%|| 24652/27626 [58:04<07:04,  7.00it/s]

{'epoch': 1, 'iter': 24650, 'avg_loss': 8.579394485069276, 'avg_acc': 50.04867956675185, 'loss': 8.080574989318848}


EP_train:1:  89%|| 24662/27626 [58:06<07:00,  7.05it/s]

{'epoch': 1, 'iter': 24660, 'avg_loss': 8.579315948958119, 'avg_acc': 50.04929341875837, 'loss': 8.533263206481934}


EP_train:1:  89%|| 24672/27626 [58:07<07:00,  7.03it/s]

{'epoch': 1, 'iter': 24670, 'avg_loss': 8.57935538687572, 'avg_acc': 50.050413440882004, 'loss': 7.563475608825684}


EP_train:1:  89%|| 24682/27626 [58:08<06:58,  7.04it/s]

{'epoch': 1, 'iter': 24680, 'avg_loss': 8.579263294802352, 'avg_acc': 50.05153255540699, 'loss': 8.309826850891113}


EP_train:1:  89%|| 24692/27626 [58:10<06:55,  7.07it/s]

{'epoch': 1, 'iter': 24690, 'avg_loss': 8.579366757017956, 'avg_acc': 50.05429609979345, 'loss': 8.95838737487793}


EP_train:1:  89%|| 24702/27626 [58:11<06:55,  7.05it/s]

{'epoch': 1, 'iter': 24700, 'avg_loss': 8.579498225979215, 'avg_acc': 50.05313550058702, 'loss': 8.277132987976074}


EP_train:1:  89%|| 24712/27626 [58:13<06:52,  7.07it/s]

{'epoch': 1, 'iter': 24710, 'avg_loss': 8.579480505670793, 'avg_acc': 50.05412569301121, 'loss': 8.68993091583252}


EP_train:1:  89%|| 24722/27626 [58:14<06:49,  7.10it/s]

{'epoch': 1, 'iter': 24720, 'avg_loss': 8.579346891813431, 'avg_acc': 50.05271328020711, 'loss': 8.082051277160645}


EP_train:1:  90%|| 24732/27626 [58:15<06:52,  7.01it/s]

{'epoch': 1, 'iter': 24730, 'avg_loss': 8.579247302896432, 'avg_acc': 50.052944684808544, 'loss': 7.469970226287842}


EP_train:1:  90%|| 24742/27626 [58:17<06:49,  7.04it/s]

{'epoch': 1, 'iter': 24740, 'avg_loss': 8.5791231761305, 'avg_acc': 50.050649731215394, 'loss': 8.140414237976074}


EP_train:1:  90%|| 24752/27626 [58:18<06:48,  7.03it/s]

{'epoch': 1, 'iter': 24750, 'avg_loss': 8.578981308449466, 'avg_acc': 50.05163932770393, 'loss': 8.270113945007324}


EP_train:1:  90%|| 24762/27626 [58:20<06:50,  6.98it/s]

{'epoch': 1, 'iter': 24760, 'avg_loss': 8.578996158706309, 'avg_acc': 50.05224950527038, 'loss': 9.113205909729004}


EP_train:1:  90%|| 24772/27626 [58:21<06:48,  6.99it/s]

{'epoch': 1, 'iter': 24770, 'avg_loss': 8.57902292662224, 'avg_acc': 50.05298534576723, 'loss': 8.512574195861816}


EP_train:1:  90%|| 24782/27626 [58:23<06:43,  7.05it/s]

{'epoch': 1, 'iter': 24780, 'avg_loss': 8.578871391639357, 'avg_acc': 50.053468383035394, 'loss': 7.80593729019165}


EP_train:1:  90%|| 24792/27626 [58:24<06:38,  7.12it/s]

{'epoch': 1, 'iter': 24790, 'avg_loss': 8.57879531910053, 'avg_acc': 50.053068653946994, 'loss': 8.677909851074219}


EP_train:1:  90%|| 24802/27626 [58:25<06:40,  7.06it/s]

{'epoch': 1, 'iter': 24800, 'avg_loss': 8.578653140191296, 'avg_acc': 50.05279525019153, 'loss': 7.750732421875}


EP_train:1:  90%|| 24812/27626 [58:27<06:37,  7.08it/s]

{'epoch': 1, 'iter': 24810, 'avg_loss': 8.578545368345717, 'avg_acc': 50.05227016242796, 'loss': 8.426706314086914}


EP_train:1:  90%|| 24822/27626 [58:28<06:36,  7.07it/s]

{'epoch': 1, 'iter': 24820, 'avg_loss': 8.57844134638472, 'avg_acc': 50.050738286128684, 'loss': 8.843985557556152}


EP_train:1:  90%|| 24832/27626 [58:30<06:40,  6.98it/s]

{'epoch': 1, 'iter': 24830, 'avg_loss': 8.578348617999996, 'avg_acc': 50.05021444967983, 'loss': 9.037850379943848}


EP_train:1:  90%|| 24842/27626 [58:31<06:36,  7.03it/s]

{'epoch': 1, 'iter': 24840, 'avg_loss': 8.578397030348766, 'avg_acc': 50.051452236222374, 'loss': 8.485197067260742}


EP_train:1:  90%|| 24852/27626 [58:32<06:33,  7.04it/s]

{'epoch': 1, 'iter': 24850, 'avg_loss': 8.578458178772983, 'avg_acc': 50.052060279264424, 'loss': 8.739160537719727}


EP_train:1:  90%|| 24862/27626 [58:34<06:31,  7.05it/s]

{'epoch': 1, 'iter': 24860, 'avg_loss': 8.578391689531312, 'avg_acc': 50.0520393387233, 'loss': 8.409628868103027}


EP_train:1:  90%|| 24872/27626 [58:35<06:30,  7.05it/s]

{'epoch': 1, 'iter': 24870, 'avg_loss': 8.57840794262325, 'avg_acc': 50.052144063366974, 'loss': 8.15432357788086}


EP_train:1:  90%|| 24882/27626 [58:37<06:26,  7.10it/s]

{'epoch': 1, 'iter': 24880, 'avg_loss': 8.578418297359622, 'avg_acc': 50.05249989952172, 'loss': 8.683294296264648}


EP_train:1:  90%|| 24892/27626 [58:38<06:27,  7.05it/s]

{'epoch': 1, 'iter': 24890, 'avg_loss': 8.57833224355152, 'avg_acc': 50.05260435498775, 'loss': 8.499855995178223}


EP_train:1:  90%|| 24902/27626 [58:40<06:24,  7.08it/s]

{'epoch': 1, 'iter': 24900, 'avg_loss': 8.578417425916633, 'avg_acc': 50.05245773262118, 'loss': 8.995804786682129}


EP_train:1:  90%|| 24912/27626 [58:41<06:25,  7.03it/s]

{'epoch': 1, 'iter': 24910, 'avg_loss': 8.578508718466177, 'avg_acc': 50.050680422303394, 'loss': 8.47942066192627}


EP_train:1:  90%|| 24922/27626 [58:42<06:25,  7.02it/s]

{'epoch': 1, 'iter': 24920, 'avg_loss': 8.578551647258529, 'avg_acc': 50.04953151960194, 'loss': 7.974862098693848}


EP_train:1:  90%|| 24932/27626 [58:44<06:23,  7.03it/s]

{'epoch': 1, 'iter': 24930, 'avg_loss': 8.578528171469076, 'avg_acc': 50.05013838193414, 'loss': 8.718550682067871}


EP_train:1:  90%|| 24942/27626 [58:45<06:20,  7.05it/s]

{'epoch': 1, 'iter': 24940, 'avg_loss': 8.578426993871613, 'avg_acc': 50.050118279138765, 'loss': 7.365336894989014}


EP_train:1:  90%|| 24952/27626 [58:47<06:18,  7.07it/s]

{'epoch': 1, 'iter': 24950, 'avg_loss': 8.578553026947802, 'avg_acc': 50.046591318985215, 'loss': 9.994860649108887}


EP_train:1:  90%|| 24962/27626 [58:48<06:16,  7.07it/s]

{'epoch': 1, 'iter': 24960, 'avg_loss': 8.578566712688586, 'avg_acc': 50.04707343455791, 'loss': 8.127824783325195}


EP_train:1:  90%|| 24972/27626 [58:49<06:19,  6.99it/s]

{'epoch': 1, 'iter': 24970, 'avg_loss': 8.578462274688192, 'avg_acc': 50.04655400264306, 'loss': 9.177240371704102}


EP_train:1:  90%|| 24982/27626 [58:51<06:15,  7.05it/s]

{'epoch': 1, 'iter': 24980, 'avg_loss': 8.57845331302498, 'avg_acc': 50.044533845722746, 'loss': 8.479460716247559}


EP_train:1:  90%|| 24992/27626 [58:52<06:15,  7.02it/s]

{'epoch': 1, 'iter': 24990, 'avg_loss': 8.578479640781795, 'avg_acc': 50.04689188107718, 'loss': 8.936844825744629}


EP_train:1:  91%|| 25002/27626 [58:54<06:12,  7.04it/s]

{'epoch': 1, 'iter': 25000, 'avg_loss': 8.57850303329862, 'avg_acc': 50.04612315507379, 'loss': 8.631482124328613}


EP_train:1:  91%|| 25012/27626 [58:55<06:12,  7.02it/s]

{'epoch': 1, 'iter': 25010, 'avg_loss': 8.578453618015647, 'avg_acc': 50.04597976890168, 'loss': 8.700616836547852}


EP_train:1:  91%|| 25022/27626 [58:56<06:11,  7.01it/s]

{'epoch': 1, 'iter': 25020, 'avg_loss': 8.578357991352389, 'avg_acc': 50.045836497342236, 'loss': 7.744266033172607}


EP_train:1:  91%|| 25032/27626 [58:58<06:10,  7.01it/s]

{'epoch': 1, 'iter': 25030, 'avg_loss': 8.578292190240408, 'avg_acc': 50.04481942391435, 'loss': 8.527119636535645}


EP_train:1:  91%|| 25042/27626 [58:59<06:06,  7.06it/s]

{'epoch': 1, 'iter': 25040, 'avg_loss': 8.578359072469569, 'avg_acc': 50.045425502176435, 'loss': 8.810344696044922}


EP_train:1:  91%|| 25052/27626 [59:01<06:06,  7.02it/s]

{'epoch': 1, 'iter': 25050, 'avg_loss': 8.57825939867826, 'avg_acc': 50.0447836413716, 'loss': 8.375856399536133}


EP_train:1:  91%|| 25062/27626 [59:02<06:05,  7.02it/s]

{'epoch': 1, 'iter': 25060, 'avg_loss': 8.57832241425572, 'avg_acc': 50.04638681616855, 'loss': 8.289407730102539}


EP_train:1:  91%|| 25072/27626 [59:04<06:02,  7.05it/s]

{'epoch': 1, 'iter': 25070, 'avg_loss': 8.578258524408538, 'avg_acc': 50.046492959993614, 'loss': 7.527839183807373}


EP_train:1:  91%|| 25082/27626 [59:05<06:00,  7.06it/s]

{'epoch': 1, 'iter': 25080, 'avg_loss': 8.57822267615925, 'avg_acc': 50.04734659702563, 'loss': 8.787565231323242}


EP_train:1:  91%|| 25092/27626 [59:06<05:57,  7.09it/s]

{'epoch': 1, 'iter': 25090, 'avg_loss': 8.578207223974387, 'avg_acc': 50.04670499382249, 'loss': 8.553488731384277}


EP_train:1:  91%|| 25102/27626 [59:08<06:01,  6.97it/s]

{'epoch': 1, 'iter': 25100, 'avg_loss': 8.578199937046204, 'avg_acc': 50.046312895900556, 'loss': 8.130945205688477}


EP_train:1:  91%|| 25112/27626 [59:09<05:58,  7.00it/s]

{'epoch': 1, 'iter': 25110, 'avg_loss': 8.578370976232533, 'avg_acc': 50.047414479710085, 'loss': 8.789154052734375}


EP_train:1:  91%|| 25122/27626 [59:11<05:55,  7.05it/s]

{'epoch': 1, 'iter': 25120, 'avg_loss': 8.578349633275892, 'avg_acc': 50.049634767724214, 'loss': 8.698932647705078}


EP_train:1:  91%|| 25132/27626 [59:12<05:55,  7.02it/s]

{'epoch': 1, 'iter': 25130, 'avg_loss': 8.578287699320187, 'avg_acc': 50.04787413950898, 'loss': 8.191829681396484}


EP_train:1:  91%|| 25142/27626 [59:13<05:52,  7.05it/s]

{'epoch': 1, 'iter': 25140, 'avg_loss': 8.57845874443614, 'avg_acc': 50.0486008909749, 'loss': 8.850749969482422}


EP_train:1:  91%|| 25152/27626 [59:15<05:51,  7.03it/s]

{'epoch': 1, 'iter': 25150, 'avg_loss': 8.578524104719689, 'avg_acc': 50.047960319669194, 'loss': 8.964662551879883}


EP_train:1:  91%|| 25162/27626 [59:16<05:50,  7.04it/s]

{'epoch': 1, 'iter': 25160, 'avg_loss': 8.578487359531296, 'avg_acc': 50.04968006041095, 'loss': 8.480281829833984}


EP_train:1:  91%|| 25172/27626 [59:18<05:47,  7.06it/s]

{'epoch': 1, 'iter': 25170, 'avg_loss': 8.578489957892574, 'avg_acc': 50.04804636287792, 'loss': 8.117152214050293}


EP_train:1:  91%|| 25182/27626 [59:19<05:46,  7.06it/s]

{'epoch': 1, 'iter': 25180, 'avg_loss': 8.578462607259997, 'avg_acc': 50.047406774949366, 'loss': 8.063104629516602}


EP_train:1:  91%|| 25192/27626 [59:21<05:47,  7.01it/s]

{'epoch': 1, 'iter': 25190, 'avg_loss': 8.578518255219455, 'avg_acc': 50.046767694811635, 'loss': 8.73192310333252}


EP_train:1:  91%|| 25202/27626 [59:22<05:46,  7.00it/s]

{'epoch': 1, 'iter': 25200, 'avg_loss': 8.57859522455964, 'avg_acc': 50.04724514900203, 'loss': 7.701303482055664}


EP_train:1:  91%|| 25212/27626 [59:23<05:45,  6.99it/s]

{'epoch': 1, 'iter': 25210, 'avg_loss': 8.578591513144804, 'avg_acc': 50.0479701320852, 'loss': 8.48359489440918}


EP_train:1:  91%|| 25222/27626 [59:25<05:40,  7.06it/s]

{'epoch': 1, 'iter': 25220, 'avg_loss': 8.578485071874148, 'avg_acc': 50.04807501685104, 'loss': 8.044137954711914}


EP_train:1:  91%|| 25232/27626 [59:26<05:38,  7.07it/s]

{'epoch': 1, 'iter': 25230, 'avg_loss': 8.578550317710684, 'avg_acc': 50.04743668503032, 'loss': 9.250822067260742}


EP_train:1:  91%|| 25242/27626 [59:28<05:36,  7.09it/s]

{'epoch': 1, 'iter': 25240, 'avg_loss': 8.578747243360958, 'avg_acc': 50.04605601996751, 'loss': 9.429461479187012}


EP_train:1:  91%|| 25252/27626 [59:29<05:35,  7.08it/s]

{'epoch': 1, 'iter': 25250, 'avg_loss': 8.578761403005736, 'avg_acc': 50.0454189933072, 'loss': 8.619311332702637}


EP_train:1:  91%|| 25262/27626 [59:30<05:35,  7.05it/s]

{'epoch': 1, 'iter': 25260, 'avg_loss': 8.578724846591303, 'avg_acc': 50.04589584735363, 'loss': 8.49247932434082}


EP_train:1:  91%|| 25272/27626 [59:32<05:35,  7.01it/s]

{'epoch': 1, 'iter': 25270, 'avg_loss': 8.578591869221864, 'avg_acc': 50.046743302599815, 'loss': 8.782051086425781}


EP_train:1:  92%|| 25282/27626 [59:33<05:32,  7.05it/s]

{'epoch': 1, 'iter': 25280, 'avg_loss': 8.578674033653755, 'avg_acc': 50.04783730865077, 'loss': 9.267546653747559}


EP_train:1:  92%|| 25292/27626 [59:35<05:32,  7.03it/s]

{'epoch': 1, 'iter': 25290, 'avg_loss': 8.57861115804163, 'avg_acc': 50.04843620260172, 'loss': 8.67779541015625}


EP_train:1:  92%|| 25302/27626 [59:36<05:30,  7.02it/s]

{'epoch': 1, 'iter': 25300, 'avg_loss': 8.578535251610647, 'avg_acc': 50.048911110232794, 'loss': 8.184540748596191}


EP_train:1:  92%|| 25312/27626 [59:37<05:27,  7.06it/s]

{'epoch': 1, 'iter': 25310, 'avg_loss': 8.578353593879873, 'avg_acc': 50.04901525028643, 'loss': 7.560726165771484}


EP_train:1:  92%|| 25322/27626 [59:39<05:27,  7.04it/s]

{'epoch': 1, 'iter': 25320, 'avg_loss': 8.578370347943315, 'avg_acc': 50.04924272343114, 'loss': 8.799477577209473}


EP_train:1:  92%|| 25332/27626 [59:40<05:23,  7.09it/s]

{'epoch': 1, 'iter': 25330, 'avg_loss': 8.578266285677225, 'avg_acc': 50.048359717342386, 'loss': 8.061323165893555}


EP_train:1:  92%|| 25342/27626 [59:42<05:22,  7.08it/s]

{'epoch': 1, 'iter': 25340, 'avg_loss': 8.578308756022835, 'avg_acc': 50.04834063375557, 'loss': 8.494770050048828}


EP_train:1:  92%|| 25352/27626 [59:43<05:22,  7.06it/s]

{'epoch': 1, 'iter': 25350, 'avg_loss': 8.578325393453753, 'avg_acc': 50.04758194943, 'loss': 7.885565280914307}


EP_train:1:  92%|| 25362/27626 [59:45<05:21,  7.05it/s]

{'epoch': 1, 'iter': 25360, 'avg_loss': 8.578326532392396, 'avg_acc': 50.04731674618509, 'loss': 8.340620040893555}


EP_train:1:  92%|| 25372/27626 [59:46<05:20,  7.04it/s]

{'epoch': 1, 'iter': 25370, 'avg_loss': 8.578285652229207, 'avg_acc': 50.046189547120726, 'loss': 8.593666076660156}


EP_train:1:  92%|| 25382/27626 [59:47<05:21,  6.99it/s]

{'epoch': 1, 'iter': 25380, 'avg_loss': 8.57811966915934, 'avg_acc': 50.04629447224301, 'loss': 8.451045036315918}


EP_train:1:  92%|| 25392/27626 [59:49<05:18,  7.01it/s]

{'epoch': 1, 'iter': 25390, 'avg_loss': 8.57809159638427, 'avg_acc': 50.04406088771612, 'loss': 8.56277847290039}


EP_train:1:  92%|| 25402/27626 [59:50<05:14,  7.06it/s]

{'epoch': 1, 'iter': 25400, 'avg_loss': 8.577992177545534, 'avg_acc': 50.041829061847956, 'loss': 9.471071243286133}


EP_train:1:  92%|| 25412/27626 [59:52<05:12,  7.08it/s]

{'epoch': 1, 'iter': 25410, 'avg_loss': 8.577916843851456, 'avg_acc': 50.04304238321986, 'loss': 8.58464241027832}


EP_train:1:  92%|| 25422/27626 [59:53<05:12,  7.06it/s]

{'epoch': 1, 'iter': 25420, 'avg_loss': 8.57789682808326, 'avg_acc': 50.043885960426415, 'loss': 8.924347877502441}


EP_train:1:  92%|| 25432/27626 [59:54<05:09,  7.08it/s]

{'epoch': 1, 'iter': 25430, 'avg_loss': 8.577920833445294, 'avg_acc': 50.04485175573119, 'loss': 7.868631839752197}


EP_train:1:  92%|| 25442/27626 [59:56<05:07,  7.10it/s]

{'epoch': 1, 'iter': 25440, 'avg_loss': 8.577920133955734, 'avg_acc': 50.0443427931292, 'loss': 9.070777893066406}


EP_train:1:  92%|| 25452/27626 [59:57<05:08,  7.04it/s]

{'epoch': 1, 'iter': 25450, 'avg_loss': 8.577924411257781, 'avg_acc': 50.04395701544143, 'loss': 8.450621604919434}


EP_train:1:  92%|| 25462/27626 [59:59<05:08,  7.01it/s]

{'epoch': 1, 'iter': 25460, 'avg_loss': 8.577804711584088, 'avg_acc': 50.04332606731865, 'loss': 8.008861541748047}


EP_train:1:  92%|| 25472/27626 [1:00:00<05:05,  7.06it/s]

{'epoch': 1, 'iter': 25470, 'avg_loss': 8.577802773775517, 'avg_acc': 50.04527207412352, 'loss': 8.56525993347168}


EP_train:1:  92%|| 25482/27626 [1:00:02<05:02,  7.08it/s]

{'epoch': 1, 'iter': 25480, 'avg_loss': 8.577640017966853, 'avg_acc': 50.04562222832699, 'loss': 7.621801376342773}


EP_train:1:  92%|| 25492/27626 [1:00:03<05:00,  7.11it/s]

{'epoch': 1, 'iter': 25490, 'avg_loss': 8.57767705005249, 'avg_acc': 50.04744321525244, 'loss': 9.368709564208984}


EP_train:1:  92%|| 25502/27626 [1:00:04<05:00,  7.08it/s]

{'epoch': 1, 'iter': 25500, 'avg_loss': 8.57767847576196, 'avg_acc': 50.047669699227484, 'loss': 8.474469184875488}


EP_train:1:  92%|| 25512/27626 [1:00:06<05:00,  7.04it/s]

{'epoch': 1, 'iter': 25510, 'avg_loss': 8.57771626476174, 'avg_acc': 50.04740602093215, 'loss': 7.81511116027832}


EP_train:1:  92%|| 25522/27626 [1:00:07<04:58,  7.04it/s]

{'epoch': 1, 'iter': 25520, 'avg_loss': 8.577583517859543, 'avg_acc': 50.04457113749461, 'loss': 8.619528770446777}


EP_train:1:  92%|| 25532/27626 [1:00:09<04:59,  6.98it/s]

{'epoch': 1, 'iter': 25530, 'avg_loss': 8.577584301476136, 'avg_acc': 50.04357447808546, 'loss': 8.69177532196045}


EP_train:1:  92%|| 25542/27626 [1:00:10<04:58,  6.99it/s]

{'epoch': 1, 'iter': 25540, 'avg_loss': 8.577503914364652, 'avg_acc': 50.042945656004065, 'loss': 8.782824516296387}


EP_train:1:  92%|| 25552/27626 [1:00:11<04:57,  6.97it/s]

{'epoch': 1, 'iter': 25550, 'avg_loss': 8.577492138274863, 'avg_acc': 50.04231732613205, 'loss': 8.568289756774902}


EP_train:1:  93%|| 25562/27626 [1:00:13<04:52,  7.06it/s]

{'epoch': 1, 'iter': 25560, 'avg_loss': 8.577421039040905, 'avg_acc': 50.0427897969563, 'loss': 8.099331855773926}


EP_train:1:  93%|| 25572/27626 [1:00:14<04:51,  7.04it/s]

{'epoch': 1, 'iter': 25570, 'avg_loss': 8.57742627188875, 'avg_acc': 50.042895271987796, 'loss': 8.521090507507324}


EP_train:1:  93%|| 25582/27626 [1:00:16<04:47,  7.11it/s]

{'epoch': 1, 'iter': 25580, 'avg_loss': 8.577385794924895, 'avg_acc': 50.04226769868262, 'loss': 8.058595657348633}


EP_train:1:  93%|| 25592/27626 [1:00:17<04:46,  7.11it/s]

{'epoch': 1, 'iter': 25590, 'avg_loss': 8.577350321625085, 'avg_acc': 50.043228087999694, 'loss': 9.257728576660156}


EP_train:1:  93%|| 25602/27626 [1:00:18<04:44,  7.11it/s]

{'epoch': 1, 'iter': 25600, 'avg_loss': 8.577234835155513, 'avg_acc': 50.044431858130544, 'loss': 7.763035297393799}


EP_train:1:  93%|| 25612/27626 [1:00:20<04:43,  7.11it/s]

{'epoch': 1, 'iter': 25610, 'avg_loss': 8.577227314215506, 'avg_acc': 50.044048455741674, 'loss': 7.940657138824463}


EP_train:1:  93%|| 25622/27626 [1:00:21<04:44,  7.04it/s]

{'epoch': 1, 'iter': 25620, 'avg_loss': 8.577185337690794, 'avg_acc': 50.04159185824128, 'loss': 8.771127700805664}


EP_train:1:  93%|| 25632/27626 [1:00:23<04:44,  7.01it/s]

{'epoch': 1, 'iter': 25630, 'avg_loss': 8.577203256620376, 'avg_acc': 50.04133178572822, 'loss': 7.933128356933594}


EP_train:1:  93%|| 25642/27626 [1:00:24<04:43,  7.00it/s]

{'epoch': 1, 'iter': 25640, 'avg_loss': 8.577277836889444, 'avg_acc': 50.04168129168129, 'loss': 8.524188995361328}


EP_train:1:  93%|| 25652/27626 [1:00:26<04:40,  7.04it/s]

{'epoch': 1, 'iter': 25650, 'avg_loss': 8.577503540052001, 'avg_acc': 50.040934076644184, 'loss': 9.475672721862793}


EP_train:1:  93%|| 25662/27626 [1:00:27<04:37,  7.08it/s]

{'epoch': 1, 'iter': 25660, 'avg_loss': 8.577552196450952, 'avg_acc': 50.04043100424769, 'loss': 8.856075286865234}


EP_train:1:  93%|| 25672/27626 [1:00:28<04:35,  7.09it/s]

{'epoch': 1, 'iter': 25670, 'avg_loss': 8.577492529237643, 'avg_acc': 50.03858926415021, 'loss': 8.236371040344238}


EP_train:1:  93%|| 25682/27626 [1:00:30<04:38,  6.99it/s]

{'epoch': 1, 'iter': 25680, 'avg_loss': 8.577433331761654, 'avg_acc': 50.03820918188544, 'loss': 8.562765121459961}


EP_train:1:  93%|| 25692/27626 [1:00:31<04:33,  7.06it/s]

{'epoch': 1, 'iter': 25690, 'avg_loss': 8.57769239146955, 'avg_acc': 50.0375861196528, 'loss': 9.93576431274414}


EP_train:1:  93%|| 25702/27626 [1:00:33<04:31,  7.08it/s]

{'epoch': 1, 'iter': 25700, 'avg_loss': 8.577752735117457, 'avg_acc': 50.03659877047586, 'loss': 7.876466751098633}


EP_train:1:  93%|| 25712/27626 [1:00:34<04:32,  7.04it/s]

{'epoch': 1, 'iter': 25710, 'avg_loss': 8.577766183701295, 'avg_acc': 50.035855275951924, 'loss': 8.17011547088623}


EP_train:1:  93%|| 25722/27626 [1:00:35<04:31,  7.01it/s]

{'epoch': 1, 'iter': 25720, 'avg_loss': 8.577778198694832, 'avg_acc': 50.035719839819606, 'loss': 8.895048141479492}


EP_train:1:  93%|| 25732/27626 [1:00:37<04:31,  6.98it/s]

{'epoch': 1, 'iter': 25730, 'avg_loss': 8.577802112473048, 'avg_acc': 50.03522016244997, 'loss': 8.862070083618164}


EP_train:1:  93%|| 25742/27626 [1:00:38<04:27,  7.03it/s]

{'epoch': 1, 'iter': 25740, 'avg_loss': 8.577965094536447, 'avg_acc': 50.03678470144906, 'loss': 7.853586673736572}


EP_train:1:  93%|| 25752/27626 [1:00:40<04:25,  7.07it/s]

{'epoch': 1, 'iter': 25750, 'avg_loss': 8.578108039533165, 'avg_acc': 50.03713448021436, 'loss': 8.943089485168457}


EP_train:1:  93%|| 25762/27626 [1:00:41<04:24,  7.06it/s]

{'epoch': 1, 'iter': 25760, 'avg_loss': 8.577999947868026, 'avg_acc': 50.03578568378557, 'loss': 7.663289546966553}


EP_train:1:  93%|| 25772/27626 [1:00:42<04:22,  7.06it/s]

{'epoch': 1, 'iter': 25770, 'avg_loss': 8.57801604665971, 'avg_acc': 50.0362568390827, 'loss': 7.958731651306152}


EP_train:1:  93%|| 25782/27626 [1:00:44<04:21,  7.05it/s]

{'epoch': 1, 'iter': 25780, 'avg_loss': 8.577951219628334, 'avg_acc': 50.035636709204454, 'loss': 9.541244506835938}


EP_train:1:  93%|| 25792/27626 [1:00:45<04:19,  7.06it/s]

{'epoch': 1, 'iter': 25790, 'avg_loss': 8.577962845525755, 'avg_acc': 50.03477472761816, 'loss': 9.139863014221191}


EP_train:1:  93%|| 25802/27626 [1:00:47<04:20,  6.99it/s]

{'epoch': 1, 'iter': 25800, 'avg_loss': 8.577934849036303, 'avg_acc': 50.03439789155459, 'loss': 9.166672706604004}


EP_train:1:  93%|| 25812/27626 [1:00:48<04:18,  7.03it/s]

{'epoch': 1, 'iter': 25810, 'avg_loss': 8.578025879390125, 'avg_acc': 50.033658130254544, 'loss': 8.860176086425781}


EP_train:1:  93%|| 25822/27626 [1:00:50<04:18,  6.98it/s]

{'epoch': 1, 'iter': 25820, 'avg_loss': 8.578135979793752, 'avg_acc': 50.03182971224972, 'loss': 9.094061851501465}


EP_train:1:  94%|| 25832/27626 [1:00:51<04:13,  7.08it/s]

{'epoch': 1, 'iter': 25830, 'avg_loss': 8.577967158667528, 'avg_acc': 50.03036564592931, 'loss': 8.615670204162598}


EP_train:1:  94%|| 25842/27626 [1:00:52<04:12,  7.06it/s]

{'epoch': 1, 'iter': 25840, 'avg_loss': 8.577936237180644, 'avg_acc': 50.03095855423552, 'loss': 8.266640663146973}


EP_train:1:  94%|| 25852/27626 [1:00:54<04:09,  7.11it/s]

{'epoch': 1, 'iter': 25850, 'avg_loss': 8.577866343519295, 'avg_acc': 50.0322763142625, 'loss': 8.680301666259766}


EP_train:1:  94%|| 25862/27626 [1:00:55<04:10,  7.04it/s]

{'epoch': 1, 'iter': 25860, 'avg_loss': 8.577716107388936, 'avg_acc': 50.03081377363598, 'loss': 8.573285102844238}


EP_train:1:  94%|| 25872/27626 [1:00:57<04:08,  7.05it/s]

{'epoch': 1, 'iter': 25870, 'avg_loss': 8.577595485154811, 'avg_acc': 50.02995632175023, 'loss': 8.780599594116211}


EP_train:1:  94%|| 25882/27626 [1:00:58<04:06,  7.07it/s]

{'epoch': 1, 'iter': 25880, 'avg_loss': 8.577487008445695, 'avg_acc': 50.02885804257949, 'loss': 8.342951774597168}


EP_train:1:  94%|| 25892/27626 [1:00:59<04:06,  7.03it/s]

{'epoch': 1, 'iter': 25890, 'avg_loss': 8.577632169191993, 'avg_acc': 50.02908829322931, 'loss': 9.108348846435547}


EP_train:1:  94%|| 25902/27626 [1:01:01<04:06,  6.99it/s]

{'epoch': 1, 'iter': 25900, 'avg_loss': 8.57761310359271, 'avg_acc': 50.03064553492143, 'loss': 7.563953399658203}


EP_train:1:  94%|| 25912/27626 [1:01:02<04:04,  7.02it/s]

{'epoch': 1, 'iter': 25910, 'avg_loss': 8.577493766536449, 'avg_acc': 50.03232217976921, 'loss': 8.467060089111328}


EP_train:1:  94%|| 25922/27626 [1:01:04<04:02,  7.04it/s]

{'epoch': 1, 'iter': 25920, 'avg_loss': 8.577518164828707, 'avg_acc': 50.031465799930565, 'loss': 8.851533889770508}


EP_train:1:  94%|| 25932/27626 [1:01:05<03:59,  7.07it/s]

{'epoch': 1, 'iter': 25930, 'avg_loss': 8.577563745526534, 'avg_acc': 50.02964598357179, 'loss': 8.913817405700684}


EP_train:1:  94%|| 25942/27626 [1:01:07<03:57,  7.09it/s]

{'epoch': 1, 'iter': 25940, 'avg_loss': 8.577575867134055, 'avg_acc': 50.0291526926487, 'loss': 8.865800857543945}


EP_train:1:  94%|| 25952/27626 [1:01:08<03:57,  7.03it/s]

{'epoch': 1, 'iter': 25950, 'avg_loss': 8.577552696941106, 'avg_acc': 50.02902103965165, 'loss': 8.080975532531738}


EP_train:1:  94%|| 25962/27626 [1:01:09<03:55,  7.06it/s]

{'epoch': 1, 'iter': 25960, 'avg_loss': 8.577677313680205, 'avg_acc': 50.02949135241324, 'loss': 8.62790584564209}


EP_train:1:  94%|| 25972/27626 [1:01:11<03:57,  6.96it/s]

{'epoch': 1, 'iter': 25970, 'avg_loss': 8.577720333139904, 'avg_acc': 50.02791575218512, 'loss': 9.21013355255127}


EP_train:1:  94%|| 25982/27626 [1:01:12<03:52,  7.06it/s]

{'epoch': 1, 'iter': 25980, 'avg_loss': 8.577634089088784, 'avg_acc': 50.02934836996267, 'loss': 7.511213779449463}


EP_train:1:  94%|| 25992/27626 [1:01:14<03:53,  6.99it/s]

{'epoch': 1, 'iter': 25990, 'avg_loss': 8.577525997991115, 'avg_acc': 50.03077988534492, 'loss': 8.715827941894531}


EP_train:1:  94%|| 26002/27626 [1:01:15<03:53,  6.95it/s]

{'epoch': 1, 'iter': 26000, 'avg_loss': 8.577648208671421, 'avg_acc': 50.03016710895734, 'loss': 8.421663284301758}


EP_train:1:  94%|| 26012/27626 [1:01:16<03:50,  7.00it/s]

{'epoch': 1, 'iter': 26010, 'avg_loss': 8.577680508445408, 'avg_acc': 50.0300353696513, 'loss': 8.346250534057617}


EP_train:1:  94%|| 26022/27626 [1:01:18<03:47,  7.05it/s]

{'epoch': 1, 'iter': 26020, 'avg_loss': 8.577868628752029, 'avg_acc': 50.031104684677764, 'loss': 9.83238410949707}


EP_train:1:  94%|| 26032/27626 [1:01:19<03:48,  6.98it/s]

{'epoch': 1, 'iter': 26030, 'avg_loss': 8.577916963701906, 'avg_acc': 50.03121278475664, 'loss': 9.725865364074707}


EP_train:1:  94%|| 26042/27626 [1:01:21<03:45,  7.03it/s]

{'epoch': 1, 'iter': 26040, 'avg_loss': 8.577940918920337, 'avg_acc': 50.03192081717292, 'loss': 8.90075397491455}


EP_train:1:  94%|| 26052/27626 [1:01:22<03:42,  7.08it/s]

{'epoch': 1, 'iter': 26050, 'avg_loss': 8.577950474177252, 'avg_acc': 50.033707919081806, 'loss': 9.11685562133789}


EP_train:1:  94%|| 26062/27626 [1:01:24<03:42,  7.03it/s]

{'epoch': 1, 'iter': 26060, 'avg_loss': 8.577974573804093, 'avg_acc': 50.03249587506235, 'loss': 8.879048347473145}


EP_train:1:  94%|| 26072/27626 [1:01:25<03:42,  6.98it/s]

{'epoch': 1, 'iter': 26070, 'avg_loss': 8.577999927338146, 'avg_acc': 50.03128476084539, 'loss': 8.6691255569458}


EP_train:1:  94%|| 26082/27626 [1:01:26<03:39,  7.03it/s]

{'epoch': 1, 'iter': 26080, 'avg_loss': 8.578080060204947, 'avg_acc': 50.03271059391895, 'loss': 8.478456497192383}


EP_train:1:  94%|| 26092/27626 [1:01:28<03:37,  7.04it/s]

{'epoch': 1, 'iter': 26090, 'avg_loss': 8.578048573133886, 'avg_acc': 50.03281782990303, 'loss': 8.222921371459961}


EP_train:1:  94%|| 26102/27626 [1:01:29<03:37,  7.02it/s]

{'epoch': 1, 'iter': 26100, 'avg_loss': 8.577958808868381, 'avg_acc': 50.03220662043601, 'loss': 8.508027076721191}


EP_train:1:  95%|| 26112/27626 [1:01:31<03:34,  7.04it/s]

{'epoch': 1, 'iter': 26110, 'avg_loss': 8.577931480832122, 'avg_acc': 50.031595879131395, 'loss': 8.17786979675293}


EP_train:1:  95%|| 26122/27626 [1:01:32<03:33,  7.03it/s]

{'epoch': 1, 'iter': 26120, 'avg_loss': 8.577936086960982, 'avg_acc': 50.033856858466365, 'loss': 8.42509937286377}


EP_train:1:  95%|| 26132/27626 [1:01:33<03:32,  7.02it/s]

{'epoch': 1, 'iter': 26130, 'avg_loss': 8.577997127003915, 'avg_acc': 50.034681030194015, 'loss': 9.082520484924316}


EP_train:1:  95%|| 26142/27626 [1:01:35<03:29,  7.08it/s]

{'epoch': 1, 'iter': 26140, 'avg_loss': 8.577984370961303, 'avg_acc': 50.034189587238444, 'loss': 8.662227630615234}


EP_train:1:  95%|| 26152/27626 [1:01:36<03:28,  7.06it/s]

{'epoch': 1, 'iter': 26150, 'avg_loss': 8.578007148869439, 'avg_acc': 50.033459523536386, 'loss': 8.58328914642334}


EP_train:1:  95%|| 26162/27626 [1:01:38<03:27,  7.04it/s]

{'epoch': 1, 'iter': 26160, 'avg_loss': 8.57791947995863, 'avg_acc': 50.033207828446926, 'loss': 8.229033470153809}


EP_train:1:  95%|| 26172/27626 [1:01:39<03:27,  7.00it/s]

{'epoch': 1, 'iter': 26170, 'avg_loss': 8.577748666945288, 'avg_acc': 50.03247869779527, 'loss': 7.721521377563477}


EP_train:1:  95%|| 26182/27626 [1:01:41<03:26,  6.98it/s]

{'epoch': 1, 'iter': 26180, 'avg_loss': 8.5777491600882, 'avg_acc': 50.03210820824262, 'loss': 8.937427520751953}


EP_train:1:  95%|| 26192/27626 [1:01:42<03:23,  7.05it/s]

{'epoch': 1, 'iter': 26190, 'avg_loss': 8.57776028416938, 'avg_acc': 50.03197663319461, 'loss': 8.223206520080566}


EP_train:1:  95%|| 26202/27626 [1:01:43<03:23,  7.01it/s]

{'epoch': 1, 'iter': 26200, 'avg_loss': 8.57773597511343, 'avg_acc': 50.03112953704058, 'loss': 8.19536304473877}


EP_train:1:  95%|| 26212/27626 [1:01:45<03:20,  7.05it/s]

{'epoch': 1, 'iter': 26210, 'avg_loss': 8.577674098555324, 'avg_acc': 50.032309908053875, 'loss': 7.709372520446777}


EP_train:1:  95%|| 26222/27626 [1:01:46<03:18,  7.08it/s]

{'epoch': 1, 'iter': 26220, 'avg_loss': 8.57761414433584, 'avg_acc': 50.03360855802601, 'loss': 7.898441791534424}


EP_train:1:  95%|| 26232/27626 [1:01:48<03:17,  7.07it/s]

{'epoch': 1, 'iter': 26230, 'avg_loss': 8.577604189006687, 'avg_acc': 50.03407228088902, 'loss': 9.021924018859863}


EP_train:1:  95%|| 26242/27626 [1:01:49<03:15,  7.07it/s]

{'epoch': 1, 'iter': 26240, 'avg_loss': 8.577509495267181, 'avg_acc': 50.03358294272322, 'loss': 8.830780029296875}


EP_train:1:  95%|| 26252/27626 [1:01:50<03:16,  6.98it/s]

{'epoch': 1, 'iter': 26250, 'avg_loss': 8.577543804365895, 'avg_acc': 50.03285589120414, 'loss': 7.788572311401367}


EP_train:1:  95%|| 26262/27626 [1:01:52<03:16,  6.95it/s]

{'epoch': 1, 'iter': 26260, 'avg_loss': 8.57755438908597, 'avg_acc': 50.03165340238376, 'loss': 9.078478813171387}


EP_train:1:  95%|| 26272/27626 [1:01:53<03:15,  6.93it/s]

{'epoch': 1, 'iter': 26270, 'avg_loss': 8.577483057807875, 'avg_acc': 50.03223611586921, 'loss': 8.41361141204834}


EP_train:1:  95%|| 26282/27626 [1:01:55<03:11,  7.02it/s]

{'epoch': 1, 'iter': 26280, 'avg_loss': 8.577335948555088, 'avg_acc': 50.03186712834366, 'loss': 8.630985260009766}


EP_train:1:  95%|| 26292/27626 [1:01:56<03:08,  7.08it/s]

{'epoch': 1, 'iter': 26290, 'avg_loss': 8.577261846464806, 'avg_acc': 50.03304362709673, 'loss': 8.369958877563477}


EP_train:1:  95%|| 26302/27626 [1:01:58<03:08,  7.02it/s]

{'epoch': 1, 'iter': 26300, 'avg_loss': 8.577187353601692, 'avg_acc': 50.03350633055778, 'loss': 8.324101448059082}


EP_train:1:  95%|| 26312/27626 [1:01:59<03:05,  7.09it/s]

{'epoch': 1, 'iter': 26310, 'avg_loss': 8.577230978898305, 'avg_acc': 50.03456254038235, 'loss': 8.600972175598145}


EP_train:1:  95%|| 26322/27626 [1:02:00<03:07,  6.95it/s]

{'epoch': 1, 'iter': 26320, 'avg_loss': 8.57724304635095, 'avg_acc': 50.033480870787585, 'loss': 8.823697090148926}


EP_train:1:  95%|| 26332/27626 [1:02:02<03:04,  7.02it/s]

{'epoch': 1, 'iter': 26330, 'avg_loss': 8.57721214436411, 'avg_acc': 50.03406156241692, 'loss': 9.005529403686523}


EP_train:1:  95%|| 26342/27626 [1:02:03<03:03,  6.99it/s]

{'epoch': 1, 'iter': 26340, 'avg_loss': 8.577131044321128, 'avg_acc': 50.03428590410387, 'loss': 7.973840713500977}


EP_train:1:  95%|| 26352/27626 [1:02:05<03:01,  7.04it/s]

{'epoch': 1, 'iter': 26350, 'avg_loss': 8.577164031582436, 'avg_acc': 50.03510303214299, 'loss': 8.740645408630371}


EP_train:1:  95%|| 26362/27626 [1:02:06<03:01,  6.97it/s]

{'epoch': 1, 'iter': 26360, 'avg_loss': 8.577277963717968, 'avg_acc': 50.03378570615682, 'loss': 8.650830268859863}


EP_train:1:  95%|| 26372/27626 [1:02:07<02:57,  7.06it/s]

{'epoch': 1, 'iter': 26370, 'avg_loss': 8.577294320612834, 'avg_acc': 50.035194911076566, 'loss': 8.423157691955566}


EP_train:1:  95%|| 26382/27626 [1:02:09<02:56,  7.05it/s]

{'epoch': 1, 'iter': 26380, 'avg_loss': 8.577277669949634, 'avg_acc': 50.03506311360448, 'loss': 8.630660057067871}


EP_train:1:  96%|| 26392/27626 [1:02:10<02:56,  7.01it/s]

{'epoch': 1, 'iter': 26390, 'avg_loss': 8.577139602723431, 'avg_acc': 50.03587870865067, 'loss': 8.174175262451172}


EP_train:1:  96%|| 26402/27626 [1:02:12<02:52,  7.09it/s]

{'epoch': 1, 'iter': 26400, 'avg_loss': 8.577157935429367, 'avg_acc': 50.035983485474034, 'loss': 8.676641464233398}


EP_train:1:  96%|| 26412/27626 [1:02:13<02:51,  7.09it/s]

{'epoch': 1, 'iter': 26410, 'avg_loss': 8.577052785330984, 'avg_acc': 50.0357332172201, 'loss': 7.921899318695068}


EP_train:1:  96%|| 26422/27626 [1:02:15<02:52,  6.97it/s]

{'epoch': 1, 'iter': 26420, 'avg_loss': 8.577062452233337, 'avg_acc': 50.036429355436965, 'loss': 8.51749038696289}


EP_train:1:  96%|| 26432/27626 [1:02:16<02:49,  7.04it/s]

{'epoch': 1, 'iter': 26430, 'avg_loss': 8.577094892986263, 'avg_acc': 50.03736143165223, 'loss': 8.513236999511719}


EP_train:1:  96%|| 26442/27626 [1:02:17<02:48,  7.04it/s]

{'epoch': 1, 'iter': 26440, 'avg_loss': 8.5770910549153, 'avg_acc': 50.03746548920237, 'loss': 7.754823207855225}


EP_train:1:  96%|| 26452/27626 [1:02:19<02:47,  7.03it/s]

{'epoch': 1, 'iter': 26450, 'avg_loss': 8.577073712082315, 'avg_acc': 50.03650618124078, 'loss': 9.05974292755127}


EP_train:1:  96%|| 26462/27626 [1:02:20<02:45,  7.05it/s]

{'epoch': 1, 'iter': 26460, 'avg_loss': 8.577127280804193, 'avg_acc': 50.036964778353045, 'loss': 9.169842720031738}


EP_train:1:  96%|| 26472/27626 [1:02:22<02:43,  7.06it/s]

{'epoch': 1, 'iter': 26470, 'avg_loss': 8.57711146578656, 'avg_acc': 50.03518000831099, 'loss': 7.832036972045898}


EP_train:1:  96%|| 26482/27626 [1:02:23<02:41,  7.07it/s]

{'epoch': 1, 'iter': 26480, 'avg_loss': 8.577134875650515, 'avg_acc': 50.03422265020203, 'loss': 8.153892517089844}


EP_train:1:  96%|| 26492/27626 [1:02:24<02:42,  6.99it/s]

{'epoch': 1, 'iter': 26490, 'avg_loss': 8.577155561617424, 'avg_acc': 50.033030085689475, 'loss': 9.351239204406738}


EP_train:1:  96%|| 26502/27626 [1:02:26<02:39,  7.07it/s]

{'epoch': 1, 'iter': 26500, 'avg_loss': 8.577174565480892, 'avg_acc': 50.03360722236897, 'loss': 8.371406555175781}


EP_train:1:  96%|| 26512/27626 [1:02:27<02:38,  7.05it/s]

{'epoch': 1, 'iter': 26510, 'avg_loss': 8.577051363375881, 'avg_acc': 50.03335879446268, 'loss': 8.18285846710205}


EP_train:1:  96%|| 26522/27626 [1:02:29<02:36,  7.04it/s]

{'epoch': 1, 'iter': 26520, 'avg_loss': 8.576964750049349, 'avg_acc': 50.032521398137334, 'loss': 8.246506690979004}


EP_train:1:  96%|| 26532/27626 [1:02:30<02:36,  7.00it/s]

{'epoch': 1, 'iter': 26530, 'avg_loss': 8.577178118676526, 'avg_acc': 50.032744713731105, 'loss': 9.158411026000977}


EP_train:1:  96%|| 26542/27626 [1:02:32<02:34,  7.00it/s]

{'epoch': 1, 'iter': 26540, 'avg_loss': 8.577184025674743, 'avg_acc': 50.0328501186843, 'loss': 8.579700469970703}


EP_train:1:  96%|| 26552/27626 [1:02:33<02:31,  7.07it/s]

{'epoch': 1, 'iter': 26550, 'avg_loss': 8.577153613888735, 'avg_acc': 50.03248465217882, 'loss': 8.76364803314209}


EP_train:1:  96%|| 26562/27626 [1:02:34<02:30,  7.08it/s]

{'epoch': 1, 'iter': 26560, 'avg_loss': 8.577058829491426, 'avg_acc': 50.03329599789165, 'loss': 8.126785278320312}


EP_train:1:  96%|| 26572/27626 [1:02:36<02:28,  7.09it/s]

{'epoch': 1, 'iter': 26570, 'avg_loss': 8.577000435221196, 'avg_acc': 50.033636295209064, 'loss': 8.536524772644043}


EP_train:1:  96%|| 26582/27626 [1:02:37<02:28,  7.04it/s]

{'epoch': 1, 'iter': 26580, 'avg_loss': 8.577098044597117, 'avg_acc': 50.03350607576841, 'loss': 8.882892608642578}


EP_train:1:  96%|| 26592/27626 [1:02:39<02:27,  7.03it/s]

{'epoch': 1, 'iter': 26590, 'avg_loss': 8.577062959198685, 'avg_acc': 50.032200744612844, 'loss': 7.874117374420166}


EP_train:1:  96%|| 26602/27626 [1:02:40<02:25,  7.05it/s]

{'epoch': 1, 'iter': 26600, 'avg_loss': 8.577130330074533, 'avg_acc': 50.03218863952483, 'loss': 7.9965715408325195}


EP_train:1:  96%|| 26612/27626 [1:02:41<02:25,  6.98it/s]

{'epoch': 1, 'iter': 26610, 'avg_loss': 8.577141108764996, 'avg_acc': 50.03264627409718, 'loss': 8.29505443572998}


EP_train:1:  96%|| 26622/27626 [1:02:43<02:22,  7.04it/s]

{'epoch': 1, 'iter': 26620, 'avg_loss': 8.577185452985242, 'avg_acc': 50.032399233687684, 'loss': 8.27517032623291}


EP_train:1:  96%|| 26632/27626 [1:02:44<02:21,  7.03it/s]

{'epoch': 1, 'iter': 26630, 'avg_loss': 8.577242453543871, 'avg_acc': 50.032504412151255, 'loss': 7.927955150604248}


EP_train:1:  96%|| 26642/27626 [1:02:46<02:19,  7.05it/s]

{'epoch': 1, 'iter': 26640, 'avg_loss': 8.577176876547082, 'avg_acc': 50.03167110844188, 'loss': 8.038847923278809}


EP_train:1:  96%|| 26652/27626 [1:02:47<02:18,  7.03it/s]

{'epoch': 1, 'iter': 26650, 'avg_loss': 8.577179722891334, 'avg_acc': 50.02990037897265, 'loss': 8.190251350402832}


EP_train:1:  97%|| 26662/27626 [1:02:48<02:16,  7.05it/s]

{'epoch': 1, 'iter': 26660, 'avg_loss': 8.577130337057653, 'avg_acc': 50.02930310190915, 'loss': 8.799321174621582}


EP_train:1:  97%|| 26672/27626 [1:02:50<02:15,  7.06it/s]

{'epoch': 1, 'iter': 26670, 'avg_loss': 8.577194898756312, 'avg_acc': 50.030112294252184, 'loss': 9.143583297729492}


EP_train:1:  97%|| 26682/27626 [1:02:51<02:13,  7.06it/s]

{'epoch': 1, 'iter': 26680, 'avg_loss': 8.577274630547892, 'avg_acc': 50.03056950639031, 'loss': 8.090328216552734}


EP_train:1:  97%|| 26692/27626 [1:02:53<02:13,  6.98it/s]

{'epoch': 1, 'iter': 26690, 'avg_loss': 8.577233760051678, 'avg_acc': 50.03090929526807, 'loss': 8.46509838104248}


EP_train:1:  97%|| 26702/27626 [1:02:54<02:10,  7.08it/s]

{'epoch': 1, 'iter': 26700, 'avg_loss': 8.57723257817658, 'avg_acc': 50.031365866446954, 'loss': 7.984445095062256}


EP_train:1:  97%|| 26712/27626 [1:02:56<02:08,  7.09it/s]

{'epoch': 1, 'iter': 26710, 'avg_loss': 8.57713532193428, 'avg_acc': 50.031003144771816, 'loss': 9.051080703735352}


EP_train:1:  97%|| 26722/27626 [1:02:57<02:07,  7.08it/s]

{'epoch': 1, 'iter': 26720, 'avg_loss': 8.577081966585736, 'avg_acc': 50.02841865948131, 'loss': 8.428613662719727}


EP_train:1:  97%|| 26732/27626 [1:02:58<02:06,  7.09it/s]

{'epoch': 1, 'iter': 26730, 'avg_loss': 8.57709419210459, 'avg_acc': 50.02899255545995, 'loss': 8.579181671142578}


EP_train:1:  97%|| 26742/27626 [1:03:00<02:05,  7.04it/s]

{'epoch': 1, 'iter': 26740, 'avg_loss': 8.577099806475776, 'avg_acc': 50.02828054298642, 'loss': 9.324965476989746}


EP_train:1:  97%|| 26752/27626 [1:03:01<02:03,  7.08it/s]

{'epoch': 1, 'iter': 26750, 'avg_loss': 8.577018580166301, 'avg_acc': 50.02593360995851, 'loss': 9.146249771118164}


EP_train:1:  97%|| 26762/27626 [1:03:03<02:02,  7.04it/s]

{'epoch': 1, 'iter': 26760, 'avg_loss': 8.576986258324208, 'avg_acc': 50.024872949441345, 'loss': 8.272377967834473}


EP_train:1:  97%|| 26772/27626 [1:03:04<02:01,  7.06it/s]

{'epoch': 1, 'iter': 26770, 'avg_loss': 8.576889461772968, 'avg_acc': 50.02626442792574, 'loss': 8.286859512329102}


EP_train:1:  97%|| 26782/27626 [1:03:05<01:59,  7.05it/s]

{'epoch': 1, 'iter': 26780, 'avg_loss': 8.576986291874052, 'avg_acc': 50.02648799522049, 'loss': 9.20772647857666}


EP_train:1:  97%|| 26792/27626 [1:03:07<01:59,  7.01it/s]

{'epoch': 1, 'iter': 26790, 'avg_loss': 8.577021192300272, 'avg_acc': 50.027644544809824, 'loss': 8.533080101013184}


EP_train:1:  97%|| 26802/27626 [1:03:08<01:57,  7.03it/s]

{'epoch': 1, 'iter': 26800, 'avg_loss': 8.577009899963803, 'avg_acc': 50.02705122943174, 'loss': 8.606164932250977}


EP_train:1:  97%|| 26812/27626 [1:03:10<01:55,  7.02it/s]

{'epoch': 1, 'iter': 26810, 'avg_loss': 8.576998242385384, 'avg_acc': 50.02680802655627, 'loss': 8.991658210754395}


EP_train:1:  97%|| 26822/27626 [1:03:11<01:53,  7.07it/s]

{'epoch': 1, 'iter': 26820, 'avg_loss': 8.576912782428773, 'avg_acc': 50.02959434771262, 'loss': 7.292492866516113}


EP_train:1:  97%|| 26832/27626 [1:03:13<01:52,  7.06it/s]

{'epoch': 1, 'iter': 26830, 'avg_loss': 8.576932695992397, 'avg_acc': 50.02806921098729, 'loss': 9.720331192016602}


EP_train:1:  97%|| 26842/27626 [1:03:14<01:50,  7.07it/s]

{'epoch': 1, 'iter': 26840, 'avg_loss': 8.576970044886416, 'avg_acc': 50.02724376886108, 'loss': 7.960022926330566}


EP_train:1:  97%|| 26852/27626 [1:03:15<01:49,  7.07it/s]

{'epoch': 1, 'iter': 26850, 'avg_loss': 8.577019210456847, 'avg_acc': 50.02665170757141, 'loss': 8.233736991882324}


EP_train:1:  97%|| 26862/27626 [1:03:17<01:49,  7.01it/s]

{'epoch': 1, 'iter': 26860, 'avg_loss': 8.57702187787776, 'avg_acc': 50.02652544581363, 'loss': 9.114462852478027}


EP_train:1:  97%|| 26872/27626 [1:03:18<01:46,  7.07it/s]

{'epoch': 1, 'iter': 26870, 'avg_loss': 8.576925424837231, 'avg_acc': 50.0257014997581, 'loss': 8.190793991088867}


EP_train:1:  97%|| 26882/27626 [1:03:20<01:45,  7.04it/s]

{'epoch': 1, 'iter': 26880, 'avg_loss': 8.576998959064467, 'avg_acc': 50.02441315427253, 'loss': 8.345553398132324}


EP_train:1:  97%|| 26892/27626 [1:03:21<01:44,  7.03it/s]

{'epoch': 1, 'iter': 26890, 'avg_loss': 8.577030309007059, 'avg_acc': 50.022893347216545, 'loss': 8.998560905456543}


EP_train:1:  97%|| 26902/27626 [1:03:22<01:43,  6.97it/s]

{'epoch': 1, 'iter': 26900, 'avg_loss': 8.577000355854349, 'avg_acc': 50.02253633693915, 'loss': 7.8923869132995605}


EP_train:1:  97%|| 26912/27626 [1:03:24<01:41,  7.01it/s]

{'epoch': 1, 'iter': 26910, 'avg_loss': 8.576989223024066, 'avg_acc': 50.02276020957973, 'loss': 9.886292457580566}


EP_train:1:  97%|| 26922/27626 [1:03:25<01:39,  7.06it/s]

{'epoch': 1, 'iter': 26920, 'avg_loss': 8.577018887002739, 'avg_acc': 50.02101054938524, 'loss': 8.881939888000488}


EP_train:1:  97%|| 26932/27626 [1:03:27<01:39,  6.99it/s]

{'epoch': 1, 'iter': 26930, 'avg_loss': 8.577049393654047, 'avg_acc': 50.023323493371954, 'loss': 9.140207290649414}


EP_train:1:  98%|| 26942/27626 [1:03:28<01:37,  6.99it/s]

{'epoch': 1, 'iter': 26940, 'avg_loss': 8.577009968350369, 'avg_acc': 50.023082847704245, 'loss': 8.396404266357422}


EP_train:1:  98%|| 26952/27626 [1:03:30<01:36,  6.98it/s]

{'epoch': 1, 'iter': 26950, 'avg_loss': 8.577007649891504, 'avg_acc': 50.02353808764053, 'loss': 8.247063636779785}


EP_train:1:  98%|| 26962/27626 [1:03:31<01:34,  7.02it/s]

{'epoch': 1, 'iter': 26960, 'avg_loss': 8.5770121755473, 'avg_acc': 50.02364526538333, 'loss': 8.77922534942627}


EP_train:1:  98%|| 26972/27626 [1:03:32<01:32,  7.09it/s]

{'epoch': 1, 'iter': 26970, 'avg_loss': 8.577058367131439, 'avg_acc': 50.023404768084234, 'loss': 8.848814964294434}


EP_train:1:  98%|| 26982/27626 [1:03:34<01:31,  7.07it/s]

{'epoch': 1, 'iter': 26980, 'avg_loss': 8.576960751913232, 'avg_acc': 50.022237871094475, 'loss': 7.194291114807129}


EP_train:1:  98%|| 26992/27626 [1:03:35<01:30,  7.02it/s]

{'epoch': 1, 'iter': 26990, 'avg_loss': 8.576891652448873, 'avg_acc': 50.02280852876885, 'loss': 8.44404125213623}


EP_train:1:  98%|| 27002/27626 [1:03:37<01:28,  7.05it/s]

{'epoch': 1, 'iter': 27000, 'avg_loss': 8.57680227818222, 'avg_acc': 50.0236102366579, 'loss': 7.5386762619018555}


EP_train:1:  98%|| 27012/27626 [1:03:38<01:27,  7.04it/s]

{'epoch': 1, 'iter': 27010, 'avg_loss': 8.576752165025777, 'avg_acc': 50.02394857650587, 'loss': 8.970488548278809}


EP_train:1:  98%|| 27022/27626 [1:03:39<01:25,  7.05it/s]

{'epoch': 1, 'iter': 27020, 'avg_loss': 8.576761586348198, 'avg_acc': 50.02474926908701, 'loss': 8.77662467956543}


EP_train:1:  98%|| 27032/27626 [1:03:41<01:24,  7.03it/s]

{'epoch': 1, 'iter': 27030, 'avg_loss': 8.576758808232514, 'avg_acc': 50.02566497724834, 'loss': 8.922697067260742}


EP_train:1:  98%|| 27042/27626 [1:03:42<01:23,  7.02it/s]

{'epoch': 1, 'iter': 27040, 'avg_loss': 8.576743512678938, 'avg_acc': 50.02658000813579, 'loss': 8.336970329284668}


EP_train:1:  98%|| 27052/27626 [1:03:44<01:20,  7.09it/s]

{'epoch': 1, 'iter': 27050, 'avg_loss': 8.57666417835054, 'avg_acc': 50.028418542752576, 'loss': 8.356897354125977}


EP_train:1:  98%|| 27062/27626 [1:03:45<01:20,  7.04it/s]

{'epoch': 1, 'iter': 27060, 'avg_loss': 8.57674753846978, 'avg_acc': 50.02840804109234, 'loss': 9.527806282043457}


EP_train:1:  98%|| 27072/27626 [1:03:46<01:19,  6.95it/s]

{'epoch': 1, 'iter': 27070, 'avg_loss': 8.576881971380743, 'avg_acc': 50.026665989435195, 'loss': 8.978635787963867}


EP_train:1:  98%|| 27082/27626 [1:03:48<01:17,  7.02it/s]

{'epoch': 1, 'iter': 27080, 'avg_loss': 8.576882926731315, 'avg_acc': 50.025040618884084, 'loss': 8.446257591247559}


EP_train:1:  98%|| 27092/27626 [1:03:49<01:15,  7.07it/s]

{'epoch': 1, 'iter': 27090, 'avg_loss': 8.576713782644172, 'avg_acc': 50.02468531984792, 'loss': 9.252750396728516}


EP_train:1:  98%|| 27102/27626 [1:03:51<01:14,  7.06it/s]

{'epoch': 1, 'iter': 27100, 'avg_loss': 8.576787240082679, 'avg_acc': 50.02386904542268, 'loss': 9.006489753723145}


EP_train:1:  98%|| 27112/27626 [1:03:52<01:12,  7.09it/s]

{'epoch': 1, 'iter': 27110, 'avg_loss': 8.576775686852825, 'avg_acc': 50.022361771974474, 'loss': 8.542678833007812}


EP_train:1:  98%|| 27122/27626 [1:03:54<01:11,  7.00it/s]

{'epoch': 1, 'iter': 27120, 'avg_loss': 8.576753807992676, 'avg_acc': 50.02246875115224, 'loss': 8.871159553527832}


EP_train:1:  98%|| 27132/27626 [1:03:55<01:09,  7.09it/s]

{'epoch': 1, 'iter': 27130, 'avg_loss': 8.576842750574484, 'avg_acc': 50.023151560945045, 'loss': 8.452302932739258}


EP_train:1:  98%|| 27142/27626 [1:03:56<01:08,  7.05it/s]

{'epoch': 1, 'iter': 27140, 'avg_loss': 8.576826499032972, 'avg_acc': 50.02360358866659, 'loss': 7.749031066894531}


EP_train:1:  98%|| 27152/27626 [1:03:58<01:07,  7.07it/s]

{'epoch': 1, 'iter': 27150, 'avg_loss': 8.576772473116684, 'avg_acc': 50.02221373061766, 'loss': 8.492695808410645}


EP_train:1:  98%|| 27162/27626 [1:03:59<01:05,  7.09it/s]

{'epoch': 1, 'iter': 27160, 'avg_loss': 8.576818622768041, 'avg_acc': 50.020939950664555, 'loss': 8.028432846069336}


EP_train:1:  98%|| 27172/27626 [1:04:01<01:04,  7.07it/s]

{'epoch': 1, 'iter': 27170, 'avg_loss': 8.576877269451446, 'avg_acc': 50.01920705899673, 'loss': 8.638416290283203}


EP_train:1:  98%|| 27182/27626 [1:04:02<01:02,  7.05it/s]

{'epoch': 1, 'iter': 27180, 'avg_loss': 8.57674502649255, 'avg_acc': 50.01931496265774, 'loss': 7.999409198760986}


EP_train:1:  98%|| 27192/27626 [1:04:03<01:01,  7.04it/s]

{'epoch': 1, 'iter': 27190, 'avg_loss': 8.57677600882762, 'avg_acc': 50.021721341620385, 'loss': 8.764237403869629}


EP_train:1:  98%|| 27202/27626 [1:04:05<01:00,  7.04it/s]

{'epoch': 1, 'iter': 27200, 'avg_loss': 8.576799945740843, 'avg_acc': 50.022287783537365, 'loss': 8.602090835571289}


EP_train:1:  99%|| 27212/27626 [1:04:06<00:58,  7.02it/s]

{'epoch': 1, 'iter': 27210, 'avg_loss': 8.576801398794828, 'avg_acc': 50.022164749549816, 'loss': 8.741604804992676}


EP_train:1:  99%|| 27222/27626 [1:04:08<00:57,  7.08it/s]

{'epoch': 1, 'iter': 27220, 'avg_loss': 8.576831345484418, 'avg_acc': 50.02204180595864, 'loss': 8.504585266113281}


EP_train:1:  99%|| 27232/27626 [1:04:09<00:55,  7.11it/s]

{'epoch': 1, 'iter': 27230, 'avg_loss': 8.576670829112722, 'avg_acc': 50.021230399177405, 'loss': 8.137337684631348}


EP_train:1:  99%|| 27242/27626 [1:04:10<00:54,  7.05it/s]

{'epoch': 1, 'iter': 27240, 'avg_loss': 8.576663426333099, 'avg_acc': 50.02030487133366, 'loss': 8.137518882751465}


EP_train:1:  99%|| 27252/27626 [1:04:12<00:53,  7.03it/s]

{'epoch': 1, 'iter': 27250, 'avg_loss': 8.576722664157822, 'avg_acc': 50.02224689002238, 'loss': 8.410440444946289}


EP_train:1:  99%|| 27262/27626 [1:04:13<00:51,  7.03it/s]

{'epoch': 1, 'iter': 27260, 'avg_loss': 8.57672863633489, 'avg_acc': 50.02155093356811, 'loss': 7.861304759979248}


EP_train:1:  99%|| 27272/27626 [1:04:15<00:50,  7.05it/s]

{'epoch': 1, 'iter': 27270, 'avg_loss': 8.576817844974594, 'avg_acc': 50.02119925928642, 'loss': 8.40334415435791}


EP_train:1:  99%|| 27282/27626 [1:04:16<00:49,  7.02it/s]

{'epoch': 1, 'iter': 27280, 'avg_loss': 8.576660869853997, 'avg_acc': 50.022566071624944, 'loss': 8.223287582397461}


EP_train:1:  99%|| 27292/27626 [1:04:18<00:47,  7.01it/s]

{'epoch': 1, 'iter': 27290, 'avg_loss': 8.576793341702878, 'avg_acc': 50.02347385584991, 'loss': 9.362214088439941}


EP_train:1:  99%|| 27302/27626 [1:04:19<00:46,  7.03it/s]

{'epoch': 1, 'iter': 27300, 'avg_loss': 8.57677415058179, 'avg_acc': 50.02449543972748, 'loss': 8.522786140441895}


EP_train:1:  99%|| 27312/27626 [1:04:20<00:44,  7.02it/s]

{'epoch': 1, 'iter': 27310, 'avg_loss': 8.576748919301055, 'avg_acc': 50.02517300721321, 'loss': 8.843161582946777}


EP_train:1:  99%|| 27322/27626 [1:04:22<00:42,  7.10it/s]

{'epoch': 1, 'iter': 27320, 'avg_loss': 8.576835813173625, 'avg_acc': 50.02585007869405, 'loss': 8.713311195373535}


EP_train:1:  99%|| 27332/27626 [1:04:23<00:41,  7.06it/s]

{'epoch': 1, 'iter': 27330, 'avg_loss': 8.576901680961459, 'avg_acc': 50.02538326442502, 'loss': 8.497230529785156}


EP_train:1:  99%|| 27342/27626 [1:04:25<00:40,  6.96it/s]

{'epoch': 1, 'iter': 27340, 'avg_loss': 8.576893387452628, 'avg_acc': 50.02263084744523, 'loss': 8.736769676208496}


EP_train:1:  99%|| 27352/27626 [1:04:26<00:39,  7.01it/s]

{'epoch': 1, 'iter': 27350, 'avg_loss': 8.576852974754457, 'avg_acc': 50.02136576359183, 'loss': 9.05920124053955}


EP_train:1:  99%|| 27362/27626 [1:04:27<00:37,  7.03it/s]

{'epoch': 1, 'iter': 27360, 'avg_loss': 8.576895501659713, 'avg_acc': 50.024441723621216, 'loss': 8.958279609680176}


EP_train:1:  99%|| 27372/27626 [1:04:29<00:36,  7.02it/s]

{'epoch': 1, 'iter': 27370, 'avg_loss': 8.57688709427462, 'avg_acc': 50.02500365350188, 'loss': 8.233366012573242}


EP_train:1:  99%|| 27382/27626 [1:04:30<00:37,  6.43it/s]

{'epoch': 1, 'iter': 27380, 'avg_loss': 8.576949570700648, 'avg_acc': 50.02351082867682, 'loss': 8.933296203613281}


EP_train:1:  99%|| 27392/27626 [1:04:32<00:33,  7.05it/s]

{'epoch': 1, 'iter': 27390, 'avg_loss': 8.576971259724163, 'avg_acc': 50.023388156693805, 'loss': 9.07655143737793}


EP_train:1:  99%|| 27402/27626 [1:04:33<00:32,  7.00it/s]

{'epoch': 1, 'iter': 27400, 'avg_loss': 8.57693562936245, 'avg_acc': 50.02326557424911, 'loss': 7.7945733070373535}


EP_train:1:  99%|| 27412/27626 [1:04:35<00:30,  7.06it/s]

{'epoch': 1, 'iter': 27410, 'avg_loss': 8.576849564663675, 'avg_acc': 50.02154700667615, 'loss': 8.130770683288574}


EP_train:1:  99%|| 27422/27626 [1:04:36<00:29,  7.02it/s]

{'epoch': 1, 'iter': 27420, 'avg_loss': 8.5767651972453, 'avg_acc': 50.02302067758288, 'loss': 7.951778411865234}


EP_train:1:  99%|| 27432/27626 [1:04:37<00:27,  7.04it/s]

{'epoch': 1, 'iter': 27430, 'avg_loss': 8.576760552748308, 'avg_acc': 50.02255659655135, 'loss': 8.015875816345215}


EP_train:1:  99%|| 27442/27626 [1:04:39<00:26,  7.00it/s]

{'epoch': 1, 'iter': 27440, 'avg_loss': 8.576708910151677, 'avg_acc': 50.02334554134325, 'loss': 8.38532543182373}


EP_train:1:  99%|| 27452/27626 [1:04:40<00:24,  7.04it/s]

{'epoch': 1, 'iter': 27450, 'avg_loss': 8.576641510998215, 'avg_acc': 50.02356471531092, 'loss': 8.307262420654297}


EP_train:1:  99%|| 27462/27626 [1:04:42<00:23,  7.06it/s]

{'epoch': 1, 'iter': 27460, 'avg_loss': 8.576650639805537, 'avg_acc': 50.02560449364554, 'loss': 8.728621482849121}


EP_train:1:  99%|| 27472/27626 [1:04:43<00:21,  7.06it/s]

{'epoch': 1, 'iter': 27470, 'avg_loss': 8.57651072009139, 'avg_acc': 50.026050198391026, 'loss': 8.090438842773438}


EP_train:1:  99%|| 27482/27626 [1:04:45<00:20,  7.08it/s]

{'epoch': 1, 'iter': 27480, 'avg_loss': 8.57651209906717, 'avg_acc': 50.02604071904225, 'loss': 8.532235145568848}


EP_train:1: 100%|| 27492/27626 [1:04:46<00:18,  7.06it/s]

{'epoch': 1, 'iter': 27490, 'avg_loss': 8.576593532415169, 'avg_acc': 50.02534920519442, 'loss': 8.623371124267578}


EP_train:1: 100%|| 27502/27626 [1:04:47<00:17,  7.10it/s]

{'epoch': 1, 'iter': 27500, 'avg_loss': 8.576604412909461, 'avg_acc': 50.025453619868365, 'loss': 8.001134872436523}


EP_train:1: 100%|| 27512/27626 [1:04:49<00:16,  7.04it/s]

{'epoch': 1, 'iter': 27510, 'avg_loss': 8.57656317374936, 'avg_acc': 50.02737541347098, 'loss': 8.752036094665527}


EP_train:1: 100%|| 27522/27626 [1:04:50<00:14,  7.04it/s]

{'epoch': 1, 'iter': 27520, 'avg_loss': 8.576440488766725, 'avg_acc': 50.026797718106174, 'loss': 7.43272066116333}


EP_train:1: 100%|| 27532/27626 [1:04:52<00:13,  7.02it/s]

{'epoch': 1, 'iter': 27530, 'avg_loss': 8.576469433147004, 'avg_acc': 50.02815008535833, 'loss': 9.536141395568848}


EP_train:1: 100%|| 27542/27626 [1:04:53<00:11,  7.04it/s]

{'epoch': 1, 'iter': 27540, 'avg_loss': 8.576462192908684, 'avg_acc': 50.02779946261937, 'loss': 8.31993293762207}


EP_train:1: 100%|| 27552/27626 [1:04:54<00:10,  7.03it/s]

{'epoch': 1, 'iter': 27550, 'avg_loss': 8.57665943207228, 'avg_acc': 50.028016224456465, 'loss': 8.844586372375488}


EP_train:1: 100%|| 27562/27626 [1:04:56<00:09,  7.05it/s]

{'epoch': 1, 'iter': 27560, 'avg_loss': 8.576641012659389, 'avg_acc': 50.02845959870832, 'loss': 9.008160591125488}


EP_train:1: 100%|| 27572/27626 [1:04:57<00:07,  6.98it/s]

{'epoch': 1, 'iter': 27570, 'avg_loss': 8.576597127443662, 'avg_acc': 50.03014943237459, 'loss': 8.75500774383545}


EP_train:1: 100%|| 27582/27626 [1:04:59<00:06,  7.02it/s]

{'epoch': 1, 'iter': 27580, 'avg_loss': 8.576657110342653, 'avg_acc': 50.03036510641383, 'loss': 8.592314720153809}


EP_train:1: 100%|| 27592/27626 [1:05:00<00:04,  7.04it/s]

{'epoch': 1, 'iter': 27590, 'avg_loss': 8.576810644992536, 'avg_acc': 50.03103367039977, 'loss': 8.991012573242188}


EP_train:1: 100%|| 27602/27626 [1:05:01<00:03,  7.00it/s]

{'epoch': 1, 'iter': 27600, 'avg_loss': 8.576837293972403, 'avg_acc': 50.03238107314952, 'loss': 8.694551467895508}


EP_train:1: 100%|| 27612/27626 [1:05:03<00:01,  7.02it/s]

{'epoch': 1, 'iter': 27610, 'avg_loss': 8.576773120183393, 'avg_acc': 50.033048422730076, 'loss': 8.560769081115723}


EP_train:1: 100%|| 27622/27626 [1:05:04<00:00,  7.07it/s]

{'epoch': 1, 'iter': 27620, 'avg_loss': 8.576811175848572, 'avg_acc': 50.03360215053764, 'loss': 9.727530479431152}


EP_train:1: 100%|| 27626/27626 [1:05:05<00:00,  7.07it/s]


EP1_train, avg_loss= 8.57680767448791 total_acc= 50.03387959226844
EP:1 Model Saved on: ./wikitext_trained.model.ep1


EP_train:2:   0%|| 2/27626 [00:00<1:16:39,  6.01it/s]

{'epoch': 2, 'iter': 0, 'avg_loss': 8.645350456237793, 'avg_acc': 59.375, 'loss': 8.645350456237793}


EP_train:2:   0%|| 12/27626 [00:01<1:05:31,  7.02it/s]

{'epoch': 2, 'iter': 10, 'avg_loss': 8.386954827742143, 'avg_acc': 50.85227272727273, 'loss': 7.842360496520996}


EP_train:2:   0%|| 22/27626 [00:03<1:05:51,  6.99it/s]

{'epoch': 2, 'iter': 20, 'avg_loss': 8.42261450631278, 'avg_acc': 52.827380952380956, 'loss': 8.48390007019043}


EP_train:2:   0%|| 32/27626 [00:04<1:05:46,  6.99it/s]

{'epoch': 2, 'iter': 30, 'avg_loss': 8.277901387983754, 'avg_acc': 52.62096774193549, 'loss': 7.9748101234436035}


EP_train:2:   0%|| 42/27626 [00:06<1:05:56,  6.97it/s]

{'epoch': 2, 'iter': 40, 'avg_loss': 8.328894056924959, 'avg_acc': 52.59146341463414, 'loss': 8.507349967956543}


EP_train:2:   0%|| 52/27626 [00:07<1:06:08,  6.95it/s]

{'epoch': 2, 'iter': 50, 'avg_loss': 8.38034184773763, 'avg_acc': 52.083333333333336, 'loss': 8.529937744140625}


EP_train:2:   0%|| 62/27626 [00:08<1:06:14,  6.94it/s]

{'epoch': 2, 'iter': 60, 'avg_loss': 8.383176584712794, 'avg_acc': 51.38319672131148, 'loss': 9.025888442993164}


EP_train:2:   0%|| 72/27626 [00:10<1:06:00,  6.96it/s]

{'epoch': 2, 'iter': 70, 'avg_loss': 8.452929335580745, 'avg_acc': 51.232394366197184, 'loss': 7.97681999206543}


EP_train:2:   0%|| 82/27626 [00:11<1:05:27,  7.01it/s]

{'epoch': 2, 'iter': 80, 'avg_loss': 8.45635254589128, 'avg_acc': 50.964506172839506, 'loss': 8.568809509277344}


EP_train:2:   0%|| 92/27626 [00:13<1:05:01,  7.06it/s]

{'epoch': 2, 'iter': 90, 'avg_loss': 8.45413623537336, 'avg_acc': 50.85851648351648, 'loss': 9.1688871383667}


EP_train:2:   0%|| 102/27626 [00:14<1:05:12,  7.04it/s]

{'epoch': 2, 'iter': 100, 'avg_loss': 8.491478410097631, 'avg_acc': 50.71163366336634, 'loss': 8.967870712280273}


EP_train:2:   0%|| 112/27626 [00:15<1:05:34,  6.99it/s]

{'epoch': 2, 'iter': 110, 'avg_loss': 8.508154718725532, 'avg_acc': 50.56306306306306, 'loss': 8.863505363464355}


EP_train:2:   0%|| 122/27626 [00:17<1:05:14,  7.03it/s]

{'epoch': 2, 'iter': 120, 'avg_loss': 8.514765585749602, 'avg_acc': 50.413223140495866, 'loss': 8.740517616271973}


EP_train:2:   0%|| 132/27626 [00:18<1:05:31,  6.99it/s]

{'epoch': 2, 'iter': 130, 'avg_loss': 8.507110613903016, 'avg_acc': 50.45324427480916, 'loss': 8.03609848022461}


EP_train:2:   1%|| 142/27626 [00:20<1:06:00,  6.94it/s]

{'epoch': 2, 'iter': 140, 'avg_loss': 8.513560264668566, 'avg_acc': 50.820035460992905, 'loss': 8.103473663330078}


EP_train:2:   1%|| 152/27626 [00:21<1:05:31,  6.99it/s]

{'epoch': 2, 'iter': 150, 'avg_loss': 8.502182278412068, 'avg_acc': 50.931291390728475, 'loss': 8.157410621643066}


EP_train:2:   1%|| 162/27626 [00:23<1:06:00,  6.93it/s]

{'epoch': 2, 'iter': 160, 'avg_loss': 8.519678438672369, 'avg_acc': 51.18400621118012, 'loss': 9.809168815612793}


EP_train:2:   1%|| 172/27626 [00:24<1:04:54,  7.05it/s]

{'epoch': 2, 'iter': 170, 'avg_loss': 8.505563853079813, 'avg_acc': 51.005116959064324, 'loss': 8.433378219604492}


EP_train:2:   1%|| 182/27626 [00:25<1:04:36,  7.08it/s]

{'epoch': 2, 'iter': 180, 'avg_loss': 8.50585469093112, 'avg_acc': 51.08770718232044, 'loss': 8.634017944335938}


EP_train:2:   1%|| 192/27626 [00:27<1:04:32,  7.08it/s]

{'epoch': 2, 'iter': 190, 'avg_loss': 8.495514992019894, 'avg_acc': 51.24345549738219, 'loss': 8.407002449035645}


EP_train:2:   1%|| 202/27626 [00:28<1:04:39,  7.07it/s]

{'epoch': 2, 'iter': 200, 'avg_loss': 8.499932315219102, 'avg_acc': 51.15049751243781, 'loss': 8.973310470581055}


EP_train:2:   1%|| 212/27626 [00:30<1:04:22,  7.10it/s]

{'epoch': 2, 'iter': 210, 'avg_loss': 8.512578632029312, 'avg_acc': 51.17002369668246, 'loss': 9.327168464660645}


EP_train:2:   1%|| 222/27626 [00:31<1:05:12,  7.00it/s]

{'epoch': 2, 'iter': 220, 'avg_loss': 8.529987199274123, 'avg_acc': 51.18778280542986, 'loss': 8.587828636169434}


EP_train:2:   1%|| 232/27626 [00:33<1:05:20,  6.99it/s]

{'epoch': 2, 'iter': 230, 'avg_loss': 8.523329441681568, 'avg_acc': 51.0551948051948, 'loss': 7.8831024169921875}


EP_train:2:   1%|| 242/27626 [00:34<1:04:57,  7.03it/s]

{'epoch': 2, 'iter': 240, 'avg_loss': 8.52668688010378, 'avg_acc': 50.86877593360995, 'loss': 7.961368083953857}


EP_train:2:   1%|| 252/27626 [00:35<1:04:51,  7.03it/s]

{'epoch': 2, 'iter': 250, 'avg_loss': 8.51293732540541, 'avg_acc': 50.759462151394416, 'loss': 7.62027645111084}


EP_train:2:   1%|| 262/27626 [00:37<1:04:22,  7.09it/s]

{'epoch': 2, 'iter': 260, 'avg_loss': 8.518098098564879, 'avg_acc': 50.754310344827594, 'loss': 9.08407974243164}


EP_train:2:   1%|| 272/27626 [00:38<1:04:38,  7.05it/s]

{'epoch': 2, 'iter': 270, 'avg_loss': 8.513968824900385, 'avg_acc': 50.749538745387454, 'loss': 9.063820838928223}


EP_train:2:   1%|| 282/27626 [00:40<1:04:45,  7.04it/s]

{'epoch': 2, 'iter': 280, 'avg_loss': 8.50403240312461, 'avg_acc': 50.70062277580071, 'loss': 8.829046249389648}


EP_train:2:   1%|| 292/27626 [00:41<1:04:18,  7.08it/s]

{'epoch': 2, 'iter': 290, 'avg_loss': 8.495894559879893, 'avg_acc': 50.88058419243986, 'loss': 8.083120346069336}


EP_train:2:   1%|| 302/27626 [00:42<1:04:19,  7.08it/s]

{'epoch': 2, 'iter': 300, 'avg_loss': 8.496171873669292, 'avg_acc': 50.79941860465116, 'loss': 8.362222671508789}


EP_train:2:   1%|| 312/27626 [00:44<1:04:56,  7.01it/s]

{'epoch': 2, 'iter': 310, 'avg_loss': 8.50459844123129, 'avg_acc': 51.01487138263665, 'loss': 9.217416763305664}


EP_train:2:   1%|| 322/27626 [00:45<1:04:44,  7.03it/s]

{'epoch': 2, 'iter': 320, 'avg_loss': 8.507588079042524, 'avg_acc': 51.03193146417445, 'loss': 7.914140701293945}


EP_train:2:   1%|| 332/27626 [00:47<1:04:48,  7.02it/s]

{'epoch': 2, 'iter': 330, 'avg_loss': 8.507486343383789, 'avg_acc': 51.06684290030211, 'loss': 8.654961585998535}


EP_train:2:   1%|| 342/27626 [00:48<1:04:15,  7.08it/s]

{'epoch': 2, 'iter': 340, 'avg_loss': 8.51118855042891, 'avg_acc': 51.01722873900293, 'loss': 8.268380165100098}


EP_train:2:   1%|| 352/27626 [00:49<1:04:51,  7.01it/s]

{'epoch': 2, 'iter': 350, 'avg_loss': 8.52215064692701, 'avg_acc': 50.943732193732195, 'loss': 8.228203773498535}


EP_train:2:   1%|| 362/27626 [00:51<1:04:49,  7.01it/s]

{'epoch': 2, 'iter': 360, 'avg_loss': 8.531004716815051, 'avg_acc': 50.91759002770083, 'loss': 8.669342041015625}


EP_train:2:   1%|| 372/27626 [00:52<1:04:28,  7.05it/s]

{'epoch': 2, 'iter': 370, 'avg_loss': 8.538657562430657, 'avg_acc': 50.97708894878706, 'loss': 8.995404243469238}


EP_train:2:   1%|| 382/27626 [00:54<1:04:17,  7.06it/s]

{'epoch': 2, 'iter': 380, 'avg_loss': 8.544979554774567, 'avg_acc': 50.94324146981627, 'loss': 9.439519882202148}


EP_train:2:   1%|| 392/27626 [00:55<1:04:17,  7.06it/s]

{'epoch': 2, 'iter': 390, 'avg_loss': 8.550947539336846, 'avg_acc': 50.879156010230176, 'loss': 8.293145179748535}


EP_train:2:   1%|| 402/27626 [00:57<1:04:39,  7.02it/s]

{'epoch': 2, 'iter': 400, 'avg_loss': 8.55722213445459, 'avg_acc': 50.8182668329177, 'loss': 9.535178184509277}


EP_train:2:   1%|| 412/27626 [00:58<1:04:32,  7.03it/s]

{'epoch': 2, 'iter': 410, 'avg_loss': 8.563970179453383, 'avg_acc': 50.77554744525548, 'loss': 8.655421257019043}


EP_train:2:   2%|| 422/27626 [00:59<1:04:34,  7.02it/s]

{'epoch': 2, 'iter': 420, 'avg_loss': 8.559828117171262, 'avg_acc': 50.697743467933485, 'loss': 8.744794845581055}


EP_train:2:   2%|| 432/27626 [01:01<1:04:50,  6.99it/s]

{'epoch': 2, 'iter': 430, 'avg_loss': 8.556139850837726, 'avg_acc': 50.53654292343387, 'loss': 8.868546485900879}


EP_train:2:   2%|| 442/27626 [01:02<1:04:12,  7.06it/s]

{'epoch': 2, 'iter': 440, 'avg_loss': 8.563007573151534, 'avg_acc': 50.47477324263039, 'loss': 8.425997734069824}


EP_train:2:   2%|| 452/27626 [01:04<1:03:59,  7.08it/s]

{'epoch': 2, 'iter': 450, 'avg_loss': 8.570257666898673, 'avg_acc': 50.47810421286031, 'loss': 9.34781551361084}


EP_train:2:   2%|| 462/27626 [01:05<1:04:04,  7.07it/s]

{'epoch': 2, 'iter': 460, 'avg_loss': 8.578827067724795, 'avg_acc': 50.447396963123644, 'loss': 8.439926147460938}


EP_train:2:   2%|| 472/27626 [01:06<1:03:55,  7.08it/s]

{'epoch': 2, 'iter': 470, 'avg_loss': 8.58629452650714, 'avg_acc': 50.477707006369435, 'loss': 9.372069358825684}


EP_train:2:   2%|| 482/27626 [01:08<1:03:52,  7.08it/s]

{'epoch': 2, 'iter': 480, 'avg_loss': 8.586531367470469, 'avg_acc': 50.48726611226611, 'loss': 8.144015312194824}


EP_train:2:   2%|| 492/27626 [01:09<1:03:48,  7.09it/s]

{'epoch': 2, 'iter': 490, 'avg_loss': 8.583548512818128, 'avg_acc': 50.43279022403259, 'loss': 8.559283256530762}


EP_train:2:   2%|| 502/27626 [01:11<1:04:43,  6.98it/s]

{'epoch': 2, 'iter': 500, 'avg_loss': 8.585092211435892, 'avg_acc': 50.34930139720559, 'loss': 8.697011947631836}


EP_train:2:   2%|| 512/27626 [01:12<1:04:24,  7.02it/s]

{'epoch': 2, 'iter': 510, 'avg_loss': 8.58725254129757, 'avg_acc': 50.32411937377691, 'loss': 9.377779006958008}


EP_train:2:   2%|| 522/27626 [01:14<1:03:51,  7.07it/s]

{'epoch': 2, 'iter': 520, 'avg_loss': 8.590459216807945, 'avg_acc': 50.287907869481764, 'loss': 9.054383277893066}


EP_train:2:   2%|| 532/27626 [01:15<1:04:02,  7.05it/s]

{'epoch': 2, 'iter': 530, 'avg_loss': 8.592617870050635, 'avg_acc': 50.33545197740112, 'loss': 8.833535194396973}


EP_train:2:   2%|| 542/27626 [01:16<1:03:49,  7.07it/s]

{'epoch': 2, 'iter': 540, 'avg_loss': 8.59098914105879, 'avg_acc': 50.31769870609981, 'loss': 8.443169593811035}


EP_train:2:   2%|| 552/27626 [01:18<1:03:52,  7.06it/s]

{'epoch': 2, 'iter': 550, 'avg_loss': 8.586345959921282, 'avg_acc': 50.34029038112523, 'loss': 8.7716703414917}


EP_train:2:   2%|| 562/27626 [01:19<1:03:25,  7.11it/s]

{'epoch': 2, 'iter': 560, 'avg_loss': 8.582145762316046, 'avg_acc': 50.34536541889483, 'loss': 8.477079391479492}


EP_train:2:   2%|| 572/27626 [01:21<1:03:42,  7.08it/s]

{'epoch': 2, 'iter': 570, 'avg_loss': 8.579463703202283, 'avg_acc': 50.3119527145359, 'loss': 8.772161483764648}


EP_train:2:   2%|| 582/27626 [01:22<1:03:57,  7.05it/s]

{'epoch': 2, 'iter': 580, 'avg_loss': 8.578293345676231, 'avg_acc': 50.26893287435457, 'loss': 7.693816661834717}


EP_train:2:   2%|| 592/27626 [01:23<1:04:23,  7.00it/s]

{'epoch': 2, 'iter': 590, 'avg_loss': 8.576624024741339, 'avg_acc': 50.317258883248726, 'loss': 8.28791332244873}


EP_train:2:   2%|| 602/27626 [01:25<1:05:29,  6.88it/s]

{'epoch': 2, 'iter': 600, 'avg_loss': 8.572559784335423, 'avg_acc': 50.239184692179705, 'loss': 8.174467086791992}


EP_train:2:   2%|| 612/27626 [01:26<1:03:42,  7.07it/s]

{'epoch': 2, 'iter': 610, 'avg_loss': 8.57373767203473, 'avg_acc': 50.245499181669395, 'loss': 8.226491928100586}


EP_train:2:   2%|| 622/27626 [01:28<1:04:00,  7.03it/s]

{'epoch': 2, 'iter': 620, 'avg_loss': 8.569420322319928, 'avg_acc': 50.22141706924316, 'loss': 7.967437744140625}


EP_train:2:   2%|| 632/27626 [01:29<1:03:33,  7.08it/s]

{'epoch': 2, 'iter': 630, 'avg_loss': 8.568972349544712, 'avg_acc': 50.16343106180665, 'loss': 8.150530815124512}


EP_train:2:   2%|| 642/27626 [01:31<1:03:29,  7.08it/s]

{'epoch': 2, 'iter': 640, 'avg_loss': 8.563102610583611, 'avg_acc': 50.073127925117, 'loss': 8.600616455078125}


EP_train:2:   2%|| 652/27626 [01:32<1:03:42,  7.06it/s]

{'epoch': 2, 'iter': 650, 'avg_loss': 8.564929302203856, 'avg_acc': 50.0, 'loss': 9.007171630859375}


EP_train:2:   2%|| 662/27626 [01:33<1:03:41,  7.06it/s]

{'epoch': 2, 'iter': 660, 'avg_loss': 8.560032380691274, 'avg_acc': 49.98581694402421, 'loss': 8.213345527648926}


EP_train:2:   2%|| 672/27626 [01:35<1:03:52,  7.03it/s]

{'epoch': 2, 'iter': 670, 'avg_loss': 8.560671011133094, 'avg_acc': 49.916169895678095, 'loss': 8.413223266601562}


EP_train:2:   2%|| 682/27626 [01:36<1:04:03,  7.01it/s]

{'epoch': 2, 'iter': 680, 'avg_loss': 8.55693170712733, 'avg_acc': 49.93116740088105, 'loss': 8.392165184020996}


EP_train:2:   3%|| 692/27626 [01:38<1:04:25,  6.97it/s]

{'epoch': 2, 'iter': 690, 'avg_loss': 8.55577973035931, 'avg_acc': 49.95477568740955, 'loss': 8.301426887512207}


EP_train:2:   3%|| 702/27626 [01:39<1:03:14,  7.09it/s]

{'epoch': 2, 'iter': 700, 'avg_loss': 8.555642244309059, 'avg_acc': 49.879636233951494, 'loss': 9.63273811340332}


EP_train:2:   3%|| 712/27626 [01:40<1:03:32,  7.06it/s]

{'epoch': 2, 'iter': 710, 'avg_loss': 8.555851059120918, 'avg_acc': 49.87253867791843, 'loss': 8.086058616638184}


EP_train:2:   3%|| 722/27626 [01:42<1:03:35,  7.05it/s]

{'epoch': 2, 'iter': 720, 'avg_loss': 8.552769699308314, 'avg_acc': 49.86130374479889, 'loss': 8.48383617401123}


EP_train:2:   3%|| 732/27626 [01:43<1:03:05,  7.10it/s]

{'epoch': 2, 'iter': 730, 'avg_loss': 8.562177915090412, 'avg_acc': 49.81190150478796, 'loss': 10.219192504882812}


EP_train:2:   3%|| 742/27626 [01:45<1:03:21,  7.07it/s]

{'epoch': 2, 'iter': 740, 'avg_loss': 8.560303227299782, 'avg_acc': 49.84817813765182, 'loss': 8.624780654907227}


EP_train:2:   3%|| 752/27626 [01:46<1:03:44,  7.03it/s]

{'epoch': 2, 'iter': 750, 'avg_loss': 8.56258687071413, 'avg_acc': 49.82523302263648, 'loss': 8.544600486755371}


EP_train:2:   3%|| 762/27626 [01:47<1:03:58,  7.00it/s]

{'epoch': 2, 'iter': 760, 'avg_loss': 8.563707284639136, 'avg_acc': 49.85216819973719, 'loss': 8.784708976745605}


EP_train:2:   3%|| 772/27626 [01:49<1:03:49,  7.01it/s]

{'epoch': 2, 'iter': 770, 'avg_loss': 8.566434263409962, 'avg_acc': 49.82571335927367, 'loss': 9.181291580200195}


EP_train:2:   3%|| 782/27626 [01:50<1:03:45,  7.02it/s]

{'epoch': 2, 'iter': 780, 'avg_loss': 8.568335911070644, 'avg_acc': 49.83594750320102, 'loss': 8.798185348510742}


EP_train:2:   3%|| 792/27626 [01:52<1:03:33,  7.04it/s]

{'epoch': 2, 'iter': 790, 'avg_loss': 8.56521048407187, 'avg_acc': 49.75900758533502, 'loss': 7.907129287719727}


EP_train:2:   3%|| 802/27626 [01:53<1:03:05,  7.09it/s]

{'epoch': 2, 'iter': 800, 'avg_loss': 8.560257408651669, 'avg_acc': 49.7230024968789, 'loss': 9.080322265625}


EP_train:2:   3%|| 812/27626 [01:55<1:04:02,  6.98it/s]

{'epoch': 2, 'iter': 810, 'avg_loss': 8.557582465405822, 'avg_acc': 49.707151664611594, 'loss': 8.559263229370117}


EP_train:2:   3%|| 822/27626 [01:56<1:03:22,  7.05it/s]

{'epoch': 2, 'iter': 820, 'avg_loss': 8.550981660417749, 'avg_acc': 49.74878197320341, 'loss': 7.591156005859375}


EP_train:2:   3%|| 832/27626 [01:57<1:03:37,  7.02it/s]

{'epoch': 2, 'iter': 830, 'avg_loss': 8.549249569861898, 'avg_acc': 49.80445246690734, 'loss': 8.485342979431152}


EP_train:2:   3%|| 842/27626 [01:59<1:03:34,  7.02it/s]

{'epoch': 2, 'iter': 840, 'avg_loss': 8.54278836834303, 'avg_acc': 49.82164090368609, 'loss': 7.465599536895752}


EP_train:2:   3%|| 852/27626 [02:00<1:03:32,  7.02it/s]

{'epoch': 2, 'iter': 850, 'avg_loss': 8.543366669768032, 'avg_acc': 49.860458284371326, 'loss': 8.0415678024292}


EP_train:2:   3%|| 862/27626 [02:02<1:03:53,  6.98it/s]

{'epoch': 2, 'iter': 860, 'avg_loss': 8.544905943876083, 'avg_acc': 49.822154471544714, 'loss': 7.722650527954102}


EP_train:2:   3%|| 872/27626 [02:03<1:03:05,  7.07it/s]

{'epoch': 2, 'iter': 870, 'avg_loss': 8.545806661949639, 'avg_acc': 49.824196326062, 'loss': 7.802226543426514}


EP_train:2:   3%|| 882/27626 [02:04<1:02:47,  7.10it/s]

{'epoch': 2, 'iter': 880, 'avg_loss': 8.544629506707597, 'avg_acc': 49.790720771850175, 'loss': 8.202542304992676}


EP_train:2:   3%|| 892/27626 [02:06<1:03:02,  7.07it/s]

{'epoch': 2, 'iter': 890, 'avg_loss': 8.542945019992782, 'avg_acc': 49.79306958473625, 'loss': 8.773204803466797}


EP_train:2:   3%|| 902/27626 [02:07<1:03:18,  7.04it/s]

{'epoch': 2, 'iter': 900, 'avg_loss': 8.53836493857296, 'avg_acc': 49.79536625971143, 'loss': 8.06413459777832}


EP_train:2:   3%|| 912/27626 [02:09<1:02:52,  7.08it/s]

{'epoch': 2, 'iter': 910, 'avg_loss': 8.539506810687637, 'avg_acc': 49.77703073545554, 'loss': 8.296411514282227}


EP_train:2:   3%|| 922/27626 [02:10<1:02:52,  7.08it/s]

{'epoch': 2, 'iter': 920, 'avg_loss': 8.543441183274524, 'avg_acc': 49.738735070575466, 'loss': 8.881720542907715}


EP_train:2:   3%|| 932/27626 [02:11<1:03:08,  7.05it/s]

{'epoch': 2, 'iter': 930, 'avg_loss': 8.543562284217613, 'avg_acc': 49.714688506981744, 'loss': 8.202064514160156}


EP_train:2:   3%|| 942/27626 [02:13<1:03:30,  7.00it/s]

{'epoch': 2, 'iter': 940, 'avg_loss': 8.543377757199133, 'avg_acc': 49.79078108395324, 'loss': 8.681982040405273}


EP_train:2:   3%|| 952/27626 [02:14<1:04:03,  6.94it/s]

{'epoch': 2, 'iter': 950, 'avg_loss': 8.543757692371132, 'avg_acc': 49.78969505783386, 'loss': 8.253864288330078}


EP_train:2:   3%|| 962/27626 [02:16<1:03:11,  7.03it/s]

{'epoch': 2, 'iter': 960, 'avg_loss': 8.541238241959809, 'avg_acc': 49.80489073881373, 'loss': 7.830460071563721}


EP_train:2:   4%|| 972/27626 [02:17<1:02:46,  7.08it/s]

{'epoch': 2, 'iter': 970, 'avg_loss': 8.540690951440656, 'avg_acc': 49.79724510813594, 'loss': 8.290849685668945}


EP_train:2:   4%|| 982/27626 [02:19<1:03:27,  7.00it/s]

{'epoch': 2, 'iter': 980, 'avg_loss': 8.54124534093641, 'avg_acc': 49.856651376146786, 'loss': 8.767548561096191}


EP_train:2:   4%|| 992/27626 [02:20<1:03:17,  7.01it/s]

{'epoch': 2, 'iter': 990, 'avg_loss': 8.540137203863242, 'avg_acc': 49.83602421796166, 'loss': 7.93660306930542}


EP_train:2:   4%|| 1002/27626 [02:21<1:02:55,  7.05it/s]

{'epoch': 2, 'iter': 1000, 'avg_loss': 8.538881110859203, 'avg_acc': 49.84078421578422, 'loss': 7.0589518547058105}


EP_train:2:   4%|| 1012/27626 [02:23<1:03:01,  7.04it/s]

{'epoch': 2, 'iter': 1010, 'avg_loss': 8.540071410076319, 'avg_acc': 49.8114490603363, 'loss': 8.815668106079102}


EP_train:2:   4%|| 1022/27626 [02:24<1:02:47,  7.06it/s]

{'epoch': 2, 'iter': 1020, 'avg_loss': 8.540629308440893, 'avg_acc': 49.856145935357496, 'loss': 8.863152503967285}


EP_train:2:   4%|| 1032/27626 [02:26<1:09:55,  6.34it/s]

{'epoch': 2, 'iter': 1030, 'avg_loss': 8.540069012128532, 'avg_acc': 49.860572259941804, 'loss': 8.910579681396484}


EP_train:2:   4%|| 1042/27626 [02:27<1:03:49,  6.94it/s]

{'epoch': 2, 'iter': 1040, 'avg_loss': 8.537511991378095, 'avg_acc': 49.843900096061475, 'loss': 7.821359157562256}


EP_train:2:   4%|| 1052/27626 [02:29<1:02:56,  7.04it/s]

{'epoch': 2, 'iter': 1050, 'avg_loss': 8.53719387181479, 'avg_acc': 49.86322549952426, 'loss': 8.097037315368652}


EP_train:2:   4%|| 1062/27626 [02:30<1:02:49,  7.05it/s]

{'epoch': 2, 'iter': 1060, 'avg_loss': 8.536642808042306, 'avg_acc': 49.84978793590952, 'loss': 9.047582626342773}


EP_train:2:   4%|| 1072/27626 [02:31<1:02:38,  7.06it/s]

{'epoch': 2, 'iter': 1070, 'avg_loss': 8.537532964451751, 'avg_acc': 49.86577964519141, 'loss': 8.004524230957031}


EP_train:2:   4%|| 1082/27626 [02:33<1:02:46,  7.05it/s]

{'epoch': 2, 'iter': 1080, 'avg_loss': 8.533233609053958, 'avg_acc': 49.84389454209065, 'loss': 8.937438011169434}


EP_train:2:   4%|| 1092/27626 [02:34<1:02:48,  7.04it/s]

{'epoch': 2, 'iter': 1090, 'avg_loss': 8.533730205759666, 'avg_acc': 49.83673235563703, 'loss': 9.447585105895996}


EP_train:2:   4%|| 1102/27626 [02:36<1:02:48,  7.04it/s]

{'epoch': 2, 'iter': 1100, 'avg_loss': 8.535113403517803, 'avg_acc': 49.866598546775656, 'loss': 8.703083038330078}


EP_train:2:   4%|| 1112/27626 [02:37<1:02:30,  7.07it/s]

{'epoch': 2, 'iter': 1110, 'avg_loss': 8.533774578067014, 'avg_acc': 49.915616561656165, 'loss': 8.528011322021484}


EP_train:2:   4%|| 1122/27626 [02:38<1:03:42,  6.93it/s]

{'epoch': 2, 'iter': 1120, 'avg_loss': 8.536706624128902, 'avg_acc': 49.93588314005352, 'loss': 9.384929656982422}


EP_train:2:   4%|| 1132/27626 [02:40<1:03:20,  6.97it/s]

{'epoch': 2, 'iter': 1130, 'avg_loss': 8.535813403698747, 'avg_acc': 49.975132625994696, 'loss': 7.866866111755371}


EP_train:2:   4%|| 1142/27626 [02:41<1:02:28,  7.07it/s]

{'epoch': 2, 'iter': 1140, 'avg_loss': 8.532825467463233, 'avg_acc': 49.980828220858896, 'loss': 7.710484504699707}


EP_train:2:   4%|| 1152/27626 [02:43<1:02:45,  7.03it/s]

{'epoch': 2, 'iter': 1150, 'avg_loss': 8.535352502254483, 'avg_acc': 49.98913987836664, 'loss': 9.677122116088867}


EP_train:2:   4%|| 1162/27626 [02:44<1:02:08,  7.10it/s]

{'epoch': 2, 'iter': 1160, 'avg_loss': 8.53431315385094, 'avg_acc': 49.973083548664945, 'loss': 7.773627758026123}


EP_train:2:   4%|| 1172/27626 [02:46<1:03:04,  6.99it/s]

{'epoch': 2, 'iter': 1170, 'avg_loss': 8.534760942222928, 'avg_acc': 49.97598206660974, 'loss': 8.59677505493164}


EP_train:2:   4%|| 1182/27626 [02:47<1:02:24,  7.06it/s]

{'epoch': 2, 'iter': 1180, 'avg_loss': 8.536383786714248, 'avg_acc': 49.97618543607113, 'loss': 9.128718376159668}


EP_train:2:   4%|| 1192/27626 [02:48<1:02:12,  7.08it/s]

{'epoch': 2, 'iter': 1190, 'avg_loss': 8.535447000156022, 'avg_acc': 49.93702770780856, 'loss': 8.78699016571045}


EP_train:2:   4%|| 1202/27626 [02:50<1:02:27,  7.05it/s]

{'epoch': 2, 'iter': 1200, 'avg_loss': 8.536046875803596, 'avg_acc': 49.91673605328892, 'loss': 8.491554260253906}


EP_train:2:   4%|| 1212/27626 [02:51<1:02:30,  7.04it/s]

{'epoch': 2, 'iter': 1210, 'avg_loss': 8.53495700136873, 'avg_acc': 49.92774566473989, 'loss': 8.558082580566406}


EP_train:2:   4%|| 1222/27626 [02:53<1:02:54,  6.99it/s]

{'epoch': 2, 'iter': 1220, 'avg_loss': 8.534475952557713, 'avg_acc': 49.91042178542179, 'loss': 8.946344375610352}


EP_train:2:   4%|| 1232/27626 [02:54<1:02:42,  7.02it/s]

{'epoch': 2, 'iter': 1230, 'avg_loss': 8.536199376410725, 'avg_acc': 49.93399675060926, 'loss': 8.799918174743652}


EP_train:2:   4%|| 1242/27626 [02:55<1:02:53,  6.99it/s]

{'epoch': 2, 'iter': 1240, 'avg_loss': 8.537553995871717, 'avg_acc': 49.95215551974214, 'loss': 8.827000617980957}


EP_train:2:   5%|| 1252/27626 [02:57<1:02:27,  7.04it/s]

{'epoch': 2, 'iter': 1250, 'avg_loss': 8.536111038842266, 'avg_acc': 49.947541966426854, 'loss': 8.635376930236816}


EP_train:2:   5%|| 1262/27626 [02:58<1:02:03,  7.08it/s]

{'epoch': 2, 'iter': 1260, 'avg_loss': 8.536148170362287, 'avg_acc': 49.93061062648692, 'loss': 9.02534008026123}


EP_train:2:   5%|| 1272/27626 [03:00<1:02:10,  7.06it/s]

{'epoch': 2, 'iter': 1270, 'avg_loss': 8.536596785934773, 'avg_acc': 49.94099134539732, 'loss': 8.634320259094238}


EP_train:2:   5%|| 1282/27626 [03:01<1:01:54,  7.09it/s]

{'epoch': 2, 'iter': 1280, 'avg_loss': 8.538959037876054, 'avg_acc': 49.92437548790008, 'loss': 8.344701766967773}


EP_train:2:   5%|| 1292/27626 [03:02<1:02:36,  7.01it/s]

{'epoch': 2, 'iter': 1290, 'avg_loss': 8.53665039541184, 'avg_acc': 49.92496127033307, 'loss': 7.693288803100586}


EP_train:2:   5%|| 1302/27626 [03:04<1:02:28,  7.02it/s]

{'epoch': 2, 'iter': 1300, 'avg_loss': 8.535376741920958, 'avg_acc': 49.92073405073021, 'loss': 8.56026554107666}


EP_train:2:   5%|| 1312/27626 [03:05<1:02:36,  7.01it/s]

{'epoch': 2, 'iter': 1310, 'avg_loss': 8.53526640884027, 'avg_acc': 49.93325705568268, 'loss': 7.917901039123535}


EP_train:2:   5%|| 1322/27626 [03:07<1:01:56,  7.08it/s]

{'epoch': 2, 'iter': 1320, 'avg_loss': 8.534585917745009, 'avg_acc': 49.903009084027246, 'loss': 8.212324142456055}


EP_train:2:   5%|| 1332/27626 [03:08<1:02:06,  7.06it/s]

{'epoch': 2, 'iter': 1330, 'avg_loss': 8.537537143787524, 'avg_acc': 49.92252066115703, 'loss': 10.254639625549316}


EP_train:2:   5%|| 1342/27626 [03:10<1:02:01,  7.06it/s]

{'epoch': 2, 'iter': 1340, 'avg_loss': 8.537098327383543, 'avg_acc': 49.91144668158091, 'loss': 8.174713134765625}


EP_train:2:   5%|| 1352/27626 [03:11<1:01:41,  7.10it/s]

{'epoch': 2, 'iter': 1350, 'avg_loss': 8.535436473538661, 'avg_acc': 49.925980754996296, 'loss': 8.3915433883667}


EP_train:2:   5%|| 1362/27626 [03:12<1:01:55,  7.07it/s]

{'epoch': 2, 'iter': 1360, 'avg_loss': 8.536887606130987, 'avg_acc': 49.94030124908156, 'loss': 8.781521797180176}


EP_train:2:   5%|| 1372/27626 [03:14<1:02:17,  7.02it/s]

{'epoch': 2, 'iter': 1370, 'avg_loss': 8.536203852193708, 'avg_acc': 49.93845733041575, 'loss': 8.086552619934082}


EP_train:2:   5%|| 1382/27626 [03:15<1:02:28,  7.00it/s]

{'epoch': 2, 'iter': 1380, 'avg_loss': 8.533121340003763, 'avg_acc': 49.923062997827664, 'loss': 9.646082878112793}


EP_train:2:   5%|| 1392/27626 [03:17<1:02:17,  7.02it/s]

{'epoch': 2, 'iter': 1390, 'avg_loss': 8.533594883576646, 'avg_acc': 49.93934219985622, 'loss': 8.44300651550293}


EP_train:2:   5%|| 1402/27626 [03:18<1:02:10,  7.03it/s]

{'epoch': 2, 'iter': 1400, 'avg_loss': 8.537342004823651, 'avg_acc': 49.94869735902927, 'loss': 8.761164665222168}


EP_train:2:   5%|| 1412/27626 [03:19<1:01:30,  7.10it/s]

{'epoch': 2, 'iter': 1410, 'avg_loss': 8.53784128996094, 'avg_acc': 49.924698795180724, 'loss': 8.479761123657227}


EP_train:2:   5%|| 1422/27626 [03:21<1:01:32,  7.10it/s]

{'epoch': 2, 'iter': 1420, 'avg_loss': 8.538240016639275, 'avg_acc': 49.91203377902885, 'loss': 8.855520248413086}


EP_train:2:   5%|| 1432/27626 [03:22<1:01:08,  7.14it/s]

{'epoch': 2, 'iter': 1430, 'avg_loss': 8.537862937655339, 'avg_acc': 49.9148322851153, 'loss': 8.406078338623047}


EP_train:2:   5%|| 1442/27626 [03:24<1:01:28,  7.10it/s]

{'epoch': 2, 'iter': 1440, 'avg_loss': 8.536797584385445, 'avg_acc': 49.939278278972935, 'loss': 8.00748062133789}


EP_train:2:   5%|| 1452/27626 [03:25<1:01:50,  7.05it/s]

{'epoch': 2, 'iter': 1450, 'avg_loss': 8.536630685045175, 'avg_acc': 49.95907994486561, 'loss': 8.673372268676758}


EP_train:2:   5%|| 1462/27626 [03:26<1:01:47,  7.06it/s]

{'epoch': 2, 'iter': 1460, 'avg_loss': 8.535846011431392, 'avg_acc': 49.93583162217659, 'loss': 8.285888671875}


EP_train:2:   5%|| 1472/27626 [03:28<1:02:09,  7.01it/s]

{'epoch': 2, 'iter': 1470, 'avg_loss': 8.538659198522407, 'avg_acc': 49.91077498300476, 'loss': 8.05870532989502}


EP_train:2:   5%|| 1482/27626 [03:29<1:02:29,  6.97it/s]

{'epoch': 2, 'iter': 1480, 'avg_loss': 8.53790057405759, 'avg_acc': 49.90504726536124, 'loss': 8.425466537475586}


EP_train:2:   5%|| 1492/27626 [03:31<1:02:06,  7.01it/s]

{'epoch': 2, 'iter': 1490, 'avg_loss': 8.539439821467154, 'avg_acc': 49.94550637156271, 'loss': 8.349137306213379}


EP_train:2:   5%|| 1502/27626 [03:32<1:01:34,  7.07it/s]

{'epoch': 2, 'iter': 1500, 'avg_loss': 8.541016038936586, 'avg_acc': 49.92713191205863, 'loss': 8.56357479095459}


EP_train:2:   5%|| 1512/27626 [03:34<1:01:59,  7.02it/s]

{'epoch': 2, 'iter': 1510, 'avg_loss': 8.544282609938312, 'avg_acc': 49.898659827928526, 'loss': 9.36196231842041}


EP_train:2:   6%|| 1522/27626 [03:35<1:01:54,  7.03it/s]

{'epoch': 2, 'iter': 1520, 'avg_loss': 8.545665148133748, 'avg_acc': 49.91576265614727, 'loss': 7.852625846862793}


EP_train:2:   6%|| 1532/27626 [03:36<1:01:23,  7.08it/s]

{'epoch': 2, 'iter': 1530, 'avg_loss': 8.543912882278512, 'avg_acc': 49.92243631613324, 'loss': 8.940335273742676}


EP_train:2:   6%|| 1542/27626 [03:38<1:01:38,  7.05it/s]

{'epoch': 2, 'iter': 1540, 'avg_loss': 8.543548262173443, 'avg_acc': 49.94930240103829, 'loss': 8.481207847595215}


EP_train:2:   6%|| 1552/27626 [03:39<1:02:03,  7.00it/s]

{'epoch': 2, 'iter': 1550, 'avg_loss': 8.542807128335644, 'avg_acc': 49.90530303030303, 'loss': 8.369946479797363}


EP_train:2:   6%|| 1562/27626 [03:41<1:01:46,  7.03it/s]

{'epoch': 2, 'iter': 1560, 'avg_loss': 8.543800396464102, 'avg_acc': 49.937940422805895, 'loss': 8.113720893859863}


EP_train:2:   6%|| 1572/27626 [03:42<1:01:30,  7.06it/s]

{'epoch': 2, 'iter': 1570, 'avg_loss': 8.542885911457104, 'avg_acc': 49.928389560789306, 'loss': 8.322151184082031}


EP_train:2:   6%|| 1582/27626 [03:43<1:01:40,  7.04it/s]

{'epoch': 2, 'iter': 1580, 'avg_loss': 8.544153113971399, 'avg_acc': 49.893263757115754, 'loss': 8.854507446289062}


EP_train:2:   6%|| 1592/27626 [03:45<1:01:11,  7.09it/s]

{'epoch': 2, 'iter': 1590, 'avg_loss': 8.542370371815545, 'avg_acc': 49.92143306096794, 'loss': 8.804553031921387}


EP_train:2:   6%|| 1602/27626 [03:46<1:01:11,  7.09it/s]

{'epoch': 2, 'iter': 1600, 'avg_loss': 8.542451881156126, 'avg_acc': 49.918019987507805, 'loss': 8.1830472946167}


EP_train:2:   6%|| 1612/27626 [03:48<1:01:27,  7.06it/s]

{'epoch': 2, 'iter': 1610, 'avg_loss': 8.541612370868117, 'avg_acc': 49.893311607697086, 'loss': 8.248982429504395}


EP_train:2:   6%|| 1622/27626 [03:49<1:00:59,  7.11it/s]

{'epoch': 2, 'iter': 1620, 'avg_loss': 8.540963938617177, 'avg_acc': 49.87661937075879, 'loss': 7.8955488204956055}


EP_train:2:   6%|| 1632/27626 [03:50<1:01:17,  7.07it/s]

{'epoch': 2, 'iter': 1630, 'avg_loss': 8.539233580460072, 'avg_acc': 49.89078786020846, 'loss': 8.037565231323242}


EP_train:2:   6%|| 1642/27626 [03:52<1:01:16,  7.07it/s]

{'epoch': 2, 'iter': 1640, 'avg_loss': 8.53889494157288, 'avg_acc': 49.92001828153565, 'loss': 7.8687214851379395}


EP_train:2:   6%|| 1652/27626 [03:53<1:01:20,  7.06it/s]

{'epoch': 2, 'iter': 1650, 'avg_loss': 8.537732812579945, 'avg_acc': 49.92996668685645, 'loss': 8.091662406921387}


EP_train:2:   6%|| 1662/27626 [03:55<1:01:47,  7.00it/s]

{'epoch': 2, 'iter': 1660, 'avg_loss': 8.54172962017622, 'avg_acc': 49.9228627332932, 'loss': 9.842514038085938}


EP_train:2:   6%|| 1672/27626 [03:56<1:01:32,  7.03it/s]

{'epoch': 2, 'iter': 1670, 'avg_loss': 8.541567375530104, 'avg_acc': 49.91210353081987, 'loss': 8.522592544555664}


EP_train:2:   6%|| 1682/27626 [03:58<1:01:19,  7.05it/s]

{'epoch': 2, 'iter': 1680, 'avg_loss': 8.54046429090029, 'avg_acc': 49.91634443783462, 'loss': 8.374628067016602}


EP_train:2:   6%|| 1692/27626 [03:59<1:01:10,  7.06it/s]

{'epoch': 2, 'iter': 1690, 'avg_loss': 8.541802329329046, 'avg_acc': 49.89835895919575, 'loss': 9.274810791015625}


EP_train:2:   6%|| 1702/27626 [04:00<1:00:44,  7.11it/s]

{'epoch': 2, 'iter': 1700, 'avg_loss': 8.544727395241013, 'avg_acc': 49.90079365079365, 'loss': 10.087339401245117}


EP_train:2:   6%|| 1712/27626 [04:02<1:01:21,  7.04it/s]

{'epoch': 2, 'iter': 1710, 'avg_loss': 8.544970516849164, 'avg_acc': 49.89224137931034, 'loss': 8.64211368560791}


EP_train:2:   6%|| 1722/27626 [04:03<1:01:09,  7.06it/s]

{'epoch': 2, 'iter': 1720, 'avg_loss': 8.546979126716893, 'avg_acc': 49.88560429982568, 'loss': 8.446554183959961}


EP_train:2:   6%|| 1732/27626 [04:05<1:01:43,  6.99it/s]

{'epoch': 2, 'iter': 1730, 'avg_loss': 8.548479785952246, 'avg_acc': 49.87362796071635, 'loss': 8.807915687561035}


EP_train:2:   6%|| 1742/27626 [04:06<1:01:19,  7.04it/s]

{'epoch': 2, 'iter': 1740, 'avg_loss': 8.549464542382342, 'avg_acc': 49.85460941987363, 'loss': 8.080942153930664}


EP_train:2:   6%|| 1752/27626 [04:07<1:01:07,  7.06it/s]

{'epoch': 2, 'iter': 1750, 'avg_loss': 8.547789032019457, 'avg_acc': 49.82509994288978, 'loss': 8.422947883605957}


EP_train:2:   6%|| 1762/27626 [04:09<1:01:27,  7.01it/s]

{'epoch': 2, 'iter': 1760, 'avg_loss': 8.5471952469223, 'avg_acc': 49.84206416808632, 'loss': 8.653905868530273}


EP_train:2:   6%|| 1772/27626 [04:10<1:01:06,  7.05it/s]

{'epoch': 2, 'iter': 1770, 'avg_loss': 8.547383456765965, 'avg_acc': 49.841191417278374, 'loss': 7.870728969573975}


EP_train:2:   6%|| 1782/27626 [04:12<1:00:46,  7.09it/s]

{'epoch': 2, 'iter': 1780, 'avg_loss': 8.547613478590437, 'avg_acc': 49.84559236384054, 'loss': 9.026623725891113}


EP_train:2:   6%|| 1792/27626 [04:13<1:00:42,  7.09it/s]

{'epoch': 2, 'iter': 1790, 'avg_loss': 8.548877072826835, 'avg_acc': 49.84121998883305, 'loss': 8.534401893615723}


EP_train:2:   7%|| 1802/27626 [04:15<1:01:00,  7.05it/s]

{'epoch': 2, 'iter': 1800, 'avg_loss': 8.550297729178709, 'avg_acc': 49.821279844530814, 'loss': 9.967933654785156}


EP_train:2:   7%|| 1812/27626 [04:16<1:00:46,  7.08it/s]

{'epoch': 2, 'iter': 1810, 'avg_loss': 8.549303940025764, 'avg_acc': 49.82226670347874, 'loss': 7.715089797973633}


EP_train:2:   7%|| 1822/27626 [04:17<1:00:53,  7.06it/s]

{'epoch': 2, 'iter': 1820, 'avg_loss': 8.550854647310143, 'avg_acc': 49.83182317408018, 'loss': 8.757487297058105}


EP_train:2:   7%|| 1832/27626 [04:19<1:00:54,  7.06it/s]

{'epoch': 2, 'iter': 1830, 'avg_loss': 8.549339476730704, 'avg_acc': 49.87711632987439, 'loss': 8.297590255737305}


EP_train:2:   7%|| 1842/27626 [04:20<1:01:27,  6.99it/s]

{'epoch': 2, 'iter': 1840, 'avg_loss': 8.549660785764148, 'avg_acc': 49.90833785985877, 'loss': 8.04085636138916}


EP_train:2:   7%|| 1852/27626 [04:22<1:01:40,  6.96it/s]

{'epoch': 2, 'iter': 1850, 'avg_loss': 8.550928282261152, 'avg_acc': 49.89870340356564, 'loss': 8.095261573791504}


EP_train:2:   7%|| 1862/27626 [04:23<1:00:57,  7.04it/s]

{'epoch': 2, 'iter': 1860, 'avg_loss': 8.548938359450679, 'avg_acc': 49.890851692638364, 'loss': 8.587830543518066}


EP_train:2:   7%|| 1872/27626 [04:24<1:00:44,  7.07it/s]

{'epoch': 2, 'iter': 1870, 'avg_loss': 8.54757632143927, 'avg_acc': 49.894775521111704, 'loss': 8.77479362487793}


EP_train:2:   7%|| 1882/27626 [04:26<1:00:46,  7.06it/s]

{'epoch': 2, 'iter': 1880, 'avg_loss': 8.547572442662647, 'avg_acc': 49.89865762892079, 'loss': 8.045647621154785}


EP_train:2:   7%|| 1892/27626 [04:27<1:00:38,  7.07it/s]

{'epoch': 2, 'iter': 1890, 'avg_loss': 8.54903736094329, 'avg_acc': 49.909108937070336, 'loss': 8.585478782653809}


EP_train:2:   7%|| 1902/27626 [04:29<1:01:14,  7.00it/s]

{'epoch': 2, 'iter': 1900, 'avg_loss': 8.551015996857734, 'avg_acc': 49.94082062072594, 'loss': 10.18377685546875}


EP_train:2:   7%|| 1912/27626 [04:30<1:00:36,  7.07it/s]

{'epoch': 2, 'iter': 1910, 'avg_loss': 8.551830497354441, 'avg_acc': 49.967294610151754, 'loss': 8.3832368850708}


EP_train:2:   7%|| 1922/27626 [04:31<1:00:27,  7.09it/s]

{'epoch': 2, 'iter': 1920, 'avg_loss': 8.55380545675227, 'avg_acc': 49.96258459135866, 'loss': 8.88178539276123}


EP_train:2:   7%|| 1932/27626 [04:33<1:00:46,  7.05it/s]

{'epoch': 2, 'iter': 1930, 'avg_loss': 8.55415354214574, 'avg_acc': 49.96277835318488, 'loss': 9.198241233825684}


EP_train:2:   7%|| 1942/27626 [04:34<1:00:56,  7.02it/s]

{'epoch': 2, 'iter': 1940, 'avg_loss': 8.554985249552267, 'avg_acc': 49.94043019062339, 'loss': 9.405882835388184}


EP_train:2:   7%|| 1952/27626 [04:36<1:00:25,  7.08it/s]

{'epoch': 2, 'iter': 1950, 'avg_loss': 8.556843208692307, 'avg_acc': 49.959956432598666, 'loss': 8.40194320678711}


EP_train:2:   7%|| 1962/27626 [04:37<1:00:11,  7.11it/s]

{'epoch': 2, 'iter': 1960, 'avg_loss': 8.555420023759124, 'avg_acc': 49.934663437021925, 'loss': 7.502535343170166}


EP_train:2:   7%|| 1972/27626 [04:39<1:00:25,  7.08it/s]

{'epoch': 2, 'iter': 1970, 'avg_loss': 8.555112542868992, 'avg_acc': 49.92231100963978, 'loss': 8.837602615356445}


EP_train:2:   7%|| 1982/27626 [04:40<1:00:16,  7.09it/s]

{'epoch': 2, 'iter': 1980, 'avg_loss': 8.555083832314736, 'avg_acc': 49.90535083291267, 'loss': 7.713237285614014}


EP_train:2:   7%|| 1992/27626 [04:41<1:01:04,  6.99it/s]

{'epoch': 2, 'iter': 1990, 'avg_loss': 8.5537875894923, 'avg_acc': 49.92152184831743, 'loss': 8.652637481689453}


EP_train:2:   7%|| 2002/27626 [04:43<1:00:24,  7.07it/s]

{'epoch': 2, 'iter': 2000, 'avg_loss': 8.55183596160637, 'avg_acc': 49.90317341329336, 'loss': 8.253466606140137}


EP_train:2:   7%|| 2012/27626 [04:44<1:00:22,  7.07it/s]

{'epoch': 2, 'iter': 2010, 'avg_loss': 8.549451680754856, 'avg_acc': 49.9114246643461, 'loss': 8.063167572021484}


EP_train:2:   7%|| 2022/27626 [04:46<1:00:41,  7.03it/s]

{'epoch': 2, 'iter': 2020, 'avg_loss': 8.547506297242222, 'avg_acc': 49.916501731815934, 'loss': 8.639378547668457}


EP_train:2:   7%|| 2032/27626 [04:47<1:01:04,  6.98it/s]

{'epoch': 2, 'iter': 2030, 'avg_loss': 8.54728341090861, 'avg_acc': 49.91537419990152, 'loss': 8.199307441711426}


EP_train:2:   7%|| 2042/27626 [04:48<1:00:06,  7.09it/s]

{'epoch': 2, 'iter': 2040, 'avg_loss': 8.547594522974293, 'avg_acc': 49.9265066144047, 'loss': 8.573966026306152}


EP_train:2:   7%|| 2052/27626 [04:50<1:00:02,  7.10it/s]

{'epoch': 2, 'iter': 2050, 'avg_loss': 8.547331965998403, 'avg_acc': 49.91924670892248, 'loss': 8.725818634033203}


EP_train:2:   7%|| 2062/27626 [04:51<1:00:53,  7.00it/s]

{'epoch': 2, 'iter': 2060, 'avg_loss': 8.546793421977124, 'avg_acc': 49.9484473556526, 'loss': 8.423654556274414}


EP_train:2:   8%|| 2072/27626 [04:53<1:00:33,  7.03it/s]

{'epoch': 2, 'iter': 2070, 'avg_loss': 8.54683244268656, 'avg_acc': 49.95322308063737, 'loss': 8.670376777648926}


EP_train:2:   8%|| 2082/27626 [04:54<1:01:10,  6.96it/s]

{'epoch': 2, 'iter': 2080, 'avg_loss': 8.5456767645895, 'avg_acc': 49.94443777030274, 'loss': 8.507020950317383}


EP_train:2:   8%|| 2092/27626 [04:55<1:00:10,  7.07it/s]

{'epoch': 2, 'iter': 2090, 'avg_loss': 8.545071627180409, 'avg_acc': 49.93872549019608, 'loss': 8.32953929901123}


EP_train:2:   8%|| 2102/27626 [04:57<1:00:13,  7.06it/s]

{'epoch': 2, 'iter': 2100, 'avg_loss': 8.547144809261724, 'avg_acc': 49.93158019990481, 'loss': 8.524466514587402}


EP_train:2:   8%|| 2112/27626 [04:58<1:00:25,  7.04it/s]

{'epoch': 2, 'iter': 2110, 'avg_loss': 8.547636769383736, 'avg_acc': 49.92746328754145, 'loss': 8.837411880493164}


EP_train:2:   8%|| 2122/27626 [05:00<1:00:31,  7.02it/s]

{'epoch': 2, 'iter': 2120, 'avg_loss': 8.54879205578737, 'avg_acc': 49.94106553512494, 'loss': 8.265981674194336}


EP_train:2:   8%|| 2132/27626 [05:01<1:00:04,  7.07it/s]

{'epoch': 2, 'iter': 2130, 'avg_loss': 8.549265263394982, 'avg_acc': 49.954540122008446, 'loss': 9.59652042388916}


EP_train:2:   8%|| 2142/27626 [05:03<59:52,  7.09it/s]  

{'epoch': 2, 'iter': 2140, 'avg_loss': 8.549926282311644, 'avg_acc': 49.95621205044372, 'loss': 9.412038803100586}


EP_train:2:   8%|| 2152/27626 [05:04<1:00:04,  7.07it/s]

{'epoch': 2, 'iter': 2150, 'avg_loss': 8.550675005316457, 'avg_acc': 49.95496280799628, 'loss': 8.276102066040039}


EP_train:2:   8%|| 2162/27626 [05:05<1:00:21,  7.03it/s]

{'epoch': 2, 'iter': 2160, 'avg_loss': 8.549050094793374, 'avg_acc': 49.93347987043036, 'loss': 7.834259033203125}


EP_train:2:   8%|| 2172/27626 [05:07<1:00:37,  7.00it/s]

{'epoch': 2, 'iter': 2170, 'avg_loss': 8.548584879844872, 'avg_acc': 49.933786273606636, 'loss': 8.383142471313477}


EP_train:2:   8%|| 2182/27626 [05:08<59:50,  7.09it/s]

{'epoch': 2, 'iter': 2180, 'avg_loss': 8.54652406075087, 'avg_acc': 49.93122420907841, 'loss': 8.028833389282227}


EP_train:2:   8%|| 2192/27626 [05:10<1:00:21,  7.02it/s]

{'epoch': 2, 'iter': 2190, 'avg_loss': 8.545464398163412, 'avg_acc': 49.934390689183026, 'loss': 8.511484146118164}


EP_train:2:   8%|| 2202/27626 [05:11<1:00:39,  6.98it/s]

{'epoch': 2, 'iter': 2200, 'avg_loss': 8.545209636584243, 'avg_acc': 49.933268968650616, 'loss': 9.811942100524902}


EP_train:2:   8%|| 2212/27626 [05:12<1:00:13,  7.03it/s]

{'epoch': 2, 'iter': 2210, 'avg_loss': 8.545652375723753, 'avg_acc': 49.93357078245138, 'loss': 8.577630043029785}


EP_train:2:   8%|| 2222/27626 [05:14<59:58,  7.06it/s]  

{'epoch': 2, 'iter': 2220, 'avg_loss': 8.547520221004289, 'avg_acc': 49.94653309320126, 'loss': 8.638668060302734}


EP_train:2:   8%|| 2232/27626 [05:15<1:00:39,  6.98it/s]

{'epoch': 2, 'iter': 2230, 'avg_loss': 8.546419660054424, 'avg_acc': 49.93976916181085, 'loss': 8.166332244873047}


EP_train:2:   8%|| 2242/27626 [05:17<1:00:30,  6.99it/s]

{'epoch': 2, 'iter': 2240, 'avg_loss': 8.546358645999607, 'avg_acc': 49.93864346273985, 'loss': 8.511357307434082}


EP_train:2:   8%|| 2252/27626 [05:18<1:00:17,  7.01it/s]

{'epoch': 2, 'iter': 2250, 'avg_loss': 8.547511223420626, 'avg_acc': 49.944469124833404, 'loss': 8.956809997558594}


EP_train:2:   8%|| 2262/27626 [05:20<59:56,  7.05it/s]

{'epoch': 2, 'iter': 2260, 'avg_loss': 8.547664413300303, 'avg_acc': 49.94471472799646, 'loss': 8.070320129394531}


EP_train:2:   8%|| 2272/27626 [05:21<1:00:00,  7.04it/s]

{'epoch': 2, 'iter': 2270, 'avg_loss': 8.548031881694381, 'avg_acc': 49.949086305592246, 'loss': 9.133054733276367}


EP_train:2:   8%|| 2282/27626 [05:22<59:38,  7.08it/s]

{'epoch': 2, 'iter': 2280, 'avg_loss': 8.548889989029258, 'avg_acc': 49.93834940815432, 'loss': 8.62881088256836}


EP_train:2:   8%|| 2292/27626 [05:24<1:00:00,  7.04it/s]

{'epoch': 2, 'iter': 2290, 'avg_loss': 8.548968033726066, 'avg_acc': 49.934526407682235, 'loss': 7.94041633605957}


EP_train:2:   8%|| 2302/27626 [05:25<1:00:04,  7.03it/s]

{'epoch': 2, 'iter': 2300, 'avg_loss': 8.548418190519689, 'avg_acc': 49.925304215558455, 'loss': 7.521924018859863}


EP_train:2:   8%|| 2312/27626 [05:27<59:43,  7.06it/s]

{'epoch': 2, 'iter': 2310, 'avg_loss': 8.54805205467716, 'avg_acc': 49.916161834703594, 'loss': 9.697876930236816}


EP_train:2:   8%|| 2322/27626 [05:28<59:56,  7.04it/s]  

{'epoch': 2, 'iter': 2320, 'avg_loss': 8.548926584990772, 'avg_acc': 49.89767341663076, 'loss': 8.329581260681152}


EP_train:2:   8%|| 2332/27626 [05:29<59:54,  7.04it/s]  

{'epoch': 2, 'iter': 2330, 'avg_loss': 8.548604696413726, 'avg_acc': 49.91285928785929, 'loss': 8.839515686035156}


EP_train:2:   8%|| 2342/27626 [05:31<1:00:14,  6.99it/s]

{'epoch': 2, 'iter': 2340, 'avg_loss': 8.548893150841257, 'avg_acc': 49.91723622383597, 'loss': 8.986862182617188}


EP_train:2:   9%|| 2352/27626 [05:32<59:48,  7.04it/s]  

{'epoch': 2, 'iter': 2350, 'avg_loss': 8.54937212480377, 'avg_acc': 49.926892811569545, 'loss': 8.704595565795898}


EP_train:2:   9%|| 2362/27626 [05:34<59:53,  7.03it/s]

{'epoch': 2, 'iter': 2360, 'avg_loss': 8.549519017610143, 'avg_acc': 49.915290131300296, 'loss': 7.965681076049805}


EP_train:2:   9%|| 2372/27626 [05:35<59:10,  7.11it/s]

{'epoch': 2, 'iter': 2370, 'avg_loss': 8.54887616709786, 'avg_acc': 49.91828342471531, 'loss': 8.74867057800293}


EP_train:2:   9%|| 2382/27626 [05:36<59:35,  7.06it/s]

{'epoch': 2, 'iter': 2380, 'avg_loss': 8.548310220967517, 'avg_acc': 49.893689626207475, 'loss': 9.068609237670898}


EP_train:2:   9%|| 2392/27626 [05:38<59:50,  7.03it/s]

{'epoch': 2, 'iter': 2390, 'avg_loss': 8.55038508706789, 'avg_acc': 49.898055207026346, 'loss': 9.318638801574707}


EP_train:2:   9%|| 2402/27626 [05:39<59:38,  7.05it/s]

{'epoch': 2, 'iter': 2400, 'avg_loss': 8.55109315949249, 'avg_acc': 49.90889212827989, 'loss': 8.422574043273926}


EP_train:2:   9%|| 2412/27626 [05:41<59:43,  7.04it/s]

{'epoch': 2, 'iter': 2410, 'avg_loss': 8.550717990065156, 'avg_acc': 49.897604728328496, 'loss': 8.119096755981445}


EP_train:2:   9%|| 2422/27626 [05:42<59:29,  7.06it/s]

{'epoch': 2, 'iter': 2420, 'avg_loss': 8.552085203850678, 'avg_acc': 49.894155307724084, 'loss': 8.694576263427734}


EP_train:2:   9%|| 2432/27626 [05:44<59:29,  7.06it/s]

{'epoch': 2, 'iter': 2430, 'avg_loss': 8.551610227770787, 'avg_acc': 49.91644385026738, 'loss': 8.296854019165039}


EP_train:2:   9%|| 2442/27626 [05:45<59:39,  7.04it/s]  

{'epoch': 2, 'iter': 2440, 'avg_loss': 8.551500214362232, 'avg_acc': 49.920626792298236, 'loss': 8.608685493469238}


EP_train:2:   9%|| 2452/27626 [05:46<59:59,  6.99it/s]

{'epoch': 2, 'iter': 2450, 'avg_loss': 8.551288759012895, 'avg_acc': 49.91075071399429, 'loss': 8.805228233337402}


EP_train:2:   9%|| 2462/27626 [05:48<59:53,  7.00it/s]

{'epoch': 2, 'iter': 2460, 'avg_loss': 8.550496090335217, 'avg_acc': 49.90857375050793, 'loss': 8.103582382202148}


EP_train:2:   9%|| 2472/27626 [05:49<59:26,  7.05it/s]

{'epoch': 2, 'iter': 2470, 'avg_loss': 8.549723497820692, 'avg_acc': 49.91400242816673, 'loss': 7.5749897956848145}


EP_train:2:   9%|| 2482/27626 [05:51<59:54,  7.00it/s]  

{'epoch': 2, 'iter': 2480, 'avg_loss': 8.550131902344907, 'avg_acc': 49.90805118903668, 'loss': 8.607131958007812}


EP_train:2:   9%|| 2492/27626 [05:52<59:07,  7.08it/s]

{'epoch': 2, 'iter': 2490, 'avg_loss': 8.550486013717988, 'avg_acc': 49.89336611802489, 'loss': 8.273889541625977}


EP_train:2:   9%|| 2502/27626 [05:53<59:17,  7.06it/s]

{'epoch': 2, 'iter': 2500, 'avg_loss': 8.55052120482526, 'avg_acc': 49.891293482606955, 'loss': 9.69001579284668}


EP_train:2:   9%|| 2512/27626 [05:55<59:11,  7.07it/s]

{'epoch': 2, 'iter': 2510, 'avg_loss': 8.550230544860122, 'avg_acc': 49.911638789326965, 'loss': 8.969874382019043}


EP_train:2:   9%|| 2522/27626 [05:56<59:09,  7.07it/s]

{'epoch': 2, 'iter': 2520, 'avg_loss': 8.550558224503646, 'avg_acc': 49.90579135263784, 'loss': 8.763168334960938}


EP_train:2:   9%|| 2532/27626 [05:58<59:22,  7.04it/s]

{'epoch': 2, 'iter': 2530, 'avg_loss': 8.55028454638526, 'avg_acc': 49.90122481232714, 'loss': 8.759034156799316}


EP_train:2:   9%|| 2542/27626 [05:59<59:19,  7.05it/s]

{'epoch': 2, 'iter': 2540, 'avg_loss': 8.54981042118065, 'avg_acc': 49.90038370720189, 'loss': 8.623427391052246}


EP_train:2:   9%|| 2552/27626 [06:01<59:24,  7.03it/s]

{'epoch': 2, 'iter': 2550, 'avg_loss': 8.550155638526908, 'avg_acc': 49.91424931399451, 'loss': 8.314860343933105}


EP_train:2:   9%|| 2562/27626 [06:02<59:11,  7.06it/s]

{'epoch': 2, 'iter': 2560, 'avg_loss': 8.550222286252666, 'avg_acc': 49.91214369386958, 'loss': 8.798859596252441}


EP_train:2:   9%|| 2572/27626 [06:03<59:08,  7.06it/s]

{'epoch': 2, 'iter': 2570, 'avg_loss': 8.55064338563899, 'avg_acc': 49.92707117852975, 'loss': 8.946587562561035}


EP_train:2:   9%|| 2582/27626 [06:05<59:11,  7.05it/s]

{'epoch': 2, 'iter': 2580, 'avg_loss': 8.550472029139486, 'avg_acc': 49.91645679969004, 'loss': 9.44045352935791}


EP_train:2:   9%|| 2592/27626 [06:06<59:26,  7.02it/s]

{'epoch': 2, 'iter': 2590, 'avg_loss': 8.551802126885566, 'avg_acc': 49.9421072944809, 'loss': 9.825725555419922}


EP_train:2:   9%|| 2602/27626 [06:08<59:00,  7.07it/s]

{'epoch': 2, 'iter': 2600, 'avg_loss': 8.55277566715462, 'avg_acc': 49.942329873125715, 'loss': 8.956255912780762}


EP_train:2:   9%|| 2612/27626 [06:09<59:17,  7.03it/s]

{'epoch': 2, 'iter': 2610, 'avg_loss': 8.551687120615135, 'avg_acc': 49.95451934124856, 'loss': 7.934773921966553}


EP_train:2:   9%|| 2622/27626 [06:10<59:24,  7.02it/s]

{'epoch': 2, 'iter': 2620, 'avg_loss': 8.550915614014952, 'avg_acc': 49.95946203739031, 'loss': 8.2601957321167}


EP_train:2:  10%|| 2632/27626 [06:12<58:58,  7.06it/s]

{'epoch': 2, 'iter': 2630, 'avg_loss': 8.551011816624316, 'avg_acc': 49.948926263778034, 'loss': 8.411589622497559}


EP_train:2:  10%|| 2642/27626 [06:13<58:45,  7.09it/s]

{'epoch': 2, 'iter': 2640, 'avg_loss': 8.551717505767371, 'avg_acc': 49.93965354032564, 'loss': 8.986218452453613}


EP_train:2:  10%|| 2652/27626 [06:15<59:29,  7.00it/s]

{'epoch': 2, 'iter': 2650, 'avg_loss': 8.554138033581607, 'avg_acc': 49.948132780082986, 'loss': 8.951434135437012}


EP_train:2:  10%|| 2662/27626 [06:16<59:42,  6.97it/s]

{'epoch': 2, 'iter': 2660, 'avg_loss': 8.555202790912226, 'avg_acc': 49.965943254415635, 'loss': 8.775124549865723}


EP_train:2:  10%|| 2672/27626 [06:18<58:55,  7.06it/s]

{'epoch': 2, 'iter': 2670, 'avg_loss': 8.555187975498944, 'avg_acc': 49.977770497940845, 'loss': 8.71292781829834}


EP_train:2:  10%|| 2682/27626 [06:19<59:34,  6.98it/s]

{'epoch': 2, 'iter': 2680, 'avg_loss': 8.55575964407261, 'avg_acc': 49.9708597538232, 'loss': 8.478334426879883}


EP_train:2:  10%|| 2692/27626 [06:20<58:45,  7.07it/s]

{'epoch': 2, 'iter': 2690, 'avg_loss': 8.556027943828388, 'avg_acc': 49.97561315496098, 'loss': 8.625393867492676}


EP_train:2:  10%|| 2702/27626 [06:22<58:52,  7.06it/s]

{'epoch': 2, 'iter': 2700, 'avg_loss': 8.556682407657378, 'avg_acc': 49.97917437985931, 'loss': 7.907379627227783}


EP_train:2:  10%|| 2712/27626 [06:23<58:33,  7.09it/s]

{'epoch': 2, 'iter': 2710, 'avg_loss': 8.55553590253199, 'avg_acc': 49.988472888233126, 'loss': 8.173050880432129}


EP_train:2:  10%|| 2722/27626 [06:25<58:39,  7.08it/s]

{'epoch': 2, 'iter': 2720, 'avg_loss': 8.556158272758296, 'avg_acc': 49.99770305034914, 'loss': 8.214336395263672}


EP_train:2:  10%|| 2732/27626 [06:26<58:44,  7.06it/s]

{'epoch': 2, 'iter': 2730, 'avg_loss': 8.556733756308065, 'avg_acc': 49.98283595752472, 'loss': 8.385469436645508}


EP_train:2:  10%|| 2742/27626 [06:27<58:42,  7.06it/s]

{'epoch': 2, 'iter': 2740, 'avg_loss': 8.558246869494644, 'avg_acc': 49.98061838744984, 'loss': 8.865317344665527}


EP_train:2:  10%|| 2752/27626 [06:29<59:03,  7.02it/s]

{'epoch': 2, 'iter': 2750, 'avg_loss': 8.559377020465291, 'avg_acc': 49.99772809887313, 'loss': 8.184516906738281}


EP_train:2:  10%|| 2762/27626 [06:30<58:46,  7.05it/s]

{'epoch': 2, 'iter': 2760, 'avg_loss': 8.560321523416997, 'avg_acc': 50.0011318362912, 'loss': 9.088356971740723}


EP_train:2:  10%|| 2772/27626 [06:32<58:57,  7.03it/s]

{'epoch': 2, 'iter': 2770, 'avg_loss': 8.560898910399938, 'avg_acc': 50.0, 'loss': 8.359444618225098}


EP_train:2:  10%|| 2782/27626 [06:33<58:26,  7.08it/s]

{'epoch': 2, 'iter': 2780, 'avg_loss': 8.56099180041865, 'avg_acc': 50.00112369651205, 'loss': 9.015478134155273}


EP_train:2:  10%|| 2792/27626 [06:34<58:25,  7.09it/s]

{'epoch': 2, 'iter': 2790, 'avg_loss': 8.561075213475707, 'avg_acc': 50.00447868147617, 'loss': 7.976204872131348}


EP_train:2:  10%|| 2802/27626 [06:36<58:32,  7.07it/s]

{'epoch': 2, 'iter': 2800, 'avg_loss': 8.562385090756782, 'avg_acc': 49.98326490539093, 'loss': 8.137503623962402}


EP_train:2:  10%|| 2812/27626 [06:37<58:38,  7.05it/s]

{'epoch': 2, 'iter': 2810, 'avg_loss': 8.562954327255243, 'avg_acc': 50.00111170401992, 'loss': 8.034464836120605}


EP_train:2:  10%|| 2822/27626 [06:39<58:22,  7.08it/s]

{'epoch': 2, 'iter': 2820, 'avg_loss': 8.562542036719798, 'avg_acc': 50.01107763204538, 'loss': 8.160612106323242}


EP_train:2:  10%|| 2832/27626 [06:40<59:04,  6.99it/s]

{'epoch': 2, 'iter': 2830, 'avg_loss': 8.562222736793876, 'avg_acc': 50.0022077004592, 'loss': 8.24170207977295}


EP_train:2:  10%|| 2842/27626 [06:42<59:00,  7.00it/s]

{'epoch': 2, 'iter': 2840, 'avg_loss': 8.562490479705284, 'avg_acc': 50.004399859204504, 'loss': 8.007291793823242}


EP_train:2:  10%|| 2852/27626 [06:43<59:25,  6.95it/s]

{'epoch': 2, 'iter': 2850, 'avg_loss': 8.56110035064387, 'avg_acc': 49.99232725359523, 'loss': 7.411057949066162}


EP_train:2:  10%|| 2862/27626 [06:44<59:08,  6.98it/s]

{'epoch': 2, 'iter': 2860, 'avg_loss': 8.559676101848572, 'avg_acc': 49.986892694861936, 'loss': 8.116350173950195}


EP_train:2:  10%|| 2872/27626 [06:46<58:45,  7.02it/s]

{'epoch': 2, 'iter': 2870, 'avg_loss': 8.560195130116016, 'avg_acc': 49.98584987809126, 'loss': 8.301271438598633}


EP_train:2:  10%|| 2882/27626 [06:47<58:39,  7.03it/s]

{'epoch': 2, 'iter': 2880, 'avg_loss': 8.560053493528885, 'avg_acc': 49.993491843110036, 'loss': 8.251547813415527}


EP_train:2:  10%|| 2892/27626 [06:49<58:38,  7.03it/s]

{'epoch': 2, 'iter': 2890, 'avg_loss': 8.559907542261357, 'avg_acc': 49.971895537876165, 'loss': 8.217527389526367}


EP_train:2:  11%|| 2902/27626 [06:50<58:37,  7.03it/s]

{'epoch': 2, 'iter': 2900, 'avg_loss': 8.559352410410815, 'avg_acc': 49.97953291968287, 'loss': 8.22173023223877}


EP_train:2:  11%|| 2912/27626 [06:51<58:21,  7.06it/s]

{'epoch': 2, 'iter': 2910, 'avg_loss': 8.5591740224911, 'avg_acc': 49.988191343181036, 'loss': 8.210762977600098}


EP_train:2:  11%|| 2922/27626 [06:53<58:28,  7.04it/s]

{'epoch': 2, 'iter': 2920, 'avg_loss': 8.5588622809844, 'avg_acc': 49.9978603218076, 'loss': 8.355690956115723}


EP_train:2:  11%|| 2932/27626 [06:54<58:44,  7.01it/s]

{'epoch': 2, 'iter': 2930, 'avg_loss': 8.558131464916217, 'avg_acc': 49.99466905493006, 'loss': 8.108613014221191}


EP_train:2:  11%|| 2942/27626 [06:56<58:04,  7.08it/s]

{'epoch': 2, 'iter': 2940, 'avg_loss': 8.557807162925931, 'avg_acc': 49.99468718123087, 'loss': 8.660930633544922}


EP_train:2:  11%|| 2952/27626 [06:57<58:11,  7.07it/s]

{'epoch': 2, 'iter': 2950, 'avg_loss': 8.557178508140645, 'avg_acc': 50.00847170450695, 'loss': 8.863164901733398}


EP_train:2:  11%|| 2962/27626 [06:59<58:47,  6.99it/s]

{'epoch': 2, 'iter': 2960, 'avg_loss': 8.557753073851107, 'avg_acc': 50.02849544072948, 'loss': 9.27419376373291}


EP_train:2:  11%|| 2972/27626 [07:00<57:51,  7.10it/s]

{'epoch': 2, 'iter': 2970, 'avg_loss': 8.557453302212013, 'avg_acc': 50.021036687983845, 'loss': 8.546378135681152}


EP_train:2:  11%|| 2982/27626 [07:01<58:04,  7.07it/s]

{'epoch': 2, 'iter': 2980, 'avg_loss': 8.557768388387942, 'avg_acc': 50.00943475343844, 'loss': 8.21577262878418}


EP_train:2:  11%|| 2992/27626 [07:03<57:57,  7.08it/s]

{'epoch': 2, 'iter': 2990, 'avg_loss': 8.558284060925555, 'avg_acc': 50.0, 'loss': 8.4158353805542}


EP_train:2:  11%|| 3002/27626 [07:04<58:04,  7.07it/s]

{'epoch': 2, 'iter': 3000, 'avg_loss': 8.559433902434133, 'avg_acc': 50.01770243252249, 'loss': 8.379730224609375}


EP_train:2:  11%|| 3012/27626 [07:06<58:15,  7.04it/s]

{'epoch': 2, 'iter': 3010, 'avg_loss': 8.55959954827143, 'avg_acc': 50.01868150116241, 'loss': 8.784002304077148}


EP_train:2:  11%|| 3022/27626 [07:07<58:31,  7.01it/s]

{'epoch': 2, 'iter': 3020, 'avg_loss': 8.559460868822821, 'avg_acc': 50.02172293942403, 'loss': 8.472676277160645}


EP_train:2:  11%|| 3032/27626 [07:08<58:06,  7.05it/s]

{'epoch': 2, 'iter': 3030, 'avg_loss': 8.5597222506587, 'avg_acc': 50.013403167271534, 'loss': 8.87891674041748}


EP_train:2:  11%|| 3042/27626 [07:10<58:09,  7.05it/s]

{'epoch': 2, 'iter': 3040, 'avg_loss': 8.55903400931065, 'avg_acc': 50.00719335744821, 'loss': 8.689534187316895}


EP_train:2:  11%|| 3052/27626 [07:11<58:06,  7.05it/s]

{'epoch': 2, 'iter': 3050, 'avg_loss': 8.55915013144189, 'avg_acc': 50.00204850868568, 'loss': 8.321118354797363}


EP_train:2:  11%|| 3062/27626 [07:13<58:22,  7.01it/s]

{'epoch': 2, 'iter': 3060, 'avg_loss': 8.55865766985749, 'avg_acc': 50.0030627245998, 'loss': 9.301639556884766}


EP_train:2:  11%|| 3072/27626 [07:14<58:04,  7.05it/s]

{'epoch': 2, 'iter': 3070, 'avg_loss': 8.558265923444473, 'avg_acc': 50.009158254640184, 'loss': 8.954056739807129}


EP_train:2:  11%|| 3082/27626 [07:15<57:28,  7.12it/s]

{'epoch': 2, 'iter': 3080, 'avg_loss': 8.558543982036854, 'avg_acc': 50.02738558909445, 'loss': 9.014963150024414}


EP_train:2:  11%|| 3092/27626 [07:17<57:28,  7.11it/s]

{'epoch': 2, 'iter': 3090, 'avg_loss': 8.558653695573316, 'avg_acc': 50.029318990617924, 'loss': 7.998672962188721}


EP_train:2:  11%|| 3102/27626 [07:18<57:58,  7.05it/s]

{'epoch': 2, 'iter': 3100, 'avg_loss': 8.560080501964807, 'avg_acc': 50.039301838116735, 'loss': 8.825033187866211}


EP_train:2:  11%|| 3112/27626 [07:20<58:36,  6.97it/s]

{'epoch': 2, 'iter': 3110, 'avg_loss': 8.560418280402551, 'avg_acc': 50.03917550626809, 'loss': 8.346553802490234}


EP_train:2:  11%|| 3122/27626 [07:21<58:19,  7.00it/s]

{'epoch': 2, 'iter': 3120, 'avg_loss': 8.561799519033777, 'avg_acc': 50.05306792694649, 'loss': 9.085265159606934}


EP_train:2:  11%|| 3132/27626 [07:23<57:51,  7.06it/s]

{'epoch': 2, 'iter': 3130, 'avg_loss': 8.562463424564436, 'avg_acc': 50.065873522836156, 'loss': 8.753719329833984}


EP_train:2:  11%|| 3142/27626 [07:24<57:51,  7.05it/s]

{'epoch': 2, 'iter': 3140, 'avg_loss': 8.561972218992906, 'avg_acc': 50.073623049984086, 'loss': 8.547662734985352}


EP_train:2:  11%|| 3152/27626 [07:25<57:27,  7.10it/s]

{'epoch': 2, 'iter': 3150, 'avg_loss': 8.562079441581442, 'avg_acc': 50.07140590288797, 'loss': 9.365107536315918}


EP_train:2:  11%|| 3162/27626 [07:27<57:35,  7.08it/s]

{'epoch': 2, 'iter': 3160, 'avg_loss': 8.56229529445664, 'avg_acc': 50.08205472951598, 'loss': 8.226203918457031}


EP_train:2:  11%|| 3172/27626 [07:28<57:49,  7.05it/s]

{'epoch': 2, 'iter': 3170, 'avg_loss': 8.561754262473674, 'avg_acc': 50.0916508987701, 'loss': 7.712878227233887}


EP_train:2:  12%|| 3182/27626 [07:30<57:43,  7.06it/s]

{'epoch': 2, 'iter': 3180, 'avg_loss': 8.561391104170827, 'avg_acc': 50.09038038352719, 'loss': 8.448139190673828}


EP_train:2:  12%|| 3192/27626 [07:31<58:01,  7.02it/s]

{'epoch': 2, 'iter': 3190, 'avg_loss': 8.562550807589462, 'avg_acc': 50.09205578188656, 'loss': 9.410122871398926}


EP_train:2:  12%|| 3202/27626 [07:32<58:07,  7.00it/s]

{'epoch': 2, 'iter': 3200, 'avg_loss': 8.56304941353147, 'avg_acc': 50.08786316776007, 'loss': 8.878311157226562}


EP_train:2:  12%|| 3212/27626 [07:34<58:03,  7.01it/s]

{'epoch': 2, 'iter': 3210, 'avg_loss': 8.56367651928403, 'avg_acc': 50.08077701650576, 'loss': 8.988377571105957}


EP_train:2:  12%|| 3222/27626 [07:35<58:15,  6.98it/s]

{'epoch': 2, 'iter': 3220, 'avg_loss': 8.564133842625777, 'avg_acc': 50.0776156473145, 'loss': 8.718626022338867}


EP_train:2:  12%|| 3232/27626 [07:37<57:26,  7.08it/s]

{'epoch': 2, 'iter': 3230, 'avg_loss': 8.564562146119826, 'avg_acc': 50.07157226864748, 'loss': 8.013127326965332}


EP_train:2:  12%|| 3242/27626 [07:38<57:18,  7.09it/s]

{'epoch': 2, 'iter': 3240, 'avg_loss': 8.564321934018228, 'avg_acc': 50.06653039185437, 'loss': 8.21696662902832}


EP_train:2:  12%|| 3252/27626 [07:40<57:36,  7.05it/s]

{'epoch': 2, 'iter': 3250, 'avg_loss': 8.564175416711146, 'avg_acc': 50.07593817286988, 'loss': 8.666921615600586}


EP_train:2:  12%|| 3262/27626 [07:41<57:17,  7.09it/s]

{'epoch': 2, 'iter': 3260, 'avg_loss': 8.563443528549396, 'avg_acc': 50.07283042011653, 'loss': 8.504980087280273}


EP_train:2:  12%|| 3272/27626 [07:42<57:40,  7.04it/s]

{'epoch': 2, 'iter': 3270, 'avg_loss': 8.563174525738782, 'avg_acc': 50.08693824518495, 'loss': 8.647563934326172}


EP_train:2:  12%|| 3282/27626 [07:44<57:44,  7.03it/s]

{'epoch': 2, 'iter': 3280, 'avg_loss': 8.564049304094812, 'avg_acc': 50.08762572386467, 'loss': 8.947358131408691}


EP_train:2:  12%|| 3292/27626 [07:45<57:54,  7.00it/s]

{'epoch': 2, 'iter': 3290, 'avg_loss': 8.563534742428951, 'avg_acc': 50.09020814342146, 'loss': 8.112997055053711}


EP_train:2:  12%|| 3302/27626 [07:47<58:10,  6.97it/s]

{'epoch': 2, 'iter': 3300, 'avg_loss': 8.56277654778412, 'avg_acc': 50.10508179339595, 'loss': 8.968753814697266}


EP_train:2:  12%|| 3312/27626 [07:48<57:22,  7.06it/s]

{'epoch': 2, 'iter': 3310, 'avg_loss': 8.562721674840496, 'avg_acc': 50.102876774388406, 'loss': 8.319547653198242}


EP_train:2:  12%|| 3322/27626 [07:49<57:22,  7.06it/s]

{'epoch': 2, 'iter': 3320, 'avg_loss': 8.5629151186933, 'avg_acc': 50.095039144835894, 'loss': 8.086660385131836}


EP_train:2:  12%|| 3332/27626 [07:51<56:53,  7.12it/s]

{'epoch': 2, 'iter': 3330, 'avg_loss': 8.562510273686332, 'avg_acc': 50.09193935755029, 'loss': 8.085570335388184}


EP_train:2:  12%|| 3342/27626 [07:52<57:08,  7.08it/s]

{'epoch': 2, 'iter': 3340, 'avg_loss': 8.563399317734829, 'avg_acc': 50.101017659383416, 'loss': 8.610014915466309}


EP_train:2:  12%|| 3352/27626 [07:54<57:23,  7.05it/s]

{'epoch': 2, 'iter': 3350, 'avg_loss': 8.563867799775418, 'avg_acc': 50.094188301999395, 'loss': 9.029888153076172}


EP_train:2:  12%|| 3362/27626 [07:55<56:57,  7.10it/s]

{'epoch': 2, 'iter': 3360, 'avg_loss': 8.563550495810539, 'avg_acc': 50.10320589110384, 'loss': 7.510289669036865}


EP_train:2:  12%|| 3372/27626 [07:56<57:33,  7.02it/s]

{'epoch': 2, 'iter': 3370, 'avg_loss': 8.563507949588066, 'avg_acc': 50.108461880747555, 'loss': 8.435083389282227}


EP_train:2:  12%|| 3382/27626 [07:58<57:31,  7.02it/s]

{'epoch': 2, 'iter': 3380, 'avg_loss': 8.563092969783273, 'avg_acc': 50.09889825495416, 'loss': 8.154862403869629}


EP_train:2:  12%|| 3392/27626 [07:59<57:33,  7.02it/s]

{'epoch': 2, 'iter': 3390, 'avg_loss': 8.562635842623946, 'avg_acc': 50.094920377469776, 'loss': 8.723124504089355}


EP_train:2:  12%|| 3402/27626 [08:01<57:09,  7.06it/s]

{'epoch': 2, 'iter': 3400, 'avg_loss': 8.564491841625514, 'avg_acc': 50.10291090855631, 'loss': 8.248126983642578}


EP_train:2:  12%|| 3412/27626 [08:02<57:01,  7.08it/s]

{'epoch': 2, 'iter': 3410, 'avg_loss': 8.564157195050708, 'avg_acc': 50.11085458809733, 'loss': 8.953337669372559}


EP_train:2:  12%|| 3422/27626 [08:04<56:46,  7.11it/s]

{'epoch': 2, 'iter': 3420, 'avg_loss': 8.563513429079164, 'avg_acc': 50.10596316866413, 'loss': 7.897840976715088}


EP_train:2:  12%|| 3432/27626 [08:05<56:51,  7.09it/s]

{'epoch': 2, 'iter': 3430, 'avg_loss': 8.563480250170404, 'avg_acc': 50.103832701836204, 'loss': 8.167616844177246}


EP_train:2:  12%|| 3442/27626 [08:06<56:46,  7.10it/s]

{'epoch': 2, 'iter': 3440, 'avg_loss': 8.562765308952166, 'avg_acc': 50.101714617843655, 'loss': 8.440688133239746}


EP_train:2:  12%|| 3452/27626 [08:08<56:39,  7.11it/s]

{'epoch': 2, 'iter': 3450, 'avg_loss': 8.563882023173807, 'avg_acc': 50.10594755143437, 'loss': 8.857357025146484}


EP_train:2:  13%|| 3462/27626 [08:09<57:25,  7.01it/s]

{'epoch': 2, 'iter': 3460, 'avg_loss': 8.563898416517512, 'avg_acc': 50.09932100548974, 'loss': 8.653799057006836}


EP_train:2:  13%|| 3472/27626 [08:11<58:01,  6.94it/s]

{'epoch': 2, 'iter': 3470, 'avg_loss': 8.562358342821609, 'avg_acc': 50.0954335926246, 'loss': 8.272882461547852}


EP_train:2:  13%|| 3482/27626 [08:12<57:06,  7.05it/s]

{'epoch': 2, 'iter': 3480, 'avg_loss': 8.562206444607423, 'avg_acc': 50.08797759264579, 'loss': 8.31808853149414}


EP_train:2:  13%|| 3492/27626 [08:13<56:41,  7.10it/s]

{'epoch': 2, 'iter': 3490, 'avg_loss': 8.562158255821585, 'avg_acc': 50.09399169292467, 'loss': 8.605281829833984}


EP_train:2:  13%|| 3502/27626 [08:15<56:43,  7.09it/s]

{'epoch': 2, 'iter': 3500, 'avg_loss': 8.564927083565419, 'avg_acc': 50.09997143673236, 'loss': 9.879106521606445}


EP_train:2:  13%|| 3512/27626 [08:16<56:26,  7.12it/s]

{'epoch': 2, 'iter': 3510, 'avg_loss': 8.564954250023868, 'avg_acc': 50.091676160638, 'loss': 8.182926177978516}


EP_train:2:  13%|| 3522/27626 [08:18<56:59,  7.05it/s]

{'epoch': 2, 'iter': 3520, 'avg_loss': 8.56550829507383, 'avg_acc': 50.10029111047998, 'loss': 9.227595329284668}


EP_train:2:  13%|| 3532/27626 [08:19<57:25,  6.99it/s]

{'epoch': 2, 'iter': 3530, 'avg_loss': 8.565998402576344, 'avg_acc': 50.0982370433305, 'loss': 8.675504684448242}


EP_train:2:  13%|| 3542/27626 [08:20<56:37,  7.09it/s]

{'epoch': 2, 'iter': 3540, 'avg_loss': 8.56634499359454, 'avg_acc': 50.09442953967805, 'loss': 8.66030216217041}


EP_train:2:  13%|| 3552/27626 [08:22<57:16,  7.01it/s]

{'epoch': 2, 'iter': 3550, 'avg_loss': 8.566742469343929, 'avg_acc': 50.086243311743175, 'loss': 8.233128547668457}


EP_train:2:  13%|| 3562/27626 [08:23<56:56,  7.04it/s]

{'epoch': 2, 'iter': 3560, 'avg_loss': 8.566420856401914, 'avg_acc': 50.08073574838529, 'loss': 8.796521186828613}


EP_train:2:  13%|| 3572/27626 [08:25<57:03,  7.03it/s]

{'epoch': 2, 'iter': 3570, 'avg_loss': 8.565083579167837, 'avg_acc': 50.06738308597032, 'loss': 8.605673789978027}


EP_train:2:  13%|| 3582/27626 [08:26<56:41,  7.07it/s]

{'epoch': 2, 'iter': 3580, 'avg_loss': 8.564614713142053, 'avg_acc': 50.062831611281766, 'loss': 7.98148775100708}


EP_train:2:  13%|| 3592/27626 [08:28<56:39,  7.07it/s]

{'epoch': 2, 'iter': 3590, 'avg_loss': 8.565705370418344, 'avg_acc': 50.061786410470624, 'loss': 8.830229759216309}


EP_train:2:  13%|| 3602/27626 [08:29<56:21,  7.10it/s]

{'epoch': 2, 'iter': 3600, 'avg_loss': 8.567100770272868, 'avg_acc': 50.04078728131075, 'loss': 8.677672386169434}


EP_train:2:  13%|| 3612/27626 [08:30<56:41,  7.06it/s]

{'epoch': 2, 'iter': 3610, 'avg_loss': 8.566382295634403, 'avg_acc': 50.05105926336195, 'loss': 7.621788024902344}


EP_train:2:  13%|| 3622/27626 [08:32<56:29,  7.08it/s]

{'epoch': 2, 'iter': 3620, 'avg_loss': 8.567375803744389, 'avg_acc': 50.0491922120961, 'loss': 9.282814979553223}


EP_train:2:  13%|| 3632/27626 [08:33<56:51,  7.03it/s]

{'epoch': 2, 'iter': 3630, 'avg_loss': 8.567796361902731, 'avg_acc': 50.05077802258331, 'loss': 8.02975082397461}


EP_train:2:  13%|| 3642/27626 [08:35<57:10,  6.99it/s]

{'epoch': 2, 'iter': 3640, 'avg_loss': 8.56813758403775, 'avg_acc': 50.05407168360341, 'loss': 8.014117240905762}


EP_train:2:  13%|| 3652/27626 [08:36<57:42,  6.92it/s]

{'epoch': 2, 'iter': 3650, 'avg_loss': 8.567878942168337, 'avg_acc': 50.051355792933435, 'loss': 9.666857719421387}


EP_train:2:  13%|| 3662/27626 [08:37<56:52,  7.02it/s]

{'epoch': 2, 'iter': 3660, 'avg_loss': 8.56704684366655, 'avg_acc': 50.05975143403442, 'loss': 8.343302726745605}


EP_train:2:  13%|| 3672/27626 [08:39<56:25,  7.07it/s]

{'epoch': 2, 'iter': 3670, 'avg_loss': 8.566837640459418, 'avg_acc': 50.053629801144105, 'loss': 8.697823524475098}


EP_train:2:  13%|| 3682/27626 [08:40<56:31,  7.06it/s]

{'epoch': 2, 'iter': 3680, 'avg_loss': 8.566322895211714, 'avg_acc': 50.04839038304808, 'loss': 9.310602188110352}


EP_train:2:  13%|| 3692/27626 [08:42<56:38,  7.04it/s]

{'epoch': 2, 'iter': 3690, 'avg_loss': 8.56560407011724, 'avg_acc': 50.051645895421295, 'loss': 8.381667137145996}


EP_train:2:  13%|| 3702/27626 [08:43<56:16,  7.09it/s]

{'epoch': 2, 'iter': 3700, 'avg_loss': 8.566400609896396, 'avg_acc': 50.05741691434748, 'loss': 8.918365478515625}


EP_train:2:  13%|| 3712/27626 [08:44<56:55,  7.00it/s]

{'epoch': 2, 'iter': 3710, 'avg_loss': 8.565851759313091, 'avg_acc': 50.059788466720555, 'loss': 8.18408489227295}


EP_train:2:  13%|| 3722/27626 [08:46<56:59,  6.99it/s]

{'epoch': 2, 'iter': 3720, 'avg_loss': 8.566626416390637, 'avg_acc': 50.05206933619994, 'loss': 9.26386547088623}


EP_train:2:  14%|| 3732/27626 [08:47<56:47,  7.01it/s]

{'epoch': 2, 'iter': 3730, 'avg_loss': 8.566589900808914, 'avg_acc': 50.04774189225408, 'loss': 7.680042266845703}


EP_train:2:  14%|| 3742/27626 [08:49<56:39,  7.03it/s]

{'epoch': 2, 'iter': 3740, 'avg_loss': 8.567085002138473, 'avg_acc': 50.0618150227212, 'loss': 8.988374710083008}


EP_train:2:  14%|| 3752/27626 [08:50<56:23,  7.06it/s]

{'epoch': 2, 'iter': 3750, 'avg_loss': 8.56769679139945, 'avg_acc': 50.06498267128766, 'loss': 9.219551086425781}


EP_train:2:  14%|| 3762/27626 [08:52<56:18,  7.06it/s]

{'epoch': 2, 'iter': 3760, 'avg_loss': 8.566865965775497, 'avg_acc': 50.07561153948418, 'loss': 8.522245407104492}


EP_train:2:  14%|| 3772/27626 [08:53<56:51,  6.99it/s]

{'epoch': 2, 'iter': 3770, 'avg_loss': 8.566114312993598, 'avg_acc': 50.070438875629804, 'loss': 8.1845064163208}


EP_train:2:  14%|| 3782/27626 [08:54<56:36,  7.02it/s]

{'epoch': 2, 'iter': 3780, 'avg_loss': 8.567366824897942, 'avg_acc': 50.07603808516266, 'loss': 8.832131385803223}


EP_train:2:  14%|| 3792/27626 [08:56<56:34,  7.02it/s]

{'epoch': 2, 'iter': 3790, 'avg_loss': 8.567791990729443, 'avg_acc': 50.075837509891855, 'loss': 8.130210876464844}


EP_train:2:  14%|| 3802/27626 [08:57<56:22,  7.04it/s]

{'epoch': 2, 'iter': 3800, 'avg_loss': 8.568789523891699, 'avg_acc': 50.07810444619837, 'loss': 9.898364067077637}


EP_train:2:  14%|| 3812/27626 [08:59<56:05,  7.08it/s]

{'epoch': 2, 'iter': 3810, 'avg_loss': 8.568528050983462, 'avg_acc': 50.07051954867489, 'loss': 8.410693168640137}


EP_train:2:  14%|| 3822/27626 [09:00<56:34,  7.01it/s]

{'epoch': 2, 'iter': 3820, 'avg_loss': 8.568085424003412, 'avg_acc': 50.07851347814708, 'loss': 7.999253749847412}


EP_train:2:  14%|| 3832/27626 [09:01<56:46,  6.98it/s]

{'epoch': 2, 'iter': 3830, 'avg_loss': 8.567732840099648, 'avg_acc': 50.07015139650222, 'loss': 9.006786346435547}


EP_train:2:  14%|| 3842/27626 [09:03<56:16,  7.04it/s]

{'epoch': 2, 'iter': 3840, 'avg_loss': 8.567406570312412, 'avg_acc': 50.07078234834679, 'loss': 9.00617504119873}


EP_train:2:  14%|| 3852/27626 [09:04<56:08,  7.06it/s]

{'epoch': 2, 'iter': 3850, 'avg_loss': 8.566606359400089, 'avg_acc': 50.06491820306414, 'loss': 8.716620445251465}


EP_train:2:  14%|| 3862/27626 [09:06<56:15,  7.04it/s]

{'epoch': 2, 'iter': 3860, 'avg_loss': 8.566690895601246, 'avg_acc': 50.05908443408443, 'loss': 8.591888427734375}


EP_train:2:  14%|| 3872/27626 [09:07<56:27,  7.01it/s]

{'epoch': 2, 'iter': 3870, 'avg_loss': 8.567064227506007, 'avg_acc': 50.05893180056833, 'loss': 7.858846664428711}


EP_train:2:  14%|| 3882/27626 [09:09<56:26,  7.01it/s]

{'epoch': 2, 'iter': 3880, 'avg_loss': 8.566295162423067, 'avg_acc': 50.06039036330843, 'loss': 7.9753522872924805}


EP_train:2:  14%|| 3892/27626 [09:10<55:52,  7.08it/s]

{'epoch': 2, 'iter': 3890, 'avg_loss': 8.56576620794084, 'avg_acc': 50.07388846054999, 'loss': 9.085577964782715}


EP_train:2:  14%|| 3902/27626 [09:11<56:04,  7.05it/s]

{'epoch': 2, 'iter': 3900, 'avg_loss': 8.565601852361253, 'avg_acc': 50.069693668290185, 'loss': 7.75562858581543}


EP_train:2:  14%|| 3912/27626 [09:13<56:36,  6.98it/s]

{'epoch': 2, 'iter': 3910, 'avg_loss': 8.565940101382617, 'avg_acc': 50.06552032728202, 'loss': 8.905464172363281}


EP_train:2:  14%|| 3922/27626 [09:14<56:32,  6.99it/s]

{'epoch': 2, 'iter': 3920, 'avg_loss': 8.566508306650688, 'avg_acc': 50.07172915072685, 'loss': 9.226747512817383}


EP_train:2:  14%|| 3932/27626 [09:16<56:21,  7.01it/s]

{'epoch': 2, 'iter': 3930, 'avg_loss': 8.56646060907213, 'avg_acc': 50.074726532688885, 'loss': 7.8538818359375}


EP_train:2:  14%|| 3942/27626 [09:17<56:26,  6.99it/s]

{'epoch': 2, 'iter': 3940, 'avg_loss': 8.566826318853611, 'avg_acc': 50.068986297893936, 'loss': 8.059940338134766}


EP_train:2:  14%|| 3952/27626 [09:18<55:49,  7.07it/s]

{'epoch': 2, 'iter': 3950, 'avg_loss': 8.56693861859263, 'avg_acc': 50.06011136421159, 'loss': 8.52051067352295}


EP_train:2:  14%|| 3962/27626 [09:20<55:56,  7.05it/s]

{'epoch': 2, 'iter': 3960, 'avg_loss': 8.567268128167557, 'avg_acc': 50.0568038374148, 'loss': 7.395143508911133}


EP_train:2:  14%|| 3972/27626 [09:21<55:38,  7.09it/s]

{'epoch': 2, 'iter': 3970, 'avg_loss': 8.565978649190319, 'avg_acc': 50.05429992445228, 'loss': 8.079998016357422}


EP_train:2:  14%|| 3982/27626 [09:23<56:02,  7.03it/s]

{'epoch': 2, 'iter': 3980, 'avg_loss': 8.566311200699355, 'avg_acc': 50.04474378296911, 'loss': 8.12686538696289}


EP_train:2:  14%|| 3992/27626 [09:24<55:50,  7.05it/s]

{'epoch': 2, 'iter': 3990, 'avg_loss': 8.566189672020377, 'avg_acc': 50.055593836131294, 'loss': 8.052083969116211}


EP_train:2:  14%|| 4002/27626 [09:26<56:36,  6.95it/s]

{'epoch': 2, 'iter': 4000, 'avg_loss': 8.56582535853597, 'avg_acc': 50.055454886278426, 'loss': 8.469375610351562}


EP_train:2:  15%|| 4012/27626 [09:27<55:56,  7.03it/s]

{'epoch': 2, 'iter': 4010, 'avg_loss': 8.565427675297123, 'avg_acc': 50.049862877088, 'loss': 8.601103782653809}


EP_train:2:  15%|| 4022/27626 [09:28<55:40,  7.07it/s]

{'epoch': 2, 'iter': 4020, 'avg_loss': 8.566227362186154, 'avg_acc': 50.05051604078587, 'loss': 8.424507141113281}


EP_train:2:  15%|| 4032/27626 [09:30<55:58,  7.03it/s]

{'epoch': 2, 'iter': 4030, 'avg_loss': 8.565339155038632, 'avg_acc': 50.04651451252791, 'loss': 8.318357467651367}


EP_train:2:  15%|| 4042/27626 [09:31<55:35,  7.07it/s]

{'epoch': 2, 'iter': 4040, 'avg_loss': 8.565432144373663, 'avg_acc': 50.036346201435286, 'loss': 8.724241256713867}


EP_train:2:  15%|| 4052/27626 [09:33<55:55,  7.03it/s]

{'epoch': 2, 'iter': 4050, 'avg_loss': 8.565751261135526, 'avg_acc': 50.04165638114046, 'loss': 8.497177124023438}


EP_train:2:  15%|| 4062/27626 [09:34<55:36,  7.06it/s]

{'epoch': 2, 'iter': 4060, 'avg_loss': 8.566645111130013, 'avg_acc': 50.0461708938685, 'loss': 8.91020679473877}


EP_train:2:  15%|| 4072/27626 [09:35<55:51,  7.03it/s]

{'epoch': 2, 'iter': 4070, 'avg_loss': 8.567040615836106, 'avg_acc': 50.041451731761235, 'loss': 8.577932357788086}


EP_train:2:  15%|| 4082/27626 [09:37<56:14,  6.98it/s]

{'epoch': 2, 'iter': 4080, 'avg_loss': 8.566591202292598, 'avg_acc': 50.02986400392061, 'loss': 8.133224487304688}


EP_train:2:  15%|| 4092/27626 [09:38<56:08,  6.99it/s]

{'epoch': 2, 'iter': 4090, 'avg_loss': 8.566429226068028, 'avg_acc': 50.0305548765583, 'loss': 8.190516471862793}


EP_train:2:  15%|| 4102/27626 [09:40<55:55,  7.01it/s]

{'epoch': 2, 'iter': 4100, 'avg_loss': 8.565952457645642, 'avg_acc': 50.0373384540356, 'loss': 8.475587844848633}


EP_train:2:  15%|| 4112/27626 [09:41<55:44,  7.03it/s]

{'epoch': 2, 'iter': 4110, 'avg_loss': 8.56667554630789, 'avg_acc': 50.040288251033815, 'loss': 8.98780345916748}


EP_train:2:  15%|| 4122/27626 [09:43<55:43,  7.03it/s]

{'epoch': 2, 'iter': 4120, 'avg_loss': 8.567535989928437, 'avg_acc': 50.03867386556661, 'loss': 8.61724853515625}


EP_train:2:  15%|| 4132/27626 [09:44<55:26,  7.06it/s]

{'epoch': 2, 'iter': 4130, 'avg_loss': 8.567149139322112, 'avg_acc': 50.04009319777294, 'loss': 8.928025245666504}


EP_train:2:  15%|| 4142/27626 [09:45<55:05,  7.10it/s]

{'epoch': 2, 'iter': 4140, 'avg_loss': 8.566389527881643, 'avg_acc': 50.02943129678822, 'loss': 7.709502696990967}


EP_train:2:  15%|| 4152/27626 [09:47<55:44,  7.02it/s]

{'epoch': 2, 'iter': 4150, 'avg_loss': 8.56582977604619, 'avg_acc': 50.015809443507585, 'loss': 7.0700883865356445}


EP_train:2:  15%|| 4162/27626 [09:48<55:33,  7.04it/s]

{'epoch': 2, 'iter': 4160, 'avg_loss': 8.565431460999607, 'avg_acc': 50.023281663061766, 'loss': 8.573216438293457}


EP_train:2:  15%|| 4172/27626 [09:50<55:31,  7.04it/s]

{'epoch': 2, 'iter': 4170, 'avg_loss': 8.565313390563986, 'avg_acc': 50.02547350755214, 'loss': 8.241537094116211}


EP_train:2:  15%|| 4182/27626 [09:51<55:56,  6.98it/s]

{'epoch': 2, 'iter': 4180, 'avg_loss': 8.565539671284991, 'avg_acc': 50.01420114805071, 'loss': 8.949007034301758}


EP_train:2:  15%|| 4192/27626 [09:52<56:08,  6.96it/s]

{'epoch': 2, 'iter': 4190, 'avg_loss': 8.565351448204716, 'avg_acc': 50.00149129086137, 'loss': 8.487040519714355}


EP_train:2:  15%|| 4202/27626 [09:54<55:54,  6.98it/s]

{'epoch': 2, 'iter': 4200, 'avg_loss': 8.565274846409082, 'avg_acc': 49.99925612949298, 'loss': 8.269570350646973}


EP_train:2:  15%|| 4212/27626 [09:55<55:33,  7.02it/s]

{'epoch': 2, 'iter': 4210, 'avg_loss': 8.564781109020167, 'avg_acc': 50.00148420802659, 'loss': 8.67148208618164}


EP_train:2:  15%|| 4222/27626 [09:57<55:42,  7.00it/s]

{'epoch': 2, 'iter': 4220, 'avg_loss': 8.56452277361344, 'avg_acc': 50.005182421227204, 'loss': 8.235000610351562}


EP_train:2:  15%|| 4232/27626 [09:58<55:52,  6.98it/s]

{'epoch': 2, 'iter': 4230, 'avg_loss': 8.56352451230245, 'avg_acc': 50.00517017253604, 'loss': 7.893167972564697}


EP_train:2:  15%|| 4242/27626 [10:00<55:31,  7.02it/s]

{'epoch': 2, 'iter': 4240, 'avg_loss': 8.562906275715251, 'avg_acc': 50.00589483612355, 'loss': 8.271123886108398}


EP_train:2:  15%|| 4252/27626 [10:01<55:23,  7.03it/s]

{'epoch': 2, 'iter': 4250, 'avg_loss': 8.562850562019142, 'avg_acc': 50.00147024229593, 'loss': 8.401546478271484}


EP_train:2:  15%|| 4262/27626 [10:02<55:05,  7.07it/s]

{'epoch': 2, 'iter': 4260, 'avg_loss': 8.562359399623196, 'avg_acc': 49.999266604083545, 'loss': 8.041716575622559}


EP_train:2:  15%|| 4272/27626 [10:04<55:25,  7.02it/s]

{'epoch': 2, 'iter': 4270, 'avg_loss': 8.562694911737962, 'avg_acc': 50.00439007258254, 'loss': 8.274795532226562}


EP_train:2:  15%|| 4282/27626 [10:05<55:32,  7.00it/s]

{'epoch': 2, 'iter': 4280, 'avg_loss': 8.563001037228751, 'avg_acc': 50.00291987853305, 'loss': 9.01930046081543}


EP_train:2:  16%|| 4292/27626 [10:07<55:20,  7.03it/s]

{'epoch': 2, 'iter': 4290, 'avg_loss': 8.563716519937403, 'avg_acc': 50.00072826846888, 'loss': 8.999387741088867}


EP_train:2:  16%|| 4302/27626 [10:08<55:19,  7.03it/s]

{'epoch': 2, 'iter': 4300, 'avg_loss': 8.563475104826214, 'avg_acc': 50.0, 'loss': 9.078665733337402}


EP_train:2:  16%|| 4312/27626 [10:09<54:51,  7.08it/s]

{'epoch': 2, 'iter': 4310, 'avg_loss': 8.562853758862166, 'avg_acc': 49.994200881466014, 'loss': 8.710134506225586}


EP_train:2:  16%|| 4322/27626 [10:11<54:39,  7.11it/s]

{'epoch': 2, 'iter': 4320, 'avg_loss': 8.563382358303834, 'avg_acc': 49.99710715112243, 'loss': 8.913681030273438}


EP_train:2:  16%|| 4332/27626 [10:12<54:54,  7.07it/s]

{'epoch': 2, 'iter': 4330, 'avg_loss': 8.563946148012064, 'avg_acc': 49.99711383052413, 'loss': 8.18117618560791}


EP_train:2:  16%|| 4342/27626 [10:14<55:06,  7.04it/s]

{'epoch': 2, 'iter': 4340, 'avg_loss': 8.563502335652753, 'avg_acc': 49.99208131766874, 'loss': 7.967236518859863}


EP_train:2:  16%|| 4352/27626 [10:15<54:54,  7.07it/s]

{'epoch': 2, 'iter': 4350, 'avg_loss': 8.563597013473073, 'avg_acc': 49.99640887152379, 'loss': 8.115939140319824}


EP_train:2:  16%|| 4362/27626 [10:16<54:52,  7.06it/s]

{'epoch': 2, 'iter': 4360, 'avg_loss': 8.563780476045947, 'avg_acc': 50.00071657876634, 'loss': 8.43869686126709}


EP_train:2:  16%|| 4372/27626 [10:18<55:21,  7.00it/s]

{'epoch': 2, 'iter': 4370, 'avg_loss': 8.563780964498143, 'avg_acc': 49.998570121253714, 'loss': 9.531379699707031}


EP_train:2:  16%|| 4382/27626 [10:19<55:14,  7.01it/s]

{'epoch': 2, 'iter': 4380, 'avg_loss': 8.563212087235476, 'avg_acc': 49.99572015521571, 'loss': 8.427447319030762}


EP_train:2:  16%|| 4392/27626 [10:21<55:00,  7.04it/s]

{'epoch': 2, 'iter': 4390, 'avg_loss': 8.562884600171737, 'avg_acc': 49.999288317012066, 'loss': 8.308267593383789}


EP_train:2:  16%|| 4402/27626 [10:22<54:47,  7.06it/s]

{'epoch': 2, 'iter': 4400, 'avg_loss': 8.563385113670837, 'avg_acc': 50.00568052715292, 'loss': 8.127443313598633}


EP_train:2:  16%|| 4412/27626 [10:24<54:37,  7.08it/s]

{'epoch': 2, 'iter': 4410, 'avg_loss': 8.564194193991153, 'avg_acc': 50.00566764905917, 'loss': 8.476358413696289}


EP_train:2:  16%|| 4422/27626 [10:25<54:51,  7.05it/s]

{'epoch': 2, 'iter': 4420, 'avg_loss': 8.563918728710865, 'avg_acc': 50.01484392671342, 'loss': 7.687427520751953}


EP_train:2:  16%|| 4432/27626 [10:26<54:52,  7.04it/s]

{'epoch': 2, 'iter': 4430, 'avg_loss': 8.563990205116074, 'avg_acc': 50.00423155044008, 'loss': 9.261453628540039}


EP_train:2:  16%|| 4442/27626 [10:28<54:53,  7.04it/s]

{'epoch': 2, 'iter': 4440, 'avg_loss': 8.564283326734591, 'avg_acc': 50.00492569241162, 'loss': 8.607941627502441}


EP_train:2:  16%|| 4452/27626 [10:29<55:00,  7.02it/s]

{'epoch': 2, 'iter': 4450, 'avg_loss': 8.565349435302762, 'avg_acc': 49.99859582116378, 'loss': 9.363675117492676}


EP_train:2:  16%|| 4462/27626 [10:31<55:22,  6.97it/s]

{'epoch': 2, 'iter': 4460, 'avg_loss': 8.56495178603613, 'avg_acc': 50.00070051557947, 'loss': 8.671613693237305}


EP_train:2:  16%|| 4472/27626 [10:32<54:48,  7.04it/s]

{'epoch': 2, 'iter': 4470, 'avg_loss': 8.565235222087187, 'avg_acc': 50.002795795124136, 'loss': 8.882026672363281}


EP_train:2:  16%|| 4482/27626 [10:33<54:39,  7.06it/s]

{'epoch': 2, 'iter': 4480, 'avg_loss': 8.56540349497643, 'avg_acc': 50.00418433385405, 'loss': 8.709920883178711}


EP_train:2:  16%|| 4492/27626 [10:35<54:40,  7.05it/s]

{'epoch': 2, 'iter': 4490, 'avg_loss': 8.565205584228556, 'avg_acc': 50.00208750835004, 'loss': 8.522650718688965}


EP_train:2:  16%|| 4502/27626 [10:36<54:55,  7.02it/s]

{'epoch': 2, 'iter': 4500, 'avg_loss': 8.565160793401061, 'avg_acc': 50.00416574094646, 'loss': 7.758641719818115}


EP_train:2:  16%|| 4512/27626 [10:38<54:58,  7.01it/s]

{'epoch': 2, 'iter': 4510, 'avg_loss': 8.565181319609472, 'avg_acc': 50.00346375526491, 'loss': 9.053346633911133}


EP_train:2:  16%|| 4522/27626 [10:39<55:00,  7.00it/s]

{'epoch': 2, 'iter': 4520, 'avg_loss': 8.56487758073678, 'avg_acc': 49.99308781243088, 'loss': 8.09968090057373}


EP_train:2:  16%|| 4532/27626 [10:41<54:38,  7.04it/s]

{'epoch': 2, 'iter': 4530, 'avg_loss': 8.564273549662708, 'avg_acc': 49.995172147428825, 'loss': 8.232416152954102}


EP_train:2:  16%|| 4542/27626 [10:42<54:40,  7.04it/s]

{'epoch': 2, 'iter': 4540, 'avg_loss': 8.564701969522234, 'avg_acc': 49.98967738383616, 'loss': 8.870631217956543}


EP_train:2:  16%|| 4552/27626 [10:43<54:51,  7.01it/s]

{'epoch': 2, 'iter': 4550, 'avg_loss': 8.564988972825864, 'avg_acc': 49.994506701823774, 'loss': 9.381189346313477}


EP_train:2:  17%|| 4562/27626 [10:45<54:29,  7.06it/s]

{'epoch': 2, 'iter': 4560, 'avg_loss': 8.564995919578667, 'avg_acc': 50.00548125411094, 'loss': 9.713337898254395}


EP_train:2:  17%|| 4572/27626 [10:46<54:25,  7.06it/s]

{'epoch': 2, 'iter': 4570, 'avg_loss': 8.565062382789232, 'avg_acc': 49.996581710785385, 'loss': 9.21756362915039}


EP_train:2:  17%|| 4582/27626 [10:48<54:25,  7.06it/s]

{'epoch': 2, 'iter': 4580, 'avg_loss': 8.564887893587656, 'avg_acc': 49.99113184894128, 'loss': 8.288422584533691}


EP_train:2:  17%|| 4592/27626 [10:49<54:09,  7.09it/s]

{'epoch': 2, 'iter': 4590, 'avg_loss': 8.56446853968812, 'avg_acc': 49.99115116532346, 'loss': 8.564866065979004}


EP_train:2:  17%|| 4602/27626 [10:50<54:41,  7.02it/s]

{'epoch': 2, 'iter': 4600, 'avg_loss': 8.564762239118318, 'avg_acc': 49.98981199739187, 'loss': 9.107332229614258}


EP_train:2:  17%|| 4612/27626 [10:52<54:46,  7.00it/s]

{'epoch': 2, 'iter': 4610, 'avg_loss': 8.564839879542467, 'avg_acc': 49.99186727391021, 'loss': 8.236330032348633}


EP_train:2:  17%|| 4622/27626 [10:53<54:17,  7.06it/s]

{'epoch': 2, 'iter': 4620, 'avg_loss': 8.564160648813187, 'avg_acc': 49.98647478900671, 'loss': 7.911558151245117}


EP_train:2:  17%|| 4632/27626 [10:55<54:27,  7.04it/s]

{'epoch': 2, 'iter': 4630, 'avg_loss': 8.564057659366634, 'avg_acc': 49.97773159144893, 'loss': 8.486021041870117}


EP_train:2:  17%|| 4642/27626 [10:56<54:51,  6.98it/s]

{'epoch': 2, 'iter': 4640, 'avg_loss': 8.564678449102631, 'avg_acc': 49.99124649859944, 'loss': 7.983481407165527}


EP_train:2:  17%|| 4652/27626 [10:58<54:13,  7.06it/s]

{'epoch': 2, 'iter': 4650, 'avg_loss': 8.564414554297398, 'avg_acc': 49.98924962373683, 'loss': 8.342002868652344}


EP_train:2:  17%|| 4662/27626 [10:59<54:03,  7.08it/s]

{'epoch': 2, 'iter': 4660, 'avg_loss': 8.564700274374372, 'avg_acc': 49.99463634413216, 'loss': 8.30821418762207}


EP_train:2:  17%|| 4672/27626 [11:00<54:23,  7.03it/s]

{'epoch': 2, 'iter': 4670, 'avg_loss': 8.564043214810631, 'avg_acc': 49.98795761078998, 'loss': 8.268905639648438}


EP_train:2:  17%|| 4682/27626 [11:02<54:14,  7.05it/s]

{'epoch': 2, 'iter': 4680, 'avg_loss': 8.564284191506461, 'avg_acc': 49.9893185216834, 'loss': 8.1475191116333}


EP_train:2:  17%|| 4692/27626 [11:03<54:19,  7.04it/s]

{'epoch': 2, 'iter': 4690, 'avg_loss': 8.564803488090384, 'avg_acc': 49.987342784054576, 'loss': 8.458834648132324}


EP_train:2:  17%|| 4702/27626 [11:05<54:32,  7.00it/s]

{'epoch': 2, 'iter': 4700, 'avg_loss': 8.565056351899239, 'avg_acc': 49.994681982556905, 'loss': 9.560098648071289}


EP_train:2:  17%|| 4712/27626 [11:06<53:59,  7.07it/s]

{'epoch': 2, 'iter': 4710, 'avg_loss': 8.565837052389204, 'avg_acc': 49.994029929951175, 'loss': 8.1620512008667}


EP_train:2:  17%|| 4722/27626 [11:07<54:14,  7.04it/s]

{'epoch': 2, 'iter': 4720, 'avg_loss': 8.565675666993313, 'avg_acc': 49.99404257572549, 'loss': 9.104092597961426}


EP_train:2:  17%|| 4732/27626 [11:09<54:39,  6.98it/s]

{'epoch': 2, 'iter': 4730, 'avg_loss': 8.566062977237898, 'avg_acc': 49.98480765165927, 'loss': 8.942852020263672}


EP_train:2:  17%|| 4742/27626 [11:10<53:42,  7.10it/s]

{'epoch': 2, 'iter': 4740, 'avg_loss': 8.566106154078348, 'avg_acc': 49.99406770723476, 'loss': 7.904613971710205}


EP_train:2:  17%|| 4752/27626 [11:12<54:03,  7.05it/s]

{'epoch': 2, 'iter': 4750, 'avg_loss': 8.565965625367097, 'avg_acc': 50.003288781309195, 'loss': 7.774966716766357}


EP_train:2:  17%|| 4762/27626 [11:13<53:45,  7.09it/s]

{'epoch': 2, 'iter': 4760, 'avg_loss': 8.565983832370732, 'avg_acc': 50.00262549884478, 'loss': 7.796738624572754}


EP_train:2:  17%|| 4772/27626 [11:14<53:55,  7.06it/s]

{'epoch': 2, 'iter': 4770, 'avg_loss': 8.566184604375325, 'avg_acc': 49.986245022007964, 'loss': 8.647686958312988}


EP_train:2:  17%|| 4782/27626 [11:16<54:28,  6.99it/s]

{'epoch': 2, 'iter': 4780, 'avg_loss': 8.566322254551128, 'avg_acc': 49.98235201840619, 'loss': 8.956954956054688}


EP_train:2:  17%|| 4792/27626 [11:17<54:09,  7.03it/s]

{'epoch': 2, 'iter': 4790, 'avg_loss': 8.565986783189482, 'avg_acc': 49.9797797954498, 'loss': 8.016825675964355}


EP_train:2:  17%|| 4802/27626 [11:19<53:43,  7.08it/s]

{'epoch': 2, 'iter': 4800, 'avg_loss': 8.565963693073307, 'avg_acc': 49.986330972714015, 'loss': 8.61846923828125}


EP_train:2:  17%|| 4812/27626 [11:20<53:41,  7.08it/s]

{'epoch': 2, 'iter': 4810, 'avg_loss': 8.56588435267193, 'avg_acc': 49.981162959883605, 'loss': 8.83364486694336}


EP_train:2:  17%|| 4822/27626 [11:22<53:59,  7.04it/s]

{'epoch': 2, 'iter': 4820, 'avg_loss': 8.565709940552885, 'avg_acc': 49.97277535780958, 'loss': 9.04660415649414}


EP_train:2:  17%|| 4832/27626 [11:23<54:07,  7.02it/s]

{'epoch': 2, 'iter': 4830, 'avg_loss': 8.565687297911063, 'avg_acc': 49.984475263920515, 'loss': 8.857004165649414}


EP_train:2:  18%|| 4842/27626 [11:24<53:40,  7.08it/s]

{'epoch': 2, 'iter': 4840, 'avg_loss': 8.56563211083289, 'avg_acc': 49.971596777525306, 'loss': 8.311320304870605}


EP_train:2:  18%|| 4852/27626 [11:26<53:49,  7.05it/s]

{'epoch': 2, 'iter': 4850, 'avg_loss': 8.565400568192041, 'avg_acc': 49.97487631416203, 'loss': 8.062248229980469}


EP_train:2:  18%|| 4862/27626 [11:27<54:20,  6.98it/s]

{'epoch': 2, 'iter': 4860, 'avg_loss': 8.565157464001311, 'avg_acc': 49.97492799835425, 'loss': 8.211962699890137}


EP_train:2:  18%|| 4872/27626 [11:29<53:57,  7.03it/s]

{'epoch': 2, 'iter': 4870, 'avg_loss': 8.565150673348809, 'avg_acc': 49.97433791829194, 'loss': 8.626646995544434}


EP_train:2:  18%|| 4882/27626 [11:30<53:38,  7.07it/s]

{'epoch': 2, 'iter': 4880, 'avg_loss': 8.565135450154097, 'avg_acc': 49.976311206719934, 'loss': 8.53931999206543}


EP_train:2:  18%|| 4892/27626 [11:31<53:25,  7.09it/s]

{'epoch': 2, 'iter': 4890, 'avg_loss': 8.564867945542195, 'avg_acc': 49.977637497444285, 'loss': 8.508881568908691}


EP_train:2:  18%|| 4902/27626 [11:33<53:58,  7.02it/s]

{'epoch': 2, 'iter': 4900, 'avg_loss': 8.564595521262557, 'avg_acc': 49.97959600081616, 'loss': 8.288551330566406}


EP_train:2:  18%|| 4912/27626 [11:34<53:34,  7.07it/s]

{'epoch': 2, 'iter': 4910, 'avg_loss': 8.564568902732058, 'avg_acc': 49.98600081449807, 'loss': 8.35939884185791}


EP_train:2:  18%|| 4922/27626 [11:36<53:43,  7.04it/s]

{'epoch': 2, 'iter': 4920, 'avg_loss': 8.564469444439242, 'avg_acc': 49.986664295874824, 'loss': 8.462599754333496}


EP_train:2:  18%|| 4932/27626 [11:37<53:17,  7.10it/s]

{'epoch': 2, 'iter': 4930, 'avg_loss': 8.565194296271146, 'avg_acc': 49.998098762928414, 'loss': 8.470627784729004}


EP_train:2:  18%|| 4942/27626 [11:38<53:36,  7.05it/s]

{'epoch': 2, 'iter': 4940, 'avg_loss': 8.565364561638738, 'avg_acc': 49.9955727585509, 'loss': 8.958707809448242}


EP_train:2:  18%|| 4952/27626 [11:40<54:25,  6.94it/s]

{'epoch': 2, 'iter': 4950, 'avg_loss': 8.565705767642896, 'avg_acc': 50.005049484952536, 'loss': 8.877914428710938}


EP_train:2:  18%|| 4962/27626 [11:41<53:45,  7.03it/s]

{'epoch': 2, 'iter': 4960, 'avg_loss': 8.56654925405006, 'avg_acc': 49.99685043338037, 'loss': 8.772136688232422}


EP_train:2:  18%|| 4972/27626 [11:43<53:29,  7.06it/s]

{'epoch': 2, 'iter': 4970, 'avg_loss': 8.566402291896669, 'avg_acc': 50.00628646147657, 'loss': 7.196700572967529}


EP_train:2:  18%|| 4982/27626 [11:44<53:02,  7.12it/s]

{'epoch': 2, 'iter': 4980, 'avg_loss': 8.566309819281807, 'avg_acc': 49.99686307970287, 'loss': 8.883536338806152}


EP_train:2:  18%|| 4992/27626 [11:46<53:38,  7.03it/s]

{'epoch': 2, 'iter': 4990, 'avg_loss': 8.566188145868527, 'avg_acc': 50.00187838108595, 'loss': 7.33098840713501}


EP_train:2:  18%|| 5002/27626 [11:47<53:51,  7.00it/s]

{'epoch': 2, 'iter': 5000, 'avg_loss': 8.566524834090341, 'avg_acc': 49.99562587482503, 'loss': 9.472441673278809}


EP_train:2:  18%|| 5012/27626 [11:48<53:19,  7.07it/s]

{'epoch': 2, 'iter': 5010, 'avg_loss': 8.566608732985156, 'avg_acc': 49.99937637198164, 'loss': 9.011478424072266}


EP_train:2:  18%|| 5022/27626 [11:50<53:40,  7.02it/s]

{'epoch': 2, 'iter': 5020, 'avg_loss': 8.5668549887143, 'avg_acc': 49.998132842063335, 'loss': 8.862388610839844}


EP_train:2:  18%|| 5032/27626 [11:51<53:44,  7.01it/s]

{'epoch': 2, 'iter': 5030, 'avg_loss': 8.566844479840073, 'avg_acc': 50.00434804213874, 'loss': 8.287535667419434}


EP_train:2:  18%|| 5042/27626 [11:53<53:15,  7.07it/s]

{'epoch': 2, 'iter': 5040, 'avg_loss': 8.565947339497395, 'avg_acc': 50.00495933346558, 'loss': 8.244098663330078}


EP_train:2:  18%|| 5052/27626 [11:54<53:16,  7.06it/s]

{'epoch': 2, 'iter': 5050, 'avg_loss': 8.565738883780337, 'avg_acc': 50.00247475747377, 'loss': 8.108726501464844}


EP_train:2:  18%|| 5062/27626 [11:56<52:57,  7.10it/s]

{'epoch': 2, 'iter': 5060, 'avg_loss': 8.566239975064528, 'avg_acc': 50.0, 'loss': 9.35952091217041}


EP_train:2:  18%|| 5072/27626 [11:57<53:04,  7.08it/s]

{'epoch': 2, 'iter': 5070, 'avg_loss': 8.566523436845536, 'avg_acc': 50.00554624334451, 'loss': 8.743407249450684}


EP_train:2:  18%|| 5082/27626 [11:58<53:08,  7.07it/s]

{'epoch': 2, 'iter': 5080, 'avg_loss': 8.565697556334062, 'avg_acc': 50.02275634717576, 'loss': 8.810933113098145}


EP_train:2:  18%|| 5092/27626 [12:00<54:02,  6.95it/s]

{'epoch': 2, 'iter': 5090, 'avg_loss': 8.565651211469774, 'avg_acc': 50.02455313297977, 'loss': 7.501070976257324}


EP_train:2:  18%|| 5102/27626 [12:01<53:25,  7.03it/s]

{'epoch': 2, 'iter': 5100, 'avg_loss': 8.56530033628325, 'avg_acc': 50.01899137424034, 'loss': 8.412081718444824}


EP_train:2:  19%|| 5112/27626 [12:03<53:11,  7.06it/s]

{'epoch': 2, 'iter': 5110, 'avg_loss': 8.565047720597695, 'avg_acc': 50.01528565838388, 'loss': 7.914871692657471}


EP_train:2:  19%|| 5122/27626 [12:04<53:06,  7.06it/s]

{'epoch': 2, 'iter': 5120, 'avg_loss': 8.564568938326262, 'avg_acc': 50.0164762741652, 'loss': 8.259041786193848}


EP_train:2:  19%|| 5132/27626 [12:05<53:05,  7.06it/s]

{'epoch': 2, 'iter': 5130, 'avg_loss': 8.56458183581126, 'avg_acc': 50.01644416293121, 'loss': 8.840331077575684}


EP_train:2:  19%|| 5142/27626 [12:07<53:03,  7.06it/s]

{'epoch': 2, 'iter': 5140, 'avg_loss': 8.564255935815122, 'avg_acc': 50.0066864423264, 'loss': 8.488691329956055}


EP_train:2:  19%|| 5152/27626 [12:08<53:19,  7.02it/s]

{'epoch': 2, 'iter': 5150, 'avg_loss': 8.563913619627051, 'avg_acc': 50.01395360124248, 'loss': 8.792516708374023}


EP_train:2:  19%|| 5162/27626 [12:10<52:37,  7.11it/s]

{'epoch': 2, 'iter': 5160, 'avg_loss': 8.563841674429945, 'avg_acc': 50.00605502809533, 'loss': 7.826206207275391}


EP_train:2:  19%|| 5172/27626 [12:11<53:06,  7.05it/s]

{'epoch': 2, 'iter': 5170, 'avg_loss': 8.563944883352091, 'avg_acc': 50.0, 'loss': 8.81346607208252}


EP_train:2:  19%|| 5182/27626 [12:12<53:09,  7.04it/s]

{'epoch': 2, 'iter': 5180, 'avg_loss': 8.564119883012504, 'avg_acc': 50.007841150357066, 'loss': 7.8395609855651855}


EP_train:2:  19%|| 5192/27626 [12:14<53:02,  7.05it/s]

{'epoch': 2, 'iter': 5190, 'avg_loss': 8.564041164517977, 'avg_acc': 50.00782604507802, 'loss': 8.206670761108398}


EP_train:2:  19%|| 5202/27626 [12:15<52:46,  7.08it/s]

{'epoch': 2, 'iter': 5200, 'avg_loss': 8.563958123655784, 'avg_acc': 50.006609305902714, 'loss': 8.112310409545898}


EP_train:2:  19%|| 5212/27626 [12:17<53:40,  6.96it/s]

{'epoch': 2, 'iter': 5210, 'avg_loss': 8.563705986599802, 'avg_acc': 50.00779600844367, 'loss': 8.913446426391602}


EP_train:2:  19%|| 5222/27626 [12:18<52:57,  7.05it/s]

{'epoch': 2, 'iter': 5220, 'avg_loss': 8.56405827451588, 'avg_acc': 50.00718253208197, 'loss': 8.845773696899414}


EP_train:2:  19%|| 5232/27626 [12:20<52:44,  7.08it/s]

{'epoch': 2, 'iter': 5230, 'avg_loss': 8.564408872305743, 'avg_acc': 50.01015580194992, 'loss': 8.458192825317383}


EP_train:2:  19%|| 5242/27626 [12:21<52:45,  7.07it/s]

{'epoch': 2, 'iter': 5240, 'avg_loss': 8.56430775490091, 'avg_acc': 50.01311772562488, 'loss': 8.551350593566895}


EP_train:2:  19%|| 5252/27626 [12:22<52:50,  7.06it/s]

{'epoch': 2, 'iter': 5250, 'avg_loss': 8.563655160854168, 'avg_acc': 50.0065463721196, 'loss': 7.656112194061279}


EP_train:2:  19%|| 5262/27626 [12:24<53:11,  7.01it/s]

{'epoch': 2, 'iter': 5260, 'avg_loss': 8.56287252510983, 'avg_acc': 49.99703003231325, 'loss': 9.07819938659668}


EP_train:2:  19%|| 5272/27626 [12:25<53:09,  7.01it/s]

{'epoch': 2, 'iter': 5270, 'avg_loss': 8.561906236418057, 'avg_acc': 49.99822140011383, 'loss': 8.830672264099121}


EP_train:2:  19%|| 5282/27626 [12:27<52:58,  7.03it/s]

{'epoch': 2, 'iter': 5280, 'avg_loss': 8.561636012333222, 'avg_acc': 49.99349081613331, 'loss': 8.38230037689209}


EP_train:2:  19%|| 5292/27626 [12:28<52:43,  7.06it/s]

{'epoch': 2, 'iter': 5290, 'avg_loss': 8.560748065673794, 'avg_acc': 49.992912492912495, 'loss': 7.561671733856201}


EP_train:2:  19%|| 5302/27626 [12:29<52:56,  7.03it/s]

{'epoch': 2, 'iter': 5300, 'avg_loss': 8.560782798392168, 'avg_acc': 49.98997830598, 'loss': 7.946608066558838}


EP_train:2:  19%|| 5312/27626 [12:31<52:45,  7.05it/s]

{'epoch': 2, 'iter': 5310, 'avg_loss': 8.561035251060778, 'avg_acc': 49.9847015627942, 'loss': 8.54856014251709}


EP_train:2:  19%|| 5322/27626 [12:32<52:42,  7.05it/s]

{'epoch': 2, 'iter': 5320, 'avg_loss': 8.561533848340071, 'avg_acc': 49.98355572260853, 'loss': 9.583484649658203}


EP_train:2:  19%|| 5332/27626 [12:34<53:01,  7.01it/s]

{'epoch': 2, 'iter': 5330, 'avg_loss': 8.562483680201302, 'avg_acc': 49.99296567248171, 'loss': 9.078484535217285}


EP_train:2:  19%|| 5342/27626 [12:35<52:29,  7.08it/s]

{'epoch': 2, 'iter': 5340, 'avg_loss': 8.562242325002659, 'avg_acc': 50.00877644635836, 'loss': 8.624750137329102}


EP_train:2:  19%|| 5352/27626 [12:36<52:58,  7.01it/s]

{'epoch': 2, 'iter': 5350, 'avg_loss': 8.56211390252025, 'avg_acc': 50.00759203887124, 'loss': 8.5972318649292}


EP_train:2:  19%|| 5362/27626 [12:38<52:49,  7.02it/s]

{'epoch': 2, 'iter': 5360, 'avg_loss': 8.562866771650324, 'avg_acc': 50.00641204999068, 'loss': 8.849630355834961}


EP_train:2:  19%|| 5372/27626 [12:39<52:13,  7.10it/s]

{'epoch': 2, 'iter': 5370, 'avg_loss': 8.563181375529464, 'avg_acc': 50.002909141686835, 'loss': 8.610272407531738}


EP_train:2:  19%|| 5382/27626 [12:41<52:32,  7.06it/s]

{'epoch': 2, 'iter': 5380, 'avg_loss': 8.563135012754431, 'avg_acc': 49.998257758780895, 'loss': 8.311121940612793}


EP_train:2:  20%|| 5392/27626 [12:42<52:09,  7.11it/s]

{'epoch': 2, 'iter': 5390, 'avg_loss': 8.563304828947361, 'avg_acc': 49.99826099053979, 'loss': 9.042328834533691}


EP_train:2:  20%|| 5402/27626 [12:44<52:23,  7.07it/s]

{'epoch': 2, 'iter': 5400, 'avg_loss': 8.563847817617132, 'avg_acc': 50.01099333456768, 'loss': 8.90064811706543}


EP_train:2:  20%|| 5412/27626 [12:45<52:39,  7.03it/s]

{'epoch': 2, 'iter': 5410, 'avg_loss': 8.563814486709987, 'avg_acc': 50.01905839955646, 'loss': 9.704127311706543}


EP_train:2:  20%|| 5422/27626 [12:46<51:56,  7.12it/s]

{'epoch': 2, 'iter': 5420, 'avg_loss': 8.563587914487792, 'avg_acc': 50.026517247740266, 'loss': 8.45106315612793}


EP_train:2:  20%|| 5432/27626 [12:48<52:23,  7.06it/s]

{'epoch': 2, 'iter': 5430, 'avg_loss': 8.563216825761621, 'avg_acc': 50.019563616276926, 'loss': 8.195767402648926}


EP_train:2:  20%|| 5442/27626 [12:49<52:53,  6.99it/s]

{'epoch': 2, 'iter': 5440, 'avg_loss': 8.563682554399588, 'avg_acc': 50.01435857379158, 'loss': 8.824821472167969}


EP_train:2:  20%|| 5452/27626 [12:51<52:35,  7.03it/s]

{'epoch': 2, 'iter': 5450, 'avg_loss': 8.563621864605771, 'avg_acc': 50.00745276096129, 'loss': 8.268558502197266}


EP_train:2:  20%|| 5462/27626 [12:52<52:22,  7.05it/s]

{'epoch': 2, 'iter': 5460, 'avg_loss': 8.563989883070791, 'avg_acc': 50.00629463468229, 'loss': 8.856352806091309}


EP_train:2:  20%|| 5472/27626 [12:53<52:26,  7.04it/s]

{'epoch': 2, 'iter': 5470, 'avg_loss': 8.56379576754339, 'avg_acc': 50.00342716139645, 'loss': 7.439258575439453}


EP_train:2:  20%|| 5482/27626 [12:55<52:13,  7.07it/s]

{'epoch': 2, 'iter': 5480, 'avg_loss': 8.564471355734723, 'avg_acc': 50.00741196861887, 'loss': 8.474682807922363}


EP_train:2:  20%|| 5492/27626 [12:56<52:17,  7.05it/s]

{'epoch': 2, 'iter': 5490, 'avg_loss': 8.5648874355956, 'avg_acc': 50.006829357129845, 'loss': 8.716814041137695}


EP_train:2:  20%|| 5502/27626 [12:58<52:02,  7.09it/s]

{'epoch': 2, 'iter': 5500, 'avg_loss': 8.56489424184547, 'avg_acc': 50.00511270678059, 'loss': 7.713691711425781}


EP_train:2:  20%|| 5512/27626 [12:59<52:20,  7.04it/s]

{'epoch': 2, 'iter': 5510, 'avg_loss': 8.564380333311565, 'avg_acc': 50.0062375249501, 'loss': 9.045162200927734}


EP_train:2:  20%|| 5522/27626 [13:00<52:15,  7.05it/s]

{'epoch': 2, 'iter': 5520, 'avg_loss': 8.564351276435016, 'avg_acc': 50.00679224778119, 'loss': 8.44107723236084}


EP_train:2:  20%|| 5532/27626 [13:02<52:21,  7.03it/s]

{'epoch': 2, 'iter': 5530, 'avg_loss': 8.564280571934974, 'avg_acc': 50.00960495389622, 'loss': 8.678704261779785}


EP_train:2:  20%|| 5542/27626 [13:03<53:06,  6.93it/s]

{'epoch': 2, 'iter': 5540, 'avg_loss': 8.564088292940282, 'avg_acc': 50.00394784334957, 'loss': 8.345284461975098}


EP_train:2:  20%|| 5552/27626 [13:05<52:11,  7.05it/s]

{'epoch': 2, 'iter': 5550, 'avg_loss': 8.563805411085742, 'avg_acc': 49.99718519185733, 'loss': 7.810436248779297}


EP_train:2:  20%|| 5562/27626 [13:06<52:29,  7.00it/s]

{'epoch': 2, 'iter': 5560, 'avg_loss': 8.56477049652824, 'avg_acc': 49.99550440568243, 'loss': 8.876337051391602}


EP_train:2:  20%|| 5572/27626 [13:08<52:08,  7.05it/s]

{'epoch': 2, 'iter': 5570, 'avg_loss': 8.56472145944026, 'avg_acc': 49.99439059414827, 'loss': 8.465926170349121}


EP_train:2:  20%|| 5582/27626 [13:09<52:03,  7.06it/s]

{'epoch': 2, 'iter': 5580, 'avg_loss': 8.56453610235507, 'avg_acc': 49.993280774054824, 'loss': 9.099806785583496}


EP_train:2:  20%|| 5592/27626 [13:10<52:10,  7.04it/s]

{'epoch': 2, 'iter': 5590, 'avg_loss': 8.565339416622626, 'avg_acc': 49.9938517259882, 'loss': 8.202746391296387}


EP_train:2:  20%|| 5602/27626 [13:12<51:49,  7.08it/s]

{'epoch': 2, 'iter': 5600, 'avg_loss': 8.565327518604798, 'avg_acc': 49.992188894840204, 'loss': 8.596132278442383}


EP_train:2:  20%|| 5612/27626 [13:13<51:48,  7.08it/s]

{'epoch': 2, 'iter': 5610, 'avg_loss': 8.565577250626651, 'avg_acc': 49.99275975761896, 'loss': 8.43861198425293}


EP_train:2:  20%|| 5622/27626 [13:15<52:11,  7.03it/s]

{'epoch': 2, 'iter': 5620, 'avg_loss': 8.565221266313033, 'avg_acc': 49.993328589219, 'loss': 8.939708709716797}


EP_train:2:  20%|| 5632/27626 [13:16<52:28,  6.99it/s]

{'epoch': 2, 'iter': 5630, 'avg_loss': 8.565451496343083, 'avg_acc': 49.99389540046173, 'loss': 8.859687805175781}


EP_train:2:  20%|| 5642/27626 [13:17<52:13,  7.02it/s]

{'epoch': 2, 'iter': 5640, 'avg_loss': 8.56507089141835, 'avg_acc': 49.9966761212551, 'loss': 8.180768966674805}


EP_train:2:  20%|| 5652/27626 [13:19<52:03,  7.04it/s]

{'epoch': 2, 'iter': 5650, 'avg_loss': 8.565455323804734, 'avg_acc': 50.00110599893824, 'loss': 8.149737358093262}


EP_train:2:  20%|| 5662/27626 [13:20<51:45,  7.07it/s]

{'epoch': 2, 'iter': 5660, 'avg_loss': 8.564993512649416, 'avg_acc': 50.003864158275924, 'loss': 7.862187385559082}


EP_train:2:  21%|| 5672/27626 [13:22<51:29,  7.11it/s]

{'epoch': 2, 'iter': 5670, 'avg_loss': 8.564714986292946, 'avg_acc': 50.00881678716276, 'loss': 8.721606254577637}


EP_train:2:  21%|| 5682/27626 [13:23<52:19,  6.99it/s]

{'epoch': 2, 'iter': 5680, 'avg_loss': 8.564671845361394, 'avg_acc': 50.01595229713078, 'loss': 9.02552318572998}


EP_train:2:  21%|| 5692/27626 [13:25<51:37,  7.08it/s]

{'epoch': 2, 'iter': 5690, 'avg_loss': 8.564219433485027, 'avg_acc': 50.013727815849585, 'loss': 8.089079856872559}


EP_train:2:  21%|| 5702/27626 [13:26<51:48,  7.05it/s]

{'epoch': 2, 'iter': 5700, 'avg_loss': 8.564276525627582, 'avg_acc': 50.0126074372917, 'loss': 8.475191116333008}


EP_train:2:  21%|| 5712/27626 [13:27<51:25,  7.10it/s]

{'epoch': 2, 'iter': 5710, 'avg_loss': 8.564707611596974, 'avg_acc': 50.01039660304675, 'loss': 9.048283576965332}


EP_train:2:  21%|| 5722/27626 [13:29<51:52,  7.04it/s]

{'epoch': 2, 'iter': 5720, 'avg_loss': 8.56384683876391, 'avg_acc': 50.00928596399231, 'loss': 8.128141403198242}


EP_train:2:  21%|| 5732/27626 [13:30<51:38,  7.07it/s]

{'epoch': 2, 'iter': 5730, 'avg_loss': 8.563630686403673, 'avg_acc': 50.00926976094922, 'loss': 7.583274841308594}


EP_train:2:  21%|| 5742/27626 [13:32<51:34,  7.07it/s]

{'epoch': 2, 'iter': 5740, 'avg_loss': 8.562977872478736, 'avg_acc': 50.00272165128027, 'loss': 7.552192211151123}


EP_train:2:  21%|| 5752/27626 [13:33<51:23,  7.09it/s]

{'epoch': 2, 'iter': 5750, 'avg_loss': 8.562711645719011, 'avg_acc': 50.001086767518686, 'loss': 8.274170875549316}


EP_train:2:  21%|| 5762/27626 [13:34<51:52,  7.03it/s]

{'epoch': 2, 'iter': 5760, 'avg_loss': 8.562578016551619, 'avg_acc': 49.99132095122374, 'loss': 9.223766326904297}


EP_train:2:  21%|| 5772/27626 [13:36<51:25,  7.08it/s]

{'epoch': 2, 'iter': 5770, 'avg_loss': 8.562752563976948, 'avg_acc': 49.9886284872639, 'loss': 8.63835334777832}


EP_train:2:  21%|| 5782/27626 [13:37<51:30,  7.07it/s]

{'epoch': 2, 'iter': 5780, 'avg_loss': 8.562610571204203, 'avg_acc': 49.98432364642795, 'loss': 9.535840034484863}


EP_train:2:  21%|| 5792/27626 [13:39<51:31,  7.06it/s]

{'epoch': 2, 'iter': 5790, 'avg_loss': 8.562395472370925, 'avg_acc': 49.98003367294077, 'loss': 8.927746772766113}


EP_train:2:  21%|| 5802/27626 [13:40<51:20,  7.09it/s]

{'epoch': 2, 'iter': 5800, 'avg_loss': 8.562733719747984, 'avg_acc': 49.97468108946733, 'loss': 7.817513942718506}


EP_train:2:  21%|| 5812/27626 [13:41<51:35,  7.05it/s]

{'epoch': 2, 'iter': 5810, 'avg_loss': 8.562649813684697, 'avg_acc': 49.97472466012734, 'loss': 8.328108787536621}


EP_train:2:  21%|| 5822/27626 [13:43<51:35,  7.04it/s]

{'epoch': 2, 'iter': 5820, 'avg_loss': 8.562433411710105, 'avg_acc': 49.98657876653496, 'loss': 8.128503799438477}


EP_train:2:  21%|| 5832/27626 [13:44<51:38,  7.03it/s]

{'epoch': 2, 'iter': 5830, 'avg_loss': 8.562584651148232, 'avg_acc': 49.9882095695421, 'loss': 9.408515930175781}


EP_train:2:  21%|| 5842/27626 [13:46<51:11,  7.09it/s]

{'epoch': 2, 'iter': 5840, 'avg_loss': 8.562756836526587, 'avg_acc': 49.989834788563606, 'loss': 8.195101737976074}


EP_train:2:  21%|| 5852/27626 [13:47<51:28,  7.05it/s]

{'epoch': 2, 'iter': 5850, 'avg_loss': 8.562577320966653, 'avg_acc': 49.99359083917279, 'loss': 9.145237922668457}


EP_train:2:  21%|| 5862/27626 [13:48<50:56,  7.12it/s]

{'epoch': 2, 'iter': 5860, 'avg_loss': 8.56206215495767, 'avg_acc': 49.99573451629415, 'loss': 8.22242546081543}


EP_train:2:  21%|| 5872/27626 [13:50<51:18,  7.07it/s]

{'epoch': 2, 'iter': 5870, 'avg_loss': 8.56252200581962, 'avg_acc': 49.99893544540964, 'loss': 9.186307907104492}


EP_train:2:  21%|| 5882/27626 [13:51<51:02,  7.10it/s]

{'epoch': 2, 'iter': 5880, 'avg_loss': 8.562503436130237, 'avg_acc': 50.00425097772487, 'loss': 7.815572738647461}


EP_train:2:  21%|| 5892/27626 [13:53<51:08,  7.08it/s]

{'epoch': 2, 'iter': 5890, 'avg_loss': 8.562068639071116, 'avg_acc': 50.01007893396707, 'loss': 8.434989929199219}


EP_train:2:  21%|| 5902/27626 [13:54<51:23,  7.04it/s]

{'epoch': 2, 'iter': 5900, 'avg_loss': 8.56221107853972, 'avg_acc': 50.01059142518217, 'loss': 9.191579818725586}


EP_train:2:  21%|| 5912/27626 [13:56<51:11,  7.07it/s]

{'epoch': 2, 'iter': 5910, 'avg_loss': 8.5622801259576, 'avg_acc': 50.01268820842497, 'loss': 8.719708442687988}


EP_train:2:  21%|| 5922/27626 [13:57<51:43,  6.99it/s]

{'epoch': 2, 'iter': 5920, 'avg_loss': 8.561494733932513, 'avg_acc': 50.01900016889039, 'loss': 7.67204475402832}


EP_train:2:  21%|| 5932/27626 [13:58<51:09,  7.07it/s]

{'epoch': 2, 'iter': 5930, 'avg_loss': 8.560929271708932, 'avg_acc': 50.02634462991063, 'loss': 8.978632926940918}


EP_train:2:  22%|| 5942/27626 [14:00<50:54,  7.10it/s]

{'epoch': 2, 'iter': 5940, 'avg_loss': 8.561154521408556, 'avg_acc': 50.02998232620771, 'loss': 8.191694259643555}


EP_train:2:  22%|| 5952/27626 [14:01<50:43,  7.12it/s]

{'epoch': 2, 'iter': 5950, 'avg_loss': 8.561335684090897, 'avg_acc': 50.02573096958495, 'loss': 9.176594734191895}


EP_train:2:  22%|| 5962/27626 [14:03<50:43,  7.12it/s]

{'epoch': 2, 'iter': 5960, 'avg_loss': 8.561671118391498, 'avg_acc': 50.01992115416877, 'loss': 8.926141738891602}


EP_train:2:  22%|| 5972/27626 [14:04<50:55,  7.09it/s]

{'epoch': 2, 'iter': 5970, 'avg_loss': 8.562099464134123, 'avg_acc': 50.023551331435264, 'loss': 9.097530364990234}


EP_train:2:  22%|| 5982/27626 [14:05<51:05,  7.06it/s]

{'epoch': 2, 'iter': 5980, 'avg_loss': 8.561946025801191, 'avg_acc': 50.02194449088782, 'loss': 8.412968635559082}


EP_train:2:  22%|| 5992/27626 [14:07<51:19,  7.03it/s]

{'epoch': 2, 'iter': 5990, 'avg_loss': 8.561198458849061, 'avg_acc': 50.01199716241028, 'loss': 8.155647277832031}


EP_train:2:  22%|| 6002/27626 [14:08<51:34,  6.99it/s]

{'epoch': 2, 'iter': 6000, 'avg_loss': 8.561283424484236, 'avg_acc': 50.01562239626729, 'loss': 9.052127838134766}


EP_train:2:  22%|| 6012/27626 [14:10<51:27,  7.00it/s]

{'epoch': 2, 'iter': 6010, 'avg_loss': 8.560963664390426, 'avg_acc': 50.01403676592913, 'loss': 7.605960369110107}


EP_train:2:  22%|| 6022/27626 [14:11<51:10,  7.04it/s]

{'epoch': 2, 'iter': 6020, 'avg_loss': 8.56124357336919, 'avg_acc': 50.01764657033715, 'loss': 8.179144859313965}


EP_train:2:  22%|| 6032/27626 [14:13<51:22,  7.00it/s]

{'epoch': 2, 'iter': 6030, 'avg_loss': 8.560945543438775, 'avg_acc': 50.01658099817609, 'loss': 8.544639587402344}


EP_train:2:  22%|| 6042/27626 [14:14<51:05,  7.04it/s]

{'epoch': 2, 'iter': 6040, 'avg_loss': 8.561131516664831, 'avg_acc': 50.02638222148651, 'loss': 8.226807594299316}


EP_train:2:  22%|| 6052/27626 [14:15<51:16,  7.01it/s]

{'epoch': 2, 'iter': 6050, 'avg_loss': 8.561201074383039, 'avg_acc': 50.02375640390018, 'loss': 8.780695915222168}


EP_train:2:  22%|| 6062/27626 [14:17<51:10,  7.02it/s]

{'epoch': 2, 'iter': 6060, 'avg_loss': 8.561089686013117, 'avg_acc': 50.02062365946214, 'loss': 7.594066619873047}


EP_train:2:  22%|| 6072/27626 [14:18<51:14,  7.01it/s]

{'epoch': 2, 'iter': 6070, 'avg_loss': 8.5607490471218, 'avg_acc': 50.022133915335196, 'loss': 7.696695327758789}


EP_train:2:  22%|| 6082/27626 [14:20<51:32,  6.97it/s]

{'epoch': 2, 'iter': 6080, 'avg_loss': 8.560817633112723, 'avg_acc': 50.01644466370663, 'loss': 9.063204765319824}


EP_train:2:  22%|| 6092/27626 [14:21<51:11,  7.01it/s]

{'epoch': 2, 'iter': 6090, 'avg_loss': 8.560196515316447, 'avg_acc': 50.01539156131998, 'loss': 7.758094310760498}


EP_train:2:  22%|| 6102/27626 [14:22<50:28,  7.11it/s]

{'epoch': 2, 'iter': 6100, 'avg_loss': 8.560423164949166, 'avg_acc': 50.0158785445009, 'loss': 8.730369567871094}


EP_train:2:  22%|| 6112/27626 [14:24<51:00,  7.03it/s]

{'epoch': 2, 'iter': 6110, 'avg_loss': 8.560443675375632, 'avg_acc': 50.018920798559975, 'loss': 8.27613639831543}


EP_train:2:  22%|| 6122/27626 [14:25<50:35,  7.08it/s]

{'epoch': 2, 'iter': 6120, 'avg_loss': 8.5606474248515, 'avg_acc': 50.01582666231008, 'loss': 8.776674270629883}


EP_train:2:  22%|| 6132/27626 [14:27<50:47,  7.05it/s]

{'epoch': 2, 'iter': 6130, 'avg_loss': 8.560709785392362, 'avg_acc': 50.01682025770674, 'loss': 8.274070739746094}


EP_train:2:  22%|| 6142/27626 [14:28<50:19,  7.11it/s]

{'epoch': 2, 'iter': 6140, 'avg_loss': 8.560648066584829, 'avg_acc': 50.01577511805895, 'loss': 8.42560863494873}


EP_train:2:  22%|| 6152/27626 [14:29<50:30,  7.09it/s]

{'epoch': 2, 'iter': 6150, 'avg_loss': 8.560488588382434, 'avg_acc': 50.017273614046495, 'loss': 8.654547691345215}


EP_train:2:  22%|| 6162/27626 [14:31<51:02,  7.01it/s]

{'epoch': 2, 'iter': 6160, 'avg_loss': 8.560825668242705, 'avg_acc': 50.01927446843045, 'loss': 8.73896598815918}


EP_train:2:  22%|| 6172/27626 [14:32<51:08,  6.99it/s]

{'epoch': 2, 'iter': 6170, 'avg_loss': 8.56221440266875, 'avg_acc': 50.01519202722411, 'loss': 9.721084594726562}


EP_train:2:  22%|| 6182/27626 [14:34<50:30,  7.08it/s]

{'epoch': 2, 'iter': 6180, 'avg_loss': 8.56195178291902, 'avg_acc': 50.01365070376962, 'loss': 7.887972831726074}


EP_train:2:  22%|| 6192/27626 [14:35<50:59,  7.01it/s]

{'epoch': 2, 'iter': 6190, 'avg_loss': 8.562265769285025, 'avg_acc': 50.006057179777095, 'loss': 8.866433143615723}


EP_train:2:  22%|| 6202/27626 [14:37<50:38,  7.05it/s]

{'epoch': 2, 'iter': 6200, 'avg_loss': 8.562518077211175, 'avg_acc': 49.99697629414611, 'loss': 8.065983772277832}


EP_train:2:  22%|| 6212/27626 [14:38<50:55,  7.01it/s]

{'epoch': 2, 'iter': 6210, 'avg_loss': 8.56202133295713, 'avg_acc': 49.995974883271614, 'loss': 8.201886177062988}


EP_train:2:  23%|| 6222/27626 [14:39<50:28,  7.07it/s]

{'epoch': 2, 'iter': 6220, 'avg_loss': 8.56177816604077, 'avg_acc': 49.989451052885386, 'loss': 7.761257648468018}


EP_train:2:  23%|| 6232/27626 [14:41<50:35,  7.05it/s]

{'epoch': 2, 'iter': 6230, 'avg_loss': 8.561927535219763, 'avg_acc': 49.9899695073022, 'loss': 7.94501256942749}


EP_train:2:  23%|| 6242/27626 [14:42<50:50,  7.01it/s]

{'epoch': 2, 'iter': 6240, 'avg_loss': 8.562098029561005, 'avg_acc': 49.9889841371575, 'loss': 8.305992126464844}


EP_train:2:  23%|| 6252/27626 [14:44<50:52,  7.00it/s]

{'epoch': 2, 'iter': 6250, 'avg_loss': 8.562057875457944, 'avg_acc': 49.99000159974404, 'loss': 8.40435791015625}


EP_train:2:  23%|| 6262/27626 [14:45<50:54,  6.99it/s]

{'epoch': 2, 'iter': 6260, 'avg_loss': 8.562031823289107, 'avg_acc': 49.99151493371666, 'loss': 8.81173038482666}


EP_train:2:  23%|| 6272/27626 [14:46<50:41,  7.02it/s]

{'epoch': 2, 'iter': 6270, 'avg_loss': 8.562217285370375, 'avg_acc': 49.99701004624462, 'loss': 9.12136459350586}


EP_train:2:  23%|| 6282/27626 [14:48<50:11,  7.09it/s]

{'epoch': 2, 'iter': 6280, 'avg_loss': 8.562336733644507, 'avg_acc': 49.99502467759911, 'loss': 8.751285552978516}


EP_train:2:  23%|| 6292/27626 [14:49<50:12,  7.08it/s]

{'epoch': 2, 'iter': 6290, 'avg_loss': 8.562230676142557, 'avg_acc': 49.99751629311715, 'loss': 8.873427391052246}


EP_train:2:  23%|| 6302/27626 [14:51<50:05,  7.09it/s]

{'epoch': 2, 'iter': 6300, 'avg_loss': 8.561779217689004, 'avg_acc': 49.99256070465006, 'loss': 7.939789772033691}


EP_train:2:  23%|| 6312/27626 [14:52<50:02,  7.10it/s]

{'epoch': 2, 'iter': 6310, 'avg_loss': 8.561324429878473, 'avg_acc': 49.99158215813659, 'loss': 8.421152114868164}


EP_train:2:  23%|| 6322/27626 [14:54<50:10,  7.08it/s]

{'epoch': 2, 'iter': 6320, 'avg_loss': 8.561693451484157, 'avg_acc': 49.992089859199496, 'loss': 8.915246963500977}


EP_train:2:  23%|| 6332/27626 [14:55<50:23,  7.04it/s]

{'epoch': 2, 'iter': 6330, 'avg_loss': 8.561829216577038, 'avg_acc': 49.99160875059233, 'loss': 9.078958511352539}


EP_train:2:  23%|| 6342/27626 [14:56<50:30,  7.02it/s]

{'epoch': 2, 'iter': 6340, 'avg_loss': 8.56173633871122, 'avg_acc': 49.98620091468223, 'loss': 7.661860466003418}


EP_train:2:  23%|| 6352/27626 [14:58<50:39,  7.00it/s]

{'epoch': 2, 'iter': 6350, 'avg_loss': 8.561584069698444, 'avg_acc': 49.986222642103606, 'loss': 7.858389854431152}


EP_train:2:  23%|| 6362/27626 [14:59<50:19,  7.04it/s]

{'epoch': 2, 'iter': 6360, 'avg_loss': 8.561438047741591, 'avg_acc': 49.983787926426665, 'loss': 8.782122611999512}


EP_train:2:  23%|| 6372/27626 [15:01<50:32,  7.01it/s]

{'epoch': 2, 'iter': 6370, 'avg_loss': 8.562414915964716, 'avg_acc': 49.98871841155235, 'loss': 8.971762657165527}


EP_train:2:  23%|| 6382/27626 [15:02<50:01,  7.08it/s]

{'epoch': 2, 'iter': 6380, 'avg_loss': 8.56243878908431, 'avg_acc': 49.99755132424385, 'loss': 8.333338737487793}


EP_train:2:  23%|| 6392/27626 [15:03<49:56,  7.09it/s]

{'epoch': 2, 'iter': 6390, 'avg_loss': 8.561758170906058, 'avg_acc': 50.00097793772492, 'loss': 7.848985195159912}


EP_train:2:  23%|| 6402/27626 [15:05<49:55,  7.08it/s]

{'epoch': 2, 'iter': 6400, 'avg_loss': 8.561535943357148, 'avg_acc': 50.00585845961568, 'loss': 9.655049324035645}


EP_train:2:  23%|| 6412/27626 [15:06<49:50,  7.09it/s]

{'epoch': 2, 'iter': 6410, 'avg_loss': 8.561729752390791, 'avg_acc': 50.00926142567462, 'loss': 8.601385116577148}


EP_train:2:  23%|| 6422/27626 [15:08<50:25,  7.01it/s]

{'epoch': 2, 'iter': 6420, 'avg_loss': 8.561958898767989, 'avg_acc': 50.01314047656128, 'loss': 7.91880464553833}


EP_train:2:  23%|| 6432/27626 [15:09<49:59,  7.07it/s]

{'epoch': 2, 'iter': 6430, 'avg_loss': 8.562028829425994, 'avg_acc': 50.0068029855388, 'loss': 8.573528289794922}


EP_train:2:  23%|| 6442/27626 [15:10<50:24,  7.00it/s]

{'epoch': 2, 'iter': 6440, 'avg_loss': 8.562289235088844, 'avg_acc': 50.01843657817109, 'loss': 9.528770446777344}


EP_train:2:  23%|| 6452/27626 [15:12<49:50,  7.08it/s]

{'epoch': 2, 'iter': 6450, 'avg_loss': 8.5620322526841, 'avg_acc': 50.02422105099984, 'loss': 9.010693550109863}


EP_train:2:  23%|| 6462/27626 [15:13<49:54,  7.07it/s]

{'epoch': 2, 'iter': 6460, 'avg_loss': 8.562036031688484, 'avg_acc': 50.02902027549915, 'loss': 7.935562610626221}


EP_train:2:  23%|| 6472/27626 [15:15<49:54,  7.06it/s]

{'epoch': 2, 'iter': 6470, 'avg_loss': 8.561714763785787, 'avg_acc': 50.031390047906044, 'loss': 8.70102596282959}


EP_train:2:  23%|| 6482/27626 [15:16<49:39,  7.10it/s]

{'epoch': 2, 'iter': 6480, 'avg_loss': 8.561464883059452, 'avg_acc': 50.02893072056781, 'loss': 8.090997695922852}


EP_train:2:  23%|| 6492/27626 [15:17<49:23,  7.13it/s]

{'epoch': 2, 'iter': 6490, 'avg_loss': 8.561993177931402, 'avg_acc': 50.03370050839624, 'loss': 8.051401138305664}


EP_train:2:  24%|| 6502/27626 [15:19<50:07,  7.02it/s]

{'epoch': 2, 'iter': 6500, 'avg_loss': 8.561908702119792, 'avg_acc': 50.02739963082603, 'loss': 8.367144584655762}


EP_train:2:  24%|| 6512/27626 [15:20<49:52,  7.05it/s]

{'epoch': 2, 'iter': 6510, 'avg_loss': 8.561703333710401, 'avg_acc': 50.022557978805104, 'loss': 8.379404067993164}


EP_train:2:  24%|| 6522/27626 [15:22<50:14,  7.00it/s]

{'epoch': 2, 'iter': 6520, 'avg_loss': 8.56197984444183, 'avg_acc': 50.02587793283239, 'loss': 8.690546035766602}


EP_train:2:  24%|| 6532/27626 [15:23<50:15,  7.00it/s]

{'epoch': 2, 'iter': 6530, 'avg_loss': 8.561708283267404, 'avg_acc': 50.03158015617822, 'loss': 8.94260025024414}


EP_train:2:  24%|| 6542/27626 [15:25<49:59,  7.03it/s]

{'epoch': 2, 'iter': 6540, 'avg_loss': 8.56076860260187, 'avg_acc': 50.031531875859955, 'loss': 7.401930332183838}


EP_train:2:  24%|| 6552/27626 [15:26<49:38,  7.07it/s]

{'epoch': 2, 'iter': 6550, 'avg_loss': 8.560362097070957, 'avg_acc': 50.03816211265456, 'loss': 7.853358268737793}


EP_train:2:  24%|| 6562/27626 [15:27<49:29,  7.09it/s]

{'epoch': 2, 'iter': 6560, 'avg_loss': 8.560130157988167, 'avg_acc': 50.03381725346746, 'loss': 8.158020973205566}


EP_train:2:  24%|| 6572/27626 [15:29<49:46,  7.05it/s]

{'epoch': 2, 'iter': 6570, 'avg_loss': 8.560464102861598, 'avg_acc': 50.03281464008522, 'loss': 8.178664207458496}


EP_train:2:  24%|| 6582/27626 [15:30<49:46,  7.05it/s]

{'epoch': 2, 'iter': 6580, 'avg_loss': 8.55996687523483, 'avg_acc': 50.032289925543225, 'loss': 8.234301567077637}


EP_train:2:  24%|| 6592/27626 [15:32<49:36,  7.07it/s]

{'epoch': 2, 'iter': 6590, 'avg_loss': 8.56003191242846, 'avg_acc': 50.02844788347747, 'loss': 8.828741073608398}


EP_train:2:  24%|| 6602/27626 [15:33<49:55,  7.02it/s]

{'epoch': 2, 'iter': 6600, 'avg_loss': 8.560059496063731, 'avg_acc': 50.03929328889563, 'loss': 8.370426177978516}


EP_train:2:  24%|| 6612/27626 [15:34<49:42,  7.04it/s]

{'epoch': 2, 'iter': 6610, 'avg_loss': 8.560087133654847, 'avg_acc': 50.03876115564968, 'loss': 8.6686429977417}


EP_train:2:  24%|| 6622/27626 [15:36<49:42,  7.04it/s]

{'epoch': 2, 'iter': 6620, 'avg_loss': 8.560094764091838, 'avg_acc': 50.03445476514121, 'loss': 9.228529930114746}


EP_train:2:  24%|| 6632/27626 [15:37<49:41,  7.04it/s]

{'epoch': 2, 'iter': 6630, 'avg_loss': 8.559966153283048, 'avg_acc': 50.03016136329362, 'loss': 8.61898422241211}


EP_train:2:  24%|| 6642/27626 [15:39<49:23,  7.08it/s]

{'epoch': 2, 'iter': 6640, 'avg_loss': 8.55998659019028, 'avg_acc': 50.02964538473121, 'loss': 8.819412231445312}


EP_train:2:  24%|| 6652/27626 [15:40<49:35,  7.05it/s]

{'epoch': 2, 'iter': 6650, 'avg_loss': 8.559684601354878, 'avg_acc': 50.03101037437979, 'loss': 8.733222961425781}


EP_train:2:  24%|| 6662/27626 [15:42<49:56,  7.00it/s]

{'epoch': 2, 'iter': 6660, 'avg_loss': 8.559849545075503, 'avg_acc': 50.0375319021168, 'loss': 7.926816940307617}


EP_train:2:  24%|| 6672/27626 [15:43<49:57,  6.99it/s]

{'epoch': 2, 'iter': 6670, 'avg_loss': 8.560203472189489, 'avg_acc': 50.03841253185429, 'loss': 8.182807922363281}


EP_train:2:  24%|| 6682/27626 [15:44<49:18,  7.08it/s]

{'epoch': 2, 'iter': 6680, 'avg_loss': 8.560235244538003, 'avg_acc': 50.03461308187397, 'loss': 8.066360473632812}


EP_train:2:  24%|| 6692/27626 [15:46<49:33,  7.04it/s]

{'epoch': 2, 'iter': 6690, 'avg_loss': 8.559864018391359, 'avg_acc': 50.03829771334629, 'loss': 7.929112434387207}


EP_train:2:  24%|| 6702/27626 [15:47<49:40,  7.02it/s]

{'epoch': 2, 'iter': 6700, 'avg_loss': 8.55975476145264, 'avg_acc': 50.03824056111028, 'loss': 8.245577812194824}


EP_train:2:  24%|| 6712/27626 [15:49<49:53,  6.99it/s]

{'epoch': 2, 'iter': 6710, 'avg_loss': 8.559713693475816, 'avg_acc': 50.0419088064372, 'loss': 9.152876853942871}


EP_train:2:  24%|| 6722/27626 [15:50<49:11,  7.08it/s]

{'epoch': 2, 'iter': 6720, 'avg_loss': 8.559437187454776, 'avg_acc': 50.048820859991075, 'loss': 8.601945877075195}


EP_train:2:  24%|| 6732/27626 [15:51<48:57,  7.11it/s]

{'epoch': 2, 'iter': 6730, 'avg_loss': 8.559302729069788, 'avg_acc': 50.04549844005348, 'loss': 8.096354484558105}


EP_train:2:  24%|| 6742/27626 [15:53<49:25,  7.04it/s]

{'epoch': 2, 'iter': 6740, 'avg_loss': 8.559683934497366, 'avg_acc': 50.0417222963952, 'loss': 8.654854774475098}


EP_train:2:  24%|| 6752/27626 [15:54<49:38,  7.01it/s]

{'epoch': 2, 'iter': 6750, 'avg_loss': 8.559692986081078, 'avg_acc': 50.0439749666716, 'loss': 8.622906684875488}


EP_train:2:  24%|| 6762/27626 [15:56<49:07,  7.08it/s]

{'epoch': 2, 'iter': 6760, 'avg_loss': 8.559421883269785, 'avg_acc': 50.03605235911848, 'loss': 8.64474105834961}


EP_train:2:  25%|| 6772/27626 [15:57<49:20,  7.04it/s]

{'epoch': 2, 'iter': 6770, 'avg_loss': 8.559646801377413, 'avg_acc': 50.03507605966623, 'loss': 7.870559215545654}


EP_train:2:  25%|| 6782/27626 [15:58<49:13,  7.06it/s]

{'epoch': 2, 'iter': 6780, 'avg_loss': 8.559413946618937, 'avg_acc': 50.04147618345377, 'loss': 8.047142028808594}


EP_train:2:  25%|| 6792/27626 [16:00<49:15,  7.05it/s]

{'epoch': 2, 'iter': 6790, 'avg_loss': 8.559155917838227, 'avg_acc': 50.038193933146815, 'loss': 8.122119903564453}


EP_train:2:  25%|| 6802/27626 [16:01<49:27,  7.02it/s]

{'epoch': 2, 'iter': 6800, 'avg_loss': 8.559770791149546, 'avg_acc': 50.03905675635936, 'loss': 8.804133415222168}


EP_train:2:  25%|| 6812/27626 [16:03<49:00,  7.08it/s]

{'epoch': 2, 'iter': 6810, 'avg_loss': 8.559746862315446, 'avg_acc': 50.03991704595507, 'loss': 8.653393745422363}


EP_train:2:  25%|| 6822/27626 [16:04<49:24,  7.02it/s]

{'epoch': 2, 'iter': 6820, 'avg_loss': 8.55984066134236, 'avg_acc': 50.04077481307726, 'loss': 7.63004732131958}


EP_train:2:  25%|| 6832/27626 [16:06<49:20,  7.02it/s]

{'epoch': 2, 'iter': 6830, 'avg_loss': 8.559335374601797, 'avg_acc': 50.03614038940126, 'loss': 8.877235412597656}


EP_train:2:  25%|| 6842/27626 [16:07<48:54,  7.08it/s]

{'epoch': 2, 'iter': 6840, 'avg_loss': 8.558648537550905, 'avg_acc': 50.027408273644205, 'loss': 8.392026901245117}


EP_train:2:  25%|| 6852/27626 [16:08<48:36,  7.12it/s]

{'epoch': 2, 'iter': 6850, 'avg_loss': 8.558702778109772, 'avg_acc': 50.033298058677566, 'loss': 8.455810546875}


EP_train:2:  25%|| 6862/27626 [16:10<48:56,  7.07it/s]

{'epoch': 2, 'iter': 6860, 'avg_loss': 8.558684285050873, 'avg_acc': 50.03780425593936, 'loss': 7.818206787109375}


EP_train:2:  25%|| 6872/27626 [16:11<48:58,  7.06it/s]

{'epoch': 2, 'iter': 6870, 'avg_loss': 8.558798327020572, 'avg_acc': 50.04457138698879, 'loss': 9.051963806152344}


EP_train:2:  25%|| 6882/27626 [16:13<48:54,  7.07it/s]

{'epoch': 2, 'iter': 6880, 'avg_loss': 8.558954944826805, 'avg_acc': 50.04904810347334, 'loss': 8.363055229187012}


EP_train:2:  25%|| 6892/27626 [16:14<49:16,  7.01it/s]

{'epoch': 2, 'iter': 6890, 'avg_loss': 8.55914152324191, 'avg_acc': 50.04580249600929, 'loss': 8.54555892944336}


EP_train:2:  25%|| 6902/27626 [16:15<48:47,  7.08it/s]

{'epoch': 2, 'iter': 6900, 'avg_loss': 8.558982628494945, 'avg_acc': 50.0412077959716, 'loss': 8.758842468261719}


EP_train:2:  25%|| 6912/27626 [16:17<48:48,  7.07it/s]

{'epoch': 2, 'iter': 6910, 'avg_loss': 8.558740175161912, 'avg_acc': 50.035269859644046, 'loss': 9.069747924804688}


EP_train:2:  25%|| 6922/27626 [16:18<48:57,  7.05it/s]

{'epoch': 2, 'iter': 6920, 'avg_loss': 8.559286698563357, 'avg_acc': 50.03070365554111, 'loss': 8.494921684265137}


EP_train:2:  25%|| 6932/27626 [16:20<49:00,  7.04it/s]

{'epoch': 2, 'iter': 6930, 'avg_loss': 8.559818706545474, 'avg_acc': 50.028855864954544, 'loss': 8.662153244018555}


EP_train:2:  25%|| 6942/27626 [16:21<48:29,  7.11it/s]

{'epoch': 2, 'iter': 6940, 'avg_loss': 8.559789143452262, 'avg_acc': 50.028364068578014, 'loss': 9.185832977294922}


EP_train:2:  25%|| 6952/27626 [16:22<48:49,  7.06it/s]

{'epoch': 2, 'iter': 6950, 'avg_loss': 8.559840164807456, 'avg_acc': 50.03057114084304, 'loss': 8.714595794677734}


EP_train:2:  25%|| 6962/27626 [16:24<48:34,  7.09it/s]

{'epoch': 2, 'iter': 6960, 'avg_loss': 8.559316959348223, 'avg_acc': 50.03366973136044, 'loss': 8.456988334655762}


EP_train:2:  25%|| 6972/27626 [16:25<48:59,  7.03it/s]

{'epoch': 2, 'iter': 6970, 'avg_loss': 8.559434440929088, 'avg_acc': 50.04079400372974, 'loss': 8.624505043029785}


EP_train:2:  25%|| 6982/27626 [16:27<48:56,  7.03it/s]

{'epoch': 2, 'iter': 6980, 'avg_loss': 8.559737363699735, 'avg_acc': 50.047002578427154, 'loss': 8.927908897399902}


EP_train:2:  25%|| 6992/27626 [16:28<48:50,  7.04it/s]

{'epoch': 2, 'iter': 6990, 'avg_loss': 8.559964925293723, 'avg_acc': 50.041124302674866, 'loss': 10.071647644042969}


EP_train:2:  25%|| 7002/27626 [16:30<48:57,  7.02it/s]

{'epoch': 2, 'iter': 7000, 'avg_loss': 8.559766243941578, 'avg_acc': 50.035709184402236, 'loss': 7.861020088195801}


EP_train:2:  25%|| 7012/27626 [16:31<48:53,  7.03it/s]

{'epoch': 2, 'iter': 7010, 'avg_loss': 8.559289189144032, 'avg_acc': 50.02852660105549, 'loss': 8.45312786102295}


EP_train:2:  25%|| 7022/27626 [16:32<48:36,  7.07it/s]

{'epoch': 2, 'iter': 7020, 'avg_loss': 8.559179594740536, 'avg_acc': 50.02581541091013, 'loss': 8.042607307434082}


EP_train:2:  25%|| 7032/27626 [16:34<48:50,  7.03it/s]

{'epoch': 2, 'iter': 7030, 'avg_loss': 8.559250958222547, 'avg_acc': 50.02311193286872, 'loss': 8.348771095275879}


EP_train:2:  25%|| 7042/27626 [16:35<48:53,  7.02it/s]

{'epoch': 2, 'iter': 7040, 'avg_loss': 8.55900521773436, 'avg_acc': 50.02219145007811, 'loss': 8.705644607543945}


EP_train:2:  26%|| 7052/27626 [16:37<48:54,  7.01it/s]

{'epoch': 2, 'iter': 7050, 'avg_loss': 8.559349886053353, 'avg_acc': 50.01905758048504, 'loss': 8.760799407958984}


EP_train:2:  26%|| 7062/27626 [16:38<48:49,  7.02it/s]

{'epoch': 2, 'iter': 7060, 'avg_loss': 8.559497572446812, 'avg_acc': 50.02212859368361, 'loss': 8.370807647705078}


EP_train:2:  26%|| 7072/27626 [16:39<49:02,  6.98it/s]

{'epoch': 2, 'iter': 7070, 'avg_loss': 8.56002226200894, 'avg_acc': 50.019445622967055, 'loss': 9.056946754455566}


EP_train:2:  26%|| 7082/27626 [16:41<48:19,  7.09it/s]

{'epoch': 2, 'iter': 7080, 'avg_loss': 8.559826175736566, 'avg_acc': 50.02118344866544, 'loss': 7.999352931976318}


EP_train:2:  26%|| 7092/27626 [16:42<48:15,  7.09it/s]

{'epoch': 2, 'iter': 7090, 'avg_loss': 8.559944410516419, 'avg_acc': 50.02776406712734, 'loss': 8.73350715637207}


EP_train:2:  26%|| 7102/27626 [16:44<48:33,  7.04it/s]

{'epoch': 2, 'iter': 7100, 'avg_loss': 8.560334543798394, 'avg_acc': 50.02552457400367, 'loss': 8.526786804199219}


EP_train:2:  26%|| 7112/27626 [16:45<48:12,  7.09it/s]

{'epoch': 2, 'iter': 7110, 'avg_loss': 8.560402524301965, 'avg_acc': 50.02373083954437, 'loss': 8.585846900939941}


EP_train:2:  26%|| 7122/27626 [16:47<48:45,  7.01it/s]

{'epoch': 2, 'iter': 7120, 'avg_loss': 8.560306602240178, 'avg_acc': 50.02194214295746, 'loss': 8.070444107055664}


EP_train:2:  26%|| 7132/27626 [16:48<48:38,  7.02it/s]

{'epoch': 2, 'iter': 7130, 'avg_loss': 8.560313261569993, 'avg_acc': 50.025417192539614, 'loss': 8.943178176879883}


EP_train:2:  26%|| 7142/27626 [16:49<48:41,  7.01it/s]

{'epoch': 2, 'iter': 7140, 'avg_loss': 8.560443909691422, 'avg_acc': 50.031070578350366, 'loss': 7.935361385345459}


EP_train:2:  26%|| 7152/27626 [16:51<48:51,  6.98it/s]

{'epoch': 2, 'iter': 7150, 'avg_loss': 8.559948659479959, 'avg_acc': 50.03670815270591, 'loss': 8.665132522583008}


EP_train:2:  26%|| 7162/27626 [16:52<48:36,  7.02it/s]

{'epoch': 2, 'iter': 7160, 'avg_loss': 8.559807619924003, 'avg_acc': 50.041457198715264, 'loss': 7.557572364807129}


EP_train:2:  26%|| 7172/27626 [16:54<48:29,  7.03it/s]

{'epoch': 2, 'iter': 7170, 'avg_loss': 8.560106275138487, 'avg_acc': 50.038784688327986, 'loss': 8.529746055603027}


EP_train:2:  26%|| 7182/27626 [16:55<48:10,  7.07it/s]

{'epoch': 2, 'iter': 7180, 'avg_loss': 8.560302591675786, 'avg_acc': 50.03046233115165, 'loss': 8.889250755310059}


EP_train:2:  26%|| 7192/27626 [16:56<48:06,  7.08it/s]

{'epoch': 2, 'iter': 7190, 'avg_loss': 8.56016947632621, 'avg_acc': 50.029550827423165, 'loss': 8.170502662658691}


EP_train:2:  26%|| 7202/27626 [16:58<48:31,  7.01it/s]

{'epoch': 2, 'iter': 7200, 'avg_loss': 8.560252924790532, 'avg_acc': 50.02907582280239, 'loss': 8.309967994689941}


EP_train:2:  26%|| 7212/27626 [16:59<48:17,  7.05it/s]

{'epoch': 2, 'iter': 7210, 'avg_loss': 8.55997758651636, 'avg_acc': 50.027302038552214, 'loss': 9.112119674682617}


EP_train:2:  26%|| 7222/27626 [17:01<48:02,  7.08it/s]

{'epoch': 2, 'iter': 7220, 'avg_loss': 8.560201502460437, 'avg_acc': 50.02856252596594, 'loss': 8.947690963745117}


EP_train:2:  26%|| 7232/27626 [17:02<48:12,  7.05it/s]

{'epoch': 2, 'iter': 7230, 'avg_loss': 8.559946005117876, 'avg_acc': 50.02506568939289, 'loss': 8.283945083618164}


EP_train:2:  26%|| 7242/27626 [17:04<48:41,  6.98it/s]

{'epoch': 2, 'iter': 7240, 'avg_loss': 8.560479381502276, 'avg_acc': 50.02373636238089, 'loss': 8.396985054016113}


EP_train:2:  26%|| 7252/27626 [17:05<48:16,  7.03it/s]

{'epoch': 2, 'iter': 7250, 'avg_loss': 8.55997086156567, 'avg_acc': 50.02241070197214, 'loss': 8.239990234375}


EP_train:2:  26%|| 7262/27626 [17:06<47:49,  7.10it/s]

{'epoch': 2, 'iter': 7260, 'avg_loss': 8.559824923512824, 'avg_acc': 50.01893678556672, 'loss': 8.411832809448242}


EP_train:2:  26%|| 7272/27626 [17:08<47:45,  7.10it/s]

{'epoch': 2, 'iter': 7270, 'avg_loss': 8.559728592269408, 'avg_acc': 50.01633200385092, 'loss': 8.46416187286377}


EP_train:2:  26%|| 7282/27626 [17:09<47:48,  7.09it/s]

{'epoch': 2, 'iter': 7280, 'avg_loss': 8.559161698813295, 'avg_acc': 50.021459964290614, 'loss': 7.122503280639648}


EP_train:2:  26%|| 7292/27626 [17:11<48:15,  7.02it/s]

{'epoch': 2, 'iter': 7290, 'avg_loss': 8.558568324723568, 'avg_acc': 50.0201446989439, 'loss': 8.084024429321289}


EP_train:2:  26%|| 7302/27626 [17:12<48:08,  7.04it/s]

{'epoch': 2, 'iter': 7300, 'avg_loss': 8.558641912381692, 'avg_acc': 50.01669291877825, 'loss': 9.481904029846191}


EP_train:2:  26%|| 7312/27626 [17:13<47:48,  7.08it/s]

{'epoch': 2, 'iter': 7310, 'avg_loss': 8.55876741186243, 'avg_acc': 50.00983107646013, 'loss': 10.002874374389648}


EP_train:2:  27%|| 7322/27626 [17:15<48:04,  7.04it/s]

{'epoch': 2, 'iter': 7320, 'avg_loss': 8.558857574470707, 'avg_acc': 50.00853708509766, 'loss': 8.30842399597168}


EP_train:2:  27%|| 7332/27626 [17:16<48:03,  7.04it/s]

{'epoch': 2, 'iter': 7330, 'avg_loss': 8.558920122887328, 'avg_acc': 50.0098042558996, 'loss': 9.096389770507812}


EP_train:2:  27%|| 7342/27626 [17:18<48:07,  7.02it/s]

{'epoch': 2, 'iter': 7340, 'avg_loss': 8.559062273654021, 'avg_acc': 50.01447350497208, 'loss': 8.692718505859375}


EP_train:2:  27%|| 7352/27626 [17:19<47:38,  7.09it/s]

{'epoch': 2, 'iter': 7350, 'avg_loss': 8.55949948398682, 'avg_acc': 50.01062780574072, 'loss': 9.668262481689453}


EP_train:2:  27%|| 7362/27626 [17:20<47:53,  7.05it/s]

{'epoch': 2, 'iter': 7360, 'avg_loss': 8.559499359830472, 'avg_acc': 50.005518951229455, 'loss': 8.154918670654297}


EP_train:2:  27%|| 7372/27626 [17:22<47:50,  7.06it/s]

{'epoch': 2, 'iter': 7370, 'avg_loss': 8.559322079976917, 'avg_acc': 50.00254375254375, 'loss': 8.20906925201416}


EP_train:2:  27%|| 7382/27626 [17:23<47:45,  7.06it/s]

{'epoch': 2, 'iter': 7380, 'avg_loss': 8.559454134850785, 'avg_acc': 49.994919387616854, 'loss': 8.947729110717773}


EP_train:2:  27%|| 7392/27626 [17:25<47:39,  7.08it/s]

{'epoch': 2, 'iter': 7390, 'avg_loss': 8.559090130320422, 'avg_acc': 49.9923893925044, 'loss': 8.998984336853027}


EP_train:2:  27%|| 7402/27626 [17:26<48:07,  7.00it/s]

{'epoch': 2, 'iter': 7400, 'avg_loss': 8.559327976910783, 'avg_acc': 49.986488312390215, 'loss': 8.551553726196289}


EP_train:2:  27%|| 7412/27626 [17:28<48:02,  7.01it/s]

{'epoch': 2, 'iter': 7410, 'avg_loss': 8.559244847043518, 'avg_acc': 49.98524153285657, 'loss': 9.049447059631348}


EP_train:2:  27%|| 7422/27626 [17:29<47:51,  7.04it/s]

{'epoch': 2, 'iter': 7420, 'avg_loss': 8.559060802261811, 'avg_acc': 49.98189260207519, 'loss': 8.21656608581543}


EP_train:2:  27%|| 7432/27626 [17:30<47:46,  7.04it/s]

{'epoch': 2, 'iter': 7430, 'avg_loss': 8.559343343852042, 'avg_acc': 49.98612232539362, 'loss': 8.46198844909668}


EP_train:2:  27%|| 7442/27626 [17:32<47:45,  7.04it/s]

{'epoch': 2, 'iter': 7440, 'avg_loss': 8.559427322166739, 'avg_acc': 49.98824082784572, 'loss': 8.203310012817383}


EP_train:2:  27%|| 7452/27626 [17:33<47:22,  7.10it/s]

{'epoch': 2, 'iter': 7450, 'avg_loss': 8.559206412792397, 'avg_acc': 49.98909542343309, 'loss': 9.046218872070312}


EP_train:2:  27%|| 7462/27626 [17:35<47:44,  7.04it/s]

{'epoch': 2, 'iter': 7460, 'avg_loss': 8.559037701819317, 'avg_acc': 49.98911003886878, 'loss': 8.331584930419922}


EP_train:2:  27%|| 7472/27626 [17:36<47:53,  7.01it/s]

{'epoch': 2, 'iter': 7470, 'avg_loss': 8.558901734659111, 'avg_acc': 49.99205260339981, 'loss': 8.236738204956055}


EP_train:2:  27%|| 7482/27626 [17:37<47:37,  7.05it/s]

{'epoch': 2, 'iter': 7480, 'avg_loss': 8.559246709362387, 'avg_acc': 49.989974602325894, 'loss': 8.58047103881836}


EP_train:2:  27%|| 7492/27626 [17:39<47:41,  7.04it/s]

{'epoch': 2, 'iter': 7490, 'avg_loss': 8.55962485220607, 'avg_acc': 49.997496996395675, 'loss': 8.338530540466309}


EP_train:2:  27%|| 7502/27626 [17:40<47:31,  7.06it/s]

{'epoch': 2, 'iter': 7500, 'avg_loss': 8.559489654550678, 'avg_acc': 49.99958338888148, 'loss': 8.944042205810547}


EP_train:2:  27%|| 7512/27626 [17:42<47:20,  7.08it/s]

{'epoch': 2, 'iter': 7510, 'avg_loss': 8.559982682556814, 'avg_acc': 50.00249633870324, 'loss': 9.464858055114746}


EP_train:2:  27%|| 7522/27626 [17:43<47:43,  7.02it/s]

{'epoch': 2, 'iter': 7520, 'avg_loss': 8.560184284943626, 'avg_acc': 49.998337986969815, 'loss': 8.428628921508789}


EP_train:2:  27%|| 7532/27626 [17:44<47:33,  7.04it/s]

{'epoch': 2, 'iter': 7530, 'avg_loss': 8.559738273972645, 'avg_acc': 49.99917009693268, 'loss': 7.629955291748047}


EP_train:2:  27%|| 7542/27626 [17:46<47:13,  7.09it/s]

{'epoch': 2, 'iter': 7540, 'avg_loss': 8.559806884686711, 'avg_acc': 49.9979279936348, 'loss': 8.012986183166504}


EP_train:2:  27%|| 7552/27626 [17:47<50:53,  6.57it/s]

{'epoch': 2, 'iter': 7550, 'avg_loss': 8.559741268061973, 'avg_acc': 50.00372467222884, 'loss': 8.802709579467773}


EP_train:2:  27%|| 7562/27626 [17:49<47:26,  7.05it/s]

{'epoch': 2, 'iter': 7560, 'avg_loss': 8.559562203241173, 'avg_acc': 50.007439492130665, 'loss': 9.292329788208008}


EP_train:2:  27%|| 7572/27626 [17:50<47:34,  7.03it/s]

{'epoch': 2, 'iter': 7570, 'avg_loss': 8.559905777234507, 'avg_acc': 50.00908070268129, 'loss': 8.63224983215332}


EP_train:2:  27%|| 7582/27626 [17:52<47:31,  7.03it/s]

{'epoch': 2, 'iter': 7580, 'avg_loss': 8.560405908324697, 'avg_acc': 50.00041221474739, 'loss': 8.967796325683594}


EP_train:2:  27%|| 7592/27626 [17:53<47:18,  7.06it/s]

{'epoch': 2, 'iter': 7590, 'avg_loss': 8.560619158136124, 'avg_acc': 49.99917665656699, 'loss': 9.196435928344727}


EP_train:2:  28%|| 7602/27626 [17:54<47:17,  7.06it/s]

{'epoch': 2, 'iter': 7600, 'avg_loss': 8.561033486473045, 'avg_acc': 49.99835547954217, 'loss': 8.909598350524902}


EP_train:2:  28%|| 7612/27626 [17:56<47:42,  6.99it/s]

{'epoch': 2, 'iter': 7610, 'avg_loss': 8.561020422476755, 'avg_acc': 49.99384115096571, 'loss': 8.202834129333496}


EP_train:2:  28%|| 7622/27626 [17:57<47:27,  7.03it/s]

{'epoch': 2, 'iter': 7620, 'avg_loss': 8.560854246947665, 'avg_acc': 49.99425928355859, 'loss': 8.906852722167969}


EP_train:2:  28%|| 7632/27626 [17:59<46:58,  7.09it/s]

{'epoch': 2, 'iter': 7630, 'avg_loss': 8.560806882973502, 'avg_acc': 49.994266806447385, 'loss': 7.949197292327881}


EP_train:2:  28%|| 7642/27626 [18:00<47:01,  7.08it/s]

{'epoch': 2, 'iter': 7640, 'avg_loss': 8.560699478679108, 'avg_acc': 49.99222942023295, 'loss': 8.182765007019043}


EP_train:2:  28%|| 7652/27626 [18:01<47:36,  6.99it/s]

{'epoch': 2, 'iter': 7650, 'avg_loss': 8.560659580644542, 'avg_acc': 49.993056463207424, 'loss': 7.676219463348389}


EP_train:2:  28%|| 7662/27626 [18:03<47:11,  7.05it/s]

{'epoch': 2, 'iter': 7660, 'avg_loss': 8.56047350998383, 'avg_acc': 49.988170604359745, 'loss': 7.6775994300842285}


EP_train:2:  28%|| 7672/27626 [18:04<47:27,  7.01it/s]

{'epoch': 2, 'iter': 7670, 'avg_loss': 8.559974918494774, 'avg_acc': 49.988186025290055, 'loss': 8.238563537597656}


EP_train:2:  28%|| 7682/27626 [18:06<47:10,  7.05it/s]

{'epoch': 2, 'iter': 7680, 'avg_loss': 8.55927000795704, 'avg_acc': 49.99064249446686, 'loss': 8.27193832397461}


EP_train:2:  28%|| 7692/27626 [18:07<47:06,  7.05it/s]

{'epoch': 2, 'iter': 7690, 'avg_loss': 8.558592628741385, 'avg_acc': 49.99106098036666, 'loss': 8.889023780822754}


EP_train:2:  28%|| 7702/27626 [18:09<47:30,  6.99it/s]

{'epoch': 2, 'iter': 7700, 'avg_loss': 8.558269327050013, 'avg_acc': 49.99472471107648, 'loss': 8.61695384979248}


EP_train:2:  28%|| 7712/27626 [18:10<46:55,  7.07it/s]

{'epoch': 2, 'iter': 7710, 'avg_loss': 8.558081044496506, 'avg_acc': 49.995947347944494, 'loss': 9.405364990234375}


EP_train:2:  28%|| 7722/27626 [18:11<46:58,  7.06it/s]

{'epoch': 2, 'iter': 7720, 'avg_loss': 8.557716582325082, 'avg_acc': 49.995952596813886, 'loss': 8.508298873901367}


EP_train:2:  28%|| 7732/27626 [18:13<47:14,  7.02it/s]

{'epoch': 2, 'iter': 7730, 'avg_loss': 8.557848262379787, 'avg_acc': 49.993128314577675, 'loss': 8.392638206481934}


EP_train:2:  28%|| 7742/27626 [18:14<47:17,  7.01it/s]

{'epoch': 2, 'iter': 7740, 'avg_loss': 8.557902896054692, 'avg_acc': 49.990715023898716, 'loss': 8.5448637008667}


EP_train:2:  28%|| 7752/27626 [18:16<47:07,  7.03it/s]

{'epoch': 2, 'iter': 7750, 'avg_loss': 8.557731861146, 'avg_acc': 49.99395239323958, 'loss': 7.581913471221924}


EP_train:2:  28%|| 7762/27626 [18:17<46:51,  7.07it/s]

{'epoch': 2, 'iter': 7760, 'avg_loss': 8.557280489102327, 'avg_acc': 49.99798672851436, 'loss': 8.21046257019043}


EP_train:2:  28%|| 7772/27626 [18:18<46:45,  7.08it/s]

{'epoch': 2, 'iter': 7770, 'avg_loss': 8.55726782405197, 'avg_acc': 49.99678291082229, 'loss': 9.641742706298828}


EP_train:2:  28%|| 7782/27626 [18:20<47:10,  7.01it/s]

{'epoch': 2, 'iter': 7780, 'avg_loss': 8.55695973605764, 'avg_acc': 50.00200809664568, 'loss': 8.292430877685547}


EP_train:2:  28%|| 7792/27626 [18:21<46:53,  7.05it/s]

{'epoch': 2, 'iter': 7790, 'avg_loss': 8.556579661574355, 'avg_acc': 50.00080220767552, 'loss': 7.0905351638793945}


EP_train:2:  28%|| 7802/27626 [18:23<47:03,  7.02it/s]

{'epoch': 2, 'iter': 7800, 'avg_loss': 8.556350438384854, 'avg_acc': 49.99959941033201, 'loss': 8.005719184875488}


EP_train:2:  28%|| 7812/27626 [18:24<46:50,  7.05it/s]

{'epoch': 2, 'iter': 7810, 'avg_loss': 8.556329688530179, 'avg_acc': 50.003200614517986, 'loss': 8.519129753112793}


EP_train:2:  28%|| 7822/27626 [18:26<46:44,  7.06it/s]

{'epoch': 2, 'iter': 7820, 'avg_loss': 8.556551689779099, 'avg_acc': 50.00399565272983, 'loss': 8.481254577636719}


EP_train:2:  28%|| 7832/27626 [18:27<46:25,  7.11it/s]

{'epoch': 2, 'iter': 7830, 'avg_loss': 8.55637011481129, 'avg_acc': 50.00478866045205, 'loss': 8.726616859436035}


EP_train:2:  28%|| 7842/27626 [18:28<46:45,  7.05it/s]

{'epoch': 2, 'iter': 7840, 'avg_loss': 8.556156689651523, 'avg_acc': 50.00239127662288, 'loss': 7.231186389923096}


EP_train:2:  28%|| 7852/27626 [18:30<46:35,  7.07it/s]

{'epoch': 2, 'iter': 7850, 'avg_loss': 8.555949728772255, 'avg_acc': 50.0, 'loss': 9.027324676513672}


EP_train:2:  28%|| 7862/27626 [18:31<46:25,  7.09it/s]

{'epoch': 2, 'iter': 7860, 'avg_loss': 8.555939295443244, 'avg_acc': 49.99721727515583, 'loss': 8.432940483093262}


EP_train:2:  28%|| 7872/27626 [18:33<46:45,  7.04it/s]

{'epoch': 2, 'iter': 7870, 'avg_loss': 8.555916789162342, 'avg_acc': 49.99722081057045, 'loss': 8.616930961608887}


EP_train:2:  29%|| 7882/27626 [18:34<46:52,  7.02it/s]

{'epoch': 2, 'iter': 7880, 'avg_loss': 8.555740969538945, 'avg_acc': 50.00158609313539, 'loss': 8.48878002166748}


EP_train:2:  29%|| 7892/27626 [18:35<46:46,  7.03it/s]

{'epoch': 2, 'iter': 7890, 'avg_loss': 8.555898540752073, 'avg_acc': 49.99683183373463, 'loss': 8.884480476379395}


EP_train:2:  29%|| 7902/27626 [18:37<46:56,  7.00it/s]

{'epoch': 2, 'iter': 7900, 'avg_loss': 8.555810456214985, 'avg_acc': 49.99644032400962, 'loss': 8.191119194030762}


EP_train:2:  29%|| 7912/27626 [18:38<46:23,  7.08it/s]

{'epoch': 2, 'iter': 7910, 'avg_loss': 8.555846693301499, 'avg_acc': 49.99091454936165, 'loss': 7.524104595184326}


EP_train:2:  29%|| 7922/27626 [18:40<46:15,  7.10it/s]

{'epoch': 2, 'iter': 7920, 'avg_loss': 8.555504542982739, 'avg_acc': 49.990926019441986, 'loss': 8.886555671691895}


EP_train:2:  29%|| 7932/27626 [18:41<46:44,  7.02it/s]

{'epoch': 2, 'iter': 7930, 'avg_loss': 8.555182684897295, 'avg_acc': 49.99133148404993, 'loss': 8.235153198242188}


EP_train:2:  29%|| 7942/27626 [18:42<46:17,  7.09it/s]

{'epoch': 2, 'iter': 7940, 'avg_loss': 8.555320874913004, 'avg_acc': 49.989768291147215, 'loss': 7.907072067260742}


EP_train:2:  29%|| 7952/27626 [18:44<46:19,  7.08it/s]

{'epoch': 2, 'iter': 7950, 'avg_loss': 8.55556083052372, 'avg_acc': 49.98860206263363, 'loss': 8.675832748413086}


EP_train:2:  29%|| 7962/27626 [18:45<46:49,  7.00it/s]

{'epoch': 2, 'iter': 7960, 'avg_loss': 8.555574562004232, 'avg_acc': 49.98547607084537, 'loss': 9.010666847229004}


EP_train:2:  29%|| 7972/27626 [18:47<46:34,  7.03it/s]

{'epoch': 2, 'iter': 7970, 'avg_loss': 8.555570235531746, 'avg_acc': 49.98667043030987, 'loss': 8.310546875}


EP_train:2:  29%|| 7982/27626 [18:48<46:12,  7.09it/s]

{'epoch': 2, 'iter': 7980, 'avg_loss': 8.555120182013455, 'avg_acc': 49.99060268136825, 'loss': 8.21329402923584}


EP_train:2:  29%|| 7992/27626 [18:49<46:18,  7.07it/s]

{'epoch': 2, 'iter': 7990, 'avg_loss': 8.554927560089974, 'avg_acc': 49.994525090727066, 'loss': 8.406578063964844}


EP_train:2:  29%|| 8002/27626 [18:51<46:34,  7.02it/s]

{'epoch': 2, 'iter': 8000, 'avg_loss': 8.555022537492242, 'avg_acc': 49.99375078115236, 'loss': 8.545244216918945}


EP_train:2:  29%|| 8012/27626 [18:52<46:40,  7.00it/s]

{'epoch': 2, 'iter': 8010, 'avg_loss': 8.555155585818397, 'avg_acc': 49.991418050181004, 'loss': 8.84295654296875}


EP_train:2:  29%|| 8022/27626 [18:54<46:40,  7.00it/s]

{'epoch': 2, 'iter': 8020, 'avg_loss': 8.555371344527941, 'avg_acc': 49.99337676100237, 'loss': 8.457745552062988}


EP_train:2:  29%|| 8032/27626 [18:55<45:58,  7.10it/s]

{'epoch': 2, 'iter': 8030, 'avg_loss': 8.554996523372026, 'avg_acc': 49.98832648487112, 'loss': 8.217302322387695}


EP_train:2:  29%|| 8042/27626 [18:57<46:01,  7.09it/s]

{'epoch': 2, 'iter': 8040, 'avg_loss': 8.554901182940611, 'avg_acc': 49.98989553538117, 'loss': 9.018031120300293}


EP_train:2:  29%|| 8052/27626 [18:58<46:21,  7.04it/s]

{'epoch': 2, 'iter': 8050, 'avg_loss': 8.555067953804594, 'avg_acc': 49.996894795677555, 'loss': 8.607050895690918}


EP_train:2:  29%|| 8062/27626 [18:59<46:27,  7.02it/s]

{'epoch': 2, 'iter': 8060, 'avg_loss': 8.554824056851333, 'avg_acc': 49.99767398585783, 'loss': 8.882960319519043}


EP_train:2:  29%|| 8072/27626 [19:01<45:51,  7.11it/s]

{'epoch': 2, 'iter': 8070, 'avg_loss': 8.554251007424634, 'avg_acc': 49.994192169495726, 'loss': 7.86976957321167}


EP_train:2:  29%|| 8082/27626 [19:02<46:14,  7.04it/s]

{'epoch': 2, 'iter': 8080, 'avg_loss': 8.554079705809414, 'avg_acc': 49.99342593738399, 'loss': 8.780055046081543}


EP_train:2:  29%|| 8092/27626 [19:04<46:04,  7.06it/s]

{'epoch': 2, 'iter': 8090, 'avg_loss': 8.554100444033685, 'avg_acc': 49.994978989000124, 'loss': 8.575510025024414}


EP_train:2:  29%|| 8102/27626 [19:05<46:04,  7.06it/s]

{'epoch': 2, 'iter': 8100, 'avg_loss': 8.553743689108183, 'avg_acc': 49.99768547092952, 'loss': 8.328145980834961}


EP_train:2:  29%|| 8112/27626 [19:06<46:18,  7.02it/s]

{'epoch': 2, 'iter': 8110, 'avg_loss': 8.553133938142336, 'avg_acc': 49.99499136974479, 'loss': 7.988964557647705}


EP_train:2:  29%|| 8122/27626 [19:08<46:12,  7.04it/s]

{'epoch': 2, 'iter': 8120, 'avg_loss': 8.553422186794782, 'avg_acc': 49.99769117103805, 'loss': 9.306635856628418}


EP_train:2:  29%|| 8132/27626 [19:09<45:44,  7.10it/s]

{'epoch': 2, 'iter': 8130, 'avg_loss': 8.55370541308554, 'avg_acc': 50.00653363669906, 'loss': 9.234044075012207}


EP_train:2:  29%|| 8142/27626 [19:11<45:49,  7.09it/s]

{'epoch': 2, 'iter': 8140, 'avg_loss': 8.553497810150567, 'avg_acc': 50.00345473529051, 'loss': 8.919855117797852}


EP_train:2:  30%|| 8152/27626 [19:12<46:06,  7.04it/s]

{'epoch': 2, 'iter': 8150, 'avg_loss': 8.55312294826056, 'avg_acc': 50.00306710833027, 'loss': 9.114559173583984}


EP_train:2:  30%|| 8162/27626 [19:14<46:27,  6.98it/s]

{'epoch': 2, 'iter': 8160, 'avg_loss': 8.553055064962448, 'avg_acc': 50.0, 'loss': 9.086238861083984}


EP_train:2:  30%|| 8172/27626 [19:15<46:13,  7.02it/s]

{'epoch': 2, 'iter': 8170, 'avg_loss': 8.553050445672888, 'avg_acc': 50.00382450128503, 'loss': 9.336606979370117}


EP_train:2:  30%|| 8182/27626 [19:16<45:49,  7.07it/s]

{'epoch': 2, 'iter': 8180, 'avg_loss': 8.552891192292252, 'avg_acc': 50.00305586114167, 'loss': 8.15369701385498}


EP_train:2:  30%|| 8192/27626 [19:18<45:53,  7.06it/s]

{'epoch': 2, 'iter': 8190, 'avg_loss': 8.552545780402635, 'avg_acc': 50.00267061408863, 'loss': 8.551509857177734}


EP_train:2:  30%|| 8202/27626 [19:19<45:45,  7.07it/s]

{'epoch': 2, 'iter': 8200, 'avg_loss': 8.552563516704968, 'avg_acc': 49.99733264236069, 'loss': 9.439884185791016}


EP_train:2:  30%|| 8212/27626 [19:21<45:35,  7.10it/s]

{'epoch': 2, 'iter': 8210, 'avg_loss': 8.552757036234427, 'avg_acc': 49.99695530386067, 'loss': 8.633134841918945}


EP_train:2:  30%|| 8222/27626 [19:22<45:48,  7.06it/s]

{'epoch': 2, 'iter': 8220, 'avg_loss': 8.552755347134612, 'avg_acc': 49.99733913149252, 'loss': 8.366194725036621}


EP_train:2:  30%|| 8232/27626 [19:23<45:38,  7.08it/s]

{'epoch': 2, 'iter': 8230, 'avg_loss': 8.552249160199333, 'avg_acc': 49.9924067549508, 'loss': 8.713740348815918}


EP_train:2:  30%|| 8242/27626 [19:25<46:05,  7.01it/s]

{'epoch': 2, 'iter': 8240, 'avg_loss': 8.552296862875918, 'avg_acc': 49.99620798446791, 'loss': 8.104889869689941}


EP_train:2:  30%|| 8252/27626 [19:26<45:53,  7.04it/s]

{'epoch': 2, 'iter': 8250, 'avg_loss': 8.552326627333025, 'avg_acc': 49.99507635438128, 'loss': 9.048074722290039}


EP_train:2:  30%|| 8262/27626 [19:28<45:53,  7.03it/s]

{'epoch': 2, 'iter': 8260, 'avg_loss': 8.552276134260081, 'avg_acc': 49.990921195981116, 'loss': 8.260721206665039}


EP_train:2:  30%|| 8272/27626 [19:29<45:55,  7.02it/s]

{'epoch': 2, 'iter': 8270, 'avg_loss': 8.55137450062887, 'avg_acc': 49.99093217265143, 'loss': 8.150111198425293}


EP_train:2:  30%|| 8282/27626 [19:30<45:38,  7.06it/s]

{'epoch': 2, 'iter': 8280, 'avg_loss': 8.551562317499373, 'avg_acc': 49.99282997222558, 'loss': 8.0327730178833}


EP_train:2:  30%|| 8292/27626 [19:32<45:59,  7.01it/s]

{'epoch': 2, 'iter': 8290, 'avg_loss': 8.551935492457936, 'avg_acc': 49.99547702327825, 'loss': 8.50600528717041}


EP_train:2:  30%|| 8302/27626 [19:33<45:35,  7.06it/s]

{'epoch': 2, 'iter': 8300, 'avg_loss': 8.55241350127536, 'avg_acc': 49.99774123599566, 'loss': 9.460278511047363}


EP_train:2:  30%|| 8312/27626 [19:35<45:29,  7.08it/s]

{'epoch': 2, 'iter': 8310, 'avg_loss': 8.552822235358951, 'avg_acc': 49.995863915292986, 'loss': 8.35099983215332}


EP_train:2:  30%|| 8322/27626 [19:36<45:38,  7.05it/s]

{'epoch': 2, 'iter': 8320, 'avg_loss': 8.55276945636989, 'avg_acc': 49.99661999759644, 'loss': 8.714646339416504}


EP_train:2:  30%|| 8332/27626 [19:38<45:48,  7.02it/s]

{'epoch': 2, 'iter': 8330, 'avg_loss': 8.552682545073191, 'avg_acc': 49.99587384467651, 'loss': 8.29641342163086}


EP_train:2:  30%|| 8342/27626 [19:39<45:40,  7.04it/s]

{'epoch': 2, 'iter': 8340, 'avg_loss': 8.552758216171895, 'avg_acc': 49.996628102146026, 'loss': 8.76272201538086}


EP_train:2:  30%|| 8352/27626 [19:40<45:46,  7.02it/s]

{'epoch': 2, 'iter': 8350, 'avg_loss': 8.552643550625676, 'avg_acc': 50.000748413363674, 'loss': 8.365108489990234}


EP_train:2:  30%|| 8362/27626 [19:42<45:38,  7.03it/s]

{'epoch': 2, 'iter': 8360, 'avg_loss': 8.552024600409162, 'avg_acc': 50.0, 'loss': 9.379918098449707}


EP_train:2:  30%|| 8372/27626 [19:43<45:54,  6.99it/s]

{'epoch': 2, 'iter': 8370, 'avg_loss': 8.552734700828282, 'avg_acc': 49.99626687373074, 'loss': 8.829938888549805}


EP_train:2:  30%|| 8382/27626 [19:45<45:46,  7.01it/s]

{'epoch': 2, 'iter': 8380, 'avg_loss': 8.552512153750202, 'avg_acc': 49.99701706240305, 'loss': 7.711169719696045}


EP_train:2:  30%|| 8392/27626 [19:46<45:52,  6.99it/s]

{'epoch': 2, 'iter': 8390, 'avg_loss': 8.55208814097252, 'avg_acc': 49.99366881182219, 'loss': 8.03046703338623}


EP_train:2:  30%|| 8402/27626 [19:47<45:28,  7.05it/s]

{'epoch': 2, 'iter': 8400, 'avg_loss': 8.55186951675524, 'avg_acc': 49.99553624568504, 'loss': 8.835965156555176}


EP_train:2:  30%|| 8412/27626 [19:49<45:30,  7.04it/s]

{'epoch': 2, 'iter': 8410, 'avg_loss': 8.551786693094064, 'avg_acc': 49.99294079182023, 'loss': 8.68285083770752}


EP_train:2:  30%|| 8422/27626 [19:50<49:20,  6.49it/s]

{'epoch': 2, 'iter': 8420, 'avg_loss': 8.552021203903472, 'avg_acc': 49.99257807861299, 'loss': 8.904853820800781}


EP_train:2:  31%|| 8432/27626 [19:52<45:54,  6.97it/s]

{'epoch': 2, 'iter': 8430, 'avg_loss': 8.552023047357956, 'avg_acc': 49.99295753765864, 'loss': 8.650049209594727}


EP_train:2:  31%|| 8442/27626 [19:53<45:27,  7.03it/s]

{'epoch': 2, 'iter': 8440, 'avg_loss': 8.552017112999021, 'avg_acc': 49.994816964814596, 'loss': 8.49885368347168}


EP_train:2:  31%|| 8452/27626 [19:55<45:05,  7.09it/s]

{'epoch': 2, 'iter': 8450, 'avg_loss': 8.551647107963124, 'avg_acc': 49.99149508933854, 'loss': 7.702643871307373}


EP_train:2:  31%|| 8462/27626 [19:56<45:26,  7.03it/s]

{'epoch': 2, 'iter': 8460, 'avg_loss': 8.55173630235945, 'avg_acc': 49.99409053303392, 'loss': 8.850403785705566}


EP_train:2:  31%|| 8472/27626 [19:57<45:25,  7.03it/s]

{'epoch': 2, 'iter': 8470, 'avg_loss': 8.551926129768596, 'avg_acc': 49.98745720694133, 'loss': 7.606428146362305}


EP_train:2:  31%|| 8482/27626 [19:59<45:05,  7.08it/s]

{'epoch': 2, 'iter': 8480, 'avg_loss': 8.552128431074829, 'avg_acc': 49.983418818535554, 'loss': 8.893588066101074}


EP_train:2:  31%|| 8492/27626 [20:00<45:09,  7.06it/s]

{'epoch': 2, 'iter': 8490, 'avg_loss': 8.552105975563823, 'avg_acc': 49.98859086091155, 'loss': 9.928937911987305}


EP_train:2:  31%|| 8502/27626 [20:02<45:32,  7.00it/s]

{'epoch': 2, 'iter': 8500, 'avg_loss': 8.551637558386812, 'avg_acc': 49.99154511233972, 'loss': 8.88429069519043}


EP_train:2:  31%|| 8512/27626 [20:03<45:26,  7.01it/s]

{'epoch': 2, 'iter': 8510, 'avg_loss': 8.552047660916381, 'avg_acc': 49.99045353072494, 'loss': 8.548620223999023}


EP_train:2:  31%|| 8522/27626 [20:05<45:05,  7.06it/s]

{'epoch': 2, 'iter': 8520, 'avg_loss': 8.552086051732298, 'avg_acc': 49.99229843915033, 'loss': 8.751714706420898}


EP_train:2:  31%|| 8532/27626 [20:06<44:56,  7.08it/s]

{'epoch': 2, 'iter': 8530, 'avg_loss': 8.552181751643378, 'avg_acc': 49.98754542257649, 'loss': 9.266961097717285}


EP_train:2:  31%|| 8542/27626 [20:07<45:03,  7.06it/s]

{'epoch': 2, 'iter': 8540, 'avg_loss': 8.552035107683514, 'avg_acc': 49.99341412012645, 'loss': 7.977696895599365}


EP_train:2:  31%|| 8552/27626 [20:09<44:54,  7.08it/s]

{'epoch': 2, 'iter': 8550, 'avg_loss': 8.552205780516896, 'avg_acc': 49.99342182200912, 'loss': 8.940828323364258}


EP_train:2:  31%|| 8562/27626 [20:10<44:55,  7.07it/s]

{'epoch': 2, 'iter': 8560, 'avg_loss': 8.552572487810062, 'avg_acc': 49.99744480784955, 'loss': 9.266216278076172}


EP_train:2:  31%|| 8572/27626 [20:12<45:20,  7.00it/s]

{'epoch': 2, 'iter': 8570, 'avg_loss': 8.55219414959904, 'avg_acc': 49.99562478123906, 'loss': 9.084933280944824}


EP_train:2:  31%|| 8582/27626 [20:13<44:44,  7.09it/s]

{'epoch': 2, 'iter': 8580, 'avg_loss': 8.551960964800802, 'avg_acc': 49.995994056636754, 'loss': 8.587173461914062}


EP_train:2:  31%|| 8592/27626 [20:14<44:47,  7.08it/s]

{'epoch': 2, 'iter': 8590, 'avg_loss': 8.551447287606022, 'avg_acc': 49.98654114771272, 'loss': 8.022488594055176}


EP_train:2:  31%|| 8602/27626 [20:16<45:29,  6.97it/s]

{'epoch': 2, 'iter': 8600, 'avg_loss': 8.551206738765039, 'avg_acc': 49.98546680618532, 'loss': 8.484347343444824}


EP_train:2:  31%|| 8612/27626 [20:17<45:14,  7.00it/s]

{'epoch': 2, 'iter': 8610, 'avg_loss': 8.551242184796797, 'avg_acc': 49.982217512484034, 'loss': 8.898018836975098}


EP_train:2:  31%|| 8622/27626 [20:19<45:01,  7.03it/s]

{'epoch': 2, 'iter': 8620, 'avg_loss': 8.551331949516095, 'avg_acc': 49.97825078297181, 'loss': 8.450115203857422}


EP_train:2:  31%|| 8632/27626 [20:20<45:04,  7.02it/s]

{'epoch': 2, 'iter': 8630, 'avg_loss': 8.551555708625642, 'avg_acc': 49.98117251766887, 'loss': 8.357222557067871}


EP_train:2:  31%|| 8642/27626 [20:21<44:50,  7.06it/s]

{'epoch': 2, 'iter': 8640, 'avg_loss': 8.552130982412562, 'avg_acc': 49.986980673533154, 'loss': 7.712807655334473}


EP_train:2:  31%|| 8652/27626 [20:23<44:51,  7.05it/s]

{'epoch': 2, 'iter': 8650, 'avg_loss': 8.55174781071521, 'avg_acc': 49.98771818286903, 'loss': 7.815392017364502}


EP_train:2:  31%|| 8662/27626 [20:24<44:35,  7.09it/s]

{'epoch': 2, 'iter': 8660, 'avg_loss': 8.551736904668857, 'avg_acc': 49.98881480198592, 'loss': 8.531408309936523}


EP_train:2:  31%|| 8672/27626 [20:26<44:34,  7.09it/s]

{'epoch': 2, 'iter': 8670, 'avg_loss': 8.551462992460392, 'avg_acc': 49.986304924460846, 'loss': 8.461662292480469}


EP_train:2:  31%|| 8682/27626 [20:27<44:37,  7.08it/s]

{'epoch': 2, 'iter': 8680, 'avg_loss': 8.55152529536435, 'avg_acc': 49.98776062665591, 'loss': 9.244046211242676}


EP_train:2:  31%|| 8692/27626 [20:29<44:52,  7.03it/s]

{'epoch': 2, 'iter': 8690, 'avg_loss': 8.551650740748897, 'avg_acc': 49.98777470946957, 'loss': 8.948921203613281}


EP_train:2:  31%|| 8702/27626 [20:30<44:43,  7.05it/s]

{'epoch': 2, 'iter': 8700, 'avg_loss': 8.55152166872076, 'avg_acc': 49.982042293989196, 'loss': 7.903231620788574}


EP_train:2:  32%|| 8712/27626 [20:31<44:41,  7.05it/s]

{'epoch': 2, 'iter': 8710, 'avg_loss': 8.551339706232156, 'avg_acc': 49.984215359889795, 'loss': 7.82173490524292}


EP_train:2:  32%|| 8722/27626 [20:33<44:59,  7.00it/s]

{'epoch': 2, 'iter': 8720, 'avg_loss': 8.551051370677373, 'avg_acc': 49.984950120399034, 'loss': 7.841697692871094}


EP_train:2:  32%|| 8732/27626 [20:34<44:48,  7.03it/s]

{'epoch': 2, 'iter': 8730, 'avg_loss': 8.551419227206, 'avg_acc': 49.98890447829572, 'loss': 8.191910743713379}


EP_train:2:  32%|| 8742/27626 [20:36<44:36,  7.06it/s]

{'epoch': 2, 'iter': 8740, 'avg_loss': 8.551418657884378, 'avg_acc': 49.998927468253065, 'loss': 7.756834983825684}


EP_train:2:  32%|| 8752/27626 [20:37<44:37,  7.05it/s]

{'epoch': 2, 'iter': 8750, 'avg_loss': 8.551389232797222, 'avg_acc': 49.996071877499716, 'loss': 8.58890438079834}


EP_train:2:  32%|| 8762/27626 [20:38<44:29,  7.07it/s]

{'epoch': 2, 'iter': 8760, 'avg_loss': 8.551187321769907, 'avg_acc': 49.99072594452688, 'loss': 8.587984085083008}


EP_train:2:  32%|| 8772/27626 [20:40<44:27,  7.07it/s]

{'epoch': 2, 'iter': 8770, 'avg_loss': 8.551141978642129, 'avg_acc': 49.987173640405885, 'loss': 7.756484508514404}


EP_train:2:  32%|| 8782/27626 [20:41<44:38,  7.04it/s]

{'epoch': 2, 'iter': 8780, 'avg_loss': 8.550862902919437, 'avg_acc': 49.99145883156816, 'loss': 7.869760990142822}


EP_train:2:  32%|| 8792/27626 [20:43<44:59,  6.98it/s]

{'epoch': 2, 'iter': 8790, 'avg_loss': 8.54982461501898, 'avg_acc': 49.9893356842225, 'loss': 7.111666202545166}


EP_train:2:  32%|| 8802/27626 [20:44<44:26,  7.06it/s]

{'epoch': 2, 'iter': 8800, 'avg_loss': 8.549286794396995, 'avg_acc': 49.983666628792186, 'loss': 8.387189865112305}


EP_train:2:  32%|| 8812/27626 [20:46<44:47,  7.00it/s]

{'epoch': 2, 'iter': 8810, 'avg_loss': 8.54951802015494, 'avg_acc': 49.985813188060376, 'loss': 8.981743812561035}


EP_train:2:  32%|| 8822/27626 [20:47<44:34,  7.03it/s]

{'epoch': 2, 'iter': 8820, 'avg_loss': 8.549466387185769, 'avg_acc': 49.99185183085818, 'loss': 8.477895736694336}


EP_train:2:  32%|| 8832/27626 [20:48<44:10,  7.09it/s]

{'epoch': 2, 'iter': 8830, 'avg_loss': 8.549500469280765, 'avg_acc': 49.99681519646699, 'loss': 9.356842041015625}


EP_train:2:  32%|| 8842/27626 [20:50<44:32,  7.03it/s]

{'epoch': 2, 'iter': 8840, 'avg_loss': 8.54955672656852, 'avg_acc': 49.99116332994005, 'loss': 8.560880661010742}


EP_train:2:  32%|| 8852/27626 [20:51<44:10,  7.08it/s]

{'epoch': 2, 'iter': 8850, 'avg_loss': 8.549705856412888, 'avg_acc': 49.99046717884985, 'loss': 8.6202392578125}


EP_train:2:  32%|| 8862/27626 [20:53<44:48,  6.98it/s]

{'epoch': 2, 'iter': 8860, 'avg_loss': 8.549553160822938, 'avg_acc': 49.98624590903961, 'loss': 8.489900588989258}


EP_train:2:  32%|| 8872/27626 [20:54<44:33,  7.02it/s]

{'epoch': 2, 'iter': 8870, 'avg_loss': 8.54925164320654, 'avg_acc': 49.98661368504115, 'loss': 7.736985683441162}


EP_train:2:  32%|| 8882/27626 [20:55<44:21,  7.04it/s]

{'epoch': 2, 'iter': 8880, 'avg_loss': 8.549375953444516, 'avg_acc': 49.97959126224524, 'loss': 9.207447052001953}


EP_train:2:  32%|| 8892/27626 [20:57<44:30,  7.02it/s]

{'epoch': 2, 'iter': 8890, 'avg_loss': 8.549224455409425, 'avg_acc': 49.98418344393207, 'loss': 8.692658424377441}


EP_train:2:  32%|| 8902/27626 [20:58<44:01,  7.09it/s]

{'epoch': 2, 'iter': 8900, 'avg_loss': 8.549169272174327, 'avg_acc': 49.98736097067745, 'loss': 8.306896209716797}


EP_train:2:  32%|| 8912/27626 [21:00<44:07,  7.07it/s]

{'epoch': 2, 'iter': 8910, 'avg_loss': 8.548957368097058, 'avg_acc': 49.99123274604422, 'loss': 8.553145408630371}


EP_train:2:  32%|| 8922/27626 [21:01<44:21,  7.03it/s]

{'epoch': 2, 'iter': 8920, 'avg_loss': 8.548896043320138, 'avg_acc': 49.9940449501177, 'loss': 8.176820755004883}


EP_train:2:  32%|| 8932/27626 [21:03<43:58,  7.09it/s]

{'epoch': 2, 'iter': 8930, 'avg_loss': 8.549036256704522, 'avg_acc': 49.99615104691524, 'loss': 8.911749839782715}


EP_train:2:  32%|| 8942/27626 [21:04<44:06,  7.06it/s]

{'epoch': 2, 'iter': 8940, 'avg_loss': 8.548815555346383, 'avg_acc': 49.99266021697797, 'loss': 7.676457405090332}


EP_train:2:  32%|| 8952/27626 [21:05<44:16,  7.03it/s]

{'epoch': 2, 'iter': 8950, 'avg_loss': 8.549095365564565, 'avg_acc': 49.99266841693665, 'loss': 9.519126892089844}


EP_train:2:  32%|| 8962/27626 [21:07<44:56,  6.92it/s]

{'epoch': 2, 'iter': 8960, 'avg_loss': 8.549479904617533, 'avg_acc': 49.99616393259681, 'loss': 7.9518256187438965}


EP_train:2:  32%|| 8972/27626 [21:08<44:14,  7.03it/s]

{'epoch': 2, 'iter': 8970, 'avg_loss': 8.549323173978365, 'avg_acc': 50.00139337866458, 'loss': 9.042287826538086}


EP_train:2:  33%|| 8982/27626 [21:10<43:57,  7.07it/s]

{'epoch': 2, 'iter': 8980, 'avg_loss': 8.549529246425937, 'avg_acc': 49.997564302416215, 'loss': 8.462188720703125}


EP_train:2:  33%|| 8992/27626 [21:11<44:05,  7.04it/s]

{'epoch': 2, 'iter': 8990, 'avg_loss': 8.549333674904236, 'avg_acc': 50.0, 'loss': 7.686972141265869}


EP_train:2:  33%|| 9002/27626 [21:12<44:12,  7.02it/s]

{'epoch': 2, 'iter': 9000, 'avg_loss': 8.549405090809664, 'avg_acc': 50.00069436729252, 'loss': 8.962162971496582}


EP_train:2:  33%|| 9012/27626 [21:14<44:07,  7.03it/s]

{'epoch': 2, 'iter': 9010, 'avg_loss': 8.549636102677873, 'avg_acc': 49.998612806569746, 'loss': 8.110142707824707}


EP_train:2:  33%|| 9022/27626 [21:15<43:47,  7.08it/s]

{'epoch': 2, 'iter': 9020, 'avg_loss': 8.550016883101197, 'avg_acc': 49.99722868861545, 'loss': 8.719481468200684}


EP_train:2:  33%|| 9032/27626 [21:17<44:02,  7.04it/s]

{'epoch': 2, 'iter': 9030, 'avg_loss': 8.550142999054941, 'avg_acc': 49.99723175728048, 'loss': 8.457462310791016}


EP_train:2:  33%|| 9042/27626 [21:18<43:52,  7.06it/s]

{'epoch': 2, 'iter': 9040, 'avg_loss': 8.550071196750707, 'avg_acc': 49.99446963831435, 'loss': 8.610997200012207}


EP_train:2:  33%|| 9052/27626 [21:19<43:55,  7.05it/s]

{'epoch': 2, 'iter': 9050, 'avg_loss': 8.549617310674556, 'avg_acc': 50.002416860015465, 'loss': 8.33926773071289}


EP_train:2:  33%|| 9062/27626 [21:21<44:05,  7.02it/s]

{'epoch': 2, 'iter': 9060, 'avg_loss': 8.549642577601274, 'avg_acc': 50.00172442335283, 'loss': 7.9434285163879395}


EP_train:2:  33%|| 9072/27626 [21:22<43:54,  7.04it/s]

{'epoch': 2, 'iter': 9070, 'avg_loss': 8.549462227847108, 'avg_acc': 49.99827747767611, 'loss': 8.003373146057129}


EP_train:2:  33%|| 9082/27626 [21:24<43:48,  7.06it/s]

{'epoch': 2, 'iter': 9080, 'avg_loss': 8.549867159468842, 'avg_acc': 49.99655874903645, 'loss': 9.095000267028809}


EP_train:2:  33%|| 9092/27626 [21:25<43:38,  7.08it/s]

{'epoch': 2, 'iter': 9090, 'avg_loss': 8.54987965971838, 'avg_acc': 49.997250027499724, 'loss': 8.15720272064209}


EP_train:2:  33%|| 9102/27626 [21:27<43:27,  7.10it/s]

{'epoch': 2, 'iter': 9100, 'avg_loss': 8.549950344669153, 'avg_acc': 49.996909680254916, 'loss': 8.606669425964355}


EP_train:2:  33%|| 9112/27626 [21:28<43:39,  7.07it/s]

{'epoch': 2, 'iter': 9110, 'avg_loss': 8.549893030075141, 'avg_acc': 49.99588409614751, 'loss': 8.269610404968262}


EP_train:2:  33%|| 9122/27626 [21:29<43:51,  7.03it/s]

{'epoch': 2, 'iter': 9120, 'avg_loss': 8.549362084175748, 'avg_acc': 49.99314768117531, 'loss': 8.506272315979004}


EP_train:2:  33%|| 9132/27626 [21:31<43:39,  7.06it/s]

{'epoch': 2, 'iter': 9130, 'avg_loss': 8.549275056253895, 'avg_acc': 49.995208629941956, 'loss': 8.3272066116333}


EP_train:2:  33%|| 9142/27626 [21:32<43:50,  7.03it/s]

{'epoch': 2, 'iter': 9140, 'avg_loss': 8.548964391768259, 'avg_acc': 49.99487200525107, 'loss': 9.318970680236816}


EP_train:2:  33%|| 9152/27626 [21:34<43:24,  7.09it/s]

{'epoch': 2, 'iter': 9150, 'avg_loss': 8.54903299729559, 'avg_acc': 49.99453611627144, 'loss': 8.989577293395996}


EP_train:2:  33%|| 9162/27626 [21:35<43:45,  7.03it/s]

{'epoch': 2, 'iter': 9160, 'avg_loss': 8.549297598118528, 'avg_acc': 49.99420096059382, 'loss': 8.744958877563477}


EP_train:2:  33%|| 9172/27626 [21:36<43:55,  7.00it/s]

{'epoch': 2, 'iter': 9170, 'avg_loss': 8.549229802514432, 'avg_acc': 49.99182204775924, 'loss': 8.09887981414795}


EP_train:2:  33%|| 9182/27626 [21:38<43:34,  7.05it/s]

{'epoch': 2, 'iter': 9180, 'avg_loss': 8.54902133275813, 'avg_acc': 49.9911502015031, 'loss': 7.145942687988281}


EP_train:2:  33%|| 9192/27626 [21:39<43:30,  7.06it/s]

{'epoch': 2, 'iter': 9190, 'avg_loss': 8.548887231536456, 'avg_acc': 49.98639973887499, 'loss': 8.63671588897705}


EP_train:2:  33%|| 9202/27626 [21:41<43:23,  7.08it/s]

{'epoch': 2, 'iter': 9200, 'avg_loss': 8.548815718164601, 'avg_acc': 49.98709379415281, 'loss': 8.589556694030762}


EP_train:2:  33%|| 9212/27626 [21:42<43:47,  7.01it/s]

{'epoch': 2, 'iter': 9210, 'avg_loss': 8.549003950923911, 'avg_acc': 49.98880414721529, 'loss': 8.968707084655762}


EP_train:2:  33%|| 9222/27626 [21:44<43:33,  7.04it/s]

{'epoch': 2, 'iter': 9220, 'avg_loss': 8.548927569360902, 'avg_acc': 49.990171890250515, 'loss': 8.234304428100586}


EP_train:2:  33%|| 9232/27626 [21:45<43:19,  7.08it/s]

{'epoch': 2, 'iter': 9230, 'avg_loss': 8.54896996259147, 'avg_acc': 49.98679720506987, 'loss': 8.23300552368164}


EP_train:2:  33%|| 9242/27626 [21:46<43:11,  7.09it/s]

{'epoch': 2, 'iter': 9240, 'avg_loss': 8.54888706694381, 'avg_acc': 49.98816415972298, 'loss': 8.901151657104492}


EP_train:2:  33%|| 9252/27626 [21:48<43:19,  7.07it/s]

{'epoch': 2, 'iter': 9250, 'avg_loss': 8.548834933756288, 'avg_acc': 49.982772132742404, 'loss': 9.076654434204102}


EP_train:2:  34%|| 9262/27626 [21:49<43:15,  7.08it/s]

{'epoch': 2, 'iter': 9260, 'avg_loss': 8.548872347488707, 'avg_acc': 49.98042867940827, 'loss': 8.389453887939453}


EP_train:2:  34%|| 9272/27626 [21:51<43:08,  7.09it/s]

{'epoch': 2, 'iter': 9270, 'avg_loss': 8.548769626940432, 'avg_acc': 49.9804497896667, 'loss': 8.270445823669434}


EP_train:2:  34%|| 9282/27626 [21:52<43:15,  7.07it/s]

{'epoch': 2, 'iter': 9280, 'avg_loss': 8.548769950301544, 'avg_acc': 49.97407337571382, 'loss': 7.971313953399658}


EP_train:2:  34%|| 9292/27626 [21:53<43:24,  7.04it/s]

{'epoch': 2, 'iter': 9290, 'avg_loss': 8.548675757282812, 'avg_acc': 49.97611936282424, 'loss': 8.83178424835205}


EP_train:2:  34%|| 9302/27626 [21:55<43:08,  7.08it/s]

{'epoch': 2, 'iter': 9300, 'avg_loss': 8.548812394346708, 'avg_acc': 49.98185678959251, 'loss': 9.112507820129395}


EP_train:2:  34%|| 9312/27626 [21:56<43:09,  7.07it/s]

{'epoch': 2, 'iter': 9310, 'avg_loss': 8.549355013620685, 'avg_acc': 49.988253141445604, 'loss': 9.76212215423584}


EP_train:2:  34%|| 9322/27626 [21:58<43:26,  7.02it/s]

{'epoch': 2, 'iter': 9320, 'avg_loss': 8.549752554736441, 'avg_acc': 49.98960680184529, 'loss': 8.054055213928223}


EP_train:2:  34%|| 9332/27626 [21:59<43:01,  7.09it/s]

{'epoch': 2, 'iter': 9330, 'avg_loss': 8.54974599620969, 'avg_acc': 49.991627371128494, 'loss': 8.43242359161377}


EP_train:2:  34%|| 9342/27626 [22:00<43:03,  7.08it/s]

{'epoch': 2, 'iter': 9340, 'avg_loss': 8.549918258666175, 'avg_acc': 49.98996360132748, 'loss': 8.824240684509277}


EP_train:2:  34%|| 9352/27626 [22:02<43:00,  7.08it/s]

{'epoch': 2, 'iter': 9350, 'avg_loss': 8.549753411825092, 'avg_acc': 49.989305956582186, 'loss': 8.482605934143066}


EP_train:2:  34%|| 9362/27626 [22:03<43:01,  7.07it/s]

{'epoch': 2, 'iter': 9360, 'avg_loss': 8.549595595806942, 'avg_acc': 49.987648221343875, 'loss': 8.13204288482666}


EP_train:2:  34%|| 9372/27626 [22:05<43:09,  7.05it/s]

{'epoch': 2, 'iter': 9370, 'avg_loss': 8.549560160306662, 'avg_acc': 49.98632749973322, 'loss': 8.065945625305176}


EP_train:2:  34%|| 9382/27626 [22:06<43:05,  7.06it/s]

{'epoch': 2, 'iter': 9380, 'avg_loss': 8.549504208470722, 'avg_acc': 49.99400383754397, 'loss': 7.682146072387695}


EP_train:2:  34%|| 9392/27626 [22:07<42:53,  7.09it/s]

{'epoch': 2, 'iter': 9390, 'avg_loss': 8.549582810412579, 'avg_acc': 49.996339580449366, 'loss': 8.310946464538574}


EP_train:2:  34%|| 9402/27626 [22:09<42:57,  7.07it/s]

{'epoch': 2, 'iter': 9400, 'avg_loss': 8.549806333275479, 'avg_acc': 49.99966758855441, 'loss': 8.789972305297852}


EP_train:2:  34%|| 9412/27626 [22:10<43:21,  7.00it/s]

{'epoch': 2, 'iter': 9410, 'avg_loss': 8.549658276958503, 'avg_acc': 49.99402295186484, 'loss': 8.689533233642578}


EP_train:2:  34%|| 9422/27626 [22:12<43:11,  7.03it/s]

{'epoch': 2, 'iter': 9420, 'avg_loss': 8.549932200062644, 'avg_acc': 49.99402929625305, 'loss': 9.032201766967773}


EP_train:2:  34%|| 9432/27626 [22:13<43:22,  6.99it/s]

{'epoch': 2, 'iter': 9430, 'avg_loss': 8.550213111899284, 'avg_acc': 49.99569239741279, 'loss': 9.375146865844727}


EP_train:2:  34%|| 9442/27626 [22:15<42:56,  7.06it/s]

{'epoch': 2, 'iter': 9440, 'avg_loss': 8.55018748475414, 'avg_acc': 49.996358966211204, 'loss': 9.591015815734863}


EP_train:2:  34%|| 9452/27626 [22:16<42:55,  7.06it/s]

{'epoch': 2, 'iter': 9450, 'avg_loss': 8.5502664137487, 'avg_acc': 49.994048248862555, 'loss': 8.83854866027832}


EP_train:2:  34%|| 9462/27626 [22:17<42:52,  7.06it/s]

{'epoch': 2, 'iter': 9460, 'avg_loss': 8.550362053689613, 'avg_acc': 49.991412112884476, 'loss': 8.228469848632812}


EP_train:2:  34%|| 9472/27626 [22:19<42:46,  7.07it/s]

{'epoch': 2, 'iter': 9470, 'avg_loss': 8.550524074466638, 'avg_acc': 49.9881216344631, 'loss': 8.82298469543457}


EP_train:2:  34%|| 9482/27626 [22:20<42:42,  7.08it/s]

{'epoch': 2, 'iter': 9480, 'avg_loss': 8.550247806170459, 'avg_acc': 49.987804556481386, 'loss': 8.402556419372559}


EP_train:2:  34%|| 9492/27626 [22:22<42:43,  7.07it/s]

{'epoch': 2, 'iter': 9490, 'avg_loss': 8.550271757308932, 'avg_acc': 49.9868296280687, 'loss': 8.426929473876953}


EP_train:2:  34%|| 9502/27626 [22:23<43:05,  7.01it/s]

{'epoch': 2, 'iter': 9500, 'avg_loss': 8.550206260321806, 'avg_acc': 49.98947479212714, 'loss': 8.661598205566406}


EP_train:2:  34%|| 9512/27626 [22:24<42:58,  7.03it/s]

{'epoch': 2, 'iter': 9510, 'avg_loss': 8.550077235908015, 'avg_acc': 49.992114393859744, 'loss': 8.088713645935059}


EP_train:2:  34%|| 9522/27626 [22:26<42:46,  7.05it/s]

{'epoch': 2, 'iter': 9520, 'avg_loss': 8.550163884559057, 'avg_acc': 49.99606133809474, 'loss': 8.671523094177246}


EP_train:2:  35%|| 9532/27626 [22:27<43:02,  7.01it/s]

{'epoch': 2, 'iter': 9530, 'avg_loss': 8.550048103088372, 'avg_acc': 49.998360612737386, 'loss': 7.871263027191162}


EP_train:2:  35%|| 9542/27626 [22:29<42:36,  7.07it/s]

{'epoch': 2, 'iter': 9540, 'avg_loss': 8.55020222510448, 'avg_acc': 49.99213918876428, 'loss': 9.276284217834473}


EP_train:2:  35%|| 9552/27626 [22:30<42:50,  7.03it/s]

{'epoch': 2, 'iter': 9550, 'avg_loss': 8.55021866822465, 'avg_acc': 49.99509213694901, 'loss': 8.347214698791504}


EP_train:2:  35%|| 9562/27626 [22:32<42:52,  7.02it/s]

{'epoch': 2, 'iter': 9560, 'avg_loss': 8.550252824357807, 'avg_acc': 49.994116724192025, 'loss': 8.192482948303223}


EP_train:2:  35%|| 9572/27626 [22:33<42:43,  7.04it/s]

{'epoch': 2, 'iter': 9570, 'avg_loss': 8.550128683078851, 'avg_acc': 49.99151081391704, 'loss': 7.778437614440918}


EP_train:2:  35%|| 9582/27626 [22:34<42:49,  7.02it/s]

{'epoch': 2, 'iter': 9580, 'avg_loss': 8.550155553946153, 'avg_acc': 49.99315050621021, 'loss': 7.862834930419922}


EP_train:2:  35%|| 9592/27626 [22:36<43:18,  6.94it/s]

{'epoch': 2, 'iter': 9590, 'avg_loss': 8.550534138775458, 'avg_acc': 49.98989938483995, 'loss': 9.619144439697266}


EP_train:2:  35%|| 9602/27626 [22:37<42:52,  7.01it/s]

{'epoch': 2, 'iter': 9600, 'avg_loss': 8.5502870942811, 'avg_acc': 49.99316477450266, 'loss': 8.112531661987305}


EP_train:2:  35%|| 9612/27626 [22:39<42:37,  7.04it/s]

{'epoch': 2, 'iter': 9610, 'avg_loss': 8.55029910204376, 'avg_acc': 49.992196441577356, 'loss': 7.990499019622803}


EP_train:2:  35%|| 9622/27626 [22:40<42:27,  7.07it/s]

{'epoch': 2, 'iter': 9620, 'avg_loss': 8.550146771498275, 'avg_acc': 49.99382860409521, 'loss': 7.725714206695557}


EP_train:2:  35%|| 9632/27626 [22:41<42:11,  7.11it/s]

{'epoch': 2, 'iter': 9630, 'avg_loss': 8.550244677785946, 'avg_acc': 49.993835011940604, 'loss': 8.200626373291016}


EP_train:2:  35%|| 9642/27626 [22:43<42:19,  7.08it/s]

{'epoch': 2, 'iter': 9640, 'avg_loss': 8.550503633289637, 'avg_acc': 49.997731044497456, 'loss': 8.85777473449707}


EP_train:2:  35%|| 9652/27626 [22:44<42:26,  7.06it/s]

{'epoch': 2, 'iter': 9650, 'avg_loss': 8.550423543325497, 'avg_acc': 49.99935239871516, 'loss': 8.250631332397461}


EP_train:2:  35%|| 9662/27626 [22:46<42:13,  7.09it/s]

{'epoch': 2, 'iter': 9660, 'avg_loss': 8.550582131272382, 'avg_acc': 49.99967653452024, 'loss': 8.320834159851074}


EP_train:2:  35%|| 9672/27626 [22:47<42:16,  7.08it/s]

{'epoch': 2, 'iter': 9670, 'avg_loss': 8.55054920529052, 'avg_acc': 50.00032313101024, 'loss': 8.336604118347168}


EP_train:2:  35%|| 9682/27626 [22:48<42:43,  7.00it/s]

{'epoch': 2, 'iter': 9680, 'avg_loss': 8.550656119280518, 'avg_acc': 50.001291188926764, 'loss': 8.363178253173828}


EP_train:2:  35%|| 9692/27626 [22:50<42:21,  7.06it/s]

{'epoch': 2, 'iter': 9690, 'avg_loss': 8.550571313080017, 'avg_acc': 49.99967753585801, 'loss': 8.892001152038574}


EP_train:2:  35%|| 9702/27626 [22:51<42:31,  7.02it/s]

{'epoch': 2, 'iter': 9700, 'avg_loss': 8.55079157196877, 'avg_acc': 49.995168023915056, 'loss': 8.695740699768066}


EP_train:2:  35%|| 9712/27626 [22:53<42:22,  7.05it/s]

{'epoch': 2, 'iter': 9710, 'avg_loss': 8.55076426863609, 'avg_acc': 49.991954999485124, 'loss': 9.606169700622559}


EP_train:2:  35%|| 9722/27626 [22:54<42:23,  7.04it/s]

{'epoch': 2, 'iter': 9720, 'avg_loss': 8.550920186937008, 'avg_acc': 49.99549943421459, 'loss': 8.5411958694458}


EP_train:2:  35%|| 9732/27626 [22:56<42:40,  6.99it/s]

{'epoch': 2, 'iter': 9730, 'avg_loss': 8.551300953206564, 'avg_acc': 49.99646747507964, 'loss': 9.0797700881958}


EP_train:2:  35%|| 9742/27626 [22:57<42:27,  7.02it/s]

{'epoch': 2, 'iter': 9740, 'avg_loss': 8.551504838663675, 'avg_acc': 50.00224566266297, 'loss': 8.463016510009766}


EP_train:2:  35%|| 9752/27626 [22:58<41:56,  7.10it/s]

{'epoch': 2, 'iter': 9750, 'avg_loss': 8.55151902212386, 'avg_acc': 50.0, 'loss': 8.964821815490723}


EP_train:2:  35%|| 9762/27626 [23:00<42:12,  7.05it/s]

{'epoch': 2, 'iter': 9760, 'avg_loss': 8.552051427679196, 'avg_acc': 50.00256121299047, 'loss': 8.748848915100098}


EP_train:2:  35%|| 9772/27626 [23:01<42:22,  7.02it/s]

{'epoch': 2, 'iter': 9770, 'avg_loss': 8.551844900845872, 'avg_acc': 50.00095947190666, 'loss': 7.486805438995361}


EP_train:2:  35%|| 9782/27626 [23:03<42:11,  7.05it/s]

{'epoch': 2, 'iter': 9780, 'avg_loss': 8.551683628416514, 'avg_acc': 50.00223647888764, 'loss': 9.031177520751953}


EP_train:2:  35%|| 9792/27626 [23:04<42:09,  7.05it/s]

{'epoch': 2, 'iter': 9790, 'avg_loss': 8.551408845671423, 'avg_acc': 49.99968082933306, 'loss': 7.98122501373291}


EP_train:2:  35%|| 9802/27626 [23:05<41:55,  7.09it/s]

{'epoch': 2, 'iter': 9800, 'avg_loss': 8.551343910548896, 'avg_acc': 49.99330425466789, 'loss': 8.348761558532715}


EP_train:2:  36%|| 9812/27626 [23:07<41:53,  7.09it/s]

{'epoch': 2, 'iter': 9810, 'avg_loss': 8.551417768505805, 'avg_acc': 49.99522219957191, 'loss': 7.895534038543701}


EP_train:2:  36%|| 9822/27626 [23:08<42:21,  7.01it/s]

{'epoch': 2, 'iter': 9820, 'avg_loss': 8.551526307112912, 'avg_acc': 49.99522706445372, 'loss': 9.002819061279297}


EP_train:2:  36%|| 9832/27626 [23:10<42:20,  7.00it/s]

{'epoch': 2, 'iter': 9830, 'avg_loss': 8.551319735203714, 'avg_acc': 49.99459617536365, 'loss': 8.886133193969727}


EP_train:2:  36%|| 9842/27626 [23:11<42:00,  7.05it/s]

{'epoch': 2, 'iter': 9840, 'avg_loss': 8.55138195347173, 'avg_acc': 49.998094705822574, 'loss': 8.132801055908203}


EP_train:2:  36%|| 9852/27626 [23:13<41:55,  7.07it/s]

{'epoch': 2, 'iter': 9850, 'avg_loss': 8.5515137677168, 'avg_acc': 49.991117653030145, 'loss': 9.408182144165039}


EP_train:2:  36%|| 9862/27626 [23:14<42:17,  7.00it/s]

{'epoch': 2, 'iter': 9860, 'avg_loss': 8.551414533943351, 'avg_acc': 49.9857392759355, 'loss': 8.86500358581543}


EP_train:2:  36%|| 9872/27626 [23:15<42:06,  7.03it/s]

{'epoch': 2, 'iter': 9870, 'avg_loss': 8.551345229136787, 'avg_acc': 49.987336642690714, 'loss': 7.2313947677612305}


EP_train:2:  36%|| 9882/27626 [23:17<42:01,  7.04it/s]

{'epoch': 2, 'iter': 9880, 'avg_loss': 8.55133571123838, 'avg_acc': 49.98481935026819, 'loss': 8.409416198730469}


EP_train:2:  36%|| 9892/27626 [23:18<41:42,  7.09it/s]

{'epoch': 2, 'iter': 9890, 'avg_loss': 8.551106015105976, 'avg_acc': 49.98167526033768, 'loss': 8.160553932189941}


EP_train:2:  36%|| 9902/27626 [23:20<41:37,  7.10it/s]

{'epoch': 2, 'iter': 9900, 'avg_loss': 8.550773416260446, 'avg_acc': 49.98169376830623, 'loss': 8.119799613952637}


EP_train:2:  36%|| 9912/27626 [23:21<41:53,  7.05it/s]

{'epoch': 2, 'iter': 9910, 'avg_loss': 8.550457272131716, 'avg_acc': 49.97729795177076, 'loss': 8.544846534729004}


EP_train:2:  36%|| 9922/27626 [23:22<41:46,  7.06it/s]

{'epoch': 2, 'iter': 9920, 'avg_loss': 8.550173976466777, 'avg_acc': 49.9703910896079, 'loss': 9.3018159866333}


EP_train:2:  36%|| 9932/27626 [23:24<41:52,  7.04it/s]

{'epoch': 2, 'iter': 9930, 'avg_loss': 8.550389914675405, 'avg_acc': 49.97073557547075, 'loss': 8.376580238342285}


EP_train:2:  36%|| 9942/27626 [23:25<42:02,  7.01it/s]

{'epoch': 2, 'iter': 9940, 'avg_loss': 8.550168964515304, 'avg_acc': 49.968878885424, 'loss': 8.924925804138184}


EP_train:2:  36%|| 9952/27626 [23:27<41:52,  7.03it/s]

{'epoch': 2, 'iter': 9950, 'avg_loss': 8.550182793730048, 'avg_acc': 49.97173650889358, 'loss': 9.10045337677002}


EP_train:2:  36%|| 9962/27626 [23:28<41:38,  7.07it/s]

{'epoch': 2, 'iter': 9960, 'avg_loss': 8.550066421314627, 'avg_acc': 49.97772562995683, 'loss': 8.491302490234375}


EP_train:2:  36%|| 9972/27626 [23:29<41:30,  7.09it/s]

{'epoch': 2, 'iter': 9970, 'avg_loss': 8.550005145279052, 'avg_acc': 49.97680774245312, 'loss': 8.221759796142578}


EP_train:2:  36%|| 9982/27626 [23:31<41:44,  7.05it/s]

{'epoch': 2, 'iter': 9980, 'avg_loss': 8.550136875501915, 'avg_acc': 49.97933573790201, 'loss': 7.983181953430176}


EP_train:2:  36%|| 9992/27626 [23:32<41:49,  7.03it/s]

{'epoch': 2, 'iter': 9990, 'avg_loss': 8.550033232194743, 'avg_acc': 49.98248423581223, 'loss': 8.85545539855957}


EP_train:2:  36%|| 10002/27626 [23:34<41:50,  7.02it/s]

{'epoch': 2, 'iter': 10000, 'avg_loss': 8.550086782856138, 'avg_acc': 49.98093940605939, 'loss': 8.953010559082031}


EP_train:2:  36%|| 10012/27626 [23:35<41:29,  7.08it/s]

{'epoch': 2, 'iter': 10010, 'avg_loss': 8.549974868971798, 'avg_acc': 49.97783687943262, 'loss': 8.268187522888184}


EP_train:2:  36%|| 10022/27626 [23:37<41:44,  7.03it/s]

{'epoch': 2, 'iter': 10020, 'avg_loss': 8.550126291870203, 'avg_acc': 49.97817084123341, 'loss': 7.514392375946045}


EP_train:2:  36%|| 10032/27626 [23:38<41:41,  7.03it/s]

{'epoch': 2, 'iter': 10030, 'avg_loss': 8.549850269966402, 'avg_acc': 49.97850413717476, 'loss': 7.895968914031982}


EP_train:2:  36%|| 10042/27626 [23:39<41:36,  7.04it/s]

{'epoch': 2, 'iter': 10040, 'avg_loss': 8.549887016396303, 'avg_acc': 49.982882681007865, 'loss': 8.322829246520996}


EP_train:2:  36%|| 10052/27626 [23:41<41:33,  7.05it/s]

{'epoch': 2, 'iter': 10050, 'avg_loss': 8.550265197622966, 'avg_acc': 49.98694159785096, 'loss': 9.667389869689941}


EP_train:2:  36%|| 10062/27626 [23:42<41:23,  7.07it/s]

{'epoch': 2, 'iter': 10060, 'avg_loss': 8.550279698212167, 'avg_acc': 49.98788639300268, 'loss': 8.523174285888672}


EP_train:2:  36%|| 10072/27626 [23:44<41:23,  7.07it/s]

{'epoch': 2, 'iter': 10070, 'avg_loss': 8.550042369445947, 'avg_acc': 49.98448515539668, 'loss': 7.693882942199707}


EP_train:2:  36%|| 10082/27626 [23:45<41:25,  7.06it/s]

{'epoch': 2, 'iter': 10080, 'avg_loss': 8.549931020730073, 'avg_acc': 49.985430512845944, 'loss': 7.770420074462891}


EP_train:2:  37%|| 10092/27626 [23:46<41:21,  7.06it/s]

{'epoch': 2, 'iter': 10090, 'avg_loss': 8.54993130461145, 'avg_acc': 49.98265781389357, 'loss': 8.830487251281738}


EP_train:2:  37%|| 10102/27626 [23:48<41:16,  7.08it/s]

{'epoch': 2, 'iter': 10100, 'avg_loss': 8.549732737724833, 'avg_acc': 49.98236560736561, 'loss': 8.152697563171387}


EP_train:2:  37%|| 10112/27626 [23:49<41:13,  7.08it/s]

{'epoch': 2, 'iter': 10110, 'avg_loss': 8.550217412626598, 'avg_acc': 49.97712886954802, 'loss': 9.249526977539062}


EP_train:2:  37%|| 10122/27626 [23:51<41:13,  7.08it/s]

{'epoch': 2, 'iter': 10120, 'avg_loss': 8.549901583439489, 'avg_acc': 49.97684270329019, 'loss': 7.646761417388916}


EP_train:2:  37%|| 10132/27626 [23:52<41:39,  7.00it/s]

{'epoch': 2, 'iter': 10130, 'avg_loss': 8.54948653733315, 'avg_acc': 49.9713132958247, 'loss': 7.756186485290527}


EP_train:2:  37%|| 10142/27626 [23:53<41:11,  7.07it/s]

{'epoch': 2, 'iter': 10140, 'avg_loss': 8.549485998668093, 'avg_acc': 49.97658021891333, 'loss': 8.71374225616455}


EP_train:2:  37%|| 10152/27626 [23:55<41:18,  7.05it/s]

{'epoch': 2, 'iter': 10150, 'avg_loss': 8.549905358106793, 'avg_acc': 49.974448330213775, 'loss': 9.262206077575684}


EP_train:2:  37%|| 10162/27626 [23:56<41:17,  7.05it/s]

{'epoch': 2, 'iter': 10160, 'avg_loss': 8.549942695933783, 'avg_acc': 49.97877915559492, 'loss': 8.540517807006836}


EP_train:2:  37%|| 10172/27626 [23:58<41:12,  7.06it/s]

{'epoch': 2, 'iter': 10170, 'avg_loss': 8.550048005064028, 'avg_acc': 49.98217972667388, 'loss': 8.605026245117188}


EP_train:2:  37%|| 10182/27626 [23:59<41:24,  7.02it/s]

{'epoch': 2, 'iter': 10180, 'avg_loss': 8.549977848471505, 'avg_acc': 49.98465278459876, 'loss': 8.139677047729492}


EP_train:2:  37%|| 10192/27626 [24:01<41:17,  7.04it/s]

{'epoch': 2, 'iter': 10190, 'avg_loss': 8.549542552600629, 'avg_acc': 49.985281130409184, 'loss': 8.031396865844727}


EP_train:2:  37%|| 10202/27626 [24:02<40:59,  7.08it/s]

{'epoch': 2, 'iter': 10200, 'avg_loss': 8.549355325950335, 'avg_acc': 49.984682874228014, 'loss': 8.391858100891113}


EP_train:2:  37%|| 10212/27626 [24:03<40:52,  7.10it/s]

{'epoch': 2, 'iter': 10210, 'avg_loss': 8.548619556165491, 'avg_acc': 49.984697874840855, 'loss': 7.44145393371582}


EP_train:2:  37%|| 10222/27626 [24:05<41:28,  6.99it/s]

{'epoch': 2, 'iter': 10220, 'avg_loss': 8.548558359731143, 'avg_acc': 49.98501858917914, 'loss': 9.059653282165527}


EP_train:2:  37%|| 10232/27626 [24:06<41:24,  7.00it/s]

{'epoch': 2, 'iter': 10230, 'avg_loss': 8.548250097413296, 'avg_acc': 49.985949565047406, 'loss': 8.780413627624512}


EP_train:2:  37%|| 10242/27626 [24:08<41:09,  7.04it/s]

{'epoch': 2, 'iter': 10240, 'avg_loss': 8.548615118335343, 'avg_acc': 49.986573576799145, 'loss': 9.024603843688965}


EP_train:2:  37%|| 10252/27626 [24:09<41:00,  7.06it/s]

{'epoch': 2, 'iter': 10250, 'avg_loss': 8.548780949519745, 'avg_acc': 49.98445273631841, 'loss': 9.3041353225708}


EP_train:2:  37%|| 10262/27626 [24:10<41:12,  7.02it/s]

{'epoch': 2, 'iter': 10260, 'avg_loss': 8.54908259306929, 'avg_acc': 49.98324968326674, 'loss': 8.583693504333496}


EP_train:2:  37%|| 10272/27626 [24:12<40:59,  7.06it/s]

{'epoch': 2, 'iter': 10270, 'avg_loss': 8.549099387682444, 'avg_acc': 49.983265991626915, 'loss': 8.126800537109375}


EP_train:2:  37%|| 10282/27626 [24:13<40:57,  7.06it/s]

{'epoch': 2, 'iter': 10280, 'avg_loss': 8.54914500214527, 'avg_acc': 49.98540997957397, 'loss': 8.690679550170898}


EP_train:2:  37%|| 10292/27626 [24:15<40:50,  7.07it/s]

{'epoch': 2, 'iter': 10290, 'avg_loss': 8.549065123863764, 'avg_acc': 49.98299484986882, 'loss': 8.12878131866455}


EP_train:2:  37%|| 10302/27626 [24:16<41:12,  7.01it/s]

{'epoch': 2, 'iter': 10300, 'avg_loss': 8.549098545134084, 'avg_acc': 49.98634841277546, 'loss': 8.082794189453125}


EP_train:2:  37%|| 10312/27626 [24:17<41:32,  6.95it/s]

{'epoch': 2, 'iter': 10310, 'avg_loss': 8.549340645264104, 'avg_acc': 49.98727087576375, 'loss': 8.554421424865723}


EP_train:2:  37%|| 10322/27626 [24:19<40:43,  7.08it/s]

{'epoch': 2, 'iter': 10320, 'avg_loss': 8.54955535585965, 'avg_acc': 49.98849433194458, 'loss': 8.4411039352417}


EP_train:2:  37%|| 10332/27626 [24:20<40:44,  7.07it/s]

{'epoch': 2, 'iter': 10330, 'avg_loss': 8.549644628160518, 'avg_acc': 49.98578308005033, 'loss': 8.820910453796387}


EP_train:2:  37%|| 10342/27626 [24:22<40:54,  7.04it/s]

{'epoch': 2, 'iter': 10340, 'avg_loss': 8.549542957204832, 'avg_acc': 49.98579682815975, 'loss': 8.327902793884277}


EP_train:2:  37%|| 10352/27626 [24:23<40:56,  7.03it/s]

{'epoch': 2, 'iter': 10350, 'avg_loss': 8.549563741743846, 'avg_acc': 49.99003719447396, 'loss': 8.593586921691895}


EP_train:2:  38%|| 10362/27626 [24:25<40:48,  7.05it/s]

{'epoch': 2, 'iter': 10360, 'avg_loss': 8.549312158206659, 'avg_acc': 49.99034842196699, 'loss': 9.938957214355469}


EP_train:2:  38%|| 10372/27626 [24:26<40:41,  7.07it/s]

{'epoch': 2, 'iter': 10370, 'avg_loss': 8.549167713334114, 'avg_acc': 49.99096037026323, 'loss': 8.611207962036133}


EP_train:2:  38%|| 10382/27626 [24:27<40:35,  7.08it/s]

{'epoch': 2, 'iter': 10380, 'avg_loss': 8.549066708159348, 'avg_acc': 49.99036701666506, 'loss': 8.077536582946777}


EP_train:2:  38%|| 10392/27626 [24:29<40:52,  7.03it/s]

{'epoch': 2, 'iter': 10390, 'avg_loss': 8.549148380922727, 'avg_acc': 49.9876696179386, 'loss': 8.669937133789062}


EP_train:2:  38%|| 10402/27626 [24:30<41:14,  6.96it/s]

{'epoch': 2, 'iter': 10400, 'avg_loss': 8.5496000179063, 'avg_acc': 49.98918373233343, 'loss': 9.070479393005371}


EP_train:2:  38%|| 10412/27626 [24:32<40:54,  7.01it/s]

{'epoch': 2, 'iter': 10410, 'avg_loss': 8.549603380097537, 'avg_acc': 49.98859379502449, 'loss': 7.552492141723633}


EP_train:2:  38%|| 10422/27626 [24:33<40:38,  7.06it/s]

{'epoch': 2, 'iter': 10420, 'avg_loss': 8.549589905511416, 'avg_acc': 49.98710536416851, 'loss': 8.297062873840332}


EP_train:2:  38%|| 10432/27626 [24:34<40:51,  7.01it/s]

{'epoch': 2, 'iter': 10430, 'avg_loss': 8.549649631539374, 'avg_acc': 49.98082638289713, 'loss': 8.318215370178223}


EP_train:2:  38%|| 10442/27626 [24:36<40:43,  7.03it/s]

{'epoch': 2, 'iter': 10440, 'avg_loss': 8.549941482448952, 'avg_acc': 49.98144334833828, 'loss': 8.594517707824707}


EP_train:2:  38%|| 10452/27626 [24:37<40:53,  7.00it/s]

{'epoch': 2, 'iter': 10450, 'avg_loss': 8.549779511629572, 'avg_acc': 49.983853219787584, 'loss': 8.239264488220215}


EP_train:2:  38%|| 10462/27626 [24:39<40:43,  7.02it/s]

{'epoch': 2, 'iter': 10460, 'avg_loss': 8.550274667246667, 'avg_acc': 49.984466112226364, 'loss': 8.346835136413574}


EP_train:2:  38%|| 10472/27626 [24:40<40:45,  7.01it/s]

{'epoch': 2, 'iter': 10470, 'avg_loss': 8.550275523823547, 'avg_acc': 49.985077834017765, 'loss': 8.147051811218262}


EP_train:2:  38%|| 10482/27626 [24:42<40:46,  7.01it/s]

{'epoch': 2, 'iter': 10480, 'avg_loss': 8.550251526409928, 'avg_acc': 49.98360127850396, 'loss': 8.23857593536377}


EP_train:2:  38%|| 10492/27626 [24:43<41:16,  6.92it/s]

{'epoch': 2, 'iter': 10490, 'avg_loss': 8.550190836057663, 'avg_acc': 49.983021160995136, 'loss': 8.036932945251465}


EP_train:2:  38%|| 10502/27626 [24:44<40:59,  6.96it/s]

{'epoch': 2, 'iter': 10500, 'avg_loss': 8.550267281844246, 'avg_acc': 49.984525283306354, 'loss': 9.123641967773438}


EP_train:2:  38%|| 10512/27626 [24:46<40:34,  7.03it/s]

{'epoch': 2, 'iter': 10510, 'avg_loss': 8.550599658651409, 'avg_acc': 49.98483731329084, 'loss': 9.122735023498535}


EP_train:2:  38%|| 10522/27626 [24:47<40:37,  7.02it/s]

{'epoch': 2, 'iter': 10520, 'avg_loss': 8.550643467991517, 'avg_acc': 49.98455470012356, 'loss': 9.811643600463867}


EP_train:2:  38%|| 10532/27626 [24:49<40:32,  7.03it/s]

{'epoch': 2, 'iter': 10530, 'avg_loss': 8.550868804429221, 'avg_acc': 49.98516285253062, 'loss': 8.340245246887207}


EP_train:2:  38%|| 10542/27626 [24:50<40:19,  7.06it/s]

{'epoch': 2, 'iter': 10540, 'avg_loss': 8.550936599619622, 'avg_acc': 49.98399108243999, 'loss': 8.13028621673584}


EP_train:2:  38%|| 10552/27626 [24:51<40:45,  6.98it/s]

{'epoch': 2, 'iter': 10550, 'avg_loss': 8.55042592781594, 'avg_acc': 49.98459861624491, 'loss': 8.670523643493652}


EP_train:2:  38%|| 10562/27626 [24:53<40:19,  7.05it/s]

{'epoch': 2, 'iter': 10560, 'avg_loss': 8.550626325557634, 'avg_acc': 49.982245999431875, 'loss': 9.200222969055176}


EP_train:2:  38%|| 10572/27626 [24:54<40:29,  7.02it/s]

{'epoch': 2, 'iter': 10570, 'avg_loss': 8.550777982914417, 'avg_acc': 49.98078469397408, 'loss': 8.844058990478516}


EP_train:2:  38%|| 10582/27626 [24:56<40:59,  6.93it/s]

{'epoch': 2, 'iter': 10580, 'avg_loss': 8.550360898779578, 'avg_acc': 49.98611898686324, 'loss': 8.135260581970215}


EP_train:2:  38%|| 10592/27626 [24:57<40:15,  7.05it/s]

{'epoch': 2, 'iter': 10590, 'avg_loss': 8.550394105186387, 'avg_acc': 49.985246907751865, 'loss': 8.25062084197998}


EP_train:2:  38%|| 10602/27626 [24:59<40:34,  6.99it/s]

{'epoch': 2, 'iter': 10600, 'avg_loss': 8.550658263931656, 'avg_acc': 49.986439958494486, 'loss': 8.158730506896973}


EP_train:2:  38%|| 10612/27626 [25:00<40:12,  7.05it/s]

{'epoch': 2, 'iter': 10610, 'avg_loss': 8.550871511036394, 'avg_acc': 49.991459334652724, 'loss': 8.166396141052246}


EP_train:2:  38%|| 10622/27626 [25:01<39:54,  7.10it/s]

{'epoch': 2, 'iter': 10620, 'avg_loss': 8.550874416016217, 'avg_acc': 49.98970200546088, 'loss': 9.159192085266113}


EP_train:2:  38%|| 10632/27626 [25:03<40:21,  7.02it/s]

{'epoch': 2, 'iter': 10630, 'avg_loss': 8.551118654877792, 'avg_acc': 49.991181450475025, 'loss': 8.300070762634277}


EP_train:2:  39%|| 10642/27626 [25:04<40:03,  7.07it/s]

{'epoch': 2, 'iter': 10640, 'avg_loss': 8.550845510002231, 'avg_acc': 49.99383281646462, 'loss': 8.406652450561523}


EP_train:2:  39%|| 10652/27626 [25:06<39:56,  7.08it/s]

{'epoch': 2, 'iter': 10650, 'avg_loss': 8.550935946566524, 'avg_acc': 49.99765280255375, 'loss': 8.107044219970703}


EP_train:2:  39%|| 10662/27626 [25:07<40:21,  7.01it/s]

{'epoch': 2, 'iter': 10660, 'avg_loss': 8.5511440943625, 'avg_acc': 49.9926718881906, 'loss': 7.665963172912598}


EP_train:2:  39%|| 10672/27626 [25:08<40:26,  6.99it/s]

{'epoch': 2, 'iter': 10670, 'avg_loss': 8.551176317070365, 'avg_acc': 49.9923859057258, 'loss': 8.529534339904785}


EP_train:2:  39%|| 10682/27626 [25:10<39:53,  7.08it/s]

{'epoch': 2, 'iter': 10680, 'avg_loss': 8.551040746067226, 'avg_acc': 49.99853712199232, 'loss': 8.5416259765625}


EP_train:2:  39%|| 10692/27626 [25:11<40:05,  7.04it/s]

{'epoch': 2, 'iter': 10690, 'avg_loss': 8.550913806805028, 'avg_acc': 49.999123094191376, 'loss': 8.631082534790039}


EP_train:2:  39%|| 10702/27626 [25:13<40:16,  7.00it/s]

{'epoch': 2, 'iter': 10700, 'avg_loss': 8.551417823462115, 'avg_acc': 49.998539856088215, 'loss': 8.785780906677246}


EP_train:2:  39%|| 10712/27626 [25:14<39:50,  7.08it/s]

{'epoch': 2, 'iter': 10710, 'avg_loss': 8.551406667141473, 'avg_acc': 49.99854121930725, 'loss': 7.913373947143555}


EP_train:2:  39%|| 10722/27626 [25:16<39:53,  7.06it/s]

{'epoch': 2, 'iter': 10720, 'avg_loss': 8.551410033958371, 'avg_acc': 50.00262335603022, 'loss': 8.130627632141113}


EP_train:2:  39%|| 10732/27626 [25:17<40:06,  7.02it/s]

{'epoch': 2, 'iter': 10730, 'avg_loss': 8.551059383857663, 'avg_acc': 50.00553303513187, 'loss': 7.676844596862793}


EP_train:2:  39%|| 10742/27626 [25:18<39:52,  7.06it/s]

{'epoch': 2, 'iter': 10740, 'avg_loss': 8.550899840923963, 'avg_acc': 50.00436411879713, 'loss': 7.803048133850098}


EP_train:2:  39%|| 10752/27626 [25:20<39:58,  7.04it/s]

{'epoch': 2, 'iter': 10750, 'avg_loss': 8.551099101462128, 'avg_acc': 50.00552274207051, 'loss': 8.094185829162598}


EP_train:2:  39%|| 10762/27626 [25:21<40:02,  7.02it/s]

{'epoch': 2, 'iter': 10760, 'avg_loss': 8.551104418153091, 'avg_acc': 50.00493680884676, 'loss': 8.225309371948242}


EP_train:2:  39%|| 10772/27626 [25:23<39:49,  7.05it/s]

{'epoch': 2, 'iter': 10770, 'avg_loss': 8.551335236851568, 'avg_acc': 50.00377170179184, 'loss': 8.417123794555664}


EP_train:2:  39%|| 10782/27626 [25:24<39:51,  7.04it/s]

{'epoch': 2, 'iter': 10780, 'avg_loss': 8.551431111596797, 'avg_acc': 50.003768203320654, 'loss': 8.891693115234375}


EP_train:2:  39%|| 10792/27626 [25:25<39:40,  7.07it/s]

{'epoch': 2, 'iter': 10790, 'avg_loss': 8.551261495631394, 'avg_acc': 50.00810860902604, 'loss': 7.768098831176758}


EP_train:2:  39%|| 10802/27626 [25:27<39:55,  7.02it/s]

{'epoch': 2, 'iter': 10800, 'avg_loss': 8.551121388262217, 'avg_acc': 50.00896907693731, 'loss': 8.089577674865723}


EP_train:2:  39%|| 10812/27626 [25:28<39:52,  7.03it/s]

{'epoch': 2, 'iter': 10810, 'avg_loss': 8.55136249650033, 'avg_acc': 50.00693737859587, 'loss': 8.592339515686035}


EP_train:2:  39%|| 10822/27626 [25:30<39:38,  7.06it/s]

{'epoch': 2, 'iter': 10820, 'avg_loss': 8.55103639714236, 'avg_acc': 50.00808612882358, 'loss': 8.199498176574707}


EP_train:2:  39%|| 10832/27626 [25:31<40:08,  6.97it/s]

{'epoch': 2, 'iter': 10830, 'avg_loss': 8.551064197227529, 'avg_acc': 50.006924568368575, 'loss': 8.092840194702148}


EP_train:2:  39%|| 10842/27626 [25:33<39:47,  7.03it/s]

{'epoch': 2, 'iter': 10840, 'avg_loss': 8.551336758515676, 'avg_acc': 50.00835946868371, 'loss': 9.079385757446289}


EP_train:2:  39%|| 10852/27626 [25:34<39:47,  7.03it/s]

{'epoch': 2, 'iter': 10850, 'avg_loss': 8.551568131195918, 'avg_acc': 50.01065569993549, 'loss': 9.18642807006836}


EP_train:2:  39%|| 10862/27626 [25:35<39:53,  7.00it/s]

{'epoch': 2, 'iter': 10860, 'avg_loss': 8.551719502337608, 'avg_acc': 50.00517908111593, 'loss': 8.789895057678223}


EP_train:2:  39%|| 10872/27626 [25:37<39:47,  7.02it/s]

{'epoch': 2, 'iter': 10870, 'avg_loss': 8.551856389711121, 'avg_acc': 50.00488685493515, 'loss': 8.48133659362793}


EP_train:2:  39%|| 10882/27626 [25:38<39:51,  7.00it/s]

{'epoch': 2, 'iter': 10880, 'avg_loss': 8.551890759266055, 'avg_acc': 50.00832873816745, 'loss': 9.425869941711426}


EP_train:2:  39%|| 10892/27626 [25:40<39:29,  7.06it/s]

{'epoch': 2, 'iter': 10890, 'avg_loss': 8.55212726296171, 'avg_acc': 50.008608024974755, 'loss': 9.05422306060791}


EP_train:2:  39%|| 10902/27626 [25:41<39:41,  7.02it/s]

{'epoch': 2, 'iter': 10900, 'avg_loss': 8.552061643286393, 'avg_acc': 50.00716677369049, 'loss': 8.855353355407715}


EP_train:2:  39%|| 10912/27626 [25:42<39:19,  7.08it/s]

{'epoch': 2, 'iter': 10910, 'avg_loss': 8.552159771802565, 'avg_acc': 50.00801942993309, 'loss': 8.910087585449219}


EP_train:2:  40%|| 10922/27626 [25:44<39:28,  7.05it/s]

{'epoch': 2, 'iter': 10920, 'avg_loss': 8.55238669216169, 'avg_acc': 50.00715364893325, 'loss': 8.291112899780273}


EP_train:2:  40%|| 10932/27626 [25:45<39:48,  6.99it/s]

{'epoch': 2, 'iter': 10930, 'avg_loss': 8.552336979104293, 'avg_acc': 50.0077188729302, 'loss': 8.543380737304688}


EP_train:2:  40%|| 10942/27626 [25:47<39:52,  6.97it/s]

{'epoch': 2, 'iter': 10940, 'avg_loss': 8.552302695597648, 'avg_acc': 50.00828306370533, 'loss': 8.6740083694458}


EP_train:2:  40%|| 10952/27626 [25:48<39:24,  7.05it/s]

{'epoch': 2, 'iter': 10950, 'avg_loss': 8.552493854261797, 'avg_acc': 50.01512418957172, 'loss': 9.402390480041504}


EP_train:2:  40%|| 10962/27626 [25:50<39:23,  7.05it/s]

{'epoch': 2, 'iter': 10960, 'avg_loss': 8.552750116194614, 'avg_acc': 50.01824651035489, 'loss': 8.643087387084961}


EP_train:2:  40%|| 10972/27626 [25:51<39:24,  7.04it/s]

{'epoch': 2, 'iter': 10970, 'avg_loss': 8.552807955958764, 'avg_acc': 50.015951143924894, 'loss': 8.48979377746582}


EP_train:2:  40%|| 10982/27626 [25:52<39:20,  7.05it/s]

{'epoch': 2, 'iter': 10980, 'avg_loss': 8.552950887512438, 'avg_acc': 50.01422912303069, 'loss': 8.794488906860352}


EP_train:2:  40%|| 10992/27626 [25:54<39:11,  7.07it/s]

{'epoch': 2, 'iter': 10990, 'avg_loss': 8.55355070715616, 'avg_acc': 50.01620644163406, 'loss': 8.926685333251953}


EP_train:2:  40%|| 11002/27626 [25:55<39:29,  7.02it/s]

{'epoch': 2, 'iter': 11000, 'avg_loss': 8.553732637340032, 'avg_acc': 50.01732797018453, 'loss': 8.97325611114502}


EP_train:2:  40%|| 11012/27626 [25:57<39:28,  7.02it/s]

{'epoch': 2, 'iter': 11010, 'avg_loss': 8.55391994673272, 'avg_acc': 50.01646081191535, 'loss': 9.29173469543457}


EP_train:2:  40%|| 11022/27626 [25:58<39:28,  7.01it/s]

{'epoch': 2, 'iter': 11020, 'avg_loss': 8.553787979345422, 'avg_acc': 50.01275973142183, 'loss': 9.095527648925781}


EP_train:2:  40%|| 11032/27626 [25:59<39:34,  6.99it/s]

{'epoch': 2, 'iter': 11030, 'avg_loss': 8.553812942309497, 'avg_acc': 50.015014504578005, 'loss': 8.199702262878418}


EP_train:2:  40%|| 11042/27626 [26:01<39:32,  6.99it/s]

{'epoch': 2, 'iter': 11040, 'avg_loss': 8.55378855597378, 'avg_acc': 50.01245358210306, 'loss': 8.990997314453125}


EP_train:2:  40%|| 11052/27626 [26:02<39:01,  7.08it/s]

{'epoch': 2, 'iter': 11050, 'avg_loss': 8.553493502517382, 'avg_acc': 50.01131119355714, 'loss': 7.910351753234863}


EP_train:2:  40%|| 11062/27626 [26:04<39:01,  7.07it/s]

{'epoch': 2, 'iter': 11060, 'avg_loss': 8.553365687868736, 'avg_acc': 50.01214853991501, 'loss': 8.575037002563477}


EP_train:2:  40%|| 11072/27626 [26:05<39:20,  7.01it/s]

{'epoch': 2, 'iter': 11070, 'avg_loss': 8.553325040996608, 'avg_acc': 50.01298437358865, 'loss': 8.622504234313965}


EP_train:2:  40%|| 11082/27626 [26:07<39:06,  7.05it/s]

{'epoch': 2, 'iter': 11080, 'avg_loss': 8.553226015755184, 'avg_acc': 50.016356827001175, 'loss': 7.730677127838135}


EP_train:2:  40%|| 11092/27626 [26:08<38:56,  7.08it/s]

{'epoch': 2, 'iter': 11090, 'avg_loss': 8.55342863244712, 'avg_acc': 50.01606031917771, 'loss': 8.871003150939941}


EP_train:2:  40%|| 11102/27626 [26:09<38:55,  7.08it/s]

{'epoch': 2, 'iter': 11100, 'avg_loss': 8.55327637132659, 'avg_acc': 50.01801639491937, 'loss': 8.685413360595703}


EP_train:2:  40%|| 11112/27626 [26:11<39:04,  7.04it/s]

{'epoch': 2, 'iter': 11110, 'avg_loss': 8.553283614157857, 'avg_acc': 50.01884393843938, 'loss': 7.605964183807373}


EP_train:2:  40%|| 11122/27626 [26:12<39:13,  7.01it/s]

{'epoch': 2, 'iter': 11120, 'avg_loss': 8.553480210487345, 'avg_acc': 50.01938899379552, 'loss': 8.320590019226074}


EP_train:2:  40%|| 11132/27626 [26:14<39:04,  7.03it/s]

{'epoch': 2, 'iter': 11130, 'avg_loss': 8.553525700422217, 'avg_acc': 50.02021381726709, 'loss': 8.991046905517578}


EP_train:2:  40%|| 11142/27626 [26:15<39:18,  6.99it/s]

{'epoch': 2, 'iter': 11140, 'avg_loss': 8.553743036845548, 'avg_acc': 50.02272013284266, 'loss': 9.75281047821045}


EP_train:2:  40%|| 11152/27626 [26:16<39:03,  7.03it/s]

{'epoch': 2, 'iter': 11150, 'avg_loss': 8.553689655194143, 'avg_acc': 50.02157878217201, 'loss': 8.295977592468262}


EP_train:2:  40%|| 11162/27626 [26:18<39:16,  6.99it/s]

{'epoch': 2, 'iter': 11160, 'avg_loss': 8.553398193974251, 'avg_acc': 50.02043947674939, 'loss': 8.485276222229004}


EP_train:2:  40%|| 11172/27626 [26:19<39:00,  7.03it/s]

{'epoch': 2, 'iter': 11170, 'avg_loss': 8.553946167873553, 'avg_acc': 50.01846298451347, 'loss': 9.202686309814453}


EP_train:2:  40%|| 11182/27626 [26:21<38:57,  7.03it/s]

{'epoch': 2, 'iter': 11180, 'avg_loss': 8.554131580757419, 'avg_acc': 50.018725963688404, 'loss': 8.552207946777344}


EP_train:2:  41%|| 11192/27626 [26:22<39:09,  6.99it/s]

{'epoch': 2, 'iter': 11190, 'avg_loss': 8.553983374581922, 'avg_acc': 50.0206639263694, 'loss': 8.461831092834473}


EP_train:2:  41%|| 11202/27626 [26:23<38:52,  7.04it/s]

{'epoch': 2, 'iter': 11200, 'avg_loss': 8.553874112571863, 'avg_acc': 50.025667351129364, 'loss': 7.988456726074219}


EP_train:2:  41%|| 11212/27626 [26:25<39:06,  7.00it/s]

{'epoch': 2, 'iter': 11210, 'avg_loss': 8.553782386897176, 'avg_acc': 50.027595664971905, 'loss': 8.541871070861816}


EP_train:2:  41%|| 11222/27626 [26:26<38:58,  7.02it/s]

{'epoch': 2, 'iter': 11220, 'avg_loss': 8.553409734412678, 'avg_acc': 50.02868505480795, 'loss': 8.482034683227539}


EP_train:2:  41%|| 11232/27626 [26:28<38:39,  7.07it/s]

{'epoch': 2, 'iter': 11230, 'avg_loss': 8.553489519305565, 'avg_acc': 50.034224467990384, 'loss': 8.609209060668945}


EP_train:2:  41%|| 11242/27626 [26:29<38:46,  7.04it/s]

{'epoch': 2, 'iter': 11240, 'avg_loss': 8.553577857810337, 'avg_acc': 50.03697402366337, 'loss': 8.313283920288086}


EP_train:2:  41%|| 11252/27626 [26:31<38:32,  7.08it/s]

{'epoch': 2, 'iter': 11250, 'avg_loss': 8.553561576174987, 'avg_acc': 50.039163185494616, 'loss': 8.187752723693848}


EP_train:2:  41%|| 11262/27626 [26:32<38:59,  6.99it/s]

{'epoch': 2, 'iter': 11260, 'avg_loss': 8.553468192664294, 'avg_acc': 50.03968342065536, 'loss': 9.48424243927002}


EP_train:2:  41%|| 11272/27626 [26:33<38:42,  7.04it/s]

{'epoch': 2, 'iter': 11270, 'avg_loss': 8.553618929589422, 'avg_acc': 50.04186629402893, 'loss': 8.740053176879883}


EP_train:2:  41%|| 11282/27626 [26:35<38:12,  7.13it/s]

{'epoch': 2, 'iter': 11280, 'avg_loss': 8.55337652413201, 'avg_acc': 50.040998138462896, 'loss': 8.57545280456543}


EP_train:2:  41%|| 11292/27626 [26:36<38:39,  7.04it/s]

{'epoch': 2, 'iter': 11290, 'avg_loss': 8.55333473422468, 'avg_acc': 50.040685058896464, 'loss': 8.75671672821045}


EP_train:2:  41%|| 11302/27626 [26:38<39:09,  6.95it/s]

{'epoch': 2, 'iter': 11300, 'avg_loss': 8.553299123993407, 'avg_acc': 50.040372533404124, 'loss': 8.133747100830078}


EP_train:2:  41%|| 11312/27626 [26:39<38:50,  7.00it/s]

{'epoch': 2, 'iter': 11310, 'avg_loss': 8.553231273634598, 'avg_acc': 50.0419945186102, 'loss': 8.05173110961914}


EP_train:2:  41%|| 11322/27626 [26:40<38:41,  7.02it/s]

{'epoch': 2, 'iter': 11320, 'avg_loss': 8.553274398788885, 'avg_acc': 50.040853281512234, 'loss': 9.03421688079834}


EP_train:2:  41%|| 11332/27626 [26:42<38:17,  7.09it/s]

{'epoch': 2, 'iter': 11330, 'avg_loss': 8.55286404461802, 'avg_acc': 50.041093019151006, 'loss': 8.479910850524902}


EP_train:2:  41%|| 11342/27626 [26:43<38:21,  7.07it/s]

{'epoch': 2, 'iter': 11340, 'avg_loss': 8.552960136602946, 'avg_acc': 50.03995458954237, 'loss': 9.248126983642578}


EP_train:2:  41%|| 11352/27626 [26:45<38:25,  7.06it/s]

{'epoch': 2, 'iter': 11350, 'avg_loss': 8.55296545341229, 'avg_acc': 50.04157122720465, 'loss': 8.329750061035156}


EP_train:2:  41%|| 11362/27626 [26:46<38:11,  7.10it/s]

{'epoch': 2, 'iter': 11360, 'avg_loss': 8.553158179564267, 'avg_acc': 50.04428527418361, 'loss': 8.439836502075195}


EP_train:2:  41%|| 11372/27626 [26:48<38:15,  7.08it/s]

{'epoch': 2, 'iter': 11370, 'avg_loss': 8.5529851739424, 'avg_acc': 50.04452115029461, 'loss': 7.939414978027344}


EP_train:2:  41%|| 11382/27626 [26:49<38:32,  7.03it/s]

{'epoch': 2, 'iter': 11380, 'avg_loss': 8.553106450686796, 'avg_acc': 50.04420745101484, 'loss': 8.334393501281738}


EP_train:2:  41%|| 11392/27626 [26:50<38:30,  7.03it/s]

{'epoch': 2, 'iter': 11390, 'avg_loss': 8.552903046255084, 'avg_acc': 50.04636335703626, 'loss': 8.50960922241211}


EP_train:2:  41%|| 11402/27626 [26:52<38:15,  7.07it/s]

{'epoch': 2, 'iter': 11400, 'avg_loss': 8.552825591384543, 'avg_acc': 50.04687088851855, 'loss': 8.137923240661621}


EP_train:2:  41%|| 11412/27626 [26:53<38:13,  7.07it/s]

{'epoch': 2, 'iter': 11410, 'avg_loss': 8.553251122229044, 'avg_acc': 50.04354351064762, 'loss': 9.165587425231934}


EP_train:2:  41%|| 11422/27626 [26:55<38:24,  7.03it/s]

{'epoch': 2, 'iter': 11420, 'avg_loss': 8.553110806602342, 'avg_acc': 50.04131643463795, 'loss': 9.408892631530762}


EP_train:2:  41%|| 11432/27626 [26:56<38:06,  7.08it/s]

{'epoch': 2, 'iter': 11430, 'avg_loss': 8.552970425578494, 'avg_acc': 50.0410069110314, 'loss': 7.9906005859375}


EP_train:2:  41%|| 11442/27626 [26:57<38:11,  7.06it/s]

{'epoch': 2, 'iter': 11440, 'avg_loss': 8.552662473366338, 'avg_acc': 50.040424788043005, 'loss': 8.779422760009766}


EP_train:2:  41%|| 11452/27626 [26:59<38:02,  7.09it/s]

{'epoch': 2, 'iter': 11450, 'avg_loss': 8.552778648607658, 'avg_acc': 50.04175399528425, 'loss': 8.804496765136719}


EP_train:2:  41%|| 11462/27626 [27:00<38:15,  7.04it/s]

{'epoch': 2, 'iter': 11460, 'avg_loss': 8.552703773462587, 'avg_acc': 50.04389887444377, 'loss': 7.660610675811768}


EP_train:2:  42%|| 11472/27626 [27:02<38:19,  7.02it/s]

{'epoch': 2, 'iter': 11470, 'avg_loss': 8.552868587845403, 'avg_acc': 50.04712971842037, 'loss': 8.033548355102539}


EP_train:2:  42%|| 11482/27626 [27:03<38:19,  7.02it/s]

{'epoch': 2, 'iter': 11480, 'avg_loss': 8.55264348658148, 'avg_acc': 50.04763304590193, 'loss': 8.709185600280762}


EP_train:2:  42%|| 11492/27626 [27:05<37:58,  7.08it/s]

{'epoch': 2, 'iter': 11490, 'avg_loss': 8.552339062057083, 'avg_acc': 50.044600121834485, 'loss': 8.243258476257324}


EP_train:2:  42%|| 11502/27626 [27:06<37:57,  7.08it/s]

{'epoch': 2, 'iter': 11500, 'avg_loss': 8.552675190018359, 'avg_acc': 50.04075732544996, 'loss': 10.100878715515137}


EP_train:2:  42%|| 11512/27626 [27:07<38:06,  7.05it/s]

{'epoch': 2, 'iter': 11510, 'avg_loss': 8.552605727019023, 'avg_acc': 50.03610676743983, 'loss': 7.982912540435791}


EP_train:2:  42%|| 11522/27626 [27:09<37:51,  7.09it/s]

{'epoch': 2, 'iter': 11520, 'avg_loss': 8.552527977602628, 'avg_acc': 50.03580418366461, 'loss': 8.067662239074707}


EP_train:2:  42%|| 11532/27626 [27:10<38:18,  7.00it/s]

{'epoch': 2, 'iter': 11530, 'avg_loss': 8.552031054003429, 'avg_acc': 50.03170800450958, 'loss': 8.538980484008789}


EP_train:2:  42%|| 11542/27626 [27:12<38:15,  7.01it/s]

{'epoch': 2, 'iter': 11540, 'avg_loss': 8.551936269911682, 'avg_acc': 50.03357594662508, 'loss': 8.191875457763672}


EP_train:2:  42%|| 11552/27626 [27:13<38:15,  7.00it/s]

{'epoch': 2, 'iter': 11550, 'avg_loss': 8.552005683113595, 'avg_acc': 50.03246472166912, 'loss': 8.690262794494629}


EP_train:2:  42%|| 11562/27626 [27:14<37:55,  7.06it/s]

{'epoch': 2, 'iter': 11560, 'avg_loss': 8.55211768089699, 'avg_acc': 50.03189602975521, 'loss': 9.053731918334961}


EP_train:2:  42%|| 11572/27626 [27:16<37:57,  7.05it/s]

{'epoch': 2, 'iter': 11570, 'avg_loss': 8.552017646593722, 'avg_acc': 50.034569181574625, 'loss': 8.249269485473633}


EP_train:2:  42%|| 11582/27626 [27:17<38:07,  7.01it/s]

{'epoch': 2, 'iter': 11580, 'avg_loss': 8.551905356070279, 'avg_acc': 50.034269493135305, 'loss': 7.767475128173828}


EP_train:2:  42%|| 11592/27626 [27:19<37:48,  7.07it/s]

{'epoch': 2, 'iter': 11590, 'avg_loss': 8.551934677782569, 'avg_acc': 50.033970321801405, 'loss': 8.985052108764648}


EP_train:2:  42%|| 11602/27626 [27:20<37:37,  7.10it/s]

{'epoch': 2, 'iter': 11600, 'avg_loss': 8.551993827287127, 'avg_acc': 50.035018532885104, 'loss': 7.88869047164917}


EP_train:2:  42%|| 11612/27626 [27:21<37:57,  7.03it/s]

{'epoch': 2, 'iter': 11610, 'avg_loss': 8.551775876870606, 'avg_acc': 50.0339118077685, 'loss': 8.05432415008545}


EP_train:2:  42%|| 11622/27626 [27:23<37:50,  7.05it/s]

{'epoch': 2, 'iter': 11620, 'avg_loss': 8.551929099292458, 'avg_acc': 50.03872300146287, 'loss': 9.200362205505371}


EP_train:2:  42%|| 11632/27626 [27:24<37:49,  7.05it/s]

{'epoch': 2, 'iter': 11630, 'avg_loss': 8.55169712721292, 'avg_acc': 50.03815235147451, 'loss': 7.787213325500488}


EP_train:2:  42%|| 11642/27626 [27:26<37:50,  7.04it/s]

{'epoch': 2, 'iter': 11640, 'avg_loss': 8.5515310196786, 'avg_acc': 50.03677733871661, 'loss': 7.908634662628174}


EP_train:2:  42%|| 11652/27626 [27:27<37:53,  7.03it/s]

{'epoch': 2, 'iter': 11650, 'avg_loss': 8.551259059413248, 'avg_acc': 50.037550424856235, 'loss': 9.176849365234375}


EP_train:2:  42%|| 11662/27626 [27:29<37:45,  7.05it/s]

{'epoch': 2, 'iter': 11660, 'avg_loss': 8.551274601238157, 'avg_acc': 50.03832218506131, 'loss': 8.183713912963867}


EP_train:2:  42%|| 11672/27626 [27:30<37:32,  7.08it/s]

{'epoch': 2, 'iter': 11670, 'avg_loss': 8.551564303801714, 'avg_acc': 50.03775383429012, 'loss': 9.264006614685059}


EP_train:2:  42%|| 11682/27626 [27:31<37:24,  7.10it/s]

{'epoch': 2, 'iter': 11680, 'avg_loss': 8.551805996280471, 'avg_acc': 50.036383871243906, 'loss': 8.87195873260498}


EP_train:2:  42%|| 11692/27626 [27:33<37:29,  7.08it/s]

{'epoch': 2, 'iter': 11690, 'avg_loss': 8.551616023292636, 'avg_acc': 50.033412454024464, 'loss': 9.081313133239746}


EP_train:2:  42%|| 11702/27626 [27:34<37:30,  7.08it/s]

{'epoch': 2, 'iter': 11700, 'avg_loss': 8.551564405702258, 'avg_acc': 50.03311682762157, 'loss': 9.110883712768555}


EP_train:2:  42%|| 11712/27626 [27:36<37:31,  7.07it/s]

{'epoch': 2, 'iter': 11710, 'avg_loss': 8.55126969558394, 'avg_acc': 50.02855221586543, 'loss': 8.470325469970703}


EP_train:2:  42%|| 11722/27626 [27:37<37:36,  7.05it/s]

{'epoch': 2, 'iter': 11720, 'avg_loss': 8.550894010291756, 'avg_acc': 50.027461394079, 'loss': 8.520821571350098}


EP_train:2:  42%|| 11732/27626 [27:38<37:45,  7.01it/s]

{'epoch': 2, 'iter': 11730, 'avg_loss': 8.551065189191503, 'avg_acc': 50.02610604381553, 'loss': 9.294124603271484}


EP_train:2:  43%|| 11742/27626 [27:40<37:45,  7.01it/s]

{'epoch': 2, 'iter': 11740, 'avg_loss': 8.550869815526855, 'avg_acc': 50.02714845413509, 'loss': 8.712812423706055}


EP_train:2:  43%|| 11752/27626 [27:41<37:50,  6.99it/s]

{'epoch': 2, 'iter': 11750, 'avg_loss': 8.550916062224298, 'avg_acc': 50.024731937707436, 'loss': 8.91908073425293}


EP_train:2:  43%|| 11762/27626 [27:43<37:40,  7.02it/s]

{'epoch': 2, 'iter': 11760, 'avg_loss': 8.550777905815039, 'avg_acc': 50.02417949153983, 'loss': 8.148775100708008}


EP_train:2:  43%|| 11772/27626 [27:44<37:15,  7.09it/s]

{'epoch': 2, 'iter': 11770, 'avg_loss': 8.55123898486963, 'avg_acc': 50.024424432928384, 'loss': 8.516180038452148}


EP_train:2:  43%|| 11782/27626 [27:46<37:28,  7.05it/s]

{'epoch': 2, 'iter': 11780, 'avg_loss': 8.55147409447182, 'avg_acc': 50.02572998896528, 'loss': 8.173720359802246}


EP_train:2:  43%|| 11792/27626 [27:47<37:18,  7.07it/s]

{'epoch': 2, 'iter': 11790, 'avg_loss': 8.551426916265596, 'avg_acc': 50.02756339581036, 'loss': 8.434558868408203}


EP_train:2:  43%|| 11802/27626 [27:48<37:13,  7.08it/s]

{'epoch': 2, 'iter': 11800, 'avg_loss': 8.551281671788306, 'avg_acc': 50.02648080671129, 'loss': 9.088322639465332}


EP_train:2:  43%|| 11812/27626 [27:50<37:11,  7.09it/s]

{'epoch': 2, 'iter': 11810, 'avg_loss': 8.55119688312465, 'avg_acc': 50.02963339260012, 'loss': 8.505885124206543}


EP_train:2:  43%|| 11822/27626 [27:51<37:03,  7.11it/s]

{'epoch': 2, 'iter': 11820, 'avg_loss': 8.55122495647534, 'avg_acc': 50.03145884442941, 'loss': 8.329586029052734}


EP_train:2:  43%|| 11832/27626 [27:53<37:28,  7.02it/s]

{'epoch': 2, 'iter': 11830, 'avg_loss': 8.551335809841612, 'avg_acc': 50.030903981066686, 'loss': 8.920379638671875}


EP_train:2:  43%|| 11842/27626 [27:54<37:22,  7.04it/s]

{'epoch': 2, 'iter': 11840, 'avg_loss': 8.551281672536437, 'avg_acc': 50.03114179545647, 'loss': 8.508650779724121}


EP_train:2:  43%|| 11852/27626 [27:55<37:01,  7.10it/s]

{'epoch': 2, 'iter': 11850, 'avg_loss': 8.55156109723529, 'avg_acc': 50.0340161167834, 'loss': 8.89505386352539}


EP_train:2:  43%|| 11862/27626 [27:57<36:57,  7.11it/s]

{'epoch': 2, 'iter': 11860, 'avg_loss': 8.551898297426542, 'avg_acc': 50.03372396931118, 'loss': 8.791705131530762}


EP_train:2:  43%|| 11872/27626 [27:58<37:13,  7.05it/s]

{'epoch': 2, 'iter': 11870, 'avg_loss': 8.551741167223852, 'avg_acc': 50.03527504001348, 'loss': 8.196542739868164}


EP_train:2:  43%|| 11882/27626 [28:00<37:13,  7.05it/s]

{'epoch': 2, 'iter': 11880, 'avg_loss': 8.551775392945412, 'avg_acc': 50.0312999747496, 'loss': 8.229729652404785}


EP_train:2:  43%|| 11892/27626 [28:01<36:51,  7.11it/s]

{'epoch': 2, 'iter': 11890, 'avg_loss': 8.551626800618594, 'avg_acc': 50.02917122193256, 'loss': 8.125691413879395}


EP_train:2:  43%|| 11902/27626 [28:02<37:15,  7.03it/s]

{'epoch': 2, 'iter': 11900, 'avg_loss': 8.551724257844604, 'avg_acc': 50.030722208217796, 'loss': 8.749099731445312}


EP_train:2:  43%|| 11912/27626 [28:04<37:23,  7.00it/s]

{'epoch': 2, 'iter': 11910, 'avg_loss': 8.551696681984325, 'avg_acc': 50.0306964150785, 'loss': 8.036275863647461}


EP_train:2:  43%|| 11922/27626 [28:05<37:04,  7.06it/s]

{'epoch': 2, 'iter': 11920, 'avg_loss': 8.551556477019895, 'avg_acc': 50.0298842378995, 'loss': 8.860066413879395}


EP_train:2:  43%|| 11932/27626 [28:07<37:14,  7.02it/s]

{'epoch': 2, 'iter': 11930, 'avg_loss': 8.551720052040809, 'avg_acc': 50.027763808565915, 'loss': 7.945043563842773}


EP_train:2:  43%|| 11942/27626 [28:08<36:59,  7.07it/s]

{'epoch': 2, 'iter': 11940, 'avg_loss': 8.551863802766173, 'avg_acc': 50.0274788543673, 'loss': 8.956620216369629}


EP_train:2:  43%|| 11952/27626 [28:10<36:44,  7.11it/s]

{'epoch': 2, 'iter': 11950, 'avg_loss': 8.551694417376865, 'avg_acc': 50.02614843946114, 'loss': 7.349218368530273}


EP_train:2:  43%|| 11962/27626 [28:11<37:00,  7.05it/s]

{'epoch': 2, 'iter': 11960, 'avg_loss': 8.551448564606758, 'avg_acc': 50.0250815149235, 'loss': 8.28591251373291}


EP_train:2:  43%|| 11972/27626 [28:12<36:59,  7.05it/s]

{'epoch': 2, 'iter': 11970, 'avg_loss': 8.551524457691725, 'avg_acc': 50.026104753153454, 'loss': 8.596333503723145}


EP_train:2:  43%|| 11982/27626 [28:14<36:51,  7.08it/s]

{'epoch': 2, 'iter': 11980, 'avg_loss': 8.551772993043352, 'avg_acc': 50.0273871129288, 'loss': 9.688122749328613}


EP_train:2:  43%|| 11992/27626 [28:15<36:51,  7.07it/s]

{'epoch': 2, 'iter': 11990, 'avg_loss': 8.551755158858068, 'avg_acc': 50.02632182470186, 'loss': 8.91279125213623}


EP_train:2:  43%|| 12002/27626 [28:17<37:12,  7.00it/s]

{'epoch': 2, 'iter': 12000, 'avg_loss': 8.55179871722923, 'avg_acc': 50.02473752187318, 'loss': 8.518004417419434}


EP_train:2:  43%|| 12012/27626 [28:18<36:55,  7.05it/s]

{'epoch': 2, 'iter': 12010, 'avg_loss': 8.551632290904383, 'avg_acc': 50.02731870785113, 'loss': 8.694108963012695}


EP_train:2:  44%|| 12022/27626 [28:19<36:56,  7.04it/s]

{'epoch': 2, 'iter': 12020, 'avg_loss': 8.551442713539549, 'avg_acc': 50.025476249896016, 'loss': 7.75946044921875}


EP_train:2:  44%|| 12032/27626 [28:21<36:43,  7.08it/s]

{'epoch': 2, 'iter': 12030, 'avg_loss': 8.551585369222668, 'avg_acc': 50.023896600448836, 'loss': 8.62987232208252}


EP_train:2:  44%|| 12042/27626 [28:22<36:41,  7.08it/s]

{'epoch': 2, 'iter': 12040, 'avg_loss': 8.55167423361075, 'avg_acc': 50.027769703512995, 'loss': 8.631608963012695}


EP_train:2:  44%|| 12052/27626 [28:24<36:42,  7.07it/s]

{'epoch': 2, 'iter': 12050, 'avg_loss': 8.551704564892635, 'avg_acc': 50.025931457970295, 'loss': 8.341764450073242}


EP_train:2:  44%|| 12062/27626 [28:25<36:44,  7.06it/s]

{'epoch': 2, 'iter': 12060, 'avg_loss': 8.55173614932257, 'avg_acc': 50.0230598623663, 'loss': 9.440374374389648}


EP_train:2:  44%|| 12072/27626 [28:26<36:50,  7.04it/s]

{'epoch': 2, 'iter': 12070, 'avg_loss': 8.55197369640522, 'avg_acc': 50.02459406842846, 'loss': 8.130621910095215}


EP_train:2:  44%|| 12082/27626 [28:28<36:57,  7.01it/s]

{'epoch': 2, 'iter': 12080, 'avg_loss': 8.551822970890562, 'avg_acc': 50.02276301630659, 'loss': 9.146167755126953}


EP_train:2:  44%|| 12092/27626 [28:29<36:39,  7.06it/s]

{'epoch': 2, 'iter': 12090, 'avg_loss': 8.55170987255963, 'avg_acc': 50.02610412703664, 'loss': 9.135841369628906}


EP_train:2:  44%|| 12102/27626 [28:31<36:49,  7.03it/s]

{'epoch': 2, 'iter': 12100, 'avg_loss': 8.551028733844156, 'avg_acc': 50.026082555160734, 'loss': 7.628683567047119}


EP_train:2:  44%|| 12112/27626 [28:32<37:07,  6.97it/s]

{'epoch': 2, 'iter': 12110, 'avg_loss': 8.550811270976103, 'avg_acc': 50.028641317810255, 'loss': 8.278350830078125}


EP_train:2:  44%|| 12122/27626 [28:34<36:46,  7.03it/s]

{'epoch': 2, 'iter': 12120, 'avg_loss': 8.550602247274194, 'avg_acc': 50.02861768830954, 'loss': 9.196090698242188}


EP_train:2:  44%|| 12132/27626 [28:35<36:32,  7.07it/s]

{'epoch': 2, 'iter': 12130, 'avg_loss': 8.550886070135745, 'avg_acc': 50.02988212018795, 'loss': 8.371513366699219}


EP_train:2:  44%|| 12142/27626 [28:36<36:41,  7.03it/s]

{'epoch': 2, 'iter': 12140, 'avg_loss': 8.550603676643494, 'avg_acc': 50.02960011531176, 'loss': 8.550406455993652}


EP_train:2:  44%|| 12152/27626 [28:38<36:29,  7.07it/s]

{'epoch': 2, 'iter': 12150, 'avg_loss': 8.550517529227116, 'avg_acc': 50.027775491729074, 'loss': 8.810040473937988}


EP_train:2:  44%|| 12162/27626 [28:39<36:36,  7.04it/s]

{'epoch': 2, 'iter': 12160, 'avg_loss': 8.550341531679667, 'avg_acc': 50.028780527917114, 'loss': 8.864860534667969}


EP_train:2:  44%|| 12172/27626 [28:41<36:43,  7.01it/s]

{'epoch': 2, 'iter': 12170, 'avg_loss': 8.550194802980437, 'avg_acc': 50.02772984964259, 'loss': 8.413856506347656}


EP_train:2:  44%|| 12182/27626 [28:42<36:12,  7.11it/s]

{'epoch': 2, 'iter': 12180, 'avg_loss': 8.55035407358609, 'avg_acc': 50.02385887858139, 'loss': 8.425919532775879}


EP_train:2:  44%|| 12192/27626 [28:43<36:33,  7.04it/s]

{'epoch': 2, 'iter': 12190, 'avg_loss': 8.550524887308379, 'avg_acc': 50.02742802067098, 'loss': 8.895821571350098}


EP_train:2:  44%|| 12202/27626 [28:45<36:35,  7.03it/s]

{'epoch': 2, 'iter': 12200, 'avg_loss': 8.55066561403846, 'avg_acc': 50.03175969182854, 'loss': 8.380648612976074}


EP_train:2:  44%|| 12212/27626 [28:46<36:25,  7.05it/s]

{'epoch': 2, 'iter': 12210, 'avg_loss': 8.550336662942582, 'avg_acc': 50.03250143313406, 'loss': 7.990533828735352}


EP_train:2:  44%|| 12222/27626 [28:48<36:22,  7.06it/s]

{'epoch': 2, 'iter': 12220, 'avg_loss': 8.550179312324945, 'avg_acc': 50.03631044922674, 'loss': 8.106595039367676}


EP_train:2:  44%|| 12232/27626 [28:49<36:37,  7.00it/s]

{'epoch': 2, 'iter': 12230, 'avg_loss': 8.550317717421114, 'avg_acc': 50.03576976535034, 'loss': 7.911900520324707}


EP_train:2:  44%|| 12242/27626 [28:50<36:32,  7.02it/s]

{'epoch': 2, 'iter': 12240, 'avg_loss': 8.55023983964031, 'avg_acc': 50.034464096070586, 'loss': 9.669085502624512}


EP_train:2:  44%|| 12252/27626 [28:52<36:37,  7.00it/s]

{'epoch': 2, 'iter': 12250, 'avg_loss': 8.550183775804255, 'avg_acc': 50.036731695371806, 'loss': 8.36269474029541}


EP_train:2:  44%|| 12262/27626 [28:53<36:11,  7.08it/s]

{'epoch': 2, 'iter': 12260, 'avg_loss': 8.550004967973493, 'avg_acc': 50.04077970801729, 'loss': 8.926261901855469}


EP_train:2:  44%|| 12272/27626 [28:55<36:08,  7.08it/s]

{'epoch': 2, 'iter': 12270, 'avg_loss': 8.54996798279602, 'avg_acc': 50.040237144487, 'loss': 8.367165565490723}


EP_train:2:  44%|| 12282/27626 [28:56<36:13,  7.06it/s]

{'epoch': 2, 'iter': 12280, 'avg_loss': 8.54996774445313, 'avg_acc': 50.03816871590261, 'loss': 8.720266342163086}


EP_train:2:  44%|| 12292/27626 [28:58<36:18,  7.04it/s]

{'epoch': 2, 'iter': 12290, 'avg_loss': 8.54978524921831, 'avg_acc': 50.04017167032788, 'loss': 8.337909698486328}


EP_train:2:  45%|| 12302/27626 [28:59<36:11,  7.06it/s]

{'epoch': 2, 'iter': 12300, 'avg_loss': 8.549648476445514, 'avg_acc': 50.04115519063491, 'loss': 8.79277515411377}


EP_train:2:  45%|| 12312/27626 [29:00<36:09,  7.06it/s]

{'epoch': 2, 'iter': 12310, 'avg_loss': 8.549737477474912, 'avg_acc': 50.041883275119815, 'loss': 9.190742492675781}


EP_train:2:  45%|| 12322/27626 [29:02<36:18,  7.02it/s]

{'epoch': 2, 'iter': 12320, 'avg_loss': 8.549896135741207, 'avg_acc': 50.04134201769337, 'loss': 9.334943771362305}


EP_train:2:  45%|| 12332/27626 [29:03<36:26,  7.00it/s]

{'epoch': 2, 'iter': 12330, 'avg_loss': 8.549844866608696, 'avg_acc': 50.038520801232664, 'loss': 8.685111045837402}


EP_train:2:  45%|| 12342/27626 [29:05<36:04,  7.06it/s]

{'epoch': 2, 'iter': 12340, 'avg_loss': 8.550304936354207, 'avg_acc': 50.038489587553684, 'loss': 8.678230285644531}


EP_train:2:  45%|| 12352/27626 [29:06<36:15,  7.02it/s]

{'epoch': 2, 'iter': 12350, 'avg_loss': 8.55038906744005, 'avg_acc': 50.03820540846895, 'loss': 7.917547225952148}


EP_train:2:  45%|| 12362/27626 [29:07<35:55,  7.08it/s]

{'epoch': 2, 'iter': 12360, 'avg_loss': 8.550306504236225, 'avg_acc': 50.0417138581021, 'loss': 8.259706497192383}


EP_train:2:  45%|| 12372/27626 [29:09<36:20,  6.99it/s]

{'epoch': 2, 'iter': 12370, 'avg_loss': 8.550106817127844, 'avg_acc': 50.039659283808916, 'loss': 8.949362754821777}


EP_train:2:  45%|| 12382/27626 [29:10<36:15,  7.01it/s]

{'epoch': 2, 'iter': 12380, 'avg_loss': 8.550112991935361, 'avg_acc': 50.038870042807524, 'loss': 8.225172996520996}


EP_train:2:  45%|| 12392/27626 [29:12<35:55,  7.07it/s]

{'epoch': 2, 'iter': 12390, 'avg_loss': 8.550064868879284, 'avg_acc': 50.037325478169635, 'loss': 8.231722831726074}


EP_train:2:  45%|| 12402/27626 [29:13<36:08,  7.02it/s]

{'epoch': 2, 'iter': 12400, 'avg_loss': 8.550229831026808, 'avg_acc': 50.03427142972341, 'loss': 9.418131828308105}


EP_train:2:  45%|| 12412/27626 [29:15<35:56,  7.05it/s]

{'epoch': 2, 'iter': 12410, 'avg_loss': 8.550174011998571, 'avg_acc': 50.03827250020143, 'loss': 8.705751419067383}


EP_train:2:  45%|| 12422/27626 [29:16<35:41,  7.10it/s]

{'epoch': 2, 'iter': 12420, 'avg_loss': 8.549919516575162, 'avg_acc': 50.04050599790677, 'loss': 8.512054443359375}


EP_train:2:  45%|| 12432/27626 [29:17<35:55,  7.05it/s]

{'epoch': 2, 'iter': 12430, 'avg_loss': 8.549895186597253, 'avg_acc': 50.03544566004344, 'loss': 7.797097682952881}


EP_train:2:  45%|| 12442/27626 [29:19<36:08,  7.00it/s]

{'epoch': 2, 'iter': 12440, 'avg_loss': 8.549876517512084, 'avg_acc': 50.03566835463387, 'loss': 8.773879051208496}


EP_train:2:  45%|| 12452/27626 [29:20<35:42,  7.08it/s]

{'epoch': 2, 'iter': 12450, 'avg_loss': 8.54985250843432, 'avg_acc': 50.035639707654, 'loss': 9.033051490783691}


EP_train:2:  45%|| 12462/27626 [29:22<35:40,  7.08it/s]

{'epoch': 2, 'iter': 12460, 'avg_loss': 8.549870193277256, 'avg_acc': 50.04137910280074, 'loss': 7.913419723510742}


EP_train:2:  45%|| 12472/27626 [29:23<36:15,  6.97it/s]

{'epoch': 2, 'iter': 12470, 'avg_loss': 8.549573350092258, 'avg_acc': 50.04360115467885, 'loss': 7.443310260772705}


EP_train:2:  45%|| 12482/27626 [29:24<35:50,  7.04it/s]

{'epoch': 2, 'iter': 12480, 'avg_loss': 8.54944168395238, 'avg_acc': 50.04581964586171, 'loss': 8.384231567382812}


EP_train:2:  45%|| 12492/27626 [29:26<36:05,  6.99it/s]

{'epoch': 2, 'iter': 12490, 'avg_loss': 8.549560244418767, 'avg_acc': 50.04378152269634, 'loss': 8.657268524169922}


EP_train:2:  45%|| 12502/27626 [29:27<35:30,  7.10it/s]

{'epoch': 2, 'iter': 12500, 'avg_loss': 8.549404808563496, 'avg_acc': 50.045246380289576, 'loss': 8.440210342407227}


EP_train:2:  45%|| 12512/27626 [29:29<35:53,  7.02it/s]

{'epoch': 2, 'iter': 12510, 'avg_loss': 8.549602132975155, 'avg_acc': 50.04845735752538, 'loss': 9.321147918701172}


EP_train:2:  45%|| 12522/27626 [29:30<35:35,  7.07it/s]

{'epoch': 2, 'iter': 12520, 'avg_loss': 8.54975675506415, 'avg_acc': 50.04392620397732, 'loss': 7.971970081329346}


EP_train:2:  45%|| 12532/27626 [29:31<35:28,  7.09it/s]

{'epoch': 2, 'iter': 12530, 'avg_loss': 8.549044671628302, 'avg_acc': 50.04488867608331, 'loss': 7.139881134033203}


EP_train:2:  45%|| 12542/27626 [29:33<35:28,  7.09it/s]

{'epoch': 2, 'iter': 12540, 'avg_loss': 8.549020087221823, 'avg_acc': 50.045351247906865, 'loss': 8.786538124084473}


EP_train:2:  45%|| 12552/27626 [29:34<35:30,  7.08it/s]

{'epoch': 2, 'iter': 12550, 'avg_loss': 8.548984802611729, 'avg_acc': 50.04481714604414, 'loss': 9.927714347839355}


EP_train:2:  45%|| 12562/27626 [29:36<35:52,  7.00it/s]

{'epoch': 2, 'iter': 12560, 'avg_loss': 8.548937380726708, 'avg_acc': 50.04503025236844, 'loss': 8.445843696594238}


EP_train:2:  46%|| 12572/27626 [29:37<35:51,  7.00it/s]

{'epoch': 2, 'iter': 12570, 'avg_loss': 8.54903566238941, 'avg_acc': 50.04499443162835, 'loss': 8.373966217041016}


EP_train:2:  46%|| 12582/27626 [29:39<35:33,  7.05it/s]

{'epoch': 2, 'iter': 12580, 'avg_loss': 8.549063952376018, 'avg_acc': 50.04942969557269, 'loss': 8.842923164367676}


EP_train:2:  46%|| 12592/27626 [29:40<35:20,  7.09it/s]

{'epoch': 2, 'iter': 12590, 'avg_loss': 8.549190105072851, 'avg_acc': 50.04616392661425, 'loss': 8.593201637268066}


EP_train:2:  46%|| 12602/27626 [29:41<35:22,  7.08it/s]

{'epoch': 2, 'iter': 12600, 'avg_loss': 8.549108619718702, 'avg_acc': 50.04736727243869, 'loss': 8.006986618041992}


EP_train:2:  46%|| 12612/27626 [29:43<35:28,  7.05it/s]

{'epoch': 2, 'iter': 12610, 'avg_loss': 8.548851495011832, 'avg_acc': 50.04757751169614, 'loss': 8.715633392333984}


EP_train:2:  46%|| 12622/27626 [29:44<35:16,  7.09it/s]

{'epoch': 2, 'iter': 12620, 'avg_loss': 8.549012757473538, 'avg_acc': 50.05051105300689, 'loss': 8.828320503234863}


EP_train:2:  46%|| 12632/27626 [29:46<35:34,  7.03it/s]

{'epoch': 2, 'iter': 12630, 'avg_loss': 8.548719787718461, 'avg_acc': 50.049976248911406, 'loss': 7.887617588043213}


EP_train:2:  46%|| 12642/27626 [29:47<35:21,  7.06it/s]

{'epoch': 2, 'iter': 12640, 'avg_loss': 8.548698827403886, 'avg_acc': 50.051419982596315, 'loss': 8.37032413482666}


EP_train:2:  46%|| 12652/27626 [29:48<35:34,  7.02it/s]

{'epoch': 2, 'iter': 12650, 'avg_loss': 8.548639102506408, 'avg_acc': 50.05878981898664, 'loss': 8.8040132522583}


EP_train:2:  46%|| 12662/27626 [29:50<35:21,  7.05it/s]

{'epoch': 2, 'iter': 12660, 'avg_loss': 8.548667231533315, 'avg_acc': 50.056768817628935, 'loss': 8.219602584838867}


EP_train:2:  46%|| 12672/27626 [29:51<35:31,  7.02it/s]

{'epoch': 2, 'iter': 12670, 'avg_loss': 8.54876123774109, 'avg_acc': 50.05771052008523, 'loss': 8.464337348937988}


EP_train:2:  46%|| 12682/27626 [29:53<35:17,  7.06it/s]

{'epoch': 2, 'iter': 12680, 'avg_loss': 8.548511925485174, 'avg_acc': 50.05618642062929, 'loss': 7.719461441040039}


EP_train:2:  46%|| 12692/27626 [29:54<35:10,  7.08it/s]

{'epoch': 2, 'iter': 12690, 'avg_loss': 8.548296595885956, 'avg_acc': 50.05466472303207, 'loss': 7.940829277038574}


EP_train:2:  46%|| 12702/27626 [29:56<35:06,  7.08it/s]

{'epoch': 2, 'iter': 12700, 'avg_loss': 8.548294599258414, 'avg_acc': 50.05265333438312, 'loss': 8.899271965026855}


EP_train:2:  46%|| 12712/27626 [29:57<35:38,  6.97it/s]

{'epoch': 2, 'iter': 12710, 'avg_loss': 8.548262541735447, 'avg_acc': 50.045728109511444, 'loss': 8.42126750946045}


EP_train:2:  46%|| 12722/27626 [29:58<35:11,  7.06it/s]

{'epoch': 2, 'iter': 12720, 'avg_loss': 8.548163027913679, 'avg_acc': 50.04692044650578, 'loss': 8.53157901763916}


EP_train:2:  46%|| 12732/27626 [30:00<35:21,  7.02it/s]

{'epoch': 2, 'iter': 12730, 'avg_loss': 8.548202465957187, 'avg_acc': 50.0473745188909, 'loss': 9.066425323486328}


EP_train:2:  46%|| 12742/27626 [30:01<35:16,  7.03it/s]

{'epoch': 2, 'iter': 12740, 'avg_loss': 8.548161020833575, 'avg_acc': 50.047092064987055, 'loss': 8.802278518676758}


EP_train:2:  46%|| 12752/27626 [30:03<35:13,  7.04it/s]

{'epoch': 2, 'iter': 12750, 'avg_loss': 8.548474839150453, 'avg_acc': 50.04901576346953, 'loss': 9.2464599609375}


EP_train:2:  46%|| 12762/27626 [30:04<35:06,  7.06it/s]

{'epoch': 2, 'iter': 12760, 'avg_loss': 8.548775597880153, 'avg_acc': 50.04995689992947, 'loss': 9.113755226135254}


EP_train:2:  46%|| 12772/27626 [30:05<35:20,  7.01it/s]

{'epoch': 2, 'iter': 12770, 'avg_loss': 8.549012704254382, 'avg_acc': 50.04869430741524, 'loss': 9.185787200927734}


EP_train:2:  46%|| 12782/27626 [30:07<35:03,  7.06it/s]

{'epoch': 2, 'iter': 12780, 'avg_loss': 8.549323312721942, 'avg_acc': 50.04841170487442, 'loss': 7.991557598114014}


EP_train:2:  46%|| 12792/27626 [30:08<34:58,  7.07it/s]

{'epoch': 2, 'iter': 12790, 'avg_loss': 8.549251006305866, 'avg_acc': 50.052038542725356, 'loss': 8.093720436096191}


EP_train:2:  46%|| 12802/27626 [30:10<34:44,  7.11it/s]

{'epoch': 2, 'iter': 12800, 'avg_loss': 8.54927775923344, 'avg_acc': 50.05126552613077, 'loss': 8.746132850646973}


EP_train:2:  46%|| 12812/27626 [30:11<34:59,  7.06it/s]

{'epoch': 2, 'iter': 12810, 'avg_loss': 8.549191777669483, 'avg_acc': 50.049517992350324, 'loss': 8.323620796203613}


EP_train:2:  46%|| 12822/27626 [30:12<34:59,  7.05it/s]

{'epoch': 2, 'iter': 12820, 'avg_loss': 8.549285066359296, 'avg_acc': 50.05240425863817, 'loss': 9.350417137145996}


EP_train:2:  46%|| 12832/27626 [30:14<34:58,  7.05it/s]

{'epoch': 2, 'iter': 12830, 'avg_loss': 8.549220146648642, 'avg_acc': 50.053824721377914, 'loss': 8.846535682678223}


EP_train:2:  46%|| 12842/27626 [30:15<35:15,  6.99it/s]

{'epoch': 2, 'iter': 12840, 'avg_loss': 8.549269375163496, 'avg_acc': 50.05402616618644, 'loss': 8.36369514465332}


EP_train:2:  47%|| 12852/27626 [30:17<35:15,  6.99it/s]

{'epoch': 2, 'iter': 12850, 'avg_loss': 8.549302969653452, 'avg_acc': 50.053984125748975, 'loss': 9.342597961425781}


EP_train:2:  47%|| 12862/27626 [30:18<35:14,  6.98it/s]

{'epoch': 2, 'iter': 12860, 'avg_loss': 8.549758604772075, 'avg_acc': 50.053699168027364, 'loss': 8.944988250732422}


EP_train:2:  47%|| 12872/27626 [30:20<34:56,  7.04it/s]

{'epoch': 2, 'iter': 12870, 'avg_loss': 8.54987475507308, 'avg_acc': 50.05729935513946, 'loss': 8.661434173583984}


EP_train:2:  47%|| 12882/27626 [30:21<34:51,  7.05it/s]

{'epoch': 2, 'iter': 12880, 'avg_loss': 8.55005089557927, 'avg_acc': 50.054586212250605, 'loss': 8.43034839630127}


EP_train:2:  47%|| 12892/27626 [30:22<34:50,  7.05it/s]

{'epoch': 2, 'iter': 12890, 'avg_loss': 8.549920435383084, 'avg_acc': 50.04945310681871, 'loss': 8.172649383544922}


EP_train:2:  47%|| 12902/27626 [30:24<34:55,  7.03it/s]

{'epoch': 2, 'iter': 12900, 'avg_loss': 8.5500701386174, 'avg_acc': 50.050868149755836, 'loss': 8.769881248474121}


EP_train:2:  47%|| 12912/27626 [30:25<34:46,  7.05it/s]

{'epoch': 2, 'iter': 12910, 'avg_loss': 8.550164889351217, 'avg_acc': 50.04961854232825, 'loss': 8.765979766845703}


EP_train:2:  47%|| 12922/27626 [30:27<34:53,  7.02it/s]

{'epoch': 2, 'iter': 12920, 'avg_loss': 8.549938515625579, 'avg_acc': 50.05199868431236, 'loss': 8.095846176147461}


EP_train:2:  47%|| 12932/27626 [30:28<34:43,  7.05it/s]

{'epoch': 2, 'iter': 12930, 'avg_loss': 8.549688508897598, 'avg_acc': 50.05389181037816, 'loss': 8.17976188659668}


EP_train:2:  47%|| 12942/27626 [30:30<34:41,  7.05it/s]

{'epoch': 2, 'iter': 12940, 'avg_loss': 8.549677994619538, 'avg_acc': 50.050710918785256, 'loss': 9.1856050491333}


EP_train:2:  47%|| 12952/27626 [30:31<34:35,  7.07it/s]

{'epoch': 2, 'iter': 12950, 'avg_loss': 8.549516751162196, 'avg_acc': 50.0509130569068, 'loss': 7.667067050933838}


EP_train:2:  47%|| 12962/27626 [30:32<34:22,  7.11it/s]

{'epoch': 2, 'iter': 12960, 'avg_loss': 8.549183590453897, 'avg_acc': 50.048944911658054, 'loss': 7.83551025390625}


EP_train:2:  47%|| 12972/27626 [30:34<34:34,  7.06it/s]

{'epoch': 2, 'iter': 12970, 'avg_loss': 8.549035396822967, 'avg_acc': 50.0503527098913, 'loss': 8.73979377746582}


EP_train:2:  47%|| 12982/27626 [30:35<34:30,  7.07it/s]

{'epoch': 2, 'iter': 12980, 'avg_loss': 8.549389259408025, 'avg_acc': 50.0505546568061, 'loss': 8.58965015411377}


EP_train:2:  47%|| 12992/27626 [30:37<34:49,  7.00it/s]

{'epoch': 2, 'iter': 12990, 'avg_loss': 8.549517705858777, 'avg_acc': 50.04979408821492, 'loss': 9.046204566955566}


EP_train:2:  47%|| 13002/27626 [30:38<34:33,  7.05it/s]

{'epoch': 2, 'iter': 13000, 'avg_loss': 8.549632528856822, 'avg_acc': 50.051197984770404, 'loss': 8.611104011535645}


EP_train:2:  47%|| 13012/27626 [30:39<34:38,  7.03it/s]

{'epoch': 2, 'iter': 13010, 'avg_loss': 8.549595823369325, 'avg_acc': 50.053080086081, 'loss': 9.048356056213379}


EP_train:2:  47%|| 13022/27626 [30:41<34:18,  7.09it/s]

{'epoch': 2, 'iter': 13020, 'avg_loss': 8.54992158178012, 'avg_acc': 50.051119345672376, 'loss': 8.508593559265137}


EP_train:2:  47%|| 13032/27626 [30:42<34:28,  7.06it/s]

{'epoch': 2, 'iter': 13030, 'avg_loss': 8.549907346284451, 'avg_acc': 50.04964124011971, 'loss': 8.560861587524414}


EP_train:2:  47%|| 13042/27626 [30:44<34:47,  6.99it/s]

{'epoch': 2, 'iter': 13040, 'avg_loss': 8.549872605404452, 'avg_acc': 50.051280576642895, 'loss': 8.109618186950684}


EP_train:2:  47%|| 13052/27626 [30:45<34:28,  7.04it/s]

{'epoch': 2, 'iter': 13050, 'avg_loss': 8.549745995024137, 'avg_acc': 50.05028350317984, 'loss': 8.5844144821167}


EP_train:2:  47%|| 13062/27626 [30:46<34:32,  7.03it/s]

{'epoch': 2, 'iter': 13060, 'avg_loss': 8.549689346757711, 'avg_acc': 50.049048694586936, 'loss': 8.841657638549805}


EP_train:2:  47%|| 13072/27626 [30:48<34:41,  6.99it/s]

{'epoch': 2, 'iter': 13070, 'avg_loss': 8.54960042667338, 'avg_acc': 50.046859459873005, 'loss': 8.673728942871094}


EP_train:2:  47%|| 13082/27626 [30:49<34:42,  6.98it/s]

{'epoch': 2, 'iter': 13080, 'avg_loss': 8.549572009120219, 'avg_acc': 50.04610694901002, 'loss': 8.73093032836914}


EP_train:2:  47%|| 13092/27626 [30:51<34:30,  7.02it/s]

{'epoch': 2, 'iter': 13090, 'avg_loss': 8.549435566329635, 'avg_acc': 50.04487816056833, 'loss': 8.732571601867676}


EP_train:2:  47%|| 13102/27626 [30:52<34:23,  7.04it/s]

{'epoch': 2, 'iter': 13100, 'avg_loss': 8.549136274489326, 'avg_acc': 50.044843905045425, 'loss': 7.607473373413086}


EP_train:2:  47%|| 13112/27626 [30:54<34:25,  7.03it/s]

{'epoch': 2, 'iter': 13110, 'avg_loss': 8.549407609548679, 'avg_acc': 50.04457135229959, 'loss': 8.977920532226562}


EP_train:2:  47%|| 13122/27626 [30:55<34:18,  7.05it/s]

{'epoch': 2, 'iter': 13120, 'avg_loss': 8.549705733159776, 'avg_acc': 50.04525188628916, 'loss': 8.553630828857422}


EP_train:2:  48%|| 13132/27626 [30:56<34:10,  7.07it/s]

{'epoch': 2, 'iter': 13130, 'avg_loss': 8.549813207927457, 'avg_acc': 50.048311248191304, 'loss': 8.73582935333252}


EP_train:2:  48%|| 13142/27626 [30:58<34:31,  6.99it/s]

{'epoch': 2, 'iter': 13140, 'avg_loss': 8.549836797680124, 'avg_acc': 50.05089034320067, 'loss': 8.80016803741455}


EP_train:2:  48%|| 13152/27626 [30:59<34:09,  7.06it/s]

{'epoch': 2, 'iter': 13150, 'avg_loss': 8.549862381859194, 'avg_acc': 50.0475249030492, 'loss': 8.01473331451416}


EP_train:2:  48%|| 13162/27626 [31:01<34:02,  7.08it/s]

{'epoch': 2, 'iter': 13160, 'avg_loss': 8.550118644425377, 'avg_acc': 50.04701390471848, 'loss': 8.271903991699219}


EP_train:2:  48%|| 13172/27626 [31:02<34:11,  7.05it/s]

{'epoch': 2, 'iter': 13170, 'avg_loss': 8.54988575148226, 'avg_acc': 50.04982537392757, 'loss': 8.511799812316895}


EP_train:2:  48%|| 13182/27626 [31:03<34:10,  7.05it/s]

{'epoch': 2, 'iter': 13180, 'avg_loss': 8.549823305167484, 'avg_acc': 50.051684242470216, 'loss': 8.567354202270508}


EP_train:2:  48%|| 13192/27626 [31:05<33:56,  7.09it/s]

{'epoch': 2, 'iter': 13190, 'avg_loss': 8.549803287461858, 'avg_acc': 50.052355772875444, 'loss': 8.121748924255371}


EP_train:2:  48%|| 13202/27626 [31:06<34:03,  7.06it/s]

{'epoch': 2, 'iter': 13200, 'avg_loss': 8.549828888259054, 'avg_acc': 50.05089576547231, 'loss': 9.107399940490723}


EP_train:2:  48%|| 13212/27626 [31:08<34:08,  7.04it/s]

{'epoch': 2, 'iter': 13210, 'avg_loss': 8.549762944812066, 'avg_acc': 50.04754560593445, 'loss': 7.61806058883667}


EP_train:2:  48%|| 13222/27626 [31:09<34:21,  6.99it/s]

{'epoch': 2, 'iter': 13220, 'avg_loss': 8.549666142952388, 'avg_acc': 50.05010967400349, 'loss': 8.668059349060059}


EP_train:2:  48%|| 13232/27626 [31:11<34:01,  7.05it/s]

{'epoch': 2, 'iter': 13230, 'avg_loss': 8.549602253097731, 'avg_acc': 50.051488927518704, 'loss': 8.653576850891113}


EP_train:2:  48%|| 13242/27626 [31:12<33:47,  7.09it/s]

{'epoch': 2, 'iter': 13240, 'avg_loss': 8.549754032167852, 'avg_acc': 50.0516860509025, 'loss': 8.347947120666504}


EP_train:2:  48%|| 13252/27626 [31:13<33:52,  7.07it/s]

{'epoch': 2, 'iter': 13250, 'avg_loss': 8.54965774684247, 'avg_acc': 50.053062033054104, 'loss': 8.908524513244629}


EP_train:2:  48%|| 13262/27626 [31:15<34:09,  7.01it/s]

{'epoch': 2, 'iter': 13260, 'avg_loss': 8.549898153947549, 'avg_acc': 50.055142900233776, 'loss': 8.543569564819336}


EP_train:2:  48%|| 13272/27626 [31:16<33:58,  7.04it/s]

{'epoch': 2, 'iter': 13270, 'avg_loss': 8.550139217636847, 'avg_acc': 50.05392396955768, 'loss': 9.527837753295898}


EP_train:2:  48%|| 13282/27626 [31:18<34:02,  7.02it/s]

{'epoch': 2, 'iter': 13280, 'avg_loss': 8.549925189323073, 'avg_acc': 50.05411866576313, 'loss': 8.596346855163574}


EP_train:2:  48%|| 13292/27626 [31:19<33:48,  7.07it/s]

{'epoch': 2, 'iter': 13290, 'avg_loss': 8.54992734461173, 'avg_acc': 50.05407794748326, 'loss': 8.796110153198242}


EP_train:2:  48%|| 13302/27626 [31:20<33:50,  7.05it/s]

{'epoch': 2, 'iter': 13300, 'avg_loss': 8.549675427095467, 'avg_acc': 50.04886850612736, 'loss': 8.179191589355469}


EP_train:2:  48%|| 13312/27626 [31:22<33:42,  7.08it/s]

{'epoch': 2, 'iter': 13310, 'avg_loss': 8.550001790150793, 'avg_acc': 50.049536097964086, 'loss': 8.583457946777344}


EP_train:2:  48%|| 13322/27626 [31:23<33:27,  7.13it/s]

{'epoch': 2, 'iter': 13320, 'avg_loss': 8.549989016544139, 'avg_acc': 50.05067187148112, 'loss': 8.626299858093262}


EP_train:2:  48%|| 13332/27626 [31:25<33:48,  7.05it/s]

{'epoch': 2, 'iter': 13330, 'avg_loss': 8.549967192036831, 'avg_acc': 50.04993061285725, 'loss': 9.15607738494873}


EP_train:2:  48%|| 13342/27626 [31:26<33:32,  7.10it/s]

{'epoch': 2, 'iter': 13340, 'avg_loss': 8.54986685532768, 'avg_acc': 50.05364103140694, 'loss': 8.770872116088867}


EP_train:2:  48%|| 13352/27626 [31:27<33:40,  7.07it/s]

{'epoch': 2, 'iter': 13350, 'avg_loss': 8.549886473876494, 'avg_acc': 50.05196239982024, 'loss': 8.22220516204834}


EP_train:2:  48%|| 13362/27626 [31:29<33:55,  7.01it/s]

{'epoch': 2, 'iter': 13360, 'avg_loss': 8.549861004819913, 'avg_acc': 50.04958461193024, 'loss': 8.571377754211426}


EP_train:2:  48%|| 13372/27626 [31:30<33:39,  7.06it/s]

{'epoch': 2, 'iter': 13370, 'avg_loss': 8.54996972996637, 'avg_acc': 50.051417246279264, 'loss': 8.706138610839844}


EP_train:2:  48%|| 13382/27626 [31:32<33:49,  7.02it/s]

{'epoch': 2, 'iter': 13380, 'avg_loss': 8.550032657319392, 'avg_acc': 50.04997758015096, 'loss': 8.671932220458984}


EP_train:2:  48%|| 13392/27626 [31:33<33:41,  7.04it/s]

{'epoch': 2, 'iter': 13390, 'avg_loss': 8.55021513561522, 'avg_acc': 50.05110708684938, 'loss': 8.824979782104492}


EP_train:2:  49%|| 13402/27626 [31:34<33:38,  7.05it/s]

{'epoch': 2, 'iter': 13400, 'avg_loss': 8.550124885626547, 'avg_acc': 50.05153533318409, 'loss': 8.357298851013184}


EP_train:2:  49%|| 13412/27626 [31:36<33:36,  7.05it/s]

{'epoch': 2, 'iter': 13410, 'avg_loss': 8.549980586188973, 'avg_acc': 50.05382708224592, 'loss': 8.128796577453613}


EP_train:2:  49%|| 13422/27626 [31:37<33:23,  7.09it/s]

{'epoch': 2, 'iter': 13420, 'avg_loss': 8.54987856993758, 'avg_acc': 50.05192422323225, 'loss': 8.129278182983398}


EP_train:2:  49%|| 13432/27626 [31:39<33:38,  7.03it/s]

{'epoch': 2, 'iter': 13430, 'avg_loss': 8.549632795461815, 'avg_acc': 50.04816283225374, 'loss': 8.369667053222656}


EP_train:2:  49%|| 13442/27626 [31:40<33:41,  7.02it/s]

{'epoch': 2, 'iter': 13440, 'avg_loss': 8.54959464834148, 'avg_acc': 50.04789450189718, 'loss': 9.186413764953613}


EP_train:2:  49%|| 13452/27626 [31:42<33:52,  6.97it/s]

{'epoch': 2, 'iter': 13450, 'avg_loss': 8.54955365806519, 'avg_acc': 50.04878819418631, 'loss': 7.9763617515563965}


EP_train:2:  49%|| 13462/27626 [31:43<33:30,  7.04it/s]

{'epoch': 2, 'iter': 13460, 'avg_loss': 8.549189535011624, 'avg_acc': 50.04619827650249, 'loss': 8.727762222290039}


EP_train:2:  49%|| 13472/27626 [31:44<33:35,  7.02it/s]

{'epoch': 2, 'iter': 13470, 'avg_loss': 8.549138152088728, 'avg_acc': 50.04709190112092, 'loss': 9.143183708190918}


EP_train:2:  49%|| 13482/27626 [31:46<33:46,  6.98it/s]

{'epoch': 2, 'iter': 13480, 'avg_loss': 8.549357621626086, 'avg_acc': 50.04311623766783, 'loss': 9.061294555664062}


EP_train:2:  49%|| 13492/27626 [31:47<33:15,  7.08it/s]

{'epoch': 2, 'iter': 13490, 'avg_loss': 8.549519222507783, 'avg_acc': 50.040999555259056, 'loss': 9.107342720031738}


EP_train:2:  49%|| 13502/27626 [31:49<33:11,  7.09it/s]

{'epoch': 2, 'iter': 13500, 'avg_loss': 8.54960127016693, 'avg_acc': 50.04536700985113, 'loss': 8.787109375}


EP_train:2:  49%|| 13512/27626 [31:50<33:33,  7.01it/s]

{'epoch': 2, 'iter': 13510, 'avg_loss': 8.54976773959051, 'avg_acc': 50.04556472503886, 'loss': 8.90449333190918}


EP_train:2:  49%|| 13522/27626 [31:51<33:17,  7.06it/s]

{'epoch': 2, 'iter': 13520, 'avg_loss': 8.549591636297997, 'avg_acc': 50.041139708601435, 'loss': 8.39706802368164}


EP_train:2:  49%|| 13532/27626 [31:53<33:13,  7.07it/s]

{'epoch': 2, 'iter': 13530, 'avg_loss': 8.549801247971487, 'avg_acc': 50.04134025570911, 'loss': 8.177629470825195}


EP_train:2:  49%|| 13542/27626 [31:54<33:19,  7.04it/s]

{'epoch': 2, 'iter': 13540, 'avg_loss': 8.549825698760136, 'avg_acc': 50.03992504246363, 'loss': 8.710920333862305}


EP_train:2:  49%|| 13552/27626 [31:56<33:23,  7.03it/s]

{'epoch': 2, 'iter': 13550, 'avg_loss': 8.549573216893279, 'avg_acc': 50.03528337392075, 'loss': 7.558014392852783}


EP_train:2:  49%|| 13562/27626 [31:57<33:20,  7.03it/s]

{'epoch': 2, 'iter': 13560, 'avg_loss': 8.549422525496322, 'avg_acc': 50.03364427402109, 'loss': 8.751533508300781}


EP_train:2:  49%|| 13572/27626 [31:59<33:14,  7.04it/s]

{'epoch': 2, 'iter': 13570, 'avg_loss': 8.549152146196692, 'avg_acc': 50.0336194827205, 'loss': 7.96520471572876}


EP_train:2:  49%|| 13582/27626 [32:00<33:07,  7.07it/s]

{'epoch': 2, 'iter': 13580, 'avg_loss': 8.549053881289673, 'avg_acc': 50.02945291215669, 'loss': 8.971465110778809}


EP_train:2:  49%|| 13592/27626 [32:01<33:02,  7.08it/s]

{'epoch': 2, 'iter': 13590, 'avg_loss': 8.54886794913009, 'avg_acc': 50.03311014642043, 'loss': 8.592035293579102}


EP_train:2:  49%|| 13602/27626 [32:03<33:02,  7.08it/s]

{'epoch': 2, 'iter': 13600, 'avg_loss': 8.549019150537267, 'avg_acc': 50.0307881773399, 'loss': 9.009535789489746}


EP_train:2:  49%|| 13612/27626 [32:04<33:12,  7.03it/s]

{'epoch': 2, 'iter': 13610, 'avg_loss': 8.548972710252167, 'avg_acc': 50.03375027551245, 'loss': 8.121097564697266}


EP_train:2:  49%|| 13622/27626 [32:06<33:04,  7.06it/s]

{'epoch': 2, 'iter': 13620, 'avg_loss': 8.54894792805176, 'avg_acc': 50.03556089861244, 'loss': 8.810810089111328}


EP_train:2:  49%|| 13632/27626 [32:07<33:26,  6.97it/s]

{'epoch': 2, 'iter': 13630, 'avg_loss': 8.54891781054487, 'avg_acc': 50.034159269312596, 'loss': 7.892167568206787}


EP_train:2:  49%|| 13642/27626 [32:08<33:08,  7.03it/s]

{'epoch': 2, 'iter': 13640, 'avg_loss': 8.549002183007142, 'avg_acc': 50.03367605014295, 'loss': 8.73521900177002}


EP_train:2:  49%|| 13652/27626 [32:10<32:59,  7.06it/s]

{'epoch': 2, 'iter': 13650, 'avg_loss': 8.549075547526341, 'avg_acc': 50.03571166947476, 'loss': 8.851505279541016}


EP_train:2:  49%|| 13662/27626 [32:11<32:54,  7.07it/s]

{'epoch': 2, 'iter': 13660, 'avg_loss': 8.549220185146858, 'avg_acc': 50.03431300783252, 'loss': 8.007258415222168}


EP_train:2:  49%|| 13672/27626 [32:13<33:01,  7.04it/s]

{'epoch': 2, 'iter': 13670, 'avg_loss': 8.549264935670728, 'avg_acc': 50.032916392363404, 'loss': 8.847208023071289}


EP_train:2:  50%|| 13682/27626 [32:14<32:41,  7.11it/s]

{'epoch': 2, 'iter': 13680, 'avg_loss': 8.549438156951599, 'avg_acc': 50.03015130472919, 'loss': 9.186287879943848}


EP_train:2:  50%|| 13692/27626 [32:15<32:45,  7.09it/s]

{'epoch': 2, 'iter': 13690, 'avg_loss': 8.549390709780678, 'avg_acc': 50.03218355123804, 'loss': 8.704385757446289}


EP_train:2:  50%|| 13702/27626 [32:17<32:49,  7.07it/s]

{'epoch': 2, 'iter': 13700, 'avg_loss': 8.549145712329388, 'avg_acc': 50.033528574556605, 'loss': 7.808989524841309}


EP_train:2:  50%|| 13712/27626 [32:18<33:00,  7.03it/s]

{'epoch': 2, 'iter': 13710, 'avg_loss': 8.549191397464595, 'avg_acc': 50.03168076726716, 'loss': 7.9051337242126465}


EP_train:2:  50%|| 13722/27626 [32:20<33:18,  6.96it/s]

{'epoch': 2, 'iter': 13720, 'avg_loss': 8.549123818599295, 'avg_acc': 50.03188543109103, 'loss': 8.410576820373535}


EP_train:2:  50%|| 13732/27626 [32:21<32:51,  7.05it/s]

{'epoch': 2, 'iter': 13730, 'avg_loss': 8.549354227969083, 'avg_acc': 50.03300014565581, 'loss': 7.9740376472473145}


EP_train:2:  50%|| 13742/27626 [32:23<32:41,  7.08it/s]

{'epoch': 2, 'iter': 13740, 'avg_loss': 8.549303367542011, 'avg_acc': 50.03138417873517, 'loss': 8.564294815063477}


EP_train:2:  50%|| 13752/27626 [32:24<32:35,  7.09it/s]

{'epoch': 2, 'iter': 13750, 'avg_loss': 8.549490254260473, 'avg_acc': 50.02954330594138, 'loss': 9.45290470123291}


EP_train:2:  50%|| 13762/27626 [32:25<32:47,  7.05it/s]

{'epoch': 2, 'iter': 13760, 'avg_loss': 8.549355061453934, 'avg_acc': 50.030430201293505, 'loss': 9.829612731933594}


EP_train:2:  50%|| 13772/27626 [32:27<32:44,  7.05it/s]

{'epoch': 2, 'iter': 13770, 'avg_loss': 8.549418181222073, 'avg_acc': 50.02927347324086, 'loss': 8.847240447998047}


EP_train:2:  50%|| 13782/27626 [32:28<32:44,  7.05it/s]

{'epoch': 2, 'iter': 13780, 'avg_loss': 8.549722150243875, 'avg_acc': 50.02675785501778, 'loss': 9.771963119506836}


EP_train:2:  50%|| 13792/27626 [32:30<32:43,  7.05it/s]

{'epoch': 2, 'iter': 13790, 'avg_loss': 8.54964023517755, 'avg_acc': 50.02651185555798, 'loss': 7.540047645568848}


EP_train:2:  50%|| 13802/27626 [32:31<32:45,  7.03it/s]

{'epoch': 2, 'iter': 13800, 'avg_loss': 8.549616158436143, 'avg_acc': 50.0219639881168, 'loss': 8.110490798950195}


EP_train:2:  50%|| 13812/27626 [32:32<33:04,  6.96it/s]

{'epoch': 2, 'iter': 13810, 'avg_loss': 8.549454168582514, 'avg_acc': 50.021043009195566, 'loss': 8.750661849975586}


EP_train:2:  50%|| 13822/27626 [32:34<32:40,  7.04it/s]

{'epoch': 2, 'iter': 13820, 'avg_loss': 8.549323967574225, 'avg_acc': 50.02351494103176, 'loss': 8.52745246887207}


EP_train:2:  50%|| 13832/27626 [32:35<32:19,  7.11it/s]

{'epoch': 2, 'iter': 13830, 'avg_loss': 8.549451224845813, 'avg_acc': 50.0221422890608, 'loss': 8.632461547851562}


EP_train:2:  50%|| 13842/27626 [32:37<32:29,  7.07it/s]

{'epoch': 2, 'iter': 13840, 'avg_loss': 8.549514181084582, 'avg_acc': 50.022577848421356, 'loss': 8.965206146240234}


EP_train:2:  50%|| 13852/27626 [32:38<32:17,  7.11it/s]

{'epoch': 2, 'iter': 13850, 'avg_loss': 8.549514773365186, 'avg_acc': 50.02233593242366, 'loss': 8.527507781982422}


EP_train:2:  50%|| 13862/27626 [32:39<32:25,  7.07it/s]

{'epoch': 2, 'iter': 13860, 'avg_loss': 8.549612832219044, 'avg_acc': 50.022545270903976, 'loss': 9.29263973236084}


EP_train:2:  50%|| 13872/27626 [32:41<32:25,  7.07it/s]

{'epoch': 2, 'iter': 13870, 'avg_loss': 8.549456781389116, 'avg_acc': 50.02095198615817, 'loss': 8.421196937561035}


EP_train:2:  50%|| 13882/27626 [32:42<32:34,  7.03it/s]

{'epoch': 2, 'iter': 13880, 'avg_loss': 8.549528069390007, 'avg_acc': 50.020936892154744, 'loss': 8.665056228637695}


EP_train:2:  50%|| 13892/27626 [32:44<32:44,  6.99it/s]

{'epoch': 2, 'iter': 13890, 'avg_loss': 8.54933890701842, 'avg_acc': 50.02317147793536, 'loss': 7.6875481605529785}


EP_train:2:  50%|| 13902/27626 [32:45<32:52,  6.96it/s]

{'epoch': 2, 'iter': 13900, 'avg_loss': 8.549110064017036, 'avg_acc': 50.01708510179124, 'loss': 7.493316650390625}


EP_train:2:  50%|| 13912/27626 [32:47<32:27,  7.04it/s]

{'epoch': 2, 'iter': 13910, 'avg_loss': 8.548983268122166, 'avg_acc': 50.01527568111567, 'loss': 8.074682235717773}


EP_train:2:  50%|| 13922/27626 [32:48<32:04,  7.12it/s]

{'epoch': 2, 'iter': 13920, 'avg_loss': 8.549347949810699, 'avg_acc': 50.01346885999569, 'loss': 8.63540267944336}


EP_train:2:  50%|| 13932/27626 [32:49<32:11,  7.09it/s]

{'epoch': 2, 'iter': 13930, 'avg_loss': 8.549437081374831, 'avg_acc': 50.01256191228196, 'loss': 9.372187614440918}


EP_train:2:  50%|| 13942/27626 [32:51<32:25,  7.04it/s]

{'epoch': 2, 'iter': 13940, 'avg_loss': 8.549193989109526, 'avg_acc': 50.00919051717955, 'loss': 8.281774520874023}


EP_train:2:  51%|| 13952/27626 [32:52<31:57,  7.13it/s]

{'epoch': 2, 'iter': 13950, 'avg_loss': 8.54912512325128, 'avg_acc': 50.00806393806896, 'loss': 8.520245552062988}


EP_train:2:  51%|| 13962/27626 [32:54<32:18,  7.05it/s]

{'epoch': 2, 'iter': 13960, 'avg_loss': 8.54887802298077, 'avg_acc': 50.00626745935105, 'loss': 7.8795166015625}


EP_train:2:  51%|| 13972/27626 [32:55<32:10,  7.07it/s]

{'epoch': 2, 'iter': 13970, 'avg_loss': 8.54885905446495, 'avg_acc': 50.00178942094339, 'loss': 8.440704345703125}


EP_train:2:  51%|| 13982/27626 [32:56<32:20,  7.03it/s]

{'epoch': 2, 'iter': 13980, 'avg_loss': 8.548968669384404, 'avg_acc': 50.002011658679635, 'loss': 8.259683609008789}


EP_train:2:  51%|| 13992/27626 [32:58<32:31,  6.98it/s]

{'epoch': 2, 'iter': 13990, 'avg_loss': 8.549102555400998, 'avg_acc': 50.00469051533128, 'loss': 9.327324867248535}


EP_train:2:  51%|| 14002/27626 [32:59<32:22,  7.01it/s]

{'epoch': 2, 'iter': 14000, 'avg_loss': 8.549130294537699, 'avg_acc': 50.00669595028927, 'loss': 8.54819107055664}


EP_train:2:  51%|| 14012/27626 [33:01<31:55,  7.11it/s]

{'epoch': 2, 'iter': 14010, 'avg_loss': 8.549158793582743, 'avg_acc': 50.008475483548644, 'loss': 8.909947395324707}


EP_train:2:  51%|| 14022/27626 [33:02<31:58,  7.09it/s]

{'epoch': 2, 'iter': 14020, 'avg_loss': 8.549218349709813, 'avg_acc': 50.01136687825405, 'loss': 9.036964416503906}


EP_train:2:  51%|| 14032/27626 [33:03<32:15,  7.02it/s]

{'epoch': 2, 'iter': 14030, 'avg_loss': 8.54933662776826, 'avg_acc': 50.0095770080536, 'loss': 8.137299537658691}


EP_train:2:  51%|| 14042/27626 [33:05<32:04,  7.06it/s]

{'epoch': 2, 'iter': 14040, 'avg_loss': 8.549299420917164, 'avg_acc': 50.015579374688414, 'loss': 8.61243724822998}


EP_train:2:  51%|| 14052/27626 [33:06<32:04,  7.05it/s]

{'epoch': 2, 'iter': 14050, 'avg_loss': 8.549355217760784, 'avg_acc': 50.01579069105402, 'loss': 8.446480751037598}


EP_train:2:  51%|| 14062/27626 [33:08<32:02,  7.05it/s]

{'epoch': 2, 'iter': 14060, 'avg_loss': 8.549514995955038, 'avg_acc': 50.017112936491, 'loss': 9.346464157104492}


EP_train:2:  51%|| 14072/27626 [33:09<32:03,  7.05it/s]

{'epoch': 2, 'iter': 14070, 'avg_loss': 8.549347208351243, 'avg_acc': 50.01643451069576, 'loss': 8.450102806091309}


EP_train:2:  51%|| 14082/27626 [33:11<32:12,  7.01it/s]

{'epoch': 2, 'iter': 14080, 'avg_loss': 8.549049128067866, 'avg_acc': 50.01597897876571, 'loss': 8.548785209655762}


EP_train:2:  51%|| 14092/27626 [33:12<31:52,  7.08it/s]

{'epoch': 2, 'iter': 14090, 'avg_loss': 8.549320173503812, 'avg_acc': 50.01508054786743, 'loss': 9.26427936553955}


EP_train:2:  51%|| 14102/27626 [33:13<31:55,  7.06it/s]

{'epoch': 2, 'iter': 14100, 'avg_loss': 8.549357616329065, 'avg_acc': 50.01595631515495, 'loss': 8.645942687988281}


EP_train:2:  51%|| 14112/27626 [33:15<31:52,  7.07it/s]

{'epoch': 2, 'iter': 14110, 'avg_loss': 8.549216996431435, 'avg_acc': 50.01904542555453, 'loss': 8.045814514160156}


EP_train:2:  51%|| 14122/27626 [33:16<31:40,  7.11it/s]

{'epoch': 2, 'iter': 14120, 'avg_loss': 8.549169526763023, 'avg_acc': 50.01858933503293, 'loss': 8.016983032226562}


EP_train:2:  51%|| 14132/27626 [33:18<32:08,  7.00it/s]

{'epoch': 2, 'iter': 14130, 'avg_loss': 8.54934063670438, 'avg_acc': 50.017470455027954, 'loss': 8.739002227783203}


EP_train:2:  51%|| 14142/27626 [33:19<31:43,  7.08it/s]

{'epoch': 2, 'iter': 14140, 'avg_loss': 8.549301154339856, 'avg_acc': 50.01856304363199, 'loss': 8.258026123046875}


EP_train:2:  51%|| 14152/27626 [33:20<31:58,  7.02it/s]

{'epoch': 2, 'iter': 14150, 'avg_loss': 8.549418472719735, 'avg_acc': 50.019654088050316, 'loss': 9.337362289428711}


EP_train:2:  51%|| 14162/27626 [33:22<31:59,  7.01it/s]

{'epoch': 2, 'iter': 14160, 'avg_loss': 8.549447621862125, 'avg_acc': 50.0187575030012, 'loss': 8.38730525970459}


EP_train:2:  51%|| 14172/27626 [33:23<31:46,  7.06it/s]

{'epoch': 2, 'iter': 14170, 'avg_loss': 8.549209098339452, 'avg_acc': 50.01786218333216, 'loss': 8.423048973083496}


EP_train:2:  51%|| 14182/27626 [33:25<31:34,  7.10it/s]

{'epoch': 2, 'iter': 14180, 'avg_loss': 8.54894210246359, 'avg_acc': 50.01895141386362, 'loss': 7.467757225036621}


EP_train:2:  51%|| 14192/27626 [33:26<31:38,  7.08it/s]

{'epoch': 2, 'iter': 14190, 'avg_loss': 8.548784051718668, 'avg_acc': 50.02025931928687, 'loss': 8.666926383972168}


EP_train:2:  51%|| 14202/27626 [33:28<31:37,  7.07it/s]

{'epoch': 2, 'iter': 14200, 'avg_loss': 8.548531000953805, 'avg_acc': 50.01606400957679, 'loss': 7.698464870452881}


EP_train:2:  51%|| 14212/27626 [33:29<31:43,  7.05it/s]

{'epoch': 2, 'iter': 14210, 'avg_loss': 8.548489766439339, 'avg_acc': 50.01583280557315, 'loss': 8.333667755126953}


EP_train:2:  51%|| 14222/27626 [33:30<31:19,  7.13it/s]

{'epoch': 2, 'iter': 14220, 'avg_loss': 8.548176422559061, 'avg_acc': 50.0162611630687, 'loss': 7.874274253845215}


EP_train:2:  52%|| 14232/27626 [33:32<31:30,  7.09it/s]

{'epoch': 2, 'iter': 14230, 'avg_loss': 8.548029777765962, 'avg_acc': 50.01888482889467, 'loss': 8.45216178894043}


EP_train:2:  52%|| 14242/27626 [33:33<31:40,  7.04it/s]

{'epoch': 2, 'iter': 14240, 'avg_loss': 8.548241953812626, 'avg_acc': 50.017554946984056, 'loss': 8.60438060760498}


EP_train:2:  52%|| 14252/27626 [33:35<31:41,  7.03it/s]

{'epoch': 2, 'iter': 14250, 'avg_loss': 8.548270575615927, 'avg_acc': 50.01534980001403, 'loss': 8.61886978149414}


EP_train:2:  52%|| 14262/27626 [33:36<31:38,  7.04it/s]

{'epoch': 2, 'iter': 14260, 'avg_loss': 8.548151643269026, 'avg_acc': 50.01818771474651, 'loss': 8.457045555114746}


EP_train:2:  52%|| 14272/27626 [33:37<31:43,  7.02it/s]

{'epoch': 2, 'iter': 14270, 'avg_loss': 8.548074591244188, 'avg_acc': 50.01861292130895, 'loss': 8.375977516174316}


EP_train:2:  52%|| 14282/27626 [33:39<31:19,  7.10it/s]

{'epoch': 2, 'iter': 14280, 'avg_loss': 8.54809095482058, 'avg_acc': 50.02100693228766, 'loss': 8.545005798339844}


EP_train:2:  52%|| 14292/27626 [33:40<31:28,  7.06it/s]

{'epoch': 2, 'iter': 14290, 'avg_loss': 8.54826742512844, 'avg_acc': 50.02208557833602, 'loss': 8.71098518371582}


EP_train:2:  52%|| 14302/27626 [33:42<31:25,  7.07it/s]

{'epoch': 2, 'iter': 14300, 'avg_loss': 8.548070378273852, 'avg_acc': 50.019447940703444, 'loss': 8.689324378967285}


EP_train:2:  52%|| 14312/27626 [33:43<31:30,  7.04it/s]

{'epoch': 2, 'iter': 14310, 'avg_loss': 8.548235168714259, 'avg_acc': 50.02030780518483, 'loss': 8.276098251342773}


EP_train:2:  52%|| 14322/27626 [33:44<31:25,  7.05it/s]

{'epoch': 2, 'iter': 14320, 'avg_loss': 8.548353174646577, 'avg_acc': 50.02029362474687, 'loss': 8.981793403625488}


EP_train:2:  52%|| 14332/27626 [33:46<31:30,  7.03it/s]

{'epoch': 2, 'iter': 14330, 'avg_loss': 8.548481681393309, 'avg_acc': 50.02246005163631, 'loss': 8.0691499710083}


EP_train:2:  52%|| 14342/27626 [33:47<31:20,  7.06it/s]

{'epoch': 2, 'iter': 14340, 'avg_loss': 8.548333092884958, 'avg_acc': 50.022444390209884, 'loss': 8.709774017333984}


EP_train:2:  52%|| 14352/27626 [33:49<31:33,  7.01it/s]

{'epoch': 2, 'iter': 14350, 'avg_loss': 8.548267192885765, 'avg_acc': 50.02504180893318, 'loss': 9.057086944580078}


EP_train:2:  52%|| 14362/27626 [33:50<31:23,  7.04it/s]

{'epoch': 2, 'iter': 14360, 'avg_loss': 8.548214547307568, 'avg_acc': 50.021760323097276, 'loss': 8.330135345458984}


EP_train:2:  52%|| 14372/27626 [33:52<31:15,  7.07it/s]

{'epoch': 2, 'iter': 14370, 'avg_loss': 8.54842838382648, 'avg_acc': 50.02413715120729, 'loss': 8.586118698120117}


EP_train:2:  52%|| 14382/27626 [33:53<31:28,  7.01it/s]

{'epoch': 2, 'iter': 14380, 'avg_loss': 8.548393923337233, 'avg_acc': 50.02346846533621, 'loss': 7.992125511169434}


EP_train:2:  52%|| 14392/27626 [33:54<31:24,  7.02it/s]

{'epoch': 2, 'iter': 14390, 'avg_loss': 8.548445190766069, 'avg_acc': 50.02193211034675, 'loss': 9.025517463684082}


EP_train:2:  52%|| 14402/27626 [33:56<31:10,  7.07it/s]

{'epoch': 2, 'iter': 14400, 'avg_loss': 8.548471656328076, 'avg_acc': 50.02408686896743, 'loss': 9.06350326538086}


EP_train:2:  52%|| 14412/27626 [33:57<31:22,  7.02it/s]

{'epoch': 2, 'iter': 14410, 'avg_loss': 8.548571475398722, 'avg_acc': 50.02341961002013, 'loss': 9.250801086425781}


EP_train:2:  52%|| 14422/27626 [33:59<31:18,  7.03it/s]

{'epoch': 2, 'iter': 14420, 'avg_loss': 8.548555257854247, 'avg_acc': 50.02362006795645, 'loss': 9.101669311523438}


EP_train:2:  52%|| 14432/27626 [34:00<31:11,  7.05it/s]

{'epoch': 2, 'iter': 14430, 'avg_loss': 8.548572752185128, 'avg_acc': 50.02446989120644, 'loss': 8.096951484680176}


EP_train:2:  52%|| 14442/27626 [34:01<31:14,  7.03it/s]

{'epoch': 2, 'iter': 14440, 'avg_loss': 8.548376693775776, 'avg_acc': 50.022721764420744, 'loss': 7.023556232452393}


EP_train:2:  52%|| 14452/27626 [34:03<31:16,  7.02it/s]

{'epoch': 2, 'iter': 14450, 'avg_loss': 8.548387243132156, 'avg_acc': 50.02140855304131, 'loss': 9.031412124633789}


EP_train:2:  52%|| 14462/27626 [34:04<31:16,  7.02it/s]

{'epoch': 2, 'iter': 14460, 'avg_loss': 8.54844139849082, 'avg_acc': 50.01880056704239, 'loss': 9.449950218200684}


EP_train:2:  52%|| 14472/27626 [34:06<30:57,  7.08it/s]

{'epoch': 2, 'iter': 14470, 'avg_loss': 8.548521965097503, 'avg_acc': 50.018787575150306, 'loss': 8.596736907958984}


EP_train:2:  52%|| 14482/27626 [34:07<30:59,  7.07it/s]

{'epoch': 2, 'iter': 14480, 'avg_loss': 8.548494406525547, 'avg_acc': 50.0211484013535, 'loss': 7.998165130615234}


EP_train:2:  52%|| 14492/27626 [34:08<30:59,  7.06it/s]

{'epoch': 2, 'iter': 14490, 'avg_loss': 8.548274691196532, 'avg_acc': 50.023937271409835, 'loss': 7.823972702026367}


EP_train:2:  52%|| 14502/27626 [34:10<30:53,  7.08it/s]

{'epoch': 2, 'iter': 14500, 'avg_loss': 8.548213576014374, 'avg_acc': 50.02779980690987, 'loss': 8.050344467163086}


EP_train:2:  53%|| 14512/27626 [34:11<31:25,  6.96it/s]

{'epoch': 2, 'iter': 14510, 'avg_loss': 8.548025059097023, 'avg_acc': 50.02950348011853, 'loss': 8.301712989807129}


EP_train:2:  53%|| 14522/27626 [34:13<31:03,  7.03it/s]

{'epoch': 2, 'iter': 14520, 'avg_loss': 8.547949723538663, 'avg_acc': 50.02733110667309, 'loss': 8.607626914978027}


EP_train:2:  53%|| 14532/27626 [34:14<31:05,  7.02it/s]

{'epoch': 2, 'iter': 14530, 'avg_loss': 8.548019989023647, 'avg_acc': 50.02774241277269, 'loss': 9.20611572265625}


EP_train:2:  53%|| 14542/27626 [34:16<31:02,  7.03it/s]

{'epoch': 2, 'iter': 14540, 'avg_loss': 8.548093872150327, 'avg_acc': 50.026004057492614, 'loss': 8.621723175048828}


EP_train:2:  53%|| 14552/27626 [34:17<30:43,  7.09it/s]

{'epoch': 2, 'iter': 14550, 'avg_loss': 8.5482546500132, 'avg_acc': 50.02276475843585, 'loss': 9.13693618774414}


EP_train:2:  53%|| 14562/27626 [34:18<30:47,  7.07it/s]

{'epoch': 2, 'iter': 14560, 'avg_loss': 8.548312088984904, 'avg_acc': 50.0208175949454, 'loss': 8.993660926818848}


EP_train:2:  53%|| 14572/27626 [34:20<30:39,  7.10it/s]

{'epoch': 2, 'iter': 14570, 'avg_loss': 8.548258507524176, 'avg_acc': 50.019302038295244, 'loss': 8.772571563720703}


EP_train:2:  53%|| 14582/27626 [34:21<30:47,  7.06it/s]

{'epoch': 2, 'iter': 14580, 'avg_loss': 8.548373266957471, 'avg_acc': 50.01800288046088, 'loss': 8.016904830932617}


EP_train:2:  53%|| 14592/27626 [34:23<31:01,  7.00it/s]

{'epoch': 2, 'iter': 14590, 'avg_loss': 8.5482256894376, 'avg_acc': 50.01691967651291, 'loss': 7.627396106719971}


EP_train:2:  53%|| 14602/27626 [34:24<30:39,  7.08it/s]

{'epoch': 2, 'iter': 14600, 'avg_loss': 8.548232830956279, 'avg_acc': 50.016480035613995, 'loss': 8.324241638183594}


EP_train:2:  53%|| 14612/27626 [34:25<30:42,  7.06it/s]

{'epoch': 2, 'iter': 14610, 'avg_loss': 8.54811506191079, 'avg_acc': 50.014329956881795, 'loss': 7.974050045013428}


EP_train:2:  53%|| 14622/27626 [34:27<30:49,  7.03it/s]

{'epoch': 2, 'iter': 14620, 'avg_loss': 8.548164075103033, 'avg_acc': 50.01496135695233, 'loss': 7.308199882507324}


EP_train:2:  53%|| 14632/27626 [34:28<30:48,  7.03it/s]

{'epoch': 2, 'iter': 14630, 'avg_loss': 8.548151892864828, 'avg_acc': 50.01196090492789, 'loss': 8.249744415283203}


EP_train:2:  53%|| 14642/27626 [34:30<30:46,  7.03it/s]

{'epoch': 2, 'iter': 14640, 'avg_loss': 8.548218572801698, 'avg_acc': 50.0134468274025, 'loss': 8.853732109069824}


EP_train:2:  53%|| 14652/27626 [34:31<30:23,  7.11it/s]

{'epoch': 2, 'iter': 14650, 'avg_loss': 8.548290355759963, 'avg_acc': 50.01066480103747, 'loss': 8.530645370483398}


EP_train:2:  53%|| 14662/27626 [34:32<30:30,  7.08it/s]

{'epoch': 2, 'iter': 14660, 'avg_loss': 8.548538177532645, 'avg_acc': 50.01214958051975, 'loss': 8.349274635314941}


EP_train:2:  53%|| 14672/27626 [34:34<30:40,  7.04it/s]

{'epoch': 2, 'iter': 14670, 'avg_loss': 8.548672424423273, 'avg_acc': 50.01341933065231, 'loss': 7.714035511016846}


EP_train:2:  53%|| 14682/27626 [34:35<30:47,  7.01it/s]

{'epoch': 2, 'iter': 14680, 'avg_loss': 8.548786731890425, 'avg_acc': 50.017667393229345, 'loss': 9.721654891967773}


EP_train:2:  53%|| 14692/27626 [34:37<30:19,  7.11it/s]

{'epoch': 2, 'iter': 14690, 'avg_loss': 8.548855653054849, 'avg_acc': 50.01552821455313, 'loss': 8.833637237548828}


EP_train:2:  53%|| 14702/27626 [34:38<30:40,  7.02it/s]

{'epoch': 2, 'iter': 14700, 'avg_loss': 8.549114023371704, 'avg_acc': 50.01636793415414, 'loss': 8.763582229614258}


EP_train:2:  53%|| 14712/27626 [34:40<30:39,  7.02it/s]

{'epoch': 2, 'iter': 14710, 'avg_loss': 8.548984101607743, 'avg_acc': 50.013807694922164, 'loss': 8.759963035583496}


EP_train:2:  53%|| 14722/27626 [34:41<30:23,  7.08it/s]

{'epoch': 2, 'iter': 14720, 'avg_loss': 8.548869383836374, 'avg_acc': 50.01613341484954, 'loss': 8.371909141540527}


EP_train:2:  53%|| 14732/27626 [34:42<30:12,  7.11it/s]

{'epoch': 2, 'iter': 14730, 'avg_loss': 8.548751280738449, 'avg_acc': 50.01951666553527, 'loss': 8.283339500427246}


EP_train:2:  53%|| 14742/27626 [34:44<30:17,  7.09it/s]

{'epoch': 2, 'iter': 14740, 'avg_loss': 8.548860292503916, 'avg_acc': 50.0228953259616, 'loss': 8.891582489013672}


EP_train:2:  53%|| 14752/27626 [34:45<30:15,  7.09it/s]

{'epoch': 2, 'iter': 14750, 'avg_loss': 8.548844724919835, 'avg_acc': 50.02160870449461, 'loss': 8.516622543334961}


EP_train:2:  53%|| 14762/27626 [34:47<30:15,  7.08it/s]

{'epoch': 2, 'iter': 14760, 'avg_loss': 8.548940026512168, 'avg_acc': 50.0192652936793, 'loss': 8.902375221252441}


EP_train:2:  53%|| 14772/27626 [34:48<30:37,  6.99it/s]

{'epoch': 2, 'iter': 14770, 'avg_loss': 8.548995370621462, 'avg_acc': 50.01650192945637, 'loss': 8.277092933654785}


EP_train:2:  54%|| 14782/27626 [34:49<30:13,  7.08it/s]

{'epoch': 2, 'iter': 14780, 'avg_loss': 8.54905639379646, 'avg_acc': 50.017759285569305, 'loss': 8.536596298217773}


EP_train:2:  54%|| 14792/27626 [34:51<30:27,  7.02it/s]

{'epoch': 2, 'iter': 14790, 'avg_loss': 8.549269060262938, 'avg_acc': 50.01816983300655, 'loss': 7.835769176483154}


EP_train:2:  54%|| 14802/27626 [34:52<30:33,  7.00it/s]

{'epoch': 2, 'iter': 14800, 'avg_loss': 8.549353943878828, 'avg_acc': 50.01984663198432, 'loss': 9.073875427246094}


EP_train:2:  54%|| 14812/27626 [34:54<30:18,  7.05it/s]

{'epoch': 2, 'iter': 14810, 'avg_loss': 8.5492503982852, 'avg_acc': 50.02342009317399, 'loss': 8.371774673461914}


EP_train:2:  54%|| 14822/27626 [34:55<30:06,  7.09it/s]

{'epoch': 2, 'iter': 14820, 'avg_loss': 8.549127450750856, 'avg_acc': 50.02361514067877, 'loss': 8.649127006530762}


EP_train:2:  54%|| 14832/27626 [34:57<30:03,  7.09it/s]

{'epoch': 2, 'iter': 14830, 'avg_loss': 8.549042340307498, 'avg_acc': 50.02402063245904, 'loss': 8.787668228149414}


EP_train:2:  54%|| 14842/27626 [34:58<30:20,  7.02it/s]

{'epoch': 2, 'iter': 14840, 'avg_loss': 8.548901362734322, 'avg_acc': 50.026110100397545, 'loss': 9.2774019241333}


EP_train:2:  54%|| 14852/27626 [34:59<30:16,  7.03it/s]

{'epoch': 2, 'iter': 14850, 'avg_loss': 8.548908713144707, 'avg_acc': 50.02672378964379, 'loss': 7.8476762771606445}


EP_train:2:  54%|| 14862/27626 [35:01<30:03,  7.08it/s]

{'epoch': 2, 'iter': 14860, 'avg_loss': 8.548967404574423, 'avg_acc': 50.02817778076846, 'loss': 8.595298767089844}


EP_train:2:  54%|| 14872/27626 [35:02<30:12,  7.03it/s]

{'epoch': 2, 'iter': 14870, 'avg_loss': 8.548868603058969, 'avg_acc': 50.029839956963215, 'loss': 8.214749336242676}


EP_train:2:  54%|| 14882/27626 [35:04<30:12,  7.03it/s]

{'epoch': 2, 'iter': 14880, 'avg_loss': 8.548762681286995, 'avg_acc': 50.02813990995229, 'loss': 8.056896209716797}


EP_train:2:  54%|| 14892/27626 [35:05<30:03,  7.06it/s]

{'epoch': 2, 'iter': 14890, 'avg_loss': 8.548742792699377, 'avg_acc': 50.028960445906925, 'loss': 8.070850372314453}


EP_train:2:  54%|| 14902/27626 [35:06<29:50,  7.10it/s]

{'epoch': 2, 'iter': 14900, 'avg_loss': 8.548602425502487, 'avg_acc': 50.03145762029394, 'loss': 8.280000686645508}


EP_train:2:  54%|| 14912/27626 [35:08<29:54,  7.08it/s]

{'epoch': 2, 'iter': 14910, 'avg_loss': 8.548547698029825, 'avg_acc': 50.03248440748441, 'loss': 7.769970893859863}


EP_train:2:  54%|| 14922/27626 [35:09<29:44,  7.12it/s]

{'epoch': 2, 'iter': 14920, 'avg_loss': 8.548367374840215, 'avg_acc': 50.03183432745794, 'loss': 7.910916328430176}


EP_train:2:  54%|| 14932/27626 [35:11<30:06,  7.03it/s]

{'epoch': 2, 'iter': 14930, 'avg_loss': 8.548553470277904, 'avg_acc': 50.03034793382894, 'loss': 7.900167942047119}


EP_train:2:  54%|| 14942/27626 [35:12<31:34,  6.70it/s]

{'epoch': 2, 'iter': 14940, 'avg_loss': 8.548414935565251, 'avg_acc': 50.02886352988421, 'loss': 8.703213691711426}


EP_train:2:  54%|| 14952/27626 [35:14<30:10,  7.00it/s]

{'epoch': 2, 'iter': 14950, 'avg_loss': 8.548654585329237, 'avg_acc': 50.02654504715404, 'loss': 8.594940185546875}


EP_train:2:  54%|| 14962/27626 [35:15<29:51,  7.07it/s]

{'epoch': 2, 'iter': 14960, 'avg_loss': 8.548910796184144, 'avg_acc': 50.02840719203262, 'loss': 8.142621040344238}


EP_train:2:  54%|| 14972/27626 [35:16<29:58,  7.04it/s]

{'epoch': 2, 'iter': 14970, 'avg_loss': 8.549061796897861, 'avg_acc': 50.02880569100261, 'loss': 8.656190872192383}


EP_train:2:  54%|| 14982/27626 [35:18<29:47,  7.07it/s]

{'epoch': 2, 'iter': 14980, 'avg_loss': 8.549030763795304, 'avg_acc': 50.02732627995461, 'loss': 8.20772933959961}


EP_train:2:  54%|| 14992/27626 [35:19<29:44,  7.08it/s]

{'epoch': 2, 'iter': 14990, 'avg_loss': 8.548926488448712, 'avg_acc': 50.02897571876459, 'loss': 8.582311630249023}


EP_train:2:  54%|| 15002/27626 [35:21<29:52,  7.04it/s]

{'epoch': 2, 'iter': 15000, 'avg_loss': 8.54877739564219, 'avg_acc': 50.02853976401573, 'loss': 8.504985809326172}


EP_train:2:  54%|| 15012/27626 [35:22<29:44,  7.07it/s]

{'epoch': 2, 'iter': 15010, 'avg_loss': 8.548529438060475, 'avg_acc': 50.02893711278396, 'loss': 8.982279777526855}


EP_train:2:  54%|| 15022/27626 [35:23<29:50,  7.04it/s]

{'epoch': 2, 'iter': 15020, 'avg_loss': 8.54847002077036, 'avg_acc': 50.029541974568936, 'loss': 8.1737699508667}


EP_train:2:  54%|| 15032/27626 [35:25<30:01,  6.99it/s]

{'epoch': 2, 'iter': 15030, 'avg_loss': 8.548574007245072, 'avg_acc': 50.02848280220877, 'loss': 9.273015022277832}


EP_train:2:  54%|| 15042/27626 [35:26<29:38,  7.08it/s]

{'epoch': 2, 'iter': 15040, 'avg_loss': 8.548647961458954, 'avg_acc': 50.029502692640115, 'loss': 8.831708908081055}


EP_train:2:  54%|| 15052/27626 [35:28<29:39,  7.06it/s]

{'epoch': 2, 'iter': 15050, 'avg_loss': 8.548523761910715, 'avg_acc': 50.02719918942263, 'loss': 8.917749404907227}


EP_train:2:  55%|| 15062/27626 [35:29<29:45,  7.03it/s]

{'epoch': 2, 'iter': 15060, 'avg_loss': 8.548402496245696, 'avg_acc': 50.02427627647567, 'loss': 8.581698417663574}


EP_train:2:  55%|| 15072/27626 [35:30<29:36,  7.07it/s]

{'epoch': 2, 'iter': 15070, 'avg_loss': 8.548616414237502, 'avg_acc': 50.0259189834782, 'loss': 8.833816528320312}


EP_train:2:  55%|| 15082/27626 [35:32<29:23,  7.11it/s]

{'epoch': 2, 'iter': 15080, 'avg_loss': 8.548535647802364, 'avg_acc': 50.02714508321729, 'loss': 8.907322883605957}


EP_train:2:  55%|| 15092/27626 [35:33<29:19,  7.13it/s]

{'epoch': 2, 'iter': 15090, 'avg_loss': 8.548452465413034, 'avg_acc': 50.02857663508051, 'loss': 7.942990303039551}


EP_train:2:  55%|| 15102/27626 [35:35<29:34,  7.06it/s]

{'epoch': 2, 'iter': 15100, 'avg_loss': 8.548571984599453, 'avg_acc': 50.028350771472084, 'loss': 8.679413795471191}


EP_train:2:  55%|| 15112/27626 [35:36<29:32,  7.06it/s]

{'epoch': 2, 'iter': 15110, 'avg_loss': 8.548548176211135, 'avg_acc': 50.02791840381179, 'loss': 8.631725311279297}


EP_train:2:  55%|| 15122/27626 [35:38<29:32,  7.05it/s]

{'epoch': 2, 'iter': 15120, 'avg_loss': 8.54853638466779, 'avg_acc': 50.028313272931676, 'loss': 8.06500244140625}


EP_train:2:  55%|| 15132/27626 [35:39<29:22,  7.09it/s]

{'epoch': 2, 'iter': 15130, 'avg_loss': 8.548740749436025, 'avg_acc': 50.03035985724671, 'loss': 8.846123695373535}


EP_train:2:  55%|| 15142/27626 [35:40<29:24,  7.08it/s]

{'epoch': 2, 'iter': 15140, 'avg_loss': 8.548674632658788, 'avg_acc': 50.03343570437884, 'loss': 8.478671073913574}


EP_train:2:  55%|| 15152/27626 [35:42<29:26,  7.06it/s]

{'epoch': 2, 'iter': 15150, 'avg_loss': 8.548735396113461, 'avg_acc': 50.03176357996172, 'loss': 8.128902435302734}


EP_train:2:  55%|| 15162/27626 [35:43<29:39,  7.01it/s]

{'epoch': 2, 'iter': 15160, 'avg_loss': 8.548756770239713, 'avg_acc': 50.033597717828634, 'loss': 8.257604598999023}


EP_train:2:  55%|| 15172/27626 [35:45<29:22,  7.07it/s]

{'epoch': 2, 'iter': 15170, 'avg_loss': 8.548669091902886, 'avg_acc': 50.03213367609255, 'loss': 7.778599262237549}


EP_train:2:  55%|| 15182/27626 [35:46<29:15,  7.09it/s]

{'epoch': 2, 'iter': 15180, 'avg_loss': 8.548300825366153, 'avg_acc': 50.033347605559584, 'loss': 8.00717830657959}


EP_train:2:  55%|| 15192/27626 [35:47<29:25,  7.04it/s]

{'epoch': 2, 'iter': 15190, 'avg_loss': 8.548372783463764, 'avg_acc': 50.03311993943782, 'loss': 9.135838508605957}


EP_train:2:  55%|| 15202/27626 [35:49<29:29,  7.02it/s]

{'epoch': 2, 'iter': 15200, 'avg_loss': 8.548359472338648, 'avg_acc': 50.03227583711598, 'loss': 8.869324684143066}


EP_train:2:  55%|| 15212/27626 [35:50<29:08,  7.10it/s]

{'epoch': 2, 'iter': 15210, 'avg_loss': 8.548263646043598, 'avg_acc': 50.03369272237197, 'loss': 8.162569046020508}


EP_train:2:  55%|| 15222/27626 [35:52<29:11,  7.08it/s]

{'epoch': 2, 'iter': 15220, 'avg_loss': 8.548225803198557, 'avg_acc': 50.034286512055715, 'loss': 8.669068336486816}


EP_train:2:  55%|| 15232/27626 [35:53<29:08,  7.09it/s]

{'epoch': 2, 'iter': 15230, 'avg_loss': 8.548089274041455, 'avg_acc': 50.034674348368455, 'loss': 8.311348915100098}


EP_train:2:  55%|| 15242/27626 [35:54<29:25,  7.02it/s]

{'epoch': 2, 'iter': 15240, 'avg_loss': 8.547982029132381, 'avg_acc': 50.03547175382192, 'loss': 7.709041595458984}


EP_train:2:  55%|| 15252/27626 [35:56<29:30,  6.99it/s]

{'epoch': 2, 'iter': 15250, 'avg_loss': 8.547780153204375, 'avg_acc': 50.03667792275917, 'loss': 8.770423889160156}


EP_train:2:  55%|| 15262/27626 [35:57<29:08,  7.07it/s]

{'epoch': 2, 'iter': 15260, 'avg_loss': 8.547797018496958, 'avg_acc': 50.037472970316486, 'loss': 8.671584129333496}


EP_train:2:  55%|| 15272/27626 [35:59<29:37,  6.95it/s]

{'epoch': 2, 'iter': 15270, 'avg_loss': 8.547751740820399, 'avg_acc': 50.03581134175889, 'loss': 8.308728218078613}


EP_train:2:  55%|| 15282/27626 [36:00<29:13,  7.04it/s]

{'epoch': 2, 'iter': 15280, 'avg_loss': 8.547704784059297, 'avg_acc': 50.03353838099601, 'loss': 8.315801620483398}


EP_train:2:  55%|| 15292/27626 [36:02<29:13,  7.04it/s]

{'epoch': 2, 'iter': 15290, 'avg_loss': 8.547816273650165, 'avg_acc': 50.032903341835066, 'loss': 8.516283988952637}


EP_train:2:  55%|| 15302/27626 [36:03<28:59,  7.09it/s]

{'epoch': 2, 'iter': 15300, 'avg_loss': 8.547767798812254, 'avg_acc': 50.03512842297889, 'loss': 9.020941734313965}


EP_train:2:  55%|| 15312/27626 [36:04<29:00,  7.08it/s]

{'epoch': 2, 'iter': 15310, 'avg_loss': 8.547842143354453, 'avg_acc': 50.034289073215334, 'loss': 9.366841316223145}


EP_train:2:  55%|| 15322/27626 [36:06<28:56,  7.08it/s]

{'epoch': 2, 'iter': 15320, 'avg_loss': 8.5478540288508, 'avg_acc': 50.03406272436525, 'loss': 7.597671031951904}


EP_train:2:  55%|| 15332/27626 [36:07<28:57,  7.07it/s]

{'epoch': 2, 'iter': 15330, 'avg_loss': 8.547910703308592, 'avg_acc': 50.03159448176896, 'loss': 8.143277168273926}


EP_train:2:  56%|| 15342/27626 [36:09<29:05,  7.04it/s]

{'epoch': 2, 'iter': 15340, 'avg_loss': 8.547971070990025, 'avg_acc': 50.03259239945245, 'loss': 8.731884956359863}


EP_train:2:  56%|| 15352/27626 [36:10<28:54,  7.08it/s]

{'epoch': 2, 'iter': 15350, 'avg_loss': 8.548084389743304, 'avg_acc': 50.03155331900202, 'loss': 8.052412986755371}


EP_train:2:  56%|| 15362/27626 [36:11<29:09,  7.01it/s]

{'epoch': 2, 'iter': 15360, 'avg_loss': 8.547953655722706, 'avg_acc': 50.028684655946876, 'loss': 9.22966194152832}


EP_train:2:  56%|| 15372/27626 [36:13<28:59,  7.04it/s]

{'epoch': 2, 'iter': 15370, 'avg_loss': 8.54801034046347, 'avg_acc': 50.029682519029336, 'loss': 8.643089294433594}


EP_train:2:  56%|| 15382/27626 [36:14<28:54,  7.06it/s]

{'epoch': 2, 'iter': 15380, 'avg_loss': 8.548317631806503, 'avg_acc': 50.02864735712893, 'loss': 9.548480033874512}


EP_train:2:  56%|| 15392/27626 [36:16<28:46,  7.09it/s]

{'epoch': 2, 'iter': 15390, 'avg_loss': 8.548422185202478, 'avg_acc': 50.028019621856934, 'loss': 7.627693176269531}


EP_train:2:  56%|| 15402/27626 [36:17<28:47,  7.08it/s]

{'epoch': 2, 'iter': 15400, 'avg_loss': 8.548518568453453, 'avg_acc': 50.02556652165444, 'loss': 8.62992000579834}


EP_train:2:  56%|| 15412/27626 [36:18<28:41,  7.09it/s]

{'epoch': 2, 'iter': 15410, 'avg_loss': 8.54842529865577, 'avg_acc': 50.022508273311274, 'loss': 8.523825645446777}


EP_train:2:  56%|| 15422/27626 [36:20<28:47,  7.06it/s]

{'epoch': 2, 'iter': 15420, 'avg_loss': 8.54831341511003, 'avg_acc': 50.02310161468127, 'loss': 7.526515960693359}


EP_train:2:  56%|| 15432/27626 [36:21<28:47,  7.06it/s]

{'epoch': 2, 'iter': 15430, 'avg_loss': 8.548484749250454, 'avg_acc': 50.0251117879593, 'loss': 9.759632110595703}


EP_train:2:  56%|| 15442/27626 [36:23<28:52,  7.03it/s]

{'epoch': 2, 'iter': 15440, 'avg_loss': 8.548565686965002, 'avg_acc': 50.02327407551325, 'loss': 8.705339431762695}


EP_train:2:  56%|| 15452/27626 [36:24<28:53,  7.02it/s]

{'epoch': 2, 'iter': 15450, 'avg_loss': 8.548536565016638, 'avg_acc': 50.023663516924465, 'loss': 8.648489952087402}


EP_train:2:  56%|| 15462/27626 [36:26<28:51,  7.03it/s]

{'epoch': 2, 'iter': 15460, 'avg_loss': 8.54844529748572, 'avg_acc': 50.02445669749693, 'loss': 7.779837131500244}


EP_train:2:  56%|| 15472/27626 [36:27<28:39,  7.07it/s]

{'epoch': 2, 'iter': 15470, 'avg_loss': 8.548414070106464, 'avg_acc': 50.02524885269214, 'loss': 8.157661437988281}


EP_train:2:  56%|| 15482/27626 [36:28<28:26,  7.12it/s]

{'epoch': 2, 'iter': 15480, 'avg_loss': 8.548370207505712, 'avg_acc': 50.02926975001615, 'loss': 7.86721134185791}


EP_train:2:  56%|| 15492/27626 [36:30<28:36,  7.07it/s]

{'epoch': 2, 'iter': 15490, 'avg_loss': 8.548631481923941, 'avg_acc': 50.031066425666516, 'loss': 9.550596237182617}


EP_train:2:  56%|| 15502/27626 [36:31<28:29,  7.09it/s]

{'epoch': 2, 'iter': 15500, 'avg_loss': 8.548857048322105, 'avg_acc': 50.02963518482678, 'loss': 8.317870140075684}


EP_train:2:  56%|| 15512/27626 [36:33<28:39,  7.05it/s]

{'epoch': 2, 'iter': 15510, 'avg_loss': 8.54890533370042, 'avg_acc': 50.02800431951518, 'loss': 7.767204284667969}


EP_train:2:  56%|| 15522/27626 [36:34<28:41,  7.03it/s]

{'epoch': 2, 'iter': 15520, 'avg_loss': 8.5488750696044, 'avg_acc': 50.03181173893435, 'loss': 9.119701385498047}


EP_train:2:  56%|| 15532/27626 [36:35<28:40,  7.03it/s]

{'epoch': 2, 'iter': 15530, 'avg_loss': 8.54888647809691, 'avg_acc': 50.035211834395724, 'loss': 8.22910213470459}


EP_train:2:  56%|| 15542/27626 [36:37<28:28,  7.07it/s]

{'epoch': 2, 'iter': 15540, 'avg_loss': 8.548897611597056, 'avg_acc': 50.03539025802716, 'loss': 8.795263290405273}


EP_train:2:  56%|| 15552/27626 [36:38<28:27,  7.07it/s]

{'epoch': 2, 'iter': 15550, 'avg_loss': 8.548906777926284, 'avg_acc': 50.03456369365315, 'loss': 8.910820007324219}


EP_train:2:  56%|| 15562/27626 [36:40<28:17,  7.11it/s]

{'epoch': 2, 'iter': 15560, 'avg_loss': 8.548815866905102, 'avg_acc': 50.03353736906369, 'loss': 8.070590019226074}


EP_train:2:  56%|| 15572/27626 [36:41<28:24,  7.07it/s]

{'epoch': 2, 'iter': 15570, 'avg_loss': 8.548839320429686, 'avg_acc': 50.03351583071094, 'loss': 8.805377006530762}


EP_train:2:  56%|| 15582/27626 [36:42<28:41,  6.99it/s]

{'epoch': 2, 'iter': 15580, 'avg_loss': 8.548861603032686, 'avg_acc': 50.03489827353829, 'loss': 8.750576972961426}


EP_train:2:  56%|| 15592/27626 [36:44<28:22,  7.07it/s]

{'epoch': 2, 'iter': 15590, 'avg_loss': 8.548912123913228, 'avg_acc': 50.036278942979926, 'loss': 8.03510856628418}


EP_train:2:  56%|| 15602/27626 [36:45<28:34,  7.01it/s]

{'epoch': 2, 'iter': 15600, 'avg_loss': 8.548848847870367, 'avg_acc': 50.0336516889943, 'loss': 8.24023151397705}


EP_train:2:  57%|| 15612/27626 [36:47<28:33,  7.01it/s]

{'epoch': 2, 'iter': 15610, 'avg_loss': 8.548933278971376, 'avg_acc': 50.03483120876305, 'loss': 8.690414428710938}


EP_train:2:  57%|| 15622/27626 [36:48<28:26,  7.03it/s]

{'epoch': 2, 'iter': 15620, 'avg_loss': 8.548958433749664, 'avg_acc': 50.03620926957301, 'loss': 8.66201114654541}


EP_train:2:  57%|| 15632/27626 [36:50<28:28,  7.02it/s]

{'epoch': 2, 'iter': 15630, 'avg_loss': 8.548893426828394, 'avg_acc': 50.037385643912735, 'loss': 8.054376602172852}


EP_train:2:  57%|| 15642/27626 [36:51<28:28,  7.01it/s]

{'epoch': 2, 'iter': 15640, 'avg_loss': 8.54857764699241, 'avg_acc': 50.03676235534812, 'loss': 7.728928089141846}


EP_train:2:  57%|| 15652/27626 [36:52<28:12,  7.07it/s]

{'epoch': 2, 'iter': 15650, 'avg_loss': 8.54847740595367, 'avg_acc': 50.03713820203182, 'loss': 9.07193374633789}


EP_train:2:  57%|| 15662/27626 [36:54<28:28,  7.00it/s]

{'epoch': 2, 'iter': 15660, 'avg_loss': 8.54836602284377, 'avg_acc': 50.03930943107081, 'loss': 8.650678634643555}


EP_train:2:  57%|| 15672/27626 [36:55<28:28,  7.00it/s]

{'epoch': 2, 'iter': 15670, 'avg_loss': 8.548858821829462, 'avg_acc': 50.04087965030949, 'loss': 9.29024600982666}


EP_train:2:  57%|| 15682/27626 [36:57<28:11,  7.06it/s]

{'epoch': 2, 'iter': 15680, 'avg_loss': 8.548924344875749, 'avg_acc': 50.03866143740833, 'loss': 8.680683135986328}


EP_train:2:  57%|| 15692/27626 [36:58<28:16,  7.03it/s]

{'epoch': 2, 'iter': 15690, 'avg_loss': 8.548899240440898, 'avg_acc': 50.03684436938373, 'loss': 8.296797752380371}


EP_train:2:  57%|| 15702/27626 [36:59<28:23,  7.00it/s]

{'epoch': 2, 'iter': 15700, 'avg_loss': 8.548867893632025, 'avg_acc': 50.03781606267117, 'loss': 7.547181606292725}


EP_train:2:  57%|| 15712/27626 [37:01<28:07,  7.06it/s]

{'epoch': 2, 'iter': 15710, 'avg_loss': 8.548745505579575, 'avg_acc': 50.03759308764559, 'loss': 8.211195945739746}


EP_train:2:  57%|| 15722/27626 [37:02<28:03,  7.07it/s]

{'epoch': 2, 'iter': 15720, 'avg_loss': 8.548700179620582, 'avg_acc': 50.03796673239616, 'loss': 8.786215782165527}


EP_train:2:  57%|| 15732/27626 [37:04<28:16,  7.01it/s]

{'epoch': 2, 'iter': 15730, 'avg_loss': 8.54880840480839, 'avg_acc': 50.035558769309006, 'loss': 7.523935317993164}


EP_train:2:  57%|| 15742/27626 [37:05<27:55,  7.09it/s]

{'epoch': 2, 'iter': 15740, 'avg_loss': 8.549014508046413, 'avg_acc': 50.034742074836416, 'loss': 8.460002899169922}


EP_train:2:  57%|| 15752/27626 [37:07<28:16,  7.00it/s]

{'epoch': 2, 'iter': 15750, 'avg_loss': 8.549016976491224, 'avg_acc': 50.03432321757349, 'loss': 8.21195125579834}


EP_train:2:  57%|| 15762/27626 [37:08<28:06,  7.04it/s]

{'epoch': 2, 'iter': 15760, 'avg_loss': 8.548901938248479, 'avg_acc': 50.035292811369835, 'loss': 8.095226287841797}


EP_train:2:  57%|| 15772/27626 [37:09<28:16,  6.99it/s]

{'epoch': 2, 'iter': 15770, 'avg_loss': 8.548826503886696, 'avg_acc': 50.03348709656965, 'loss': 9.013415336608887}


EP_train:2:  57%|| 15782/27626 [37:11<28:13,  6.99it/s]

{'epoch': 2, 'iter': 15780, 'avg_loss': 8.548825691260129, 'avg_acc': 50.03524808313795, 'loss': 8.272650718688965}


EP_train:2:  57%|| 15792/27626 [37:12<28:10,  7.00it/s]

{'epoch': 2, 'iter': 15790, 'avg_loss': 8.548830612941885, 'avg_acc': 50.032850991070866, 'loss': 8.571640968322754}


EP_train:2:  57%|| 15802/27626 [37:14<28:16,  6.97it/s]

{'epoch': 2, 'iter': 15800, 'avg_loss': 8.549461675485428, 'avg_acc': 50.03381906208468, 'loss': 9.210247039794922}


EP_train:2:  57%|| 15812/27626 [37:15<29:22,  6.70it/s]

{'epoch': 2, 'iter': 15810, 'avg_loss': 8.549695165897258, 'avg_acc': 50.03676238062109, 'loss': 8.462235450744629}


EP_train:2:  57%|| 15822/27626 [37:17<27:46,  7.08it/s]

{'epoch': 2, 'iter': 15820, 'avg_loss': 8.549793312638654, 'avg_acc': 50.03634409961444, 'loss': 8.703519821166992}


EP_train:2:  57%|| 15832/27626 [37:18<27:42,  7.09it/s]

{'epoch': 2, 'iter': 15830, 'avg_loss': 8.54976368622126, 'avg_acc': 50.036518539574246, 'loss': 8.17235279083252}


EP_train:2:  57%|| 15842/27626 [37:19<27:42,  7.09it/s]

{'epoch': 2, 'iter': 15840, 'avg_loss': 8.549645240812108, 'avg_acc': 50.037679123792685, 'loss': 8.292683601379395}


EP_train:2:  57%|| 15852/27626 [37:21<28:12,  6.96it/s]

{'epoch': 2, 'iter': 15850, 'avg_loss': 8.54972279794539, 'avg_acc': 50.03844394675415, 'loss': 9.578452110290527}


EP_train:2:  57%|| 15862/27626 [37:22<27:48,  7.05it/s]

{'epoch': 2, 'iter': 15860, 'avg_loss': 8.549680517973616, 'avg_acc': 50.03861673286678, 'loss': 8.153081893920898}


EP_train:2:  57%|| 15872/27626 [37:24<27:50,  7.04it/s]

{'epoch': 2, 'iter': 15870, 'avg_loss': 8.549499511087815, 'avg_acc': 50.03997070127906, 'loss': 8.163832664489746}


EP_train:2:  57%|| 15882/27626 [37:25<28:09,  6.95it/s]

{'epoch': 2, 'iter': 15880, 'avg_loss': 8.549374517389513, 'avg_acc': 50.03955198035388, 'loss': 8.423182487487793}


EP_train:2:  58%|| 15892/27626 [37:26<28:04,  6.97it/s]

{'epoch': 2, 'iter': 15890, 'avg_loss': 8.549288560592327, 'avg_acc': 50.03756056887546, 'loss': 8.828648567199707}


EP_train:2:  58%|| 15902/27626 [37:28<27:53,  7.01it/s]

{'epoch': 2, 'iter': 15900, 'avg_loss': 8.549292874256775, 'avg_acc': 50.03989528960443, 'loss': 8.916851043701172}


EP_train:2:  58%|| 15912/27626 [37:29<27:45,  7.03it/s]

{'epoch': 2, 'iter': 15910, 'avg_loss': 8.549449699333046, 'avg_acc': 50.04026302557979, 'loss': 9.210433006286621}


EP_train:2:  58%|| 15922/27626 [37:31<27:40,  7.05it/s]

{'epoch': 2, 'iter': 15920, 'avg_loss': 8.549377945438192, 'avg_acc': 50.038078638276495, 'loss': 9.928977012634277}


EP_train:2:  58%|| 15932/27626 [37:32<27:38,  7.05it/s]

{'epoch': 2, 'iter': 15930, 'avg_loss': 8.549497502094518, 'avg_acc': 50.03825089448246, 'loss': 8.219902992248535}


EP_train:2:  58%|| 15942/27626 [37:34<27:36,  7.05it/s]

{'epoch': 2, 'iter': 15940, 'avg_loss': 8.549324889021815, 'avg_acc': 50.040579323756354, 'loss': 8.547178268432617}


EP_train:2:  58%|| 15952/27626 [37:35<27:45,  7.01it/s]

{'epoch': 2, 'iter': 15950, 'avg_loss': 8.549240491119837, 'avg_acc': 50.03977023384114, 'loss': 8.037007331848145}


EP_train:2:  58%|| 15962/27626 [37:36<27:37,  7.04it/s]

{'epoch': 2, 'iter': 15960, 'avg_loss': 8.549163374444625, 'avg_acc': 50.041115844871875, 'loss': 7.8237996101379395}


EP_train:2:  58%|| 15972/27626 [37:38<27:47,  6.99it/s]

{'epoch': 2, 'iter': 15970, 'avg_loss': 8.549009688562908, 'avg_acc': 50.038742095047276, 'loss': 9.336237907409668}


EP_train:2:  58%|| 15982/27626 [37:39<27:37,  7.03it/s]

{'epoch': 2, 'iter': 15980, 'avg_loss': 8.54913887530929, 'avg_acc': 50.03852230774044, 'loss': 9.027132987976074}


EP_train:2:  58%|| 15992/27626 [37:41<27:26,  7.07it/s]

{'epoch': 2, 'iter': 15990, 'avg_loss': 8.549194707602707, 'avg_acc': 50.03908448502282, 'loss': 9.534272193908691}


EP_train:2:  58%|| 16002/27626 [37:42<27:41,  7.00it/s]

{'epoch': 2, 'iter': 16000, 'avg_loss': 8.549009964306812, 'avg_acc': 50.040036560214986, 'loss': 8.45604133605957}


EP_train:2:  58%|| 16012/27626 [37:43<27:34,  7.02it/s]

{'epoch': 2, 'iter': 16010, 'avg_loss': 8.549181568280842, 'avg_acc': 50.04274405096496, 'loss': 9.0244140625}


EP_train:2:  58%|| 16022/27626 [37:45<27:22,  7.07it/s]

{'epoch': 2, 'iter': 16020, 'avg_loss': 8.54903229315267, 'avg_acc': 50.04252231446227, 'loss': 8.735949516296387}


EP_train:2:  58%|| 16032/27626 [37:46<27:22,  7.06it/s]

{'epoch': 2, 'iter': 16030, 'avg_loss': 8.54888459720151, 'avg_acc': 50.04425020273221, 'loss': 8.645358085632324}


EP_train:2:  58%|| 16042/27626 [37:48<27:38,  6.98it/s]

{'epoch': 2, 'iter': 16040, 'avg_loss': 8.548792871986866, 'avg_acc': 50.042079670843464, 'loss': 8.912952423095703}


EP_train:2:  58%|| 16052/27626 [37:49<27:16,  7.07it/s]

{'epoch': 2, 'iter': 16050, 'avg_loss': 8.548637874281955, 'avg_acc': 50.04127468693539, 'loss': 8.44393253326416}


EP_train:2:  58%|| 16062/27626 [37:51<27:36,  6.98it/s]

{'epoch': 2, 'iter': 16060, 'avg_loss': 8.54877302502793, 'avg_acc': 50.0420272710292, 'loss': 9.43818473815918}


EP_train:2:  58%|| 16072/27626 [37:52<27:19,  7.05it/s]

{'epoch': 2, 'iter': 16070, 'avg_loss': 8.548948487736412, 'avg_acc': 50.04394561632755, 'loss': 8.49318790435791}


EP_train:2:  58%|| 16082/27626 [37:53<27:14,  7.06it/s]

{'epoch': 2, 'iter': 16080, 'avg_loss': 8.54886687337874, 'avg_acc': 50.04469560350725, 'loss': 7.946988582611084}


EP_train:2:  58%|| 16092/27626 [37:55<27:13,  7.06it/s]

{'epoch': 2, 'iter': 16090, 'avg_loss': 8.54883827362146, 'avg_acc': 50.04447361879312, 'loss': 9.184798240661621}


EP_train:2:  58%|| 16102/27626 [37:56<27:22,  7.01it/s]

{'epoch': 2, 'iter': 16100, 'avg_loss': 8.548617496363725, 'avg_acc': 50.04172877461027, 'loss': 8.720843315124512}


EP_train:2:  58%|| 16112/27626 [37:58<27:19,  7.02it/s]

{'epoch': 2, 'iter': 16110, 'avg_loss': 8.548686895340294, 'avg_acc': 50.041314940103035, 'loss': 7.395431041717529}


EP_train:2:  58%|| 16122/27626 [37:59<27:22,  7.01it/s]

{'epoch': 2, 'iter': 16120, 'avg_loss': 8.548783248207807, 'avg_acc': 50.041095465541844, 'loss': 8.921008110046387}


EP_train:2:  58%|| 16132/27626 [38:00<27:03,  7.08it/s]

{'epoch': 2, 'iter': 16130, 'avg_loss': 8.548687739629724, 'avg_acc': 50.0408762630959, 'loss': 7.74215841293335}


EP_train:2:  58%|| 16142/27626 [38:02<27:21,  7.00it/s]

{'epoch': 2, 'iter': 16140, 'avg_loss': 8.548636194040885, 'avg_acc': 50.03988290688309, 'loss': 8.48725700378418}


EP_train:2:  58%|| 16152/27626 [38:03<27:25,  6.97it/s]

{'epoch': 2, 'iter': 16150, 'avg_loss': 8.548654979880355, 'avg_acc': 50.041793077828004, 'loss': 8.286588668823242}


EP_train:2:  59%|| 16162/27626 [38:05<27:06,  7.05it/s]

{'epoch': 2, 'iter': 16160, 'avg_loss': 8.548555362785919, 'avg_acc': 50.04312078460491, 'loss': 8.323344230651855}


EP_train:2:  59%|| 16172/27626 [38:06<27:04,  7.05it/s]

{'epoch': 2, 'iter': 16170, 'avg_loss': 8.548425008883378, 'avg_acc': 50.04309411910209, 'loss': 8.586621284484863}


EP_train:2:  59%|| 16182/27626 [38:08<27:09,  7.02it/s]

{'epoch': 2, 'iter': 16180, 'avg_loss': 8.548220839829595, 'avg_acc': 50.040556825906926, 'loss': 8.065136909484863}


EP_train:2:  59%|| 16192/27626 [38:09<27:02,  7.05it/s]

{'epoch': 2, 'iter': 16190, 'avg_loss': 8.54812110655024, 'avg_acc': 50.03647859922179, 'loss': 8.251681327819824}


EP_train:2:  59%|| 16202/27626 [38:10<27:01,  7.05it/s]

{'epoch': 2, 'iter': 16200, 'avg_loss': 8.548087569167825, 'avg_acc': 50.034720079007464, 'loss': 8.984128952026367}


EP_train:2:  59%|| 16212/27626 [38:12<27:00,  7.04it/s]

{'epoch': 2, 'iter': 16210, 'avg_loss': 8.54817367836913, 'avg_acc': 50.03662636481402, 'loss': 7.694555282592773}


EP_train:2:  59%|| 16222/27626 [38:13<27:13,  6.98it/s]

{'epoch': 2, 'iter': 16220, 'avg_loss': 8.548117375390008, 'avg_acc': 50.03390666420072, 'loss': 8.066377639770508}


EP_train:2:  59%|| 16232/27626 [38:15<27:05,  7.01it/s]

{'epoch': 2, 'iter': 16230, 'avg_loss': 8.54813106146253, 'avg_acc': 50.03292311009796, 'loss': 8.282984733581543}


EP_train:2:  59%|| 16242/27626 [38:16<27:19,  6.94it/s]

{'epoch': 2, 'iter': 16240, 'avg_loss': 8.54841856154658, 'avg_acc': 50.03155593867372, 'loss': 8.482775688171387}


EP_train:2:  59%|| 16252/27626 [38:17<27:06,  6.99it/s]

{'epoch': 2, 'iter': 16250, 'avg_loss': 8.548332311653605, 'avg_acc': 50.03153652082949, 'loss': 8.373350143432617}


EP_train:2:  59%|| 16262/27626 [38:19<26:55,  7.03it/s]

{'epoch': 2, 'iter': 16260, 'avg_loss': 8.54815150037418, 'avg_acc': 50.03017188364799, 'loss': 8.619915962219238}


EP_train:2:  59%|| 16272/27626 [38:20<26:49,  7.05it/s]

{'epoch': 2, 'iter': 16270, 'avg_loss': 8.548445877552062, 'avg_acc': 50.02976922131399, 'loss': 9.789247512817383}


EP_train:2:  59%|| 16282/27626 [38:22<26:45,  7.07it/s]

{'epoch': 2, 'iter': 16280, 'avg_loss': 8.548501487065574, 'avg_acc': 50.033973650267185, 'loss': 8.329646110534668}


EP_train:2:  59%|| 16292/27626 [38:23<26:41,  7.08it/s]

{'epoch': 2, 'iter': 16290, 'avg_loss': 8.548633213955265, 'avg_acc': 50.031842735252596, 'loss': 8.78697395324707}


EP_train:2:  59%|| 16302/27626 [38:25<26:46,  7.05it/s]

{'epoch': 2, 'iter': 16300, 'avg_loss': 8.54833697807598, 'avg_acc': 50.030672964848776, 'loss': 8.379341125488281}


EP_train:2:  59%|| 16312/27626 [38:26<26:45,  7.05it/s]

{'epoch': 2, 'iter': 16310, 'avg_loss': 8.548328284563611, 'avg_acc': 50.03276163325363, 'loss': 8.918844223022461}


EP_train:2:  59%|| 16322/27626 [38:27<26:56,  6.99it/s]

{'epoch': 2, 'iter': 16320, 'avg_loss': 8.54823937622635, 'avg_acc': 50.03235861773175, 'loss': 7.302154064178467}


EP_train:2:  59%|| 16332/27626 [38:29<26:41,  7.05it/s]

{'epoch': 2, 'iter': 16330, 'avg_loss': 8.548127174990606, 'avg_acc': 50.03444369603821, 'loss': 7.338608741760254}


EP_train:2:  59%|| 16342/27626 [38:30<26:37,  7.06it/s]

{'epoch': 2, 'iter': 16340, 'avg_loss': 8.548103043487522, 'avg_acc': 50.03633498561899, 'loss': 8.296250343322754}


EP_train:2:  59%|| 16352/27626 [38:32<26:41,  7.04it/s]

{'epoch': 2, 'iter': 16350, 'avg_loss': 8.548178858468601, 'avg_acc': 50.03535716469941, 'loss': 8.756060600280762}


EP_train:2:  59%|| 16362/27626 [38:33<26:32,  7.07it/s]

{'epoch': 2, 'iter': 16360, 'avg_loss': 8.548274047595788, 'avg_acc': 50.0347625450767, 'loss': 8.874884605407715}


EP_train:2:  59%|| 16372/27626 [38:34<26:34,  7.06it/s]

{'epoch': 2, 'iter': 16370, 'avg_loss': 8.548070411211437, 'avg_acc': 50.03206890232729, 'loss': 8.132841110229492}


EP_train:2:  59%|| 16382/27626 [38:36<26:42,  7.02it/s]

{'epoch': 2, 'iter': 16380, 'avg_loss': 8.547518612271475, 'avg_acc': 50.03605549111776, 'loss': 8.35136604309082}


EP_train:2:  59%|| 16392/27626 [38:37<26:48,  6.98it/s]

{'epoch': 2, 'iter': 16390, 'avg_loss': 8.547916395131406, 'avg_acc': 50.03698676102739, 'loss': 9.424825668334961}


EP_train:2:  59%|| 16402/27626 [38:39<26:30,  7.06it/s]

{'epoch': 2, 'iter': 16400, 'avg_loss': 8.548153562681259, 'avg_acc': 50.035249375038106, 'loss': 9.02846622467041}


EP_train:2:  59%|| 16412/27626 [38:40<26:48,  6.97it/s]

{'epoch': 2, 'iter': 16410, 'avg_loss': 8.54822661333187, 'avg_acc': 50.03503747486442, 'loss': 9.156258583068848}


EP_train:2:  59%|| 16422/27626 [38:42<26:40,  7.00it/s]

{'epoch': 2, 'iter': 16420, 'avg_loss': 8.548285590564934, 'avg_acc': 50.03463552767797, 'loss': 8.795758247375488}


EP_train:2:  59%|| 16432/27626 [38:43<26:43,  6.98it/s]

{'epoch': 2, 'iter': 16430, 'avg_loss': 8.54817497449481, 'avg_acc': 50.03613596250989, 'loss': 7.633188247680664}


EP_train:2:  60%|| 16442/27626 [38:44<26:22,  7.07it/s]

{'epoch': 2, 'iter': 16440, 'avg_loss': 8.547915479068259, 'avg_acc': 50.03687427772034, 'loss': 8.722366333007812}


EP_train:2:  60%|| 16452/27626 [38:46<26:25,  7.05it/s]

{'epoch': 2, 'iter': 16450, 'avg_loss': 8.547867059258369, 'avg_acc': 50.03799161145219, 'loss': 8.936279296875}


EP_train:2:  60%|| 16462/27626 [38:47<26:21,  7.06it/s]

{'epoch': 2, 'iter': 16460, 'avg_loss': 8.547839949787274, 'avg_acc': 50.03341230787923, 'loss': 8.32943344116211}


EP_train:2:  60%|| 16472/27626 [38:49<26:29,  7.02it/s]

{'epoch': 2, 'iter': 16470, 'avg_loss': 8.547874067557922, 'avg_acc': 50.03699684293606, 'loss': 8.446656227111816}


EP_train:2:  60%|| 16482/27626 [38:50<26:24,  7.03it/s]

{'epoch': 2, 'iter': 16480, 'avg_loss': 8.547976011935097, 'avg_acc': 50.037922456161645, 'loss': 8.622977256774902}


EP_train:2:  60%|| 16492/27626 [38:51<26:20,  7.05it/s]

{'epoch': 2, 'iter': 16490, 'avg_loss': 8.547783738710486, 'avg_acc': 50.033162027772725, 'loss': 8.236285209655762}


EP_train:2:  60%|| 16502/27626 [38:53<26:38,  6.96it/s]

{'epoch': 2, 'iter': 16500, 'avg_loss': 8.54772607358786, 'avg_acc': 50.03371007817709, 'loss': 8.414581298828125}


EP_train:2:  60%|| 16512/27626 [38:54<26:29,  6.99it/s]

{'epoch': 2, 'iter': 16510, 'avg_loss': 8.54755918842898, 'avg_acc': 50.037096481133794, 'loss': 8.72284984588623}


EP_train:2:  60%|| 16522/27626 [38:56<26:15,  7.05it/s]

{'epoch': 2, 'iter': 16520, 'avg_loss': 8.54769850152434, 'avg_acc': 50.036506567399066, 'loss': 9.156198501586914}


EP_train:2:  60%|| 16532/27626 [38:57<26:18,  7.03it/s]

{'epoch': 2, 'iter': 16530, 'avg_loss': 8.547638865176609, 'avg_acc': 50.03799679390236, 'loss': 8.116984367370605}


EP_train:2:  60%|| 16542/27626 [38:59<26:13,  7.04it/s]

{'epoch': 2, 'iter': 16540, 'avg_loss': 8.547747181574772, 'avg_acc': 50.03684027567862, 'loss': 9.216781616210938}


EP_train:2:  60%|| 16552/27626 [39:00<26:16,  7.03it/s]

{'epoch': 2, 'iter': 16550, 'avg_loss': 8.547688129048904, 'avg_acc': 50.036629206694464, 'loss': 8.391206741333008}


EP_train:2:  60%|| 16562/27626 [39:01<26:24,  6.98it/s]

{'epoch': 2, 'iter': 16560, 'avg_loss': 8.54747530605099, 'avg_acc': 50.03924883763058, 'loss': 8.4425687789917}


EP_train:2:  60%|| 16572/27626 [39:03<26:12,  7.03it/s]

{'epoch': 2, 'iter': 16570, 'avg_loss': 8.547499029546172, 'avg_acc': 50.03696216281456, 'loss': 8.341598510742188}


EP_train:2:  60%|| 16582/27626 [39:04<25:59,  7.08it/s]

{'epoch': 2, 'iter': 16580, 'avg_loss': 8.54771955703037, 'avg_acc': 50.03750527712442, 'loss': 9.556931495666504}


EP_train:2:  60%|| 16592/27626 [39:06<26:19,  6.99it/s]

{'epoch': 2, 'iter': 16590, 'avg_loss': 8.547692921700396, 'avg_acc': 50.03748267132783, 'loss': 9.444780349731445}


EP_train:2:  60%|| 16602/27626 [39:07<26:10,  7.02it/s]

{'epoch': 2, 'iter': 16600, 'avg_loss': 8.547745222040318, 'avg_acc': 50.03444822601048, 'loss': 8.599907875061035}


EP_train:2:  60%|| 16612/27626 [39:08<25:59,  7.06it/s]

{'epoch': 2, 'iter': 16610, 'avg_loss': 8.547686162671537, 'avg_acc': 50.033486846065856, 'loss': 8.800821304321289}


EP_train:2:  60%|| 16622/27626 [39:10<25:56,  7.07it/s]

{'epoch': 2, 'iter': 16620, 'avg_loss': 8.54805605211138, 'avg_acc': 50.03591089585464, 'loss': 9.820615768432617}


EP_train:2:  60%|| 16632/27626 [39:11<26:03,  7.03it/s]

{'epoch': 2, 'iter': 16630, 'avg_loss': 8.54835458983375, 'avg_acc': 50.03476189044556, 'loss': 9.986523628234863}


EP_train:2:  60%|| 16642/27626 [39:13<26:02,  7.03it/s]

{'epoch': 2, 'iter': 16640, 'avg_loss': 8.548436597651369, 'avg_acc': 50.03267531999279, 'loss': 7.841163635253906}


EP_train:2:  60%|| 16652/27626 [39:14<25:54,  7.06it/s]

{'epoch': 2, 'iter': 16650, 'avg_loss': 8.548411063450349, 'avg_acc': 50.03246801993875, 'loss': 8.821285247802734}


EP_train:2:  60%|| 16662/27626 [39:16<25:56,  7.04it/s]

{'epoch': 2, 'iter': 16660, 'avg_loss': 8.548433783474922, 'avg_acc': 50.03094802232759, 'loss': 8.928374290466309}


EP_train:2:  60%|| 16672/27626 [39:17<25:44,  7.09it/s]

{'epoch': 2, 'iter': 16670, 'avg_loss': 8.54819447909425, 'avg_acc': 50.03017965329014, 'loss': 8.938054084777832}


EP_train:2:  60%|| 16682/27626 [39:18<25:58,  7.02it/s]

{'epoch': 2, 'iter': 16680, 'avg_loss': 8.548370897709184, 'avg_acc': 50.031098255500275, 'loss': 9.193175315856934}


EP_train:2:  60%|| 16692/27626 [39:20<26:11,  6.96it/s]

{'epoch': 2, 'iter': 16690, 'avg_loss': 8.54854229010642, 'avg_acc': 50.0292073572584, 'loss': 8.554492950439453}


EP_train:2:  60%|| 16702/27626 [39:21<25:52,  7.04it/s]

{'epoch': 2, 'iter': 16700, 'avg_loss': 8.548554965517676, 'avg_acc': 50.02881563978205, 'loss': 8.879388809204102}


EP_train:2:  60%|| 16712/27626 [39:23<25:37,  7.10it/s]

{'epoch': 2, 'iter': 16710, 'avg_loss': 8.548526695861643, 'avg_acc': 50.03160343486326, 'loss': 8.6475191116333}


EP_train:2:  61%|| 16722/27626 [39:24<25:49,  7.04it/s]

{'epoch': 2, 'iter': 16720, 'avg_loss': 8.54864432054461, 'avg_acc': 50.03251898809879, 'loss': 8.546126365661621}


EP_train:2:  61%|| 16732/27626 [39:25<25:46,  7.05it/s]

{'epoch': 2, 'iter': 16730, 'avg_loss': 8.548579811572488, 'avg_acc': 50.032499551730325, 'loss': 8.256134986877441}


EP_train:2:  61%|| 16742/27626 [39:27<25:35,  7.09it/s]

{'epoch': 2, 'iter': 16740, 'avg_loss': 8.54854891757499, 'avg_acc': 50.03173346872947, 'loss': 8.118135452270508}


EP_train:2:  61%|| 16752/27626 [39:28<25:37,  7.07it/s]

{'epoch': 2, 'iter': 16750, 'avg_loss': 8.548590953553173, 'avg_acc': 50.03544564503611, 'loss': 9.023286819458008}


EP_train:2:  61%|| 16762/27626 [39:30<25:29,  7.10it/s]

{'epoch': 2, 'iter': 16760, 'avg_loss': 8.548506053120928, 'avg_acc': 50.03598383151363, 'loss': 8.194302558898926}


EP_train:2:  61%|| 16772/27626 [39:31<25:42,  7.04it/s]

{'epoch': 2, 'iter': 16770, 'avg_loss': 8.548719623918917, 'avg_acc': 50.03782571104883, 'loss': 8.76297378540039}


EP_train:2:  61%|| 16782/27626 [39:32<25:46,  7.01it/s]

{'epoch': 2, 'iter': 16780, 'avg_loss': 8.548562582681022, 'avg_acc': 50.03817561527919, 'loss': 8.764561653137207}


EP_train:2:  61%|| 16792/27626 [39:34<25:24,  7.11it/s]

{'epoch': 2, 'iter': 16790, 'avg_loss': 8.548603138058347, 'avg_acc': 50.03629176344471, 'loss': 8.794663429260254}


EP_train:2:  61%|| 16802/27626 [39:35<25:45,  7.00it/s]

{'epoch': 2, 'iter': 16800, 'avg_loss': 8.5484708295068, 'avg_acc': 50.03757216832332, 'loss': 8.267550468444824}


EP_train:2:  61%|| 16812/27626 [39:37<25:32,  7.06it/s]

{'epoch': 2, 'iter': 16810, 'avg_loss': 8.548426469044161, 'avg_acc': 50.03420379513413, 'loss': 8.804173469543457}


EP_train:2:  61%|| 16822/27626 [39:38<25:24,  7.09it/s]

{'epoch': 2, 'iter': 16820, 'avg_loss': 8.548300998742851, 'avg_acc': 50.0349265798704, 'loss': 8.428491592407227}


EP_train:2:  61%|| 16832/27626 [39:40<25:18,  7.11it/s]

{'epoch': 2, 'iter': 16830, 'avg_loss': 8.54852183666482, 'avg_acc': 50.03267779692234, 'loss': 9.041704177856445}


EP_train:2:  61%|| 16842/27626 [39:41<25:29,  7.05it/s]

{'epoch': 2, 'iter': 16840, 'avg_loss': 8.548400964726179, 'avg_acc': 50.03173059794549, 'loss': 8.02306842803955}


EP_train:2:  61%|| 16852/27626 [39:42<25:18,  7.10it/s]

{'epoch': 2, 'iter': 16850, 'avg_loss': 8.548347914666707, 'avg_acc': 50.03208266571717, 'loss': 7.966002464294434}


EP_train:2:  61%|| 16862/27626 [39:44<25:36,  7.01it/s]

{'epoch': 2, 'iter': 16860, 'avg_loss': 8.548284425917034, 'avg_acc': 50.03410236640769, 'loss': 8.434476852416992}


EP_train:2:  61%|| 16872/27626 [39:45<25:35,  7.00it/s]

{'epoch': 2, 'iter': 16870, 'avg_loss': 8.548089188931923, 'avg_acc': 50.035563985537316, 'loss': 8.24876880645752}


EP_train:2:  61%|| 16882/27626 [39:47<25:15,  7.09it/s]

{'epoch': 2, 'iter': 16880, 'avg_loss': 8.54813971702228, 'avg_acc': 50.034617321248746, 'loss': 8.931097030639648}


EP_train:2:  61%|| 16892/27626 [39:48<25:22,  7.05it/s]

{'epoch': 2, 'iter': 16890, 'avg_loss': 8.548170390458536, 'avg_acc': 50.03330175833284, 'loss': 8.195258140563965}


EP_train:2:  61%|| 16902/27626 [39:49<25:13,  7.09it/s]

{'epoch': 2, 'iter': 16900, 'avg_loss': 8.548069429964675, 'avg_acc': 50.03531595763564, 'loss': 8.028569221496582}


EP_train:2:  61%|| 16912/27626 [39:51<25:14,  7.07it/s]

{'epoch': 2, 'iter': 16910, 'avg_loss': 8.548122989737328, 'avg_acc': 50.03584944710543, 'loss': 8.508072853088379}


EP_train:2:  61%|| 16922/27626 [39:52<25:11,  7.08it/s]

{'epoch': 2, 'iter': 16920, 'avg_loss': 8.548124477729596, 'avg_acc': 50.033981443177126, 'loss': 8.634968757629395}


EP_train:2:  61%|| 16932/27626 [39:54<25:08,  7.09it/s]

{'epoch': 2, 'iter': 16930, 'avg_loss': 8.54823336266956, 'avg_acc': 50.038021971531506, 'loss': 8.012409210205078}


EP_train:2:  61%|| 16942/27626 [39:55<25:16,  7.04it/s]

{'epoch': 2, 'iter': 16940, 'avg_loss': 8.548248775142838, 'avg_acc': 50.03799952777286, 'loss': 8.011969566345215}


EP_train:2:  61%|| 16952/27626 [39:56<25:05,  7.09it/s]

{'epoch': 2, 'iter': 16950, 'avg_loss': 8.548255560960849, 'avg_acc': 50.04092678898, 'loss': 8.526461601257324}


EP_train:2:  61%|| 16962/27626 [39:58<25:23,  7.00it/s]

{'epoch': 2, 'iter': 16960, 'avg_loss': 8.548163767925349, 'avg_acc': 50.04163964388893, 'loss': 7.535132884979248}


EP_train:2:  61%|| 16972/27626 [39:59<25:02,  7.09it/s]

{'epoch': 2, 'iter': 16970, 'avg_loss': 8.548106108351845, 'avg_acc': 50.039773731659885, 'loss': 8.661931991577148}


EP_train:2:  61%|| 16982/27626 [40:01<25:11,  7.04it/s]

{'epoch': 2, 'iter': 16980, 'avg_loss': 8.54801756720276, 'avg_acc': 50.040486426005536, 'loss': 8.47976016998291}


EP_train:2:  62%|| 16992/27626 [40:02<25:09,  7.04it/s]

{'epoch': 2, 'iter': 16990, 'avg_loss': 8.548125712658685, 'avg_acc': 50.03751986345713, 'loss': 7.83931827545166}


EP_train:2:  62%|| 17002/27626 [40:04<25:10,  7.03it/s]

{'epoch': 2, 'iter': 17000, 'avg_loss': 8.54800942364696, 'avg_acc': 50.03933592141638, 'loss': 8.515453338623047}


EP_train:2:  62%|| 17012/27626 [40:05<24:57,  7.09it/s]

{'epoch': 2, 'iter': 17010, 'avg_loss': 8.547964972489922, 'avg_acc': 50.04004761624831, 'loss': 8.215537071228027}


EP_train:2:  62%|| 17022/27626 [40:06<25:09,  7.02it/s]

{'epoch': 2, 'iter': 17020, 'avg_loss': 8.547875729747934, 'avg_acc': 50.04039128135832, 'loss': 8.46624755859375}


EP_train:2:  62%|| 17032/27626 [40:08<25:02,  7.05it/s]

{'epoch': 2, 'iter': 17030, 'avg_loss': 8.547882120003543, 'avg_acc': 50.03981709823263, 'loss': 8.847746849060059}


EP_train:2:  62%|| 17042/27626 [40:09<25:00,  7.05it/s]

{'epoch': 2, 'iter': 17040, 'avg_loss': 8.548043308120386, 'avg_acc': 50.04346135790153, 'loss': 9.417964935302734}


EP_train:2:  62%|| 17052/27626 [40:11<25:02,  7.04it/s]

{'epoch': 2, 'iter': 17050, 'avg_loss': 8.54802163854931, 'avg_acc': 50.04288604773913, 'loss': 8.72140884399414}


EP_train:2:  62%|| 17062/27626 [40:12<24:56,  7.06it/s]

{'epoch': 2, 'iter': 17060, 'avg_loss': 8.547903771934557, 'avg_acc': 50.042860910849306, 'loss': 8.218741416931152}


EP_train:2:  62%|| 17072/27626 [40:13<25:06,  7.01it/s]

{'epoch': 2, 'iter': 17070, 'avg_loss': 8.547905359736898, 'avg_acc': 50.046680042176796, 'loss': 9.467181205749512}


EP_train:2:  62%|| 17082/27626 [40:15<24:59,  7.03it/s]

{'epoch': 2, 'iter': 17080, 'avg_loss': 8.547802121485294, 'avg_acc': 50.04848223171945, 'loss': 8.732068061828613}


EP_train:2:  62%|| 17092/27626 [40:16<24:46,  7.08it/s]

{'epoch': 2, 'iter': 17090, 'avg_loss': 8.547810792741624, 'avg_acc': 50.04699110643028, 'loss': 9.75009822845459}


EP_train:2:  62%|| 17102/27626 [40:18<25:00,  7.01it/s]

{'epoch': 2, 'iter': 17100, 'avg_loss': 8.547715540218336, 'avg_acc': 50.04623267645167, 'loss': 7.999685764312744}


EP_train:2:  62%|| 17112/27626 [40:19<24:41,  7.10it/s]

{'epoch': 2, 'iter': 17110, 'avg_loss': 8.547579849144626, 'avg_acc': 50.04931038513237, 'loss': 8.888903617858887}


EP_train:2:  62%|| 17122/27626 [40:20<24:47,  7.06it/s]

{'epoch': 2, 'iter': 17120, 'avg_loss': 8.547521605501943, 'avg_acc': 50.0485514864786, 'loss': 8.188499450683594}


EP_train:2:  62%|| 17132/27626 [40:22<24:59,  7.00it/s]

{'epoch': 2, 'iter': 17130, 'avg_loss': 8.547401914440126, 'avg_acc': 50.048523145175416, 'loss': 9.1705322265625}


EP_train:2:  62%|| 17142/27626 [40:23<24:48,  7.04it/s]

{'epoch': 2, 'iter': 17140, 'avg_loss': 8.547484543434653, 'avg_acc': 50.04758327985531, 'loss': 9.284564018249512}


EP_train:2:  62%|| 17152/27626 [40:25<24:47,  7.04it/s]

{'epoch': 2, 'iter': 17150, 'avg_loss': 8.547618185477829, 'avg_acc': 50.04755553612035, 'loss': 8.60473346710205}


EP_train:2:  62%|| 17162/27626 [40:26<24:58,  6.98it/s]

{'epoch': 2, 'iter': 17160, 'avg_loss': 8.54754133579038, 'avg_acc': 50.04916671522639, 'loss': 7.62276029586792}


EP_train:2:  62%|| 17172/27626 [40:28<24:57,  6.98it/s]

{'epoch': 2, 'iter': 17170, 'avg_loss': 8.547503477261733, 'avg_acc': 50.04822811717431, 'loss': 8.92664623260498}


EP_train:2:  62%|| 17182/27626 [40:29<24:29,  7.11it/s]

{'epoch': 2, 'iter': 17180, 'avg_loss': 8.547539453665776, 'avg_acc': 50.04674495081777, 'loss': 9.08037281036377}


EP_train:2:  62%|| 17192/27626 [40:30<24:34,  7.08it/s]

{'epoch': 2, 'iter': 17190, 'avg_loss': 8.547510894290316, 'avg_acc': 50.04599063463439, 'loss': 7.870389938354492}


EP_train:2:  62%|| 17202/27626 [40:32<24:35,  7.07it/s]

{'epoch': 2, 'iter': 17200, 'avg_loss': 8.547792354854524, 'avg_acc': 50.04669059938376, 'loss': 9.594391822814941}


EP_train:2:  62%|| 17212/27626 [40:33<24:43,  7.02it/s]

{'epoch': 2, 'iter': 17210, 'avg_loss': 8.54765155642184, 'avg_acc': 50.04811603044564, 'loss': 8.570141792297363}


EP_train:2:  62%|| 17222/27626 [40:35<24:52,  6.97it/s]

{'epoch': 2, 'iter': 17220, 'avg_loss': 8.547640172095464, 'avg_acc': 50.04572905173915, 'loss': 8.906628608703613}


EP_train:2:  62%|| 17232/27626 [40:36<24:41,  7.02it/s]

{'epoch': 2, 'iter': 17230, 'avg_loss': 8.547727342587548, 'avg_acc': 50.04352620277407, 'loss': 8.202171325683594}


EP_train:2:  62%|| 17242/27626 [40:37<24:43,  7.00it/s]

{'epoch': 2, 'iter': 17240, 'avg_loss': 8.547757742597293, 'avg_acc': 50.04132590917, 'loss': 8.16102123260498}


EP_train:2:  62%|| 17252/27626 [40:39<24:24,  7.08it/s]

{'epoch': 2, 'iter': 17250, 'avg_loss': 8.547920563111656, 'avg_acc': 50.04184540026665, 'loss': 8.896788597106934}


EP_train:2:  62%|| 17262/27626 [40:40<24:20,  7.10it/s]

{'epoch': 2, 'iter': 17260, 'avg_loss': 8.548038715241882, 'avg_acc': 50.040372805747054, 'loss': 8.651679039001465}


EP_train:2:  63%|| 17272/27626 [40:42<24:27,  7.06it/s]

{'epoch': 2, 'iter': 17270, 'avg_loss': 8.548037508036375, 'avg_acc': 50.03926379480054, 'loss': 9.031386375427246}


EP_train:2:  63%|| 17282/27626 [40:43<24:26,  7.05it/s]

{'epoch': 2, 'iter': 17280, 'avg_loss': 8.547983016855193, 'avg_acc': 50.03833690179966, 'loss': 7.835376262664795}


EP_train:2:  63%|| 17292/27626 [40:45<24:20,  7.08it/s]

{'epoch': 2, 'iter': 17290, 'avg_loss': 8.54810194640733, 'avg_acc': 50.03759181076861, 'loss': 7.996822834014893}


EP_train:2:  63%|| 17302/27626 [40:46<24:30,  7.02it/s]

{'epoch': 2, 'iter': 17300, 'avg_loss': 8.548208612216346, 'avg_acc': 50.03594445407779, 'loss': 9.432355880737305}


EP_train:2:  63%|| 17312/27626 [40:47<24:17,  7.08it/s]

{'epoch': 2, 'iter': 17310, 'avg_loss': 8.548328908778485, 'avg_acc': 50.035021084859345, 'loss': 9.193679809570312}


EP_train:2:  63%|| 17322/27626 [40:49<24:35,  6.98it/s]

{'epoch': 2, 'iter': 17320, 'avg_loss': 8.548414385089574, 'avg_acc': 50.03680503435136, 'loss': 8.592628479003906}


EP_train:2:  63%|| 17332/27626 [40:50<24:21,  7.04it/s]

{'epoch': 2, 'iter': 17330, 'avg_loss': 8.548521047675473, 'avg_acc': 50.03642317235012, 'loss': 8.343151092529297}


EP_train:2:  63%|| 17342/27626 [40:52<24:14,  7.07it/s]

{'epoch': 2, 'iter': 17340, 'avg_loss': 8.548533866620106, 'avg_acc': 50.03550112450262, 'loss': 9.068106651306152}


EP_train:2:  63%|| 17352/27626 [40:53<24:13,  7.07it/s]

{'epoch': 2, 'iter': 17350, 'avg_loss': 8.54857759501541, 'avg_acc': 50.03440003458014, 'loss': 8.463868141174316}


EP_train:2:  63%|| 17362/27626 [40:54<24:09,  7.08it/s]

{'epoch': 2, 'iter': 17360, 'avg_loss': 8.548515810614102, 'avg_acc': 50.033480214273375, 'loss': 8.847105026245117}


EP_train:2:  63%|| 17372/27626 [40:56<24:23,  7.01it/s]

{'epoch': 2, 'iter': 17370, 'avg_loss': 8.548678027115628, 'avg_acc': 50.03543981348224, 'loss': 8.50871753692627}


EP_train:2:  63%|| 17382/27626 [40:57<24:14,  7.04it/s]

{'epoch': 2, 'iter': 17380, 'avg_loss': 8.548643981825393, 'avg_acc': 50.03470024739658, 'loss': 8.761241912841797}


EP_train:2:  63%|| 17392/27626 [40:59<24:16,  7.03it/s]

{'epoch': 2, 'iter': 17390, 'avg_loss': 8.548597296497134, 'avg_acc': 50.03396153182681, 'loss': 8.922253608703613}


EP_train:2:  63%|| 17402/27626 [41:00<24:11,  7.04it/s]

{'epoch': 2, 'iter': 17400, 'avg_loss': 8.548580616344344, 'avg_acc': 50.03591747600713, 'loss': 9.151835441589355}


EP_train:2:  63%|| 17412/27626 [41:01<24:17,  7.01it/s]

{'epoch': 2, 'iter': 17410, 'avg_loss': 8.548703801329985, 'avg_acc': 50.033922520245824, 'loss': 9.015477180480957}


EP_train:2:  63%|| 17422/27626 [41:03<24:12,  7.02it/s]

{'epoch': 2, 'iter': 17420, 'avg_loss': 8.548677058083326, 'avg_acc': 50.03300614201252, 'loss': 8.05436897277832}


EP_train:2:  63%|| 17432/27626 [41:04<23:56,  7.09it/s]

{'epoch': 2, 'iter': 17430, 'avg_loss': 8.548705121365822, 'avg_acc': 50.03280792840342, 'loss': 8.031856536865234}


EP_train:2:  63%|| 17442/27626 [41:06<24:03,  7.05it/s]

{'epoch': 2, 'iter': 17440, 'avg_loss': 8.54865198811768, 'avg_acc': 50.0318932400665, 'loss': 8.566021919250488}


EP_train:2:  63%|| 17452/27626 [41:07<23:55,  7.09it/s]

{'epoch': 2, 'iter': 17450, 'avg_loss': 8.548749414872088, 'avg_acc': 50.03044238152542, 'loss': 8.231732368469238}


EP_train:2:  63%|| 17462/27626 [41:09<24:06,  7.03it/s]

{'epoch': 2, 'iter': 17460, 'avg_loss': 8.549022956877856, 'avg_acc': 50.02970906591834, 'loss': 9.11075496673584}


EP_train:2:  63%|| 17472/27626 [41:10<24:01,  7.04it/s]

{'epoch': 2, 'iter': 17470, 'avg_loss': 8.549059409035152, 'avg_acc': 50.03058640032053, 'loss': 7.881302356719971}


EP_train:2:  63%|| 17482/27626 [41:11<23:57,  7.06it/s]

{'epoch': 2, 'iter': 17480, 'avg_loss': 8.548781081704224, 'avg_acc': 50.03235655854928, 'loss': 8.234200477600098}


EP_train:2:  63%|| 17492/27626 [41:13<23:52,  7.07it/s]

{'epoch': 2, 'iter': 17490, 'avg_loss': 8.548682794597639, 'avg_acc': 50.03198073294838, 'loss': 9.000508308410645}


EP_train:2:  63%|| 17502/27626 [41:14<24:12,  6.97it/s]

{'epoch': 2, 'iter': 17500, 'avg_loss': 8.548710900928953, 'avg_acc': 50.0289269184618, 'loss': 8.774907112121582}


EP_train:2:  63%|| 17512/27626 [41:16<23:55,  7.05it/s]

{'epoch': 2, 'iter': 17510, 'avg_loss': 8.548813139226148, 'avg_acc': 50.03015961395694, 'loss': 8.31402587890625}


EP_train:2:  63%|| 17522/27626 [41:17<23:51,  7.06it/s]

{'epoch': 2, 'iter': 17520, 'avg_loss': 8.548803993082707, 'avg_acc': 50.02818046915131, 'loss': 8.255027770996094}


EP_train:2:  63%|| 17532/27626 [41:18<23:55,  7.03it/s]

{'epoch': 2, 'iter': 17530, 'avg_loss': 8.548673032945905, 'avg_acc': 50.029412184130976, 'loss': 8.126004219055176}


EP_train:2:  63%|| 17542/27626 [41:20<23:45,  7.07it/s]

{'epoch': 2, 'iter': 17540, 'avg_loss': 8.548652040734815, 'avg_acc': 50.03206772703951, 'loss': 8.50549030303955}


EP_train:2:  64%|| 17552/27626 [41:21<23:51,  7.04it/s]

{'epoch': 2, 'iter': 17550, 'avg_loss': 8.548707573171097, 'avg_acc': 50.03115919320836, 'loss': 8.785818099975586}


EP_train:2:  64%|| 17562/27626 [41:23<23:42,  7.08it/s]

{'epoch': 2, 'iter': 17560, 'avg_loss': 8.54859770480012, 'avg_acc': 50.03060759637834, 'loss': 8.511631965637207}


EP_train:2:  64%|| 17572/27626 [41:24<23:51,  7.02it/s]

{'epoch': 2, 'iter': 17570, 'avg_loss': 8.548699103926332, 'avg_acc': 50.02827812873485, 'loss': 8.506837844848633}


EP_train:2:  64%|| 17582/27626 [41:26<23:47,  7.04it/s]

{'epoch': 2, 'iter': 17580, 'avg_loss': 8.548754188714868, 'avg_acc': 50.02897303907628, 'loss': 9.116353988647461}


EP_train:2:  64%|| 17592/27626 [41:27<24:01,  6.96it/s]

{'epoch': 2, 'iter': 17590, 'avg_loss': 8.548801997300066, 'avg_acc': 50.02966715934285, 'loss': 8.628836631774902}


EP_train:2:  64%|| 17602/27626 [41:28<23:31,  7.10it/s]

{'epoch': 2, 'iter': 17600, 'avg_loss': 8.5487469237309, 'avg_acc': 50.0284074768479, 'loss': 9.063311576843262}


EP_train:2:  64%|| 17612/27626 [41:30<23:34,  7.08it/s]

{'epoch': 2, 'iter': 17610, 'avg_loss': 8.548816533235167, 'avg_acc': 50.02661688717279, 'loss': 8.23422622680664}


EP_train:2:  64%|| 17622/27626 [41:31<23:39,  7.05it/s]

{'epoch': 2, 'iter': 17620, 'avg_loss': 8.54865381897771, 'avg_acc': 50.0258924011123, 'loss': 8.33115291595459}


EP_train:2:  64%|| 17632/27626 [41:33<23:26,  7.10it/s]

{'epoch': 2, 'iter': 17630, 'avg_loss': 8.548469099236605, 'avg_acc': 50.02570047076173, 'loss': 7.889283657073975}


EP_train:2:  64%|| 17642/27626 [41:34<23:50,  6.98it/s]

{'epoch': 2, 'iter': 17640, 'avg_loss': 8.548413307000082, 'avg_acc': 50.0240916047843, 'loss': 8.607008934020996}


EP_train:2:  64%|| 17652/27626 [41:35<23:41,  7.02it/s]

{'epoch': 2, 'iter': 17650, 'avg_loss': 8.548321095602173, 'avg_acc': 50.02655656903292, 'loss': 8.104657173156738}


EP_train:2:  64%|| 17662/27626 [41:37<23:34,  7.04it/s]

{'epoch': 2, 'iter': 17660, 'avg_loss': 8.548183645223897, 'avg_acc': 50.023356548326824, 'loss': 8.51119613647461}


EP_train:2:  64%|| 17672/27626 [41:38<23:40,  7.01it/s]

{'epoch': 2, 'iter': 17670, 'avg_loss': 8.548149133820086, 'avg_acc': 50.02139805330768, 'loss': 9.416966438293457}


EP_train:2:  64%|| 17682/27626 [41:40<23:35,  7.02it/s]

{'epoch': 2, 'iter': 17680, 'avg_loss': 8.548150650968541, 'avg_acc': 50.0201487472428, 'loss': 8.099672317504883}


EP_train:2:  64%|| 17692/27626 [41:41<23:23,  7.08it/s]

{'epoch': 2, 'iter': 17690, 'avg_loss': 8.548003736005091, 'avg_acc': 50.018900853541346, 'loss': 8.35626220703125}


EP_train:2:  64%|| 17702/27626 [41:42<23:32,  7.02it/s]

{'epoch': 2, 'iter': 17700, 'avg_loss': 8.548121077617974, 'avg_acc': 50.01783091350771, 'loss': 7.719376087188721}


EP_train:2:  64%|| 17712/27626 [41:44<23:30,  7.03it/s]

{'epoch': 2, 'iter': 17710, 'avg_loss': 8.548149830590567, 'avg_acc': 50.01358618937384, 'loss': 8.15040397644043}


EP_train:2:  64%|| 17722/27626 [41:45<23:35,  7.00it/s]

{'epoch': 2, 'iter': 17720, 'avg_loss': 8.548124810668659, 'avg_acc': 50.012696800406296, 'loss': 9.011672019958496}


EP_train:2:  64%|| 17732/27626 [41:47<23:18,  7.07it/s]

{'epoch': 2, 'iter': 17730, 'avg_loss': 8.54812350974229, 'avg_acc': 50.0146283345553, 'loss': 9.272743225097656}


EP_train:2:  64%|| 17742/27626 [41:48<23:22,  7.05it/s]

{'epoch': 2, 'iter': 17740, 'avg_loss': 8.547990644699112, 'avg_acc': 50.014620089059235, 'loss': 8.255645751953125}


EP_train:2:  64%|| 17752/27626 [41:50<23:22,  7.04it/s]

{'epoch': 2, 'iter': 17750, 'avg_loss': 8.547967015867549, 'avg_acc': 50.01302743507408, 'loss': 8.481979370117188}


EP_train:2:  64%|| 17762/27626 [41:51<23:25,  7.02it/s]

{'epoch': 2, 'iter': 17760, 'avg_loss': 8.548007462638287, 'avg_acc': 50.01319604751985, 'loss': 9.100468635559082}


EP_train:2:  64%|| 17772/27626 [41:52<23:22,  7.03it/s]

{'epoch': 2, 'iter': 17770, 'avg_loss': 8.548059625836512, 'avg_acc': 50.01248522874345, 'loss': 8.824934005737305}


EP_train:2:  64%|| 17782/27626 [41:54<23:15,  7.06it/s]

{'epoch': 2, 'iter': 17780, 'avg_loss': 8.547854146366882, 'avg_acc': 50.01089646251617, 'loss': 8.469240188598633}


EP_train:2:  64%|| 17792/27626 [41:55<23:14,  7.05it/s]

{'epoch': 2, 'iter': 17790, 'avg_loss': 8.547979076625216, 'avg_acc': 50.00948513293238, 'loss': 9.253046989440918}


EP_train:2:  64%|| 17802/27626 [41:57<23:12,  7.05it/s]

{'epoch': 2, 'iter': 17800, 'avg_loss': 8.547964947658016, 'avg_acc': 50.0112353238582, 'loss': 7.816717624664307}


EP_train:2:  64%|| 17812/27626 [41:58<23:01,  7.11it/s]

{'epoch': 2, 'iter': 17810, 'avg_loss': 8.547961746553362, 'avg_acc': 50.008948121947114, 'loss': 8.89506721496582}


EP_train:2:  65%|| 17822/27626 [41:59<23:22,  6.99it/s]

{'epoch': 2, 'iter': 17820, 'avg_loss': 8.54804832770376, 'avg_acc': 50.01174877952976, 'loss': 8.813645362854004}


EP_train:2:  65%|| 17832/27626 [42:01<23:16,  7.01it/s]

{'epoch': 2, 'iter': 17830, 'avg_loss': 8.547871555350618, 'avg_acc': 50.01174219056699, 'loss': 8.057507514953613}


EP_train:2:  65%|| 17842/27626 [42:02<23:10,  7.04it/s]

{'epoch': 2, 'iter': 17840, 'avg_loss': 8.547819593215856, 'avg_acc': 50.010334342245386, 'loss': 7.998255729675293}


EP_train:2:  65%|| 17852/27626 [42:04<23:08,  7.04it/s]

{'epoch': 2, 'iter': 17850, 'avg_loss': 8.547687804075885, 'avg_acc': 50.00962831213938, 'loss': 8.493070602416992}


EP_train:2:  65%|| 17862/27626 [42:05<23:12,  7.01it/s]

{'epoch': 2, 'iter': 17860, 'avg_loss': 8.54755425443842, 'avg_acc': 50.00839818599182, 'loss': 9.308263778686523}


EP_train:2:  65%|| 17872/27626 [42:07<23:08,  7.02it/s]

{'epoch': 2, 'iter': 17870, 'avg_loss': 8.547624761340108, 'avg_acc': 50.01031699401265, 'loss': 8.380691528320312}


EP_train:2:  65%|| 17882/27626 [42:08<23:02,  7.05it/s]

{'epoch': 2, 'iter': 17880, 'avg_loss': 8.54782613522054, 'avg_acc': 50.008738325597, 'loss': 7.932441711425781}


EP_train:2:  65%|| 17892/27626 [42:09<23:02,  7.04it/s]

{'epoch': 2, 'iter': 17890, 'avg_loss': 8.548176417522143, 'avg_acc': 50.01030546084624, 'loss': 8.924691200256348}


EP_train:2:  65%|| 17902/27626 [42:11<23:00,  7.04it/s]

{'epoch': 2, 'iter': 17900, 'avg_loss': 8.548301699574276, 'avg_acc': 50.01152170269817, 'loss': 8.381816864013672}


EP_train:2:  65%|| 17912/27626 [42:12<22:55,  7.06it/s]

{'epoch': 2, 'iter': 17910, 'avg_loss': 8.548227501676934, 'avg_acc': 50.01116632237172, 'loss': 8.894898414611816}


EP_train:2:  65%|| 17922/27626 [42:14<22:50,  7.08it/s]

{'epoch': 2, 'iter': 17920, 'avg_loss': 8.548275726487159, 'avg_acc': 50.01150884437252, 'loss': 8.804556846618652}


EP_train:2:  65%|| 17932/27626 [42:15<22:51,  7.07it/s]

{'epoch': 2, 'iter': 17930, 'avg_loss': 8.5484740814012, 'avg_acc': 50.01219954269143, 'loss': 9.373039245605469}


EP_train:2:  65%|| 17942/27626 [42:16<22:53,  7.05it/s]

{'epoch': 2, 'iter': 17940, 'avg_loss': 8.548391159784368, 'avg_acc': 50.01236692492057, 'loss': 8.598918914794922}


EP_train:2:  65%|| 17952/27626 [42:18<22:56,  7.03it/s]

{'epoch': 2, 'iter': 17950, 'avg_loss': 8.548379400328843, 'avg_acc': 50.01236003565261, 'loss': 8.19277286529541}


EP_train:2:  65%|| 17962/27626 [42:19<22:56,  7.02it/s]

{'epoch': 2, 'iter': 17960, 'avg_loss': 8.548437224936562, 'avg_acc': 50.0127011302266, 'loss': 8.178485870361328}


EP_train:2:  65%|| 17972/27626 [42:21<22:51,  7.04it/s]

{'epoch': 2, 'iter': 17970, 'avg_loss': 8.548257619999903, 'avg_acc': 50.01147682377163, 'loss': 7.858066082000732}


EP_train:2:  65%|| 17982/27626 [42:22<23:01,  6.98it/s]

{'epoch': 2, 'iter': 17980, 'avg_loss': 8.548198382917178, 'avg_acc': 50.01112285189922, 'loss': 8.627257347106934}


EP_train:2:  65%|| 17992/27626 [42:23<22:58,  6.99it/s]

{'epoch': 2, 'iter': 17990, 'avg_loss': 8.548147589771686, 'avg_acc': 50.01233255516647, 'loss': 7.974973678588867}


EP_train:2:  65%|| 18002/27626 [42:25<22:43,  7.06it/s]

{'epoch': 2, 'iter': 18000, 'avg_loss': 8.548022958584795, 'avg_acc': 50.0093744791956, 'loss': 8.367321014404297}


EP_train:2:  65%|| 18012/27626 [42:26<22:42,  7.06it/s]

{'epoch': 2, 'iter': 18010, 'avg_loss': 8.547814312095571, 'avg_acc': 50.0090222641719, 'loss': 8.472672462463379}


EP_train:2:  65%|| 18022/27626 [42:28<22:48,  7.02it/s]

{'epoch': 2, 'iter': 18020, 'avg_loss': 8.547826041749815, 'avg_acc': 50.0091906664447, 'loss': 8.807085990905762}


EP_train:2:  65%|| 18032/27626 [42:29<22:41,  7.05it/s]

{'epoch': 2, 'iter': 18030, 'avg_loss': 8.54768675355148, 'avg_acc': 50.009705507182076, 'loss': 8.35529613494873}


EP_train:2:  65%|| 18042/27626 [42:31<22:55,  6.97it/s]

{'epoch': 2, 'iter': 18040, 'avg_loss': 8.54765681016274, 'avg_acc': 50.011259076547866, 'loss': 7.832340240478516}


EP_train:2:  65%|| 18052/27626 [42:32<22:46,  7.01it/s]

{'epoch': 2, 'iter': 18050, 'avg_loss': 8.547779957149624, 'avg_acc': 50.011772200986094, 'loss': 9.010005950927734}


EP_train:2:  65%|| 18062/27626 [42:33<22:32,  7.07it/s]

{'epoch': 2, 'iter': 18060, 'avg_loss': 8.547884339912939, 'avg_acc': 50.012111732462216, 'loss': 9.12332534790039}


EP_train:2:  65%|| 18072/27626 [42:35<22:36,  7.04it/s]

{'epoch': 2, 'iter': 18070, 'avg_loss': 8.547671880279275, 'avg_acc': 50.01175917215428, 'loss': 8.487787246704102}


EP_train:2:  65%|| 18082/27626 [42:36<22:31,  7.06it/s]

{'epoch': 2, 'iter': 18080, 'avg_loss': 8.547795712634676, 'avg_acc': 50.01192550190808, 'loss': 9.483901023864746}


EP_train:2:  65%|| 18092/27626 [42:38<22:33,  7.04it/s]

{'epoch': 2, 'iter': 18090, 'avg_loss': 8.547746492139893, 'avg_acc': 50.01105522082804, 'loss': 9.089927673339844}


EP_train:2:  66%|| 18102/27626 [42:39<22:22,  7.10it/s]

{'epoch': 2, 'iter': 18100, 'avg_loss': 8.547680884986601, 'avg_acc': 50.01070382851777, 'loss': 8.65320110321045}


EP_train:2:  66%|| 18112/27626 [42:40<22:22,  7.09it/s]

{'epoch': 2, 'iter': 18110, 'avg_loss': 8.547495530460061, 'avg_acc': 50.01104301253382, 'loss': 7.801361083984375}


EP_train:2:  66%|| 18122/27626 [42:42<22:27,  7.05it/s]

{'epoch': 2, 'iter': 18120, 'avg_loss': 8.547389453323555, 'avg_acc': 50.01155427404669, 'loss': 9.192620277404785}


EP_train:2:  66%|| 18132/27626 [42:43<22:30,  7.03it/s]

{'epoch': 2, 'iter': 18130, 'avg_loss': 8.547508803713665, 'avg_acc': 50.01396089570349, 'loss': 9.438878059387207}


EP_train:2:  66%|| 18142/27626 [42:45<22:24,  7.05it/s]

{'epoch': 2, 'iter': 18140, 'avg_loss': 8.547636082624365, 'avg_acc': 50.01412546166143, 'loss': 10.65298843383789}


EP_train:2:  66%|| 18152/27626 [42:46<22:35,  6.99it/s]

{'epoch': 2, 'iter': 18150, 'avg_loss': 8.547821025639275, 'avg_acc': 50.01497851358052, 'loss': 8.41830062866211}


EP_train:2:  66%|| 18162/27626 [42:48<22:29,  7.01it/s]

{'epoch': 2, 'iter': 18160, 'avg_loss': 8.547878831147452, 'avg_acc': 50.01273332966246, 'loss': 8.407479286193848}


EP_train:2:  66%|| 18172/27626 [42:49<22:17,  7.07it/s]

{'epoch': 2, 'iter': 18170, 'avg_loss': 8.547833882813068, 'avg_acc': 50.011350503549615, 'loss': 8.707649230957031}


EP_train:2:  66%|| 18182/27626 [42:50<22:20,  7.05it/s]

{'epoch': 2, 'iter': 18180, 'avg_loss': 8.548027245113607, 'avg_acc': 50.00962543314449, 'loss': 9.016145706176758}


EP_train:2:  66%|| 18192/27626 [42:52<22:20,  7.04it/s]

{'epoch': 2, 'iter': 18190, 'avg_loss': 8.54812185807957, 'avg_acc': 50.01219696553241, 'loss': 8.843188285827637}


EP_train:2:  66%|| 18202/27626 [42:53<22:09,  7.09it/s]

{'epoch': 2, 'iter': 18200, 'avg_loss': 8.548096616270385, 'avg_acc': 50.01167518268227, 'loss': 8.719077110290527}


EP_train:2:  66%|| 18212/27626 [42:55<22:15,  7.05it/s]

{'epoch': 2, 'iter': 18210, 'avg_loss': 8.54784362269393, 'avg_acc': 50.01046757454286, 'loss': 8.893945693969727}


EP_train:2:  66%|| 18222/27626 [42:56<22:32,  6.95it/s]

{'epoch': 2, 'iter': 18220, 'avg_loss': 8.547858150735081, 'avg_acc': 50.00994731353933, 'loss': 8.795680046081543}


EP_train:2:  66%|| 18232/27626 [42:57<22:10,  7.06it/s]

{'epoch': 2, 'iter': 18230, 'avg_loss': 8.547800383652291, 'avg_acc': 50.00857056661731, 'loss': 8.896663665771484}


EP_train:2:  66%|| 18242/27626 [42:59<22:12,  7.04it/s]

{'epoch': 2, 'iter': 18240, 'avg_loss': 8.547910660872901, 'avg_acc': 50.00788059865139, 'loss': 8.428766250610352}


EP_train:2:  66%|| 18252/27626 [43:00<22:08,  7.05it/s]

{'epoch': 2, 'iter': 18250, 'avg_loss': 8.547810824470881, 'avg_acc': 50.005136704838094, 'loss': 8.133805274963379}


EP_train:2:  66%|| 18262/27626 [43:02<22:11,  7.03it/s]

{'epoch': 2, 'iter': 18260, 'avg_loss': 8.547793243208288, 'avg_acc': 50.004278243250646, 'loss': 8.567683219909668}


EP_train:2:  66%|| 18272/27626 [43:03<22:01,  7.08it/s]

{'epoch': 2, 'iter': 18270, 'avg_loss': 8.547604334451616, 'avg_acc': 50.00530211811067, 'loss': 7.76939058303833}


EP_train:2:  66%|| 18282/27626 [43:04<22:11,  7.02it/s]

{'epoch': 2, 'iter': 18280, 'avg_loss': 8.547457989519023, 'avg_acc': 50.00461544773261, 'loss': 8.929268836975098}


EP_train:2:  66%|| 18292/27626 [43:06<21:57,  7.09it/s]

{'epoch': 2, 'iter': 18290, 'avg_loss': 8.547475691667035, 'avg_acc': 50.00410037723471, 'loss': 8.87841796875}


EP_train:2:  66%|| 18302/27626 [43:07<22:02,  7.05it/s]

{'epoch': 2, 'iter': 18300, 'avg_loss': 8.547572234362033, 'avg_acc': 50.002049068356925, 'loss': 8.38537311553955}


EP_train:2:  66%|| 18312/27626 [43:09<22:12,  6.99it/s]

{'epoch': 2, 'iter': 18310, 'avg_loss': 8.547795363578237, 'avg_acc': 50.00119463710339, 'loss': 8.43996810913086}


EP_train:2:  66%|| 18322/27626 [43:10<21:56,  7.07it/s]

{'epoch': 2, 'iter': 18320, 'avg_loss': 8.54809604652271, 'avg_acc': 50.00119398504449, 'loss': 8.982734680175781}


EP_train:2:  66%|| 18332/27626 [43:12<21:50,  7.09it/s]

{'epoch': 2, 'iter': 18330, 'avg_loss': 8.548145654256004, 'avg_acc': 50.00289809612133, 'loss': 9.327601432800293}


EP_train:2:  66%|| 18342/27626 [43:13<21:55,  7.06it/s]

{'epoch': 2, 'iter': 18340, 'avg_loss': 8.54804301060414, 'avg_acc': 50.00085191647129, 'loss': 8.170744895935059}


EP_train:2:  66%|| 18352/27626 [43:14<21:55,  7.05it/s]

{'epoch': 2, 'iter': 18350, 'avg_loss': 8.54803459196479, 'avg_acc': 50.00102174268433, 'loss': 9.261550903320312}


EP_train:2:  66%|| 18362/27626 [43:16<21:49,  7.07it/s]

{'epoch': 2, 'iter': 18360, 'avg_loss': 8.548009244920797, 'avg_acc': 50.0017019770165, 'loss': 9.303960800170898}


EP_train:2:  67%|| 18372/27626 [43:17<21:49,  7.07it/s]

{'epoch': 2, 'iter': 18370, 'avg_loss': 8.548092048602037, 'avg_acc': 50.00357220619455, 'loss': 8.831583976745605}


EP_train:2:  67%|| 18382/27626 [43:19<21:40,  7.11it/s]

{'epoch': 2, 'iter': 18380, 'avg_loss': 8.548274580201792, 'avg_acc': 50.0032302377455, 'loss': 8.680727005004883}


EP_train:2:  67%|| 18392/27626 [43:20<21:54,  7.02it/s]

{'epoch': 2, 'iter': 18390, 'avg_loss': 8.548444614708052, 'avg_acc': 50.002548801043986, 'loss': 9.130494117736816}


EP_train:2:  67%|| 18402/27626 [43:21<21:53,  7.02it/s]

{'epoch': 2, 'iter': 18400, 'avg_loss': 8.548447595939098, 'avg_acc': 50.0040758654421, 'loss': 8.792459487915039}


EP_train:2:  67%|| 18412/27626 [43:23<21:51,  7.03it/s]

{'epoch': 2, 'iter': 18410, 'avg_loss': 8.54850623730788, 'avg_acc': 50.00441312258975, 'loss': 8.994001388549805}


EP_train:2:  67%|| 18422/27626 [43:24<21:38,  7.09it/s]

{'epoch': 2, 'iter': 18420, 'avg_loss': 8.54849504944323, 'avg_acc': 50.00288393681125, 'loss': 8.2921781539917}


EP_train:2:  67%|| 18432/27626 [43:26<21:42,  7.06it/s]

{'epoch': 2, 'iter': 18430, 'avg_loss': 8.54824757763557, 'avg_acc': 50.00152596169497, 'loss': 8.247446060180664}


EP_train:2:  67%|| 18442/27626 [43:27<21:39,  7.07it/s]

{'epoch': 2, 'iter': 18440, 'avg_loss': 8.548217363648698, 'avg_acc': 50.000677837427475, 'loss': 7.224409580230713}


EP_train:2:  67%|| 18452/27626 [43:28<21:41,  7.05it/s]

{'epoch': 2, 'iter': 18450, 'avg_loss': 8.548160675734817, 'avg_acc': 50.00321798276516, 'loss': 9.048203468322754}


EP_train:2:  67%|| 18462/27626 [43:30<21:31,  7.10it/s]

{'epoch': 2, 'iter': 18460, 'avg_loss': 8.548191253609831, 'avg_acc': 50.003216239640324, 'loss': 9.012534141540527}


EP_train:2:  67%|| 18472/27626 [43:31<21:40,  7.04it/s]

{'epoch': 2, 'iter': 18470, 'avg_loss': 8.54818634075334, 'avg_acc': 50.002876130149964, 'loss': 7.991296291351318}


EP_train:2:  67%|| 18482/27626 [43:33<21:45,  7.01it/s]

{'epoch': 2, 'iter': 18480, 'avg_loss': 8.54805692061028, 'avg_acc': 50.00287457388669, 'loss': 8.561656951904297}


EP_train:2:  67%|| 18492/27626 [43:34<21:52,  6.96it/s]

{'epoch': 2, 'iter': 18490, 'avg_loss': 8.548141363138866, 'avg_acc': 50.00185901249257, 'loss': 8.856061935424805}


EP_train:2:  67%|| 18502/27626 [43:36<21:37,  7.03it/s]

{'epoch': 2, 'iter': 18500, 'avg_loss': 8.548084018410982, 'avg_acc': 50.00185800767526, 'loss': 8.768974304199219}


EP_train:2:  67%|| 18512/27626 [43:37<21:27,  7.08it/s]

{'epoch': 2, 'iter': 18510, 'avg_loss': 8.548251966842537, 'avg_acc': 50.00303873372589, 'loss': 8.629266738891602}


EP_train:2:  67%|| 18522/27626 [43:38<21:34,  7.03it/s]

{'epoch': 2, 'iter': 18520, 'avg_loss': 8.548252627218936, 'avg_acc': 50.00185600129583, 'loss': 8.2485990524292}


EP_train:2:  67%|| 18532/27626 [43:40<21:37,  7.01it/s]

{'epoch': 2, 'iter': 18530, 'avg_loss': 8.548351139060163, 'avg_acc': 50.002866817764826, 'loss': 7.924855709075928}


EP_train:2:  67%|| 18542/27626 [43:41<21:20,  7.09it/s]

{'epoch': 2, 'iter': 18540, 'avg_loss': 8.548373635484785, 'avg_acc': 50.00370799848983, 'loss': 9.364971160888672}


EP_train:2:  67%|| 18552/27626 [43:43<21:23,  7.07it/s]

{'epoch': 2, 'iter': 18550, 'avg_loss': 8.548679655063866, 'avg_acc': 50.00303218155355, 'loss': 8.96187686920166}


EP_train:2:  67%|| 18562/27626 [43:44<21:25,  7.05it/s]

{'epoch': 2, 'iter': 18560, 'avg_loss': 8.548627515746295, 'avg_acc': 50.00505091320511, 'loss': 7.663732528686523}


EP_train:2:  67%|| 18572/27626 [43:45<21:23,  7.06it/s]

{'epoch': 2, 'iter': 18570, 'avg_loss': 8.548743415062432, 'avg_acc': 50.00454337407786, 'loss': 9.481792449951172}


EP_train:2:  67%|| 18582/27626 [43:47<21:29,  7.02it/s]

{'epoch': 2, 'iter': 18580, 'avg_loss': 8.54890044223992, 'avg_acc': 50.00235455572897, 'loss': 9.270411491394043}


EP_train:2:  67%|| 18592/27626 [43:48<21:17,  7.07it/s]

{'epoch': 2, 'iter': 18590, 'avg_loss': 8.548618394340657, 'avg_acc': 50.00134473670056, 'loss': 7.894100666046143}


EP_train:2:  67%|| 18602/27626 [43:50<21:15,  7.07it/s]

{'epoch': 2, 'iter': 18600, 'avg_loss': 8.548602533368653, 'avg_acc': 50.00050400516102, 'loss': 9.022244453430176}


EP_train:2:  67%|| 18612/27626 [43:51<21:25,  7.01it/s]

{'epoch': 2, 'iter': 18610, 'avg_loss': 8.548504786849835, 'avg_acc': 50.00033582290043, 'loss': 8.625866889953613}


EP_train:2:  67%|| 18622/27626 [43:53<21:16,  7.06it/s]

{'epoch': 2, 'iter': 18620, 'avg_loss': 8.548574069844467, 'avg_acc': 49.99765050212126, 'loss': 8.82633113861084}


EP_train:2:  67%|| 18632/27626 [43:54<21:10,  7.08it/s]

{'epoch': 2, 'iter': 18630, 'avg_loss': 8.548488910017392, 'avg_acc': 49.99882588159519, 'loss': 8.027165412902832}


EP_train:2:  67%|| 18642/27626 [43:55<21:08,  7.08it/s]

{'epoch': 2, 'iter': 18640, 'avg_loss': 8.548541668703177, 'avg_acc': 49.99664717558071, 'loss': 8.126261711120605}


EP_train:2:  68%|| 18652/27626 [43:57<21:05,  7.09it/s]

{'epoch': 2, 'iter': 18650, 'avg_loss': 8.548422290252224, 'avg_acc': 49.99514101120583, 'loss': 7.892970085144043}


EP_train:2:  68%|| 18662/27626 [43:58<21:12,  7.04it/s]

{'epoch': 2, 'iter': 18660, 'avg_loss': 8.548635916952394, 'avg_acc': 49.994808691924334, 'loss': 8.911335945129395}


EP_train:2:  68%|| 18672/27626 [44:00<21:07,  7.06it/s]

{'epoch': 2, 'iter': 18670, 'avg_loss': 8.54865945308122, 'avg_acc': 49.99430935675647, 'loss': 8.673190116882324}


EP_train:2:  68%|| 18682/27626 [44:01<21:10,  7.04it/s]

{'epoch': 2, 'iter': 18680, 'avg_loss': 8.54877974911603, 'avg_acc': 49.996654354691934, 'loss': 9.014877319335938}


EP_train:2:  68%|| 18692/27626 [44:02<21:05,  7.06it/s]

{'epoch': 2, 'iter': 18690, 'avg_loss': 8.548711614479677, 'avg_acc': 49.99882965063399, 'loss': 7.626222133636475}


EP_train:2:  68%|| 18702/27626 [44:04<21:14,  7.00it/s]

{'epoch': 2, 'iter': 18700, 'avg_loss': 8.548638421375799, 'avg_acc': 49.99766055291161, 'loss': 8.574419021606445}


EP_train:2:  68%|| 18712/27626 [44:05<20:58,  7.08it/s]

{'epoch': 2, 'iter': 18710, 'avg_loss': 8.548523706714752, 'avg_acc': 49.99682673293784, 'loss': 7.945459365844727}


EP_train:2:  68%|| 18722/27626 [44:07<21:00,  7.06it/s]

{'epoch': 2, 'iter': 18720, 'avg_loss': 8.548461507066028, 'avg_acc': 49.99816382671865, 'loss': 8.073005676269531}


EP_train:2:  68%|| 18732/27626 [44:08<21:03,  7.04it/s]

{'epoch': 2, 'iter': 18730, 'avg_loss': 8.548329737887649, 'avg_acc': 49.99866531418504, 'loss': 7.866824150085449}


EP_train:2:  68%|| 18742/27626 [44:09<20:56,  7.07it/s]

{'epoch': 2, 'iter': 18740, 'avg_loss': 8.548130800843486, 'avg_acc': 50.00100048023051, 'loss': 8.253108978271484}


EP_train:2:  68%|| 18752/27626 [44:11<20:53,  7.08it/s]

{'epoch': 2, 'iter': 18750, 'avg_loss': 8.548283832739859, 'avg_acc': 50.00233320889552, 'loss': 8.774496078491211}


EP_train:2:  68%|| 18762/27626 [44:12<20:58,  7.04it/s]

{'epoch': 2, 'iter': 18760, 'avg_loss': 8.548436767631397, 'avg_acc': 49.99900058632269, 'loss': 8.14751148223877}


EP_train:2:  68%|| 18772/27626 [44:14<20:58,  7.03it/s]

{'epoch': 2, 'iter': 18770, 'avg_loss': 8.54846163917811, 'avg_acc': 49.9995005593735, 'loss': 8.730072975158691}


EP_train:2:  68%|| 18782/27626 [44:15<20:49,  7.08it/s]

{'epoch': 2, 'iter': 18780, 'avg_loss': 8.548346127382667, 'avg_acc': 49.997004951813004, 'loss': 8.373773574829102}


EP_train:2:  68%|| 18792/27626 [44:17<20:52,  7.05it/s]

{'epoch': 2, 'iter': 18790, 'avg_loss': 8.548171021039893, 'avg_acc': 49.99750545473897, 'loss': 8.124394416809082}


EP_train:2:  68%|| 18802/27626 [44:18<20:54,  7.03it/s]

{'epoch': 2, 'iter': 18800, 'avg_loss': 8.548112863378634, 'avg_acc': 49.997672996117224, 'loss': 8.082447052001953}


EP_train:2:  68%|| 18812/27626 [44:19<20:45,  7.08it/s]

{'epoch': 2, 'iter': 18810, 'avg_loss': 8.548126545979528, 'avg_acc': 50.00066450481101, 'loss': 9.001425743103027}


EP_train:2:  68%|| 18822/27626 [44:21<20:40,  7.10it/s]

{'epoch': 2, 'iter': 18820, 'avg_loss': 8.548099430491217, 'avg_acc': 50.00099622761809, 'loss': 8.853519439697266}


EP_train:2:  68%|| 18832/27626 [44:22<20:50,  7.03it/s]

{'epoch': 2, 'iter': 18830, 'avg_loss': 8.547993283486774, 'avg_acc': 49.99834050236313, 'loss': 8.475669860839844}


EP_train:2:  68%|| 18842/27626 [44:24<20:41,  7.07it/s]

{'epoch': 2, 'iter': 18840, 'avg_loss': 8.547974463361038, 'avg_acc': 49.997346213046015, 'loss': 8.258163452148438}


EP_train:2:  68%|| 18852/27626 [44:25<20:52,  7.00it/s]

{'epoch': 2, 'iter': 18850, 'avg_loss': 8.547702117115742, 'avg_acc': 49.99933690520397, 'loss': 8.6021728515625}


EP_train:2:  68%|| 18862/27626 [44:26<20:50,  7.01it/s]

{'epoch': 2, 'iter': 18860, 'avg_loss': 8.547866482413855, 'avg_acc': 50.00165685806691, 'loss': 8.661799430847168}


EP_train:2:  68%|| 18872/27626 [44:28<20:49,  7.01it/s]

{'epoch': 2, 'iter': 18870, 'avg_loss': 8.547943776251813, 'avg_acc': 50.00099358804515, 'loss': 8.22740364074707}


EP_train:2:  68%|| 18882/27626 [44:29<20:39,  7.06it/s]

{'epoch': 2, 'iter': 18880, 'avg_loss': 8.548023333786137, 'avg_acc': 50.000662041205445, 'loss': 8.284432411193848}


EP_train:2:  68%|| 18892/27626 [44:31<20:47,  7.00it/s]

{'epoch': 2, 'iter': 18890, 'avg_loss': 8.547892191065392, 'avg_acc': 50.00082711344026, 'loss': 8.687579154968262}


EP_train:2:  68%|| 18902/27626 [44:32<20:32,  7.08it/s]

{'epoch': 2, 'iter': 18900, 'avg_loss': 8.547945756822925, 'avg_acc': 50.00479471985609, 'loss': 9.150755882263184}


EP_train:2:  68%|| 18912/27626 [44:33<20:35,  7.05it/s]

{'epoch': 2, 'iter': 18910, 'avg_loss': 8.548418354833027, 'avg_acc': 50.00545317540056, 'loss': 8.262911796569824}


EP_train:2:  68%|| 18922/27626 [44:35<20:34,  7.05it/s]

{'epoch': 2, 'iter': 18920, 'avg_loss': 8.54845888895273, 'avg_acc': 50.0041290100946, 'loss': 8.04655647277832}


EP_train:2:  69%|| 18932/27626 [44:36<20:37,  7.02it/s]

{'epoch': 2, 'iter': 18930, 'avg_loss': 8.548586045421818, 'avg_acc': 50.00429190217104, 'loss': 8.72627067565918}


EP_train:2:  69%|| 18942/27626 [44:38<20:39,  7.01it/s]

{'epoch': 2, 'iter': 18940, 'avg_loss': 8.548444324559064, 'avg_acc': 50.00313473417454, 'loss': 8.369749069213867}


EP_train:2:  69%|| 18952/27626 [44:39<20:27,  7.07it/s]

{'epoch': 2, 'iter': 18950, 'avg_loss': 8.548465646506552, 'avg_acc': 50.00049469684977, 'loss': 9.119397163391113}


EP_train:2:  69%|| 18962/27626 [44:41<20:28,  7.05it/s]

{'epoch': 2, 'iter': 18960, 'avg_loss': 8.548533716192226, 'avg_acc': 50.00230736775486, 'loss': 7.892477989196777}


EP_train:2:  69%|| 18972/27626 [44:42<20:37,  6.99it/s]

{'epoch': 2, 'iter': 18970, 'avg_loss': 8.548425165478717, 'avg_acc': 50.00378867745506, 'loss': 8.384931564331055}


EP_train:2:  69%|| 18982/27626 [44:43<20:27,  7.04it/s]

{'epoch': 2, 'iter': 18980, 'avg_loss': 8.548284925328474, 'avg_acc': 50.00378668141826, 'loss': 8.617377281188965}


EP_train:2:  69%|| 18992/27626 [44:45<20:29,  7.02it/s]

{'epoch': 2, 'iter': 18990, 'avg_loss': 8.548272436486558, 'avg_acc': 50.00213917118636, 'loss': 9.07470417022705}


EP_train:2:  69%|| 19002/27626 [44:46<20:22,  7.05it/s]

{'epoch': 2, 'iter': 19000, 'avg_loss': 8.548377637455108, 'avg_acc': 50.00098679016893, 'loss': 8.75821304321289}


EP_train:2:  69%|| 19012/27626 [44:48<20:26,  7.02it/s]

{'epoch': 2, 'iter': 19010, 'avg_loss': 8.54824750694695, 'avg_acc': 49.998356214823, 'loss': 8.611437797546387}


EP_train:2:  69%|| 19022/27626 [44:49<20:26,  7.01it/s]

{'epoch': 2, 'iter': 19020, 'avg_loss': 8.548044081046612, 'avg_acc': 50.00049287629462, 'loss': 7.906805515289307}


EP_train:2:  69%|| 19032/27626 [44:50<20:23,  7.03it/s]

{'epoch': 2, 'iter': 19030, 'avg_loss': 8.547828270141103, 'avg_acc': 50.00114944038674, 'loss': 8.186731338500977}


EP_train:2:  69%|| 19042/27626 [44:52<20:23,  7.01it/s]

{'epoch': 2, 'iter': 19040, 'avg_loss': 8.548362613456801, 'avg_acc': 50.00082059765769, 'loss': 8.917922019958496}


EP_train:2:  69%|| 19052/27626 [44:53<20:19,  7.03it/s]

{'epoch': 2, 'iter': 19050, 'avg_loss': 8.548373013304623, 'avg_acc': 50.00049210015223, 'loss': 8.5458402633667}


EP_train:2:  69%|| 19062/27626 [44:55<20:13,  7.06it/s]

{'epoch': 2, 'iter': 19060, 'avg_loss': 8.5483117376005, 'avg_acc': 50.00065578930801, 'loss': 8.234227180480957}


EP_train:2:  69%|| 19072/27626 [44:56<20:19,  7.01it/s]

{'epoch': 2, 'iter': 19070, 'avg_loss': 8.548261070506499, 'avg_acc': 49.99868910911856, 'loss': 8.449804306030273}


EP_train:2:  69%|| 19082/27626 [44:58<20:22,  6.99it/s]

{'epoch': 2, 'iter': 19080, 'avg_loss': 8.548290424642808, 'avg_acc': 49.99737959226456, 'loss': 9.477845191955566}


EP_train:2:  69%|| 19092/27626 [44:59<20:06,  7.07it/s]

{'epoch': 2, 'iter': 19090, 'avg_loss': 8.548224661762497, 'avg_acc': 49.99738096485255, 'loss': 9.202439308166504}


EP_train:2:  69%|| 19102/27626 [45:00<20:05,  7.07it/s]

{'epoch': 2, 'iter': 19100, 'avg_loss': 8.548214336645103, 'avg_acc': 49.99492827600649, 'loss': 8.633423805236816}


EP_train:2:  69%|| 19112/27626 [45:02<20:20,  6.98it/s]

{'epoch': 2, 'iter': 19110, 'avg_loss': 8.548217351916547, 'avg_acc': 49.994440374653344, 'loss': 8.803413391113281}


EP_train:2:  69%|| 19122/27626 [45:03<20:23,  6.95it/s]

{'epoch': 2, 'iter': 19120, 'avg_loss': 8.548175138095752, 'avg_acc': 49.99787537262696, 'loss': 7.934684753417969}


EP_train:2:  69%|| 19132/27626 [45:05<20:02,  7.06it/s]

{'epoch': 2, 'iter': 19130, 'avg_loss': 8.548092530391973, 'avg_acc': 49.99738644085516, 'loss': 8.244778633117676}


EP_train:2:  69%|| 19142/27626 [45:06<20:04,  7.04it/s]

{'epoch': 2, 'iter': 19140, 'avg_loss': 8.548001831648186, 'avg_acc': 50.00016326210752, 'loss': 8.699902534484863}


EP_train:2:  69%|| 19152/27626 [45:07<19:59,  7.06it/s]

{'epoch': 2, 'iter': 19150, 'avg_loss': 8.54793621283537, 'avg_acc': 50.00114223800324, 'loss': 8.203875541687012}


EP_train:2:  69%|| 19162/27626 [45:09<20:10,  6.99it/s]

{'epoch': 2, 'iter': 19160, 'avg_loss': 8.548116525636386, 'avg_acc': 50.00195710036011, 'loss': 8.807594299316406}


EP_train:2:  69%|| 19172/27626 [45:10<20:02,  7.03it/s]

{'epoch': 2, 'iter': 19170, 'avg_loss': 8.547990457023305, 'avg_acc': 50.00326013249179, 'loss': 7.3694844245910645}


EP_train:2:  69%|| 19182/27626 [45:12<20:02,  7.02it/s]

{'epoch': 2, 'iter': 19180, 'avg_loss': 8.54802620534597, 'avg_acc': 50.00358427610656, 'loss': 7.77275276184082}


EP_train:2:  69%|| 19192/27626 [45:13<19:53,  7.07it/s]

{'epoch': 2, 'iter': 19190, 'avg_loss': 8.547873333740194, 'avg_acc': 50.00341957167422, 'loss': 8.439138412475586}


EP_train:2:  70%|| 19202/27626 [45:15<20:02,  7.00it/s]

{'epoch': 2, 'iter': 19200, 'avg_loss': 8.54779041551636, 'avg_acc': 50.00406879850008, 'loss': 8.700390815734863}


EP_train:2:  70%|| 19212/27626 [45:16<19:59,  7.01it/s]

{'epoch': 2, 'iter': 19210, 'avg_loss': 8.547904070484403, 'avg_acc': 50.00536801832284, 'loss': 8.969756126403809}


EP_train:2:  70%|| 19222/27626 [45:17<20:04,  6.98it/s]

{'epoch': 2, 'iter': 19220, 'avg_loss': 8.54792065972946, 'avg_acc': 50.0063407210863, 'loss': 8.92241382598877}


EP_train:2:  70%|| 19232/27626 [45:19<19:54,  7.03it/s]

{'epoch': 2, 'iter': 19230, 'avg_loss': 8.547756865829047, 'avg_acc': 50.00633742395091, 'loss': 8.939698219299316}


EP_train:2:  70%|| 19242/27626 [45:20<19:45,  7.07it/s]

{'epoch': 2, 'iter': 19240, 'avg_loss': 8.547514243369031, 'avg_acc': 50.00503482147498, 'loss': 8.183435440063477}


EP_train:2:  70%|| 19252/27626 [45:22<19:43,  7.08it/s]

{'epoch': 2, 'iter': 19250, 'avg_loss': 8.547525445713104, 'avg_acc': 50.00438288920056, 'loss': 8.512350082397461}


EP_train:2:  70%|| 19262/27626 [45:23<19:40,  7.08it/s]

{'epoch': 2, 'iter': 19260, 'avg_loss': 8.547506277883395, 'avg_acc': 50.00519183842999, 'loss': 8.401101112365723}


EP_train:2:  70%|| 19272/27626 [45:24<19:44,  7.05it/s]

{'epoch': 2, 'iter': 19270, 'avg_loss': 8.547560521101495, 'avg_acc': 50.00470266203103, 'loss': 7.796602249145508}


EP_train:2:  70%|| 19282/27626 [45:26<19:36,  7.09it/s]

{'epoch': 2, 'iter': 19280, 'avg_loss': 8.547532005012298, 'avg_acc': 50.00599683626368, 'loss': 8.236681938171387}


EP_train:2:  70%|| 19292/27626 [45:27<19:44,  7.04it/s]

{'epoch': 2, 'iter': 19290, 'avg_loss': 8.547481041069283, 'avg_acc': 50.00599372764501, 'loss': 8.341104507446289}


EP_train:2:  70%|| 19302/27626 [45:29<19:52,  6.98it/s]

{'epoch': 2, 'iter': 19300, 'avg_loss': 8.547495705878436, 'avg_acc': 50.00760970934148, 'loss': 7.826078414916992}


EP_train:2:  70%|| 19312/27626 [45:30<19:46,  7.01it/s]

{'epoch': 2, 'iter': 19310, 'avg_loss': 8.547411335081929, 'avg_acc': 50.00744394386619, 'loss': 8.598528861999512}


EP_train:2:  70%|| 19322/27626 [45:32<19:39,  7.04it/s]

{'epoch': 2, 'iter': 19320, 'avg_loss': 8.54734950531813, 'avg_acc': 50.00501397443197, 'loss': 8.061656951904297}


EP_train:2:  70%|| 19332/27626 [45:33<19:38,  7.04it/s]

{'epoch': 2, 'iter': 19330, 'avg_loss': 8.54712951014967, 'avg_acc': 50.00517303812529, 'loss': 7.83381986618042}


EP_train:2:  70%|| 19342/27626 [45:34<19:38,  7.03it/s]

{'epoch': 2, 'iter': 19340, 'avg_loss': 8.54713265377242, 'avg_acc': 50.00403934646606, 'loss': 8.889799118041992}


EP_train:2:  70%|| 19352/27626 [45:36<19:27,  7.09it/s]

{'epoch': 2, 'iter': 19350, 'avg_loss': 8.547167944172594, 'avg_acc': 50.0014534132603, 'loss': 8.28010368347168}


EP_train:2:  70%|| 19362/27626 [45:37<19:28,  7.07it/s]

{'epoch': 2, 'iter': 19360, 'avg_loss': 8.546999957171003, 'avg_acc': 50.00112984866484, 'loss': 9.133991241455078}


EP_train:2:  70%|| 19372/27626 [45:39<19:27,  7.07it/s]

{'epoch': 2, 'iter': 19370, 'avg_loss': 8.546997974828601, 'avg_acc': 50.002903825305864, 'loss': 7.6819329261779785}


EP_train:2:  70%|| 19382/27626 [45:40<19:44,  6.96it/s]

{'epoch': 2, 'iter': 19380, 'avg_loss': 8.546882588577605, 'avg_acc': 50.00209612507095, 'loss': 8.022684097290039}


EP_train:2:  70%|| 19392/27626 [45:41<19:30,  7.04it/s]

{'epoch': 2, 'iter': 19390, 'avg_loss': 8.546777184692804, 'avg_acc': 50.00386777370945, 'loss': 8.935513496398926}


EP_train:2:  70%|| 19402/27626 [45:43<19:28,  7.04it/s]

{'epoch': 2, 'iter': 19400, 'avg_loss': 8.54675961998245, 'avg_acc': 50.003704705942994, 'loss': 8.49129581451416}


EP_train:2:  70%|| 19412/27626 [45:44<19:25,  7.05it/s]

{'epoch': 2, 'iter': 19410, 'avg_loss': 8.546614805824587, 'avg_acc': 50.002253876667865, 'loss': 8.275790214538574}


EP_train:2:  70%|| 19422/27626 [45:46<19:20,  7.07it/s]

{'epoch': 2, 'iter': 19420, 'avg_loss': 8.546390225311416, 'avg_acc': 50.00193089954173, 'loss': 9.168153762817383}


EP_train:2:  70%|| 19432/27626 [45:47<19:34,  6.98it/s]

{'epoch': 2, 'iter': 19430, 'avg_loss': 8.546424296958577, 'avg_acc': 50.00257320776079, 'loss': 8.32609748840332}


EP_train:2:  70%|| 19442/27626 [45:49<19:18,  7.07it/s]

{'epoch': 2, 'iter': 19440, 'avg_loss': 8.546493206879685, 'avg_acc': 50.0041793117638, 'loss': 8.994781494140625}


EP_train:2:  70%|| 19452/27626 [45:50<19:20,  7.04it/s]

{'epoch': 2, 'iter': 19450, 'avg_loss': 8.546572009344455, 'avg_acc': 50.00353452264665, 'loss': 9.242090225219727}


EP_train:2:  70%|| 19462/27626 [45:51<19:20,  7.04it/s]

{'epoch': 2, 'iter': 19460, 'avg_loss': 8.546763857248587, 'avg_acc': 50.002408663480814, 'loss': 8.913064956665039}


EP_train:2:  70%|| 19472/27626 [45:53<19:15,  7.05it/s]

{'epoch': 2, 'iter': 19470, 'avg_loss': 8.546761751187171, 'avg_acc': 50.00240742642905, 'loss': 7.552392959594727}


EP_train:2:  71%|| 19482/27626 [45:54<19:17,  7.04it/s]

{'epoch': 2, 'iter': 19480, 'avg_loss': 8.546902444143448, 'avg_acc': 49.99967917458036, 'loss': 9.25183391571045}


EP_train:2:  71%|| 19492/27626 [45:56<19:16,  7.03it/s]

{'epoch': 2, 'iter': 19490, 'avg_loss': 8.546995490885742, 'avg_acc': 50.00096198245344, 'loss': 8.397204399108887}


EP_train:2:  71%|| 19502/27626 [45:57<19:13,  7.04it/s]

{'epoch': 2, 'iter': 19500, 'avg_loss': 8.54690141077806, 'avg_acc': 50.0011217373468, 'loss': 8.549772262573242}


EP_train:2:  71%|| 19512/27626 [45:58<19:16,  7.02it/s]

{'epoch': 2, 'iter': 19510, 'avg_loss': 8.546797805071721, 'avg_acc': 49.99775767515761, 'loss': 8.630704879760742}


EP_train:2:  71%|| 19522/27626 [46:00<19:11,  7.04it/s]

{'epoch': 2, 'iter': 19520, 'avg_loss': 8.546958473250122, 'avg_acc': 49.9969584037703, 'loss': 9.367841720581055}


EP_train:2:  71%|| 19532/27626 [46:01<19:04,  7.07it/s]

{'epoch': 2, 'iter': 19530, 'avg_loss': 8.547040187856592, 'avg_acc': 49.99663995699145, 'loss': 9.285066604614258}


EP_train:2:  71%|| 19542/27626 [46:03<19:06,  7.05it/s]

{'epoch': 2, 'iter': 19540, 'avg_loss': 8.547420449928193, 'avg_acc': 49.99568215546799, 'loss': 8.58832836151123}


EP_train:2:  71%|| 19552/27626 [46:04<19:11,  7.01it/s]

{'epoch': 2, 'iter': 19550, 'avg_loss': 8.547469368749711, 'avg_acc': 49.99568436397116, 'loss': 8.054407119750977}


EP_train:2:  71%|| 19562/27626 [46:05<19:08,  7.02it/s]

{'epoch': 2, 'iter': 19560, 'avg_loss': 8.547487561501985, 'avg_acc': 49.99600608353356, 'loss': 9.015220642089844}


EP_train:2:  71%|| 19572/27626 [46:07<19:06,  7.03it/s]

{'epoch': 2, 'iter': 19570, 'avg_loss': 8.547504410210863, 'avg_acc': 49.995369424147974, 'loss': 8.725753784179688}


EP_train:2:  71%|| 19582/27626 [46:08<19:00,  7.05it/s]

{'epoch': 2, 'iter': 19580, 'avg_loss': 8.547464437622574, 'avg_acc': 49.993935447627806, 'loss': 8.633831977844238}


EP_train:2:  71%|| 19592/27626 [46:10<18:56,  7.07it/s]

{'epoch': 2, 'iter': 19590, 'avg_loss': 8.547486837957702, 'avg_acc': 49.99346000714614, 'loss': 8.486300468444824}


EP_train:2:  71%|| 19602/27626 [46:11<19:09,  6.98it/s]

{'epoch': 2, 'iter': 19600, 'avg_loss': 8.547550717857831, 'avg_acc': 49.9921878985766, 'loss': 8.86242389678955}


EP_train:2:  71%|| 19612/27626 [46:13<18:53,  7.07it/s]

{'epoch': 2, 'iter': 19610, 'avg_loss': 8.547502963827785, 'avg_acc': 49.994263423588805, 'loss': 9.130363464355469}


EP_train:2:  71%|| 19622/27626 [46:14<18:51,  7.07it/s]

{'epoch': 2, 'iter': 19620, 'avg_loss': 8.547441607166592, 'avg_acc': 49.995221956067475, 'loss': 8.472232818603516}


EP_train:2:  71%|| 19632/27626 [46:15<18:59,  7.01it/s]

{'epoch': 2, 'iter': 19630, 'avg_loss': 8.54755656413518, 'avg_acc': 49.993950893994196, 'loss': 8.693147659301758}


EP_train:2:  71%|| 19642/27626 [46:17<18:59,  7.01it/s]

{'epoch': 2, 'iter': 19640, 'avg_loss': 8.547764263839648, 'avg_acc': 49.99427218573393, 'loss': 8.289905548095703}


EP_train:2:  71%|| 19652/27626 [46:18<18:59,  7.00it/s]

{'epoch': 2, 'iter': 19650, 'avg_loss': 8.547632110445841, 'avg_acc': 49.995547300391834, 'loss': 8.95677375793457}


EP_train:2:  71%|| 19662/27626 [46:20<18:57,  7.00it/s]

{'epoch': 2, 'iter': 19660, 'avg_loss': 8.54773339157605, 'avg_acc': 49.99602639743655, 'loss': 9.066102027893066}


EP_train:2:  71%|| 19672/27626 [46:21<18:55,  7.00it/s]

{'epoch': 2, 'iter': 19670, 'avg_loss': 8.547724287630272, 'avg_acc': 49.995392964262116, 'loss': 8.831964492797852}


EP_train:2:  71%|| 19682/27626 [46:22<18:49,  7.04it/s]

{'epoch': 2, 'iter': 19680, 'avg_loss': 8.547680622738712, 'avg_acc': 49.99253721863726, 'loss': 8.545583724975586}


EP_train:2:  71%|| 19692/27626 [46:24<18:50,  7.02it/s]

{'epoch': 2, 'iter': 19690, 'avg_loss': 8.547706787222126, 'avg_acc': 49.991588796912296, 'loss': 8.506373405456543}


EP_train:2:  71%|| 19702/27626 [46:25<18:44,  7.04it/s]

{'epoch': 2, 'iter': 19700, 'avg_loss': 8.547661140348565, 'avg_acc': 49.99016547383381, 'loss': 9.050869941711426}


EP_train:2:  71%|| 19712/27626 [46:27<18:41,  7.06it/s]

{'epoch': 2, 'iter': 19710, 'avg_loss': 8.54753684257301, 'avg_acc': 49.989377758611944, 'loss': 8.454402923583984}


EP_train:2:  71%|| 19722/27626 [46:28<18:37,  7.07it/s]

{'epoch': 2, 'iter': 19720, 'avg_loss': 8.547479662182642, 'avg_acc': 49.98764007910349, 'loss': 7.909228801727295}


EP_train:2:  71%|| 19732/27626 [46:30<18:35,  7.07it/s]

{'epoch': 2, 'iter': 19730, 'avg_loss': 8.547491974478032, 'avg_acc': 49.98637930160661, 'loss': 8.56820297241211}


EP_train:2:  71%|| 19742/27626 [46:31<18:52,  6.96it/s]

{'epoch': 2, 'iter': 19740, 'avg_loss': 8.54749555077139, 'avg_acc': 49.98448660148929, 'loss': 7.7690300941467285}


EP_train:2:  71%|| 19752/27626 [46:32<18:36,  7.05it/s]

{'epoch': 2, 'iter': 19750, 'avg_loss': 8.547423727572353, 'avg_acc': 49.984652675813884, 'loss': 7.297937393188477}


EP_train:2:  72%|| 19762/27626 [46:34<18:35,  7.05it/s]

{'epoch': 2, 'iter': 19760, 'avg_loss': 8.547374139991067, 'avg_acc': 49.98639997975811, 'loss': 9.016196250915527}


EP_train:2:  72%|| 19772/27626 [46:35<18:32,  7.06it/s]

{'epoch': 2, 'iter': 19770, 'avg_loss': 8.547205512955726, 'avg_acc': 49.98482626068484, 'loss': 7.534148216247559}


EP_train:2:  72%|| 19782/27626 [46:37<18:36,  7.03it/s]

{'epoch': 2, 'iter': 19780, 'avg_loss': 8.547133042497647, 'avg_acc': 49.98341211263334, 'loss': 8.678231239318848}


EP_train:2:  72%|| 19792/27626 [46:38<18:21,  7.11it/s]

{'epoch': 2, 'iter': 19790, 'avg_loss': 8.546879386270602, 'avg_acc': 49.98120989338588, 'loss': 8.011045455932617}


EP_train:2:  72%|| 19802/27626 [46:39<18:31,  7.04it/s]

{'epoch': 2, 'iter': 19800, 'avg_loss': 8.546807303716905, 'avg_acc': 49.98216630473208, 'loss': 8.212315559387207}


EP_train:2:  72%|| 19812/27626 [46:41<18:29,  7.04it/s]

{'epoch': 2, 'iter': 19810, 'avg_loss': 8.546541013419674, 'avg_acc': 49.98359497249003, 'loss': 8.397083282470703}


EP_train:2:  72%|| 19822/27626 [46:42<18:32,  7.01it/s]

{'epoch': 2, 'iter': 19820, 'avg_loss': 8.546497879376668, 'avg_acc': 49.98360324907926, 'loss': 8.338828086853027}


EP_train:2:  72%|| 19832/27626 [46:44<18:22,  7.07it/s]

{'epoch': 2, 'iter': 19830, 'avg_loss': 8.546515518523377, 'avg_acc': 49.98345393575715, 'loss': 8.54962158203125}


EP_train:2:  72%|| 19842/27626 [46:45<18:32,  7.00it/s]

{'epoch': 2, 'iter': 19840, 'avg_loss': 8.546592944295986, 'avg_acc': 49.98314727080289, 'loss': 8.600884437561035}


EP_train:2:  72%|| 19852/27626 [46:47<18:21,  7.06it/s]

{'epoch': 2, 'iter': 19850, 'avg_loss': 8.54662293954482, 'avg_acc': 49.98126668681678, 'loss': 8.349529266357422}


EP_train:2:  72%|| 19862/27626 [46:48<18:16,  7.08it/s]

{'epoch': 2, 'iter': 19860, 'avg_loss': 8.546458494758673, 'avg_acc': 49.981905493177585, 'loss': 7.95124626159668}


EP_train:2:  72%|| 19872/27626 [46:49<18:14,  7.09it/s]

{'epoch': 2, 'iter': 19870, 'avg_loss': 8.546253895016887, 'avg_acc': 49.98081374867898, 'loss': 8.258453369140625}


EP_train:2:  72%|| 19882/27626 [46:51<18:13,  7.08it/s]

{'epoch': 2, 'iter': 19880, 'avg_loss': 8.546123120118109, 'avg_acc': 49.980194658216384, 'loss': 8.195767402648926}


EP_train:2:  72%|| 19892/27626 [46:52<18:08,  7.10it/s]

{'epoch': 2, 'iter': 19890, 'avg_loss': 8.546053881043733, 'avg_acc': 49.98020461515258, 'loss': 8.555059432983398}


EP_train:2:  72%|| 19902/27626 [46:54<18:17,  7.04it/s]

{'epoch': 2, 'iter': 19900, 'avg_loss': 8.546365286212406, 'avg_acc': 49.98068564393749, 'loss': 9.221826553344727}


EP_train:2:  72%|| 19912/27626 [46:55<18:10,  7.08it/s]

{'epoch': 2, 'iter': 19910, 'avg_loss': 8.546327712097483, 'avg_acc': 49.97896891165687, 'loss': 9.010395050048828}


EP_train:2:  72%|| 19922/27626 [46:56<18:23,  6.98it/s]

{'epoch': 2, 'iter': 19920, 'avg_loss': 8.546206520812637, 'avg_acc': 49.978822599267104, 'loss': 8.55117130279541}


EP_train:2:  72%|| 19932/27626 [46:58<18:14,  7.03it/s]

{'epoch': 2, 'iter': 19930, 'avg_loss': 8.546146315809631, 'avg_acc': 49.979460388339774, 'loss': 9.423059463500977}


EP_train:2:  72%|| 19942/27626 [46:59<18:10,  7.05it/s]

{'epoch': 2, 'iter': 19940, 'avg_loss': 8.546059075236876, 'avg_acc': 49.978216990120856, 'loss': 8.41407299041748}


EP_train:2:  72%|| 19952/27626 [47:01<18:13,  7.02it/s]

{'epoch': 2, 'iter': 19950, 'avg_loss': 8.546129727565544, 'avg_acc': 49.981047315924016, 'loss': 8.363521575927734}


EP_train:2:  72%|| 19962/27626 [47:02<18:07,  7.05it/s]

{'epoch': 2, 'iter': 19960, 'avg_loss': 8.546146104464903, 'avg_acc': 49.9791781473874, 'loss': 8.641279220581055}


EP_train:2:  72%|| 19972/27626 [47:03<17:55,  7.11it/s]

{'epoch': 2, 'iter': 19970, 'avg_loss': 8.546198576394787, 'avg_acc': 49.980753342346404, 'loss': 8.002811431884766}


EP_train:2:  72%|| 19982/27626 [47:05<17:56,  7.10it/s]

{'epoch': 2, 'iter': 19980, 'avg_loss': 8.54622715664163, 'avg_acc': 49.980137380511486, 'loss': 8.289891242980957}


EP_train:2:  72%|| 19992/27626 [47:06<18:06,  7.03it/s]

{'epoch': 2, 'iter': 19990, 'avg_loss': 8.54623318832422, 'avg_acc': 49.98202316042219, 'loss': 8.692298889160156}


EP_train:2:  72%|| 20002/27626 [47:08<18:00,  7.06it/s]

{'epoch': 2, 'iter': 20000, 'avg_loss': 8.546195022082735, 'avg_acc': 49.981250937453126, 'loss': 8.2882719039917}


EP_train:2:  72%|| 20012/27626 [47:09<17:59,  7.05it/s]

{'epoch': 2, 'iter': 20010, 'avg_loss': 8.546268777674292, 'avg_acc': 49.981884963270204, 'loss': 8.505731582641602}


EP_train:2:  72%|| 20022/27626 [47:11<18:11,  6.97it/s]

{'epoch': 2, 'iter': 20020, 'avg_loss': 8.546110488825295, 'avg_acc': 49.98142575295939, 'loss': 7.713528633117676}


EP_train:2:  73%|| 20032/27626 [47:12<17:55,  7.06it/s]

{'epoch': 2, 'iter': 20030, 'avg_loss': 8.546031701047626, 'avg_acc': 49.98315111577056, 'loss': 8.574832916259766}


EP_train:2:  73%|| 20042/27626 [47:13<17:59,  7.02it/s]

{'epoch': 2, 'iter': 20040, 'avg_loss': 8.546067397886274, 'avg_acc': 49.984406965720275, 'loss': 8.268993377685547}


EP_train:2:  73%|| 20052/27626 [47:15<17:49,  7.08it/s]

{'epoch': 2, 'iter': 20050, 'avg_loss': 8.546139070395668, 'avg_acc': 49.984258889830926, 'loss': 7.8525848388671875}


EP_train:2:  73%|| 20062/27626 [47:16<17:51,  7.06it/s]

{'epoch': 2, 'iter': 20060, 'avg_loss': 8.546113672133345, 'avg_acc': 49.983955186680625, 'loss': 7.90441370010376}


EP_train:2:  73%|| 20072/27626 [47:18<17:58,  7.01it/s]

{'epoch': 2, 'iter': 20070, 'avg_loss': 8.545956659789791, 'avg_acc': 49.98349608888446, 'loss': 8.154627799987793}


EP_train:2:  73%|| 20082/27626 [47:19<17:55,  7.02it/s]

{'epoch': 2, 'iter': 20080, 'avg_loss': 8.54619156550283, 'avg_acc': 49.9802362930133, 'loss': 8.567937850952148}


EP_train:2:  73%|| 20092/27626 [47:20<17:44,  7.07it/s]

{'epoch': 2, 'iter': 20090, 'avg_loss': 8.545993545971399, 'avg_acc': 49.980090587825394, 'loss': 8.165492057800293}


EP_train:2:  73%|| 20102/27626 [47:22<17:50,  7.03it/s]

{'epoch': 2, 'iter': 20100, 'avg_loss': 8.546156120456859, 'avg_acc': 49.97994502761057, 'loss': 9.747885704040527}


EP_train:2:  73%|| 20112/27626 [47:23<17:52,  7.00it/s]

{'epoch': 2, 'iter': 20110, 'avg_loss': 8.546065814977235, 'avg_acc': 49.97684724777485, 'loss': 8.818378448486328}


EP_train:2:  73%|| 20122/27626 [47:25<17:53,  6.99it/s]

{'epoch': 2, 'iter': 20120, 'avg_loss': 8.546092323858526, 'avg_acc': 49.97375254709011, 'loss': 8.781757354736328}


EP_train:2:  73%|| 20132/27626 [47:26<17:37,  7.09it/s]

{'epoch': 2, 'iter': 20130, 'avg_loss': 8.546022229717881, 'avg_acc': 49.97190278674681, 'loss': 8.728476524353027}


EP_train:2:  73%|| 20142/27626 [47:27<17:34,  7.10it/s]

{'epoch': 2, 'iter': 20140, 'avg_loss': 8.545946186250907, 'avg_acc': 49.97222704930242, 'loss': 9.027803421020508}


EP_train:2:  73%|| 20152/27626 [47:29<17:44,  7.02it/s]

{'epoch': 2, 'iter': 20150, 'avg_loss': 8.545842457584314, 'avg_acc': 49.971000198501315, 'loss': 8.949871063232422}


EP_train:2:  73%|| 20162/27626 [47:30<17:46,  7.00it/s]

{'epoch': 2, 'iter': 20160, 'avg_loss': 8.545602695436138, 'avg_acc': 49.97163459153812, 'loss': 7.593924045562744}


EP_train:2:  73%|| 20172/27626 [47:32<17:35,  7.06it/s]

{'epoch': 2, 'iter': 20170, 'avg_loss': 8.545450093223629, 'avg_acc': 49.97350775866343, 'loss': 8.691436767578125}


EP_train:2:  73%|| 20182/27626 [47:33<17:29,  7.09it/s]

{'epoch': 2, 'iter': 20180, 'avg_loss': 8.545220358946722, 'avg_acc': 49.97336603736188, 'loss': 8.333219528198242}


EP_train:2:  73%|| 20192/27626 [47:35<17:32,  7.06it/s]

{'epoch': 2, 'iter': 20190, 'avg_loss': 8.545206229025835, 'avg_acc': 49.97260536872864, 'loss': 8.330217361450195}


EP_train:2:  73%|| 20202/27626 [47:36<17:39,  7.01it/s]

{'epoch': 2, 'iter': 20200, 'avg_loss': 8.54513003447169, 'avg_acc': 49.97153606257116, 'loss': 8.170989036560059}


EP_train:2:  73%|| 20212/27626 [47:37<17:34,  7.03it/s]

{'epoch': 2, 'iter': 20210, 'avg_loss': 8.544980478289103, 'avg_acc': 49.97247785859186, 'loss': 8.416584968566895}


EP_train:2:  73%|| 20222/27626 [47:39<17:22,  7.10it/s]

{'epoch': 2, 'iter': 20220, 'avg_loss': 8.544794092805136, 'avg_acc': 49.97372780772464, 'loss': 7.664935111999512}


EP_train:2:  73%|| 20232/27626 [47:40<17:28,  7.05it/s]

{'epoch': 2, 'iter': 20230, 'avg_loss': 8.54472555208416, 'avg_acc': 49.977447975878604, 'loss': 8.581932067871094}


EP_train:2:  73%|| 20242/27626 [47:42<17:28,  7.04it/s]

{'epoch': 2, 'iter': 20240, 'avg_loss': 8.544585575187437, 'avg_acc': 49.97699594881676, 'loss': 8.812167167663574}


EP_train:2:  73%|| 20252/27626 [47:43<17:28,  7.04it/s]

{'epoch': 2, 'iter': 20250, 'avg_loss': 8.544531129164834, 'avg_acc': 49.97623574144487, 'loss': 8.287521362304688}


EP_train:2:  73%|| 20262/27626 [47:44<17:14,  7.12it/s]

{'epoch': 2, 'iter': 20260, 'avg_loss': 8.544320905439816, 'avg_acc': 49.97609323330536, 'loss': 8.711685180664062}


EP_train:2:  73%|| 20272/27626 [47:46<17:25,  7.03it/s]

{'epoch': 2, 'iter': 20270, 'avg_loss': 8.544391072864954, 'avg_acc': 49.97995905480736, 'loss': 9.561894416809082}


EP_train:2:  73%|| 20282/27626 [47:47<17:19,  7.07it/s]

{'epoch': 2, 'iter': 20280, 'avg_loss': 8.54443988094233, 'avg_acc': 49.980123021547264, 'loss': 9.741761207580566}


EP_train:2:  73%|| 20292/27626 [47:49<17:25,  7.01it/s]

{'epoch': 2, 'iter': 20290, 'avg_loss': 8.544606767841197, 'avg_acc': 49.97982479917204, 'loss': 8.854822158813477}


EP_train:2:  73%|| 20302/27626 [47:50<17:16,  7.06it/s]

{'epoch': 2, 'iter': 20300, 'avg_loss': 8.544957163134125, 'avg_acc': 49.980142603812624, 'loss': 8.424399375915527}


EP_train:2:  74%|| 20312/27626 [47:52<17:16,  7.06it/s]

{'epoch': 2, 'iter': 20310, 'avg_loss': 8.544949503641268, 'avg_acc': 49.98015238048348, 'loss': 8.361929893493652}


EP_train:2:  74%|| 20322/27626 [47:53<17:15,  7.05it/s]

{'epoch': 2, 'iter': 20320, 'avg_loss': 8.544729278897284, 'avg_acc': 49.97970080212588, 'loss': 7.937172889709473}


EP_train:2:  74%|| 20332/27626 [47:54<17:10,  7.08it/s]

{'epoch': 2, 'iter': 20330, 'avg_loss': 8.544577215361786, 'avg_acc': 49.977558900201664, 'loss': 8.157267570495605}


EP_train:2:  74%|| 20342/27626 [47:56<17:10,  7.07it/s]

{'epoch': 2, 'iter': 20340, 'avg_loss': 8.54444448849801, 'avg_acc': 49.97495821247726, 'loss': 8.256427764892578}


EP_train:2:  74%|| 20352/27626 [47:57<17:09,  7.06it/s]

{'epoch': 2, 'iter': 20350, 'avg_loss': 8.544314559135945, 'avg_acc': 49.97543118274287, 'loss': 8.171865463256836}


EP_train:2:  74%|| 20362/27626 [47:59<17:14,  7.02it/s]

{'epoch': 2, 'iter': 20360, 'avg_loss': 8.54440379122855, 'avg_acc': 49.97575020873238, 'loss': 9.291037559509277}


EP_train:2:  74%|| 20372/27626 [48:00<17:09,  7.04it/s]

{'epoch': 2, 'iter': 20370, 'avg_loss': 8.54449978212294, 'avg_acc': 49.97530189975946, 'loss': 8.121452331542969}


EP_train:2:  74%|| 20382/27626 [48:01<17:09,  7.04it/s]

{'epoch': 2, 'iter': 20380, 'avg_loss': 8.54461728835748, 'avg_acc': 49.97730729601099, 'loss': 9.008797645568848}


EP_train:2:  74%|| 20392/27626 [48:03<17:05,  7.05it/s]

{'epoch': 2, 'iter': 20390, 'avg_loss': 8.544621668605103, 'avg_acc': 49.97762493256829, 'loss': 9.099849700927734}


EP_train:2:  74%|| 20402/27626 [48:04<17:00,  7.08it/s]

{'epoch': 2, 'iter': 20400, 'avg_loss': 8.544560189212305, 'avg_acc': 49.98008676045292, 'loss': 9.173873901367188}


EP_train:2:  74%|| 20412/27626 [48:06<17:05,  7.03it/s]

{'epoch': 2, 'iter': 20410, 'avg_loss': 8.54475665799162, 'avg_acc': 49.98055582773994, 'loss': 8.971108436584473}


EP_train:2:  74%|| 20422/27626 [48:07<17:09,  6.99it/s]

{'epoch': 2, 'iter': 20420, 'avg_loss': 8.544935079776986, 'avg_acc': 49.97857597571127, 'loss': 9.62293529510498}


EP_train:2:  74%|| 20432/27626 [48:08<16:51,  7.12it/s]

{'epoch': 2, 'iter': 20430, 'avg_loss': 8.544798758871188, 'avg_acc': 49.979045323283245, 'loss': 8.530858039855957}


EP_train:2:  74%|| 20442/27626 [48:10<16:51,  7.10it/s]

{'epoch': 2, 'iter': 20440, 'avg_loss': 8.544798204771816, 'avg_acc': 49.98287755002202, 'loss': 8.428714752197266}


EP_train:2:  74%|| 20452/27626 [48:11<16:54,  7.07it/s]

{'epoch': 2, 'iter': 20450, 'avg_loss': 8.544803749721655, 'avg_acc': 49.98456676935113, 'loss': 8.90082836151123}


EP_train:2:  74%|| 20462/27626 [48:13<16:56,  7.04it/s]

{'epoch': 2, 'iter': 20460, 'avg_loss': 8.544866703022821, 'avg_acc': 49.98549068960461, 'loss': 8.85250473022461}


EP_train:2:  74%|| 20472/27626 [48:14<16:59,  7.02it/s]

{'epoch': 2, 'iter': 20470, 'avg_loss': 8.544869766581956, 'avg_acc': 49.98565043231889, 'loss': 9.149031639099121}


EP_train:2:  74%|| 20482/27626 [48:16<16:52,  7.05it/s]

{'epoch': 2, 'iter': 20480, 'avg_loss': 8.544992396098248, 'avg_acc': 49.985352277720814, 'loss': 8.68240737915039}


EP_train:2:  74%|| 20492/27626 [48:17<16:47,  7.08it/s]

{'epoch': 2, 'iter': 20490, 'avg_loss': 8.545024734157506, 'avg_acc': 49.98520692011127, 'loss': 8.253670692443848}


EP_train:2:  74%|| 20502/27626 [48:18<16:45,  7.08it/s]

{'epoch': 2, 'iter': 20500, 'avg_loss': 8.545045483816509, 'avg_acc': 49.98856763084728, 'loss': 8.536415100097656}


EP_train:2:  74%|| 20512/27626 [48:20<16:42,  7.10it/s]

{'epoch': 2, 'iter': 20510, 'avg_loss': 8.54507776927311, 'avg_acc': 49.98674491736141, 'loss': 8.578919410705566}


EP_train:2:  74%|| 20522/27626 [48:21<16:48,  7.05it/s]

{'epoch': 2, 'iter': 20520, 'avg_loss': 8.545410573415708, 'avg_acc': 49.98781735782857, 'loss': 9.106008529663086}


EP_train:2:  74%|| 20532/27626 [48:23<16:45,  7.05it/s]

{'epoch': 2, 'iter': 20530, 'avg_loss': 8.545282649724484, 'avg_acc': 49.987518873898004, 'loss': 7.6780314445495605}


EP_train:2:  74%|| 20542/27626 [48:24<16:42,  7.06it/s]

{'epoch': 2, 'iter': 20540, 'avg_loss': 8.545369681687093, 'avg_acc': 49.98798135436444, 'loss': 9.321985244750977}


EP_train:2:  74%|| 20552/27626 [48:25<16:53,  6.98it/s]

{'epoch': 2, 'iter': 20550, 'avg_loss': 8.545512566908242, 'avg_acc': 49.9875310203883, 'loss': 8.466316223144531}


EP_train:2:  74%|| 20562/27626 [48:27<16:38,  7.08it/s]

{'epoch': 2, 'iter': 20560, 'avg_loss': 8.545452604984462, 'avg_acc': 49.988145031856426, 'loss': 8.670830726623535}


EP_train:2:  74%|| 20572/27626 [48:28<16:41,  7.04it/s]

{'epoch': 2, 'iter': 20570, 'avg_loss': 8.545389960686169, 'avg_acc': 49.98891035924359, 'loss': 8.095474243164062}


EP_train:2:  75%|| 20582/27626 [48:30<16:41,  7.03it/s]

{'epoch': 2, 'iter': 20580, 'avg_loss': 8.545598560447427, 'avg_acc': 49.99058597735776, 'loss': 8.941643714904785}


EP_train:2:  75%|| 20592/27626 [48:31<16:48,  6.98it/s]

{'epoch': 2, 'iter': 20590, 'avg_loss': 8.54574175572466, 'avg_acc': 49.9905905492691, 'loss': 7.921349048614502}


EP_train:2:  75%|| 20602/27626 [48:33<16:35,  7.06it/s]

{'epoch': 2, 'iter': 20600, 'avg_loss': 8.545707752864214, 'avg_acc': 49.9899883500801, 'loss': 8.612372398376465}


EP_train:2:  75%|| 20612/27626 [48:34<16:32,  7.07it/s]

{'epoch': 2, 'iter': 20610, 'avg_loss': 8.545759060087756, 'avg_acc': 49.98878026296637, 'loss': 8.923829078674316}


EP_train:2:  75%|| 20622/27626 [48:35<16:30,  7.07it/s]

{'epoch': 2, 'iter': 20620, 'avg_loss': 8.545866398441127, 'avg_acc': 49.98620944668057, 'loss': 9.499829292297363}


EP_train:2:  75%|| 20632/27626 [48:37<16:28,  7.07it/s]

{'epoch': 2, 'iter': 20630, 'avg_loss': 8.545832935245608, 'avg_acc': 49.98803378411129, 'loss': 8.837320327758789}


EP_train:2:  75%|| 20642/27626 [48:38<16:28,  7.07it/s]

{'epoch': 2, 'iter': 20640, 'avg_loss': 8.545831169455349, 'avg_acc': 49.988796569933626, 'loss': 8.39145565032959}


EP_train:2:  75%|| 20652/27626 [48:40<16:37,  6.99it/s]

{'epoch': 2, 'iter': 20650, 'avg_loss': 8.545900342932141, 'avg_acc': 49.9886506706697, 'loss': 8.614459991455078}


EP_train:2:  75%|| 20662/27626 [48:41<16:25,  7.07it/s]

{'epoch': 2, 'iter': 20660, 'avg_loss': 8.545796478079097, 'avg_acc': 49.99001742413242, 'loss': 7.505865097045898}


EP_train:2:  75%|| 20672/27626 [48:42<16:30,  7.02it/s]

{'epoch': 2, 'iter': 20670, 'avg_loss': 8.54568233317442, 'avg_acc': 49.99319699095351, 'loss': 7.998930931091309}


EP_train:2:  75%|| 20682/27626 [48:44<16:31,  7.00it/s]

{'epoch': 2, 'iter': 20680, 'avg_loss': 8.545604146729874, 'avg_acc': 49.9947113292394, 'loss': 7.988417625427246}


EP_train:2:  75%|| 20692/27626 [48:45<16:24,  7.05it/s]

{'epoch': 2, 'iter': 20690, 'avg_loss': 8.545718960404587, 'avg_acc': 49.994713885264126, 'loss': 9.056680679321289}


EP_train:2:  75%|| 20702/27626 [48:47<16:22,  7.05it/s]

{'epoch': 2, 'iter': 20700, 'avg_loss': 8.54564010161178, 'avg_acc': 49.994263562146756, 'loss': 8.164432525634766}


EP_train:2:  75%|| 20712/27626 [48:48<16:26,  7.01it/s]

{'epoch': 2, 'iter': 20710, 'avg_loss': 8.545510331469243, 'avg_acc': 49.99321012988267, 'loss': 7.827108860015869}


EP_train:2:  75%|| 20722/27626 [48:49<16:18,  7.06it/s]

{'epoch': 2, 'iter': 20720, 'avg_loss': 8.545667318825808, 'avg_acc': 49.99245934076541, 'loss': 8.509864807128906}


EP_train:2:  75%|| 20732/27626 [48:51<16:26,  6.99it/s]

{'epoch': 2, 'iter': 20730, 'avg_loss': 8.54587444148828, 'avg_acc': 49.9932166803338, 'loss': 8.923737525939941}


EP_train:2:  75%|| 20742/27626 [48:52<16:15,  7.06it/s]

{'epoch': 2, 'iter': 20740, 'avg_loss': 8.54597153205443, 'avg_acc': 49.990055927872326, 'loss': 8.556233406066895}


EP_train:2:  75%|| 20752/27626 [48:54<16:12,  7.07it/s]

{'epoch': 2, 'iter': 20750, 'avg_loss': 8.545843127422875, 'avg_acc': 49.99021131511734, 'loss': 7.93536901473999}


EP_train:2:  75%|| 20762/27626 [48:55<16:16,  7.03it/s]

{'epoch': 2, 'iter': 20760, 'avg_loss': 8.545896795706554, 'avg_acc': 49.988409758682145, 'loss': 8.583498001098633}


EP_train:2:  75%|| 20772/27626 [48:57<16:14,  7.03it/s]

{'epoch': 2, 'iter': 20770, 'avg_loss': 8.545929224545871, 'avg_acc': 49.98796398825285, 'loss': 8.73177719116211}


EP_train:2:  75%|| 20782/27626 [48:58<16:09,  7.06it/s]

{'epoch': 2, 'iter': 20780, 'avg_loss': 8.545994484196854, 'avg_acc': 49.98751864684086, 'loss': 8.834542274475098}


EP_train:2:  75%|| 20792/27626 [48:59<16:14,  7.01it/s]

{'epoch': 2, 'iter': 20790, 'avg_loss': 8.546043797377802, 'avg_acc': 49.988877398874514, 'loss': 8.8414945602417}


EP_train:2:  75%|| 20802/27626 [49:01<16:09,  7.04it/s]

{'epoch': 2, 'iter': 20800, 'avg_loss': 8.546036723187196, 'avg_acc': 49.98707994807942, 'loss': 8.195749282836914}


EP_train:2:  75%|| 20812/27626 [49:02<16:03,  7.07it/s]

{'epoch': 2, 'iter': 20810, 'avg_loss': 8.546080728076937, 'avg_acc': 49.98918840997549, 'loss': 8.928865432739258}


EP_train:2:  75%|| 20822/27626 [49:04<16:06,  7.04it/s]

{'epoch': 2, 'iter': 20820, 'avg_loss': 8.546045559157179, 'avg_acc': 49.99204529081216, 'loss': 7.833462238311768}


EP_train:2:  75%|| 20832/27626 [49:05<16:06,  7.03it/s]

{'epoch': 2, 'iter': 20830, 'avg_loss': 8.546072339537758, 'avg_acc': 49.9900988910758, 'loss': 9.290670394897461}


EP_train:2:  75%|| 20842/27626 [49:06<15:59,  7.07it/s]

{'epoch': 2, 'iter': 20840, 'avg_loss': 8.545938637679784, 'avg_acc': 49.99115325560194, 'loss': 8.919473648071289}


EP_train:2:  75%|| 20852/27626 [49:08<15:59,  7.06it/s]

{'epoch': 2, 'iter': 20850, 'avg_loss': 8.546074158979213, 'avg_acc': 49.99145724425687, 'loss': 8.8186674118042}


EP_train:2:  76%|| 20862/27626 [49:09<15:53,  7.10it/s]

{'epoch': 2, 'iter': 20860, 'avg_loss': 8.546105754049162, 'avg_acc': 49.99056253295623, 'loss': 8.734607696533203}


EP_train:2:  76%|| 20872/27626 [49:11<15:49,  7.12it/s]

{'epoch': 2, 'iter': 20870, 'avg_loss': 8.546127453698418, 'avg_acc': 49.99236380623832, 'loss': 9.051131248474121}


EP_train:2:  76%|| 20882/27626 [49:12<16:01,  7.01it/s]

{'epoch': 2, 'iter': 20880, 'avg_loss': 8.546287366790246, 'avg_acc': 49.991170202576505, 'loss': 8.674552917480469}


EP_train:2:  76%|| 20892/27626 [49:13<15:51,  7.07it/s]

{'epoch': 2, 'iter': 20890, 'avg_loss': 8.546241223592412, 'avg_acc': 49.990426499449526, 'loss': 8.304723739624023}


EP_train:2:  76%|| 20902/27626 [49:15<15:44,  7.12it/s]

{'epoch': 2, 'iter': 20900, 'avg_loss': 8.546186281969591, 'avg_acc': 49.98953399358882, 'loss': 8.597694396972656}


EP_train:2:  76%|| 20912/27626 [49:16<15:49,  7.07it/s]

{'epoch': 2, 'iter': 20910, 'avg_loss': 8.546153041589129, 'avg_acc': 49.98998732724404, 'loss': 8.834447860717773}


EP_train:2:  76%|| 20922/27626 [49:18<15:53,  7.03it/s]

{'epoch': 2, 'iter': 20920, 'avg_loss': 8.546105252038874, 'avg_acc': 49.99283017064194, 'loss': 8.13984489440918}


EP_train:2:  76%|| 20932/27626 [49:19<15:44,  7.08it/s]

{'epoch': 2, 'iter': 20930, 'avg_loss': 8.546082464316282, 'avg_acc': 49.99193779561416, 'loss': 8.012625694274902}


EP_train:2:  76%|| 20942/27626 [49:21<15:45,  7.07it/s]

{'epoch': 2, 'iter': 20940, 'avg_loss': 8.546119670222149, 'avg_acc': 49.99119550164748, 'loss': 9.604571342468262}


EP_train:2:  76%|| 20952/27626 [49:22<15:42,  7.08it/s]

{'epoch': 2, 'iter': 20950, 'avg_loss': 8.546279713451012, 'avg_acc': 49.990603073838955, 'loss': 9.802111625671387}


EP_train:2:  76%|| 20962/27626 [49:23<15:38,  7.10it/s]

{'epoch': 2, 'iter': 20960, 'avg_loss': 8.546127963399371, 'avg_acc': 49.993291112065265, 'loss': 8.295575141906738}


EP_train:2:  76%|| 20972/27626 [49:25<15:41,  7.07it/s]

{'epoch': 2, 'iter': 20970, 'avg_loss': 8.5461034179956, 'avg_acc': 49.991804158123124, 'loss': 8.431053161621094}


EP_train:2:  76%|| 20982/27626 [49:26<15:46,  7.02it/s]

{'epoch': 2, 'iter': 20980, 'avg_loss': 8.54611516584063, 'avg_acc': 49.99299961870264, 'loss': 8.107205390930176}


EP_train:2:  76%|| 20992/27626 [49:28<15:36,  7.09it/s]

{'epoch': 2, 'iter': 20990, 'avg_loss': 8.54598407326019, 'avg_acc': 49.99225858701348, 'loss': 8.319771766662598}


EP_train:2:  76%|| 21002/27626 [49:29<15:42,  7.03it/s]

{'epoch': 2, 'iter': 21000, 'avg_loss': 8.546067569330871, 'avg_acc': 49.99226227322509, 'loss': 8.460350036621094}


EP_train:2:  76%|| 21012/27626 [49:30<15:36,  7.06it/s]

{'epoch': 2, 'iter': 21010, 'avg_loss': 8.546075850153553, 'avg_acc': 49.99048117652658, 'loss': 8.859077453613281}


EP_train:2:  76%|| 21022/27626 [49:32<15:40,  7.02it/s]

{'epoch': 2, 'iter': 21020, 'avg_loss': 8.546038553833956, 'avg_acc': 49.98855311355312, 'loss': 8.219110488891602}


EP_train:2:  76%|| 21032/27626 [49:33<15:32,  7.07it/s]

{'epoch': 2, 'iter': 21030, 'avg_loss': 8.545936834193757, 'avg_acc': 49.98930150729875, 'loss': 7.651312828063965}


EP_train:2:  76%|| 21042/27626 [49:35<15:36,  7.03it/s]

{'epoch': 2, 'iter': 21040, 'avg_loss': 8.545725563758737, 'avg_acc': 49.98990067012024, 'loss': 8.317590713500977}


EP_train:2:  76%|| 21052/27626 [49:36<15:33,  7.05it/s]

{'epoch': 2, 'iter': 21050, 'avg_loss': 8.545656348921545, 'avg_acc': 49.98663958956819, 'loss': 8.820090293884277}


EP_train:2:  76%|| 21062/27626 [49:38<15:37,  7.00it/s]

{'epoch': 2, 'iter': 21060, 'avg_loss': 8.545661147744175, 'avg_acc': 49.984717012487536, 'loss': 8.271520614624023}


EP_train:2:  76%|| 21072/27626 [49:39<15:32,  7.03it/s]

{'epoch': 2, 'iter': 21070, 'avg_loss': 8.545629001717185, 'avg_acc': 49.98605903848892, 'loss': 8.298908233642578}


EP_train:2:  76%|| 21082/27626 [49:40<15:31,  7.03it/s]

{'epoch': 2, 'iter': 21080, 'avg_loss': 8.545516396229331, 'avg_acc': 49.98547270053603, 'loss': 8.277958869934082}


EP_train:2:  76%|| 21092/27626 [49:42<15:28,  7.03it/s]

{'epoch': 2, 'iter': 21090, 'avg_loss': 8.545612487473736, 'avg_acc': 49.98651676070362, 'loss': 8.986994743347168}


EP_train:2:  76%|| 21102/27626 [49:43<15:30,  7.01it/s]

{'epoch': 2, 'iter': 21100, 'avg_loss': 8.545764569868766, 'avg_acc': 49.987411734041046, 'loss': 9.081388473510742}


EP_train:2:  76%|| 21112/27626 [49:45<15:25,  7.04it/s]

{'epoch': 2, 'iter': 21110, 'avg_loss': 8.545850210450475, 'avg_acc': 49.98593742598646, 'loss': 8.539419174194336}


EP_train:2:  76%|| 21122/27626 [49:46<15:23,  7.04it/s]

{'epoch': 2, 'iter': 21120, 'avg_loss': 8.545838382974345, 'avg_acc': 49.98831139624071, 'loss': 8.537398338317871}


EP_train:2:  76%|| 21132/27626 [49:47<15:14,  7.10it/s]

{'epoch': 2, 'iter': 21130, 'avg_loss': 8.545966012417736, 'avg_acc': 49.989795797643275, 'loss': 8.476407051086426}


EP_train:2:  77%|| 21142/27626 [49:49<15:19,  7.05it/s]

{'epoch': 2, 'iter': 21140, 'avg_loss': 8.545983721317764, 'avg_acc': 49.99009625845513, 'loss': 8.60274600982666}


EP_train:2:  77%|| 21152/27626 [49:50<15:17,  7.06it/s]

{'epoch': 2, 'iter': 21150, 'avg_loss': 8.545916306279826, 'avg_acc': 49.988771216490946, 'loss': 7.815175533294678}


EP_train:2:  77%|| 21162/27626 [49:52<15:15,  7.06it/s]

{'epoch': 2, 'iter': 21160, 'avg_loss': 8.545867618735473, 'avg_acc': 49.98803813619394, 'loss': 8.690987586975098}


EP_train:2:  77%|| 21172/27626 [49:53<15:11,  7.08it/s]

{'epoch': 2, 'iter': 21170, 'avg_loss': 8.5458871992696, 'avg_acc': 49.98848660904066, 'loss': 7.739773750305176}


EP_train:2:  77%|| 21182/27626 [49:54<15:15,  7.04it/s]

{'epoch': 2, 'iter': 21180, 'avg_loss': 8.545913836069145, 'avg_acc': 49.989672347858935, 'loss': 8.383667945861816}


EP_train:2:  77%|| 21192/27626 [49:56<15:16,  7.02it/s]

{'epoch': 2, 'iter': 21190, 'avg_loss': 8.545686971415018, 'avg_acc': 49.99129937237507, 'loss': 7.7225213050842285}


EP_train:2:  77%|| 21202/27626 [49:57<15:12,  7.04it/s]

{'epoch': 2, 'iter': 21200, 'avg_loss': 8.545457412699394, 'avg_acc': 49.992482665911986, 'loss': 7.5174102783203125}


EP_train:2:  77%|| 21212/27626 [49:59<15:12,  7.03it/s]

{'epoch': 2, 'iter': 21210, 'avg_loss': 8.545465597994655, 'avg_acc': 49.99366484371317, 'loss': 8.535703659057617}


EP_train:2:  77%|| 21222/27626 [50:00<15:05,  7.07it/s]

{'epoch': 2, 'iter': 21220, 'avg_loss': 8.5454344087258, 'avg_acc': 49.993078789877956, 'loss': 7.621810436248779}


EP_train:2:  77%|| 21232/27626 [50:02<15:02,  7.09it/s]

{'epoch': 2, 'iter': 21230, 'avg_loss': 8.545385439168728, 'avg_acc': 49.99381800197824, 'loss': 8.458794593811035}


EP_train:2:  77%|| 21242/27626 [50:03<15:06,  7.04it/s]

{'epoch': 2, 'iter': 21240, 'avg_loss': 8.545538203695289, 'avg_acc': 49.99455651805471, 'loss': 8.335821151733398}


EP_train:2:  77%|| 21252/27626 [50:04<15:03,  7.05it/s]

{'epoch': 2, 'iter': 21250, 'avg_loss': 8.545560673777453, 'avg_acc': 49.992941508634885, 'loss': 7.834563732147217}


EP_train:2:  77%|| 21262/27626 [50:06<14:56,  7.10it/s]

{'epoch': 2, 'iter': 21260, 'avg_loss': 8.545415688489545, 'avg_acc': 49.99353275951272, 'loss': 7.796029090881348}


EP_train:2:  77%|| 21272/27626 [50:07<15:04,  7.02it/s]

{'epoch': 2, 'iter': 21270, 'avg_loss': 8.545307164333204, 'avg_acc': 49.99412345446853, 'loss': 8.542060852050781}


EP_train:2:  77%|| 21282/27626 [50:09<15:04,  7.02it/s]

{'epoch': 2, 'iter': 21280, 'avg_loss': 8.545307312080732, 'avg_acc': 49.99221723603215, 'loss': 9.002934455871582}


EP_train:2:  77%|| 21292/27626 [50:10<14:55,  7.07it/s]

{'epoch': 2, 'iter': 21290, 'avg_loss': 8.545284723105647, 'avg_acc': 49.99089991076042, 'loss': 8.155158996582031}


EP_train:2:  77%|| 21302/27626 [50:11<14:49,  7.11it/s]

{'epoch': 2, 'iter': 21300, 'avg_loss': 8.545185259958762, 'avg_acc': 49.99046406272006, 'loss': 8.489081382751465}


EP_train:2:  77%|| 21312/27626 [50:13<14:55,  7.05it/s]

{'epoch': 2, 'iter': 21310, 'avg_loss': 8.545236160292998, 'avg_acc': 49.98929543428277, 'loss': 8.472235679626465}


EP_train:2:  77%|| 21322/27626 [50:14<14:54,  7.05it/s]

{'epoch': 2, 'iter': 21320, 'avg_loss': 8.545262649210146, 'avg_acc': 49.99017986961212, 'loss': 8.620505332946777}


EP_train:2:  77%|| 21332/27626 [50:16<14:59,  7.00it/s]

{'epoch': 2, 'iter': 21330, 'avg_loss': 8.54540122825605, 'avg_acc': 49.98813346772303, 'loss': 8.780806541442871}


EP_train:2:  77%|| 21342/27626 [50:17<14:50,  7.06it/s]

{'epoch': 2, 'iter': 21340, 'avg_loss': 8.54534894750545, 'avg_acc': 49.9888711869172, 'loss': 9.069772720336914}


EP_train:2:  77%|| 21352/27626 [50:18<14:49,  7.05it/s]

{'epoch': 2, 'iter': 21350, 'avg_loss': 8.545348055820199, 'avg_acc': 49.989022762399884, 'loss': 7.762879371643066}


EP_train:2:  77%|| 21362/27626 [50:20<14:53,  7.01it/s]

{'epoch': 2, 'iter': 21360, 'avg_loss': 8.545351100790551, 'avg_acc': 49.9885890173681, 'loss': 8.636500358581543}


EP_train:2:  77%|| 21372/27626 [50:21<14:57,  6.97it/s]

{'epoch': 2, 'iter': 21370, 'avg_loss': 8.545261869153423, 'avg_acc': 49.98932548781058, 'loss': 8.886698722839355}


EP_train:2:  77%|| 21382/27626 [50:23<14:45,  7.05it/s]

{'epoch': 2, 'iter': 21380, 'avg_loss': 8.545267937722082, 'avg_acc': 49.990061269351294, 'loss': 9.037009239196777}


EP_train:2:  77%|| 21392/27626 [50:24<14:43,  7.05it/s]

{'epoch': 2, 'iter': 21390, 'avg_loss': 8.545345331073158, 'avg_acc': 49.99079636295639, 'loss': 8.95074462890625}


EP_train:2:  77%|| 21402/27626 [50:26<14:38,  7.09it/s]

{'epoch': 2, 'iter': 21400, 'avg_loss': 8.545302858976976, 'avg_acc': 49.99094668473436, 'loss': 8.916426658630371}


EP_train:2:  78%|| 21412/27626 [50:27<14:35,  7.10it/s]

{'epoch': 2, 'iter': 21410, 'avg_loss': 8.545230229450661, 'avg_acc': 49.99168067815609, 'loss': 7.7585954666137695}


EP_train:2:  78%|| 21422/27626 [50:28<14:40,  7.04it/s]

{'epoch': 2, 'iter': 21420, 'avg_loss': 8.54530862975892, 'avg_acc': 49.992122216516506, 'loss': 8.67311954498291}


EP_train:2:  78%|| 21432/27626 [50:30<14:40,  7.03it/s]

{'epoch': 2, 'iter': 21430, 'avg_loss': 8.545201798926254, 'avg_acc': 49.99183425878401, 'loss': 8.261709213256836}


EP_train:2:  78%|| 21442/27626 [50:31<14:34,  7.07it/s]

{'epoch': 2, 'iter': 21440, 'avg_loss': 8.545399609499862, 'avg_acc': 49.99242106245044, 'loss': 9.06307601928711}


EP_train:2:  78%|| 21452/27626 [50:33<14:41,  7.01it/s]

{'epoch': 2, 'iter': 21450, 'avg_loss': 8.545408177568076, 'avg_acc': 49.99329868071419, 'loss': 8.082124710083008}


EP_train:2:  78%|| 21462/27626 [50:34<14:40,  7.00it/s]

{'epoch': 2, 'iter': 21460, 'avg_loss': 8.54523398894965, 'avg_acc': 49.993738642188156, 'loss': 8.08935832977295}


EP_train:2:  78%|| 21472/27626 [50:35<14:29,  7.08it/s]

{'epoch': 2, 'iter': 21470, 'avg_loss': 8.545055267257943, 'avg_acc': 49.995051464766426, 'loss': 8.759653091430664}


EP_train:2:  78%|| 21482/27626 [50:37<14:29,  7.07it/s]

{'epoch': 2, 'iter': 21480, 'avg_loss': 8.54488551182141, 'avg_acc': 49.9949082910479, 'loss': 8.385671615600586}


EP_train:2:  78%|| 21492/27626 [50:38<14:27,  7.07it/s]

{'epoch': 2, 'iter': 21490, 'avg_loss': 8.544854403684784, 'avg_acc': 49.99505606998279, 'loss': 8.781927108764648}


EP_train:2:  78%|| 21502/27626 [50:40<14:27,  7.06it/s]

{'epoch': 2, 'iter': 21500, 'avg_loss': 8.544846105462456, 'avg_acc': 49.995785079763735, 'loss': 8.911252975463867}


EP_train:2:  78%|| 21512/27626 [50:41<14:31,  7.02it/s]

{'epoch': 2, 'iter': 21510, 'avg_loss': 8.544776415970086, 'avg_acc': 49.994770117614244, 'loss': 8.893244743347168}


EP_train:2:  78%|| 21522/27626 [50:43<14:24,  7.06it/s]

{'epoch': 2, 'iter': 21520, 'avg_loss': 8.544835755076253, 'avg_acc': 49.994627340736955, 'loss': 8.676530838012695}


EP_train:2:  78%|| 21532/27626 [50:44<14:20,  7.08it/s]

{'epoch': 2, 'iter': 21530, 'avg_loss': 8.54483295978569, 'avg_acc': 49.99317844038828, 'loss': 8.696030616760254}


EP_train:2:  78%|| 21542/27626 [50:45<14:15,  7.11it/s]

{'epoch': 2, 'iter': 21540, 'avg_loss': 8.544783463258275, 'avg_acc': 49.99361682373149, 'loss': 8.779155731201172}


EP_train:2:  78%|| 21552/27626 [50:47<14:28,  7.00it/s]

{'epoch': 2, 'iter': 21550, 'avg_loss': 8.544876096154702, 'avg_acc': 49.99332977588047, 'loss': 8.832545280456543}


EP_train:2:  78%|| 21562/27626 [50:48<14:21,  7.04it/s]

{'epoch': 2, 'iter': 21560, 'avg_loss': 8.545014897899346, 'avg_acc': 49.99362274477065, 'loss': 8.845398902893066}


EP_train:2:  78%|| 21572/27626 [50:50<14:17,  7.06it/s]

{'epoch': 2, 'iter': 21570, 'avg_loss': 8.545075915728628, 'avg_acc': 49.99507440545176, 'loss': 8.763946533203125}


EP_train:2:  78%|| 21582/27626 [50:51<14:22,  7.01it/s]

{'epoch': 2, 'iter': 21580, 'avg_loss': 8.545030981643356, 'avg_acc': 49.99507668782726, 'loss': 8.629849433898926}


EP_train:2:  78%|| 21592/27626 [50:52<14:15,  7.05it/s]

{'epoch': 2, 'iter': 21590, 'avg_loss': 8.545021478952338, 'avg_acc': 49.992328979667455, 'loss': 8.71461009979248}


EP_train:2:  78%|| 21602/27626 [50:54<14:17,  7.02it/s]

{'epoch': 2, 'iter': 21600, 'avg_loss': 8.545119005432559, 'avg_acc': 49.99088583861858, 'loss': 7.87152624130249}


EP_train:2:  78%|| 21612/27626 [50:55<14:16,  7.02it/s]

{'epoch': 2, 'iter': 21610, 'avg_loss': 8.545074278144083, 'avg_acc': 49.990456249132386, 'loss': 7.753691673278809}


EP_train:2:  78%|| 21622/27626 [50:57<14:10,  7.06it/s]

{'epoch': 2, 'iter': 21620, 'avg_loss': 8.544984278783211, 'avg_acc': 49.99161694648721, 'loss': 8.348690032958984}


EP_train:2:  78%|| 21632/27626 [50:58<14:11,  7.04it/s]

{'epoch': 2, 'iter': 21630, 'avg_loss': 8.544877802888628, 'avg_acc': 49.99147635338172, 'loss': 8.55732536315918}


EP_train:2:  78%|| 21642/27626 [50:59<14:12,  7.02it/s]

{'epoch': 2, 'iter': 21640, 'avg_loss': 8.544912702346167, 'avg_acc': 49.99032507739938, 'loss': 8.21869945526123}


EP_train:2:  78%|| 21652/27626 [51:01<14:01,  7.10it/s]

{'epoch': 2, 'iter': 21650, 'avg_loss': 8.544777671721818, 'avg_acc': 49.98874185949841, 'loss': 8.316433906555176}


EP_train:2:  78%|| 21662/27626 [51:02<13:59,  7.10it/s]

{'epoch': 2, 'iter': 21660, 'avg_loss': 8.544720115690678, 'avg_acc': 49.98860278842159, 'loss': 8.859058380126953}


EP_train:2:  78%|| 21672/27626 [51:04<14:04,  7.05it/s]

{'epoch': 2, 'iter': 21670, 'avg_loss': 8.544751200060528, 'avg_acc': 49.99062687462508, 'loss': 8.905889511108398}


EP_train:2:  78%|| 21682/27626 [51:05<13:59,  7.08it/s]

{'epoch': 2, 'iter': 21680, 'avg_loss': 8.544747610867647, 'avg_acc': 49.99322563534893, 'loss': 8.805596351623535}


EP_train:2:  79%|| 21692/27626 [51:07<13:55,  7.10it/s]

{'epoch': 2, 'iter': 21690, 'avg_loss': 8.54477630363483, 'avg_acc': 49.99222027569038, 'loss': 8.4902925491333}


EP_train:2:  79%|| 21702/27626 [51:08<13:55,  7.09it/s]

{'epoch': 2, 'iter': 21700, 'avg_loss': 8.544742566298506, 'avg_acc': 49.99265586839316, 'loss': 8.57813549041748}


EP_train:2:  79%|| 21712/27626 [51:09<13:53,  7.09it/s]

{'epoch': 2, 'iter': 21710, 'avg_loss': 8.544803001962897, 'avg_acc': 49.992947123577906, 'loss': 8.256415367126465}


EP_train:2:  79%|| 21722/27626 [51:11<13:57,  7.05it/s]

{'epoch': 2, 'iter': 21720, 'avg_loss': 8.54492151725688, 'avg_acc': 49.99237489065881, 'loss': 9.211064338684082}


EP_train:2:  79%|| 21732/27626 [51:12<13:54,  7.06it/s]

{'epoch': 2, 'iter': 21730, 'avg_loss': 8.544871258684092, 'avg_acc': 49.99180318439097, 'loss': 8.493542671203613}


EP_train:2:  79%|| 21742/27626 [51:14<13:56,  7.03it/s]

{'epoch': 2, 'iter': 21740, 'avg_loss': 8.54474464911918, 'avg_acc': 49.99209442987903, 'loss': 8.685763359069824}


EP_train:2:  79%|| 21752/27626 [51:15<13:54,  7.04it/s]

{'epoch': 2, 'iter': 21750, 'avg_loss': 8.544839848200978, 'avg_acc': 49.989942991126846, 'loss': 8.209006309509277}


EP_train:2:  79%|| 21762/27626 [51:16<13:56,  7.01it/s]

{'epoch': 2, 'iter': 21760, 'avg_loss': 8.544869498006616, 'avg_acc': 49.99109645696429, 'loss': 9.59061050415039}


EP_train:2:  79%|| 21772/27626 [51:18<13:56,  7.00it/s]

{'epoch': 2, 'iter': 21770, 'avg_loss': 8.544871075908656, 'avg_acc': 49.99124408616967, 'loss': 8.496435165405273}


EP_train:2:  79%|| 21782/27626 [51:19<13:57,  6.98it/s]

{'epoch': 2, 'iter': 21780, 'avg_loss': 8.54477706686053, 'avg_acc': 49.99124810614756, 'loss': 8.07259464263916}


EP_train:2:  79%|| 21792/27626 [51:21<13:55,  6.99it/s]

{'epoch': 2, 'iter': 21790, 'avg_loss': 8.544638637119784, 'avg_acc': 49.98895759717315, 'loss': 7.879573345184326}


EP_train:2:  79%|| 21802/27626 [51:22<13:41,  7.09it/s]

{'epoch': 2, 'iter': 21800, 'avg_loss': 8.544475522808382, 'avg_acc': 49.98953603045732, 'loss': 8.984173774719238}


EP_train:2:  79%|| 21812/27626 [51:23<13:41,  7.07it/s]

{'epoch': 2, 'iter': 21810, 'avg_loss': 8.544604110092514, 'avg_acc': 49.98696185411031, 'loss': 9.296412467956543}


EP_train:2:  79%|| 21822/27626 [51:25<13:46,  7.02it/s]

{'epoch': 2, 'iter': 21820, 'avg_loss': 8.54466689700992, 'avg_acc': 49.98911598918473, 'loss': 8.28891372680664}


EP_train:2:  79%|| 21832/27626 [51:26<13:40,  7.06it/s]

{'epoch': 2, 'iter': 21830, 'avg_loss': 8.544716660699342, 'avg_acc': 49.9911250057258, 'loss': 9.025568008422852}


EP_train:2:  79%|| 21842/27626 [51:28<13:42,  7.04it/s]

{'epoch': 2, 'iter': 21840, 'avg_loss': 8.544652466503049, 'avg_acc': 49.99184446682845, 'loss': 7.140597343444824}


EP_train:2:  79%|| 21852/27626 [51:29<13:40,  7.03it/s]

{'epoch': 2, 'iter': 21850, 'avg_loss': 8.544565994544987, 'avg_acc': 49.99227724131619, 'loss': 9.530509948730469}


EP_train:2:  79%|| 21862/27626 [51:31<13:39,  7.04it/s]

{'epoch': 2, 'iter': 21860, 'avg_loss': 8.544568301130587, 'avg_acc': 49.992995517130964, 'loss': 8.815557479858398}


EP_train:2:  79%|| 21872/27626 [51:32<13:40,  7.01it/s]

{'epoch': 2, 'iter': 21870, 'avg_loss': 8.544545772662024, 'avg_acc': 49.992712953225734, 'loss': 8.068303108215332}


EP_train:2:  79%|| 21882/27626 [51:33<13:29,  7.09it/s]

{'epoch': 2, 'iter': 21880, 'avg_loss': 8.54444027446065, 'avg_acc': 49.99500137105251, 'loss': 7.969707012176514}


EP_train:2:  79%|| 21892/27626 [51:35<13:26,  7.11it/s]

{'epoch': 2, 'iter': 21890, 'avg_loss': 8.544591491804297, 'avg_acc': 49.99628842903476, 'loss': 7.617144584655762}


EP_train:2:  79%|| 21902/27626 [51:36<13:38,  7.00it/s]

{'epoch': 2, 'iter': 21900, 'avg_loss': 8.54448176013798, 'avg_acc': 49.99629012373864, 'loss': 8.490971565246582}


EP_train:2:  79%|| 21912/27626 [51:38<13:31,  7.04it/s]

{'epoch': 2, 'iter': 21910, 'avg_loss': 8.544505185633932, 'avg_acc': 49.99543608233307, 'loss': 9.196395874023438}


EP_train:2:  79%|| 21922/27626 [51:39<13:28,  7.06it/s]

{'epoch': 2, 'iter': 21920, 'avg_loss': 8.544598437950052, 'avg_acc': 49.996721180603075, 'loss': 8.70052719116211}


EP_train:2:  79%|| 21932/27626 [51:40<13:25,  7.07it/s]

{'epoch': 2, 'iter': 21930, 'avg_loss': 8.544747805259474, 'avg_acc': 49.996722675664586, 'loss': 8.696259498596191}


EP_train:2:  79%|| 21942/27626 [51:42<13:26,  7.05it/s]

{'epoch': 2, 'iter': 21940, 'avg_loss': 8.544610873529608, 'avg_acc': 49.99729387903924, 'loss': 8.276958465576172}


EP_train:2:  79%|| 21952/27626 [51:43<13:22,  7.07it/s]

{'epoch': 2, 'iter': 21950, 'avg_loss': 8.544517172283857, 'avg_acc': 49.997152749305265, 'loss': 7.8579792976379395}


EP_train:2:  79%|| 21962/27626 [51:45<13:21,  7.06it/s]

{'epoch': 2, 'iter': 21960, 'avg_loss': 8.544461353159218, 'avg_acc': 49.99715404580848, 'loss': 8.295194625854492}


EP_train:2:  80%|| 21972/27626 [51:46<13:19,  7.07it/s]

{'epoch': 2, 'iter': 21970, 'avg_loss': 8.544528453309157, 'avg_acc': 49.998008738792045, 'loss': 9.265463829040527}


EP_train:2:  80%|| 21982/27626 [51:48<13:17,  7.07it/s]

{'epoch': 2, 'iter': 21980, 'avg_loss': 8.544529655514333, 'avg_acc': 49.99701446703972, 'loss': 8.613018989562988}


EP_train:2:  80%|| 21992/27626 [51:49<13:15,  7.08it/s]

{'epoch': 2, 'iter': 21990, 'avg_loss': 8.544836004956357, 'avg_acc': 49.99758423900686, 'loss': 9.403364181518555}


EP_train:2:  80%|| 22002/27626 [51:50<13:19,  7.03it/s]

{'epoch': 2, 'iter': 22000, 'avg_loss': 8.544762552285713, 'avg_acc': 49.99559679105495, 'loss': 8.500406265258789}


EP_train:2:  80%|| 22012/27626 [51:52<13:19,  7.02it/s]

{'epoch': 2, 'iter': 22010, 'avg_loss': 8.54478016337696, 'avg_acc': 49.99588274044795, 'loss': 9.184436798095703}


EP_train:2:  80%|| 22022/27626 [51:53<13:11,  7.08it/s]

{'epoch': 2, 'iter': 22020, 'avg_loss': 8.544890660669264, 'avg_acc': 49.993897870214795, 'loss': 9.64521312713623}


EP_train:2:  80%|| 22032/27626 [51:55<13:16,  7.03it/s]

{'epoch': 2, 'iter': 22030, 'avg_loss': 8.544917624783816, 'avg_acc': 49.99418433116972, 'loss': 8.406878471374512}


EP_train:2:  80%|| 22042/27626 [51:56<13:20,  6.97it/s]

{'epoch': 2, 'iter': 22040, 'avg_loss': 8.544966682246125, 'avg_acc': 49.993761626060525, 'loss': 8.4600248336792}


EP_train:2:  80%|| 22052/27626 [51:57<13:09,  7.06it/s]

{'epoch': 2, 'iter': 22050, 'avg_loss': 8.54503183657661, 'avg_acc': 49.99390617205569, 'loss': 8.393299102783203}


EP_train:2:  80%|| 22062/27626 [51:59<13:18,  6.97it/s]

{'epoch': 2, 'iter': 22060, 'avg_loss': 8.54503935216955, 'avg_acc': 49.99320067086714, 'loss': 8.697015762329102}


EP_train:2:  80%|| 22072/27626 [52:00<13:10,  7.03it/s]

{'epoch': 2, 'iter': 22070, 'avg_loss': 8.545018064554988, 'avg_acc': 49.99249580898011, 'loss': 8.129103660583496}


EP_train:2:  80%|| 22082/27626 [52:02<13:04,  7.07it/s]

{'epoch': 2, 'iter': 22080, 'avg_loss': 8.544986193985899, 'avg_acc': 49.99264073185091, 'loss': 8.376667022705078}


EP_train:2:  80%|| 22092/27626 [52:03<13:05,  7.05it/s]

{'epoch': 2, 'iter': 22090, 'avg_loss': 8.545152744546018, 'avg_acc': 49.99363428545562, 'loss': 8.611245155334473}


EP_train:2:  80%|| 22102/27626 [52:05<13:05,  7.03it/s]

{'epoch': 2, 'iter': 22100, 'avg_loss': 8.545209542005171, 'avg_acc': 49.99293018415457, 'loss': 8.489415168762207}


EP_train:2:  80%|| 22112/27626 [52:06<13:02,  7.05it/s]

{'epoch': 2, 'iter': 22110, 'avg_loss': 8.545398262838761, 'avg_acc': 49.99180272262675, 'loss': 9.83981990814209}


EP_train:2:  80%|| 22122/27626 [52:07<13:04,  7.01it/s]

{'epoch': 2, 'iter': 22120, 'avg_loss': 8.545396099264748, 'avg_acc': 49.990958817413315, 'loss': 8.59594440460205}


EP_train:2:  80%|| 22132/27626 [52:09<12:59,  7.04it/s]

{'epoch': 2, 'iter': 22130, 'avg_loss': 8.545412407086861, 'avg_acc': 49.99110410736072, 'loss': 7.95795202255249}


EP_train:2:  80%|| 22142/27626 [52:10<12:56,  7.06it/s]

{'epoch': 2, 'iter': 22140, 'avg_loss': 8.545328690793466, 'avg_acc': 49.99251953389639, 'loss': 8.233878135681152}


EP_train:2:  80%|| 22152/27626 [52:12<12:51,  7.10it/s]

{'epoch': 2, 'iter': 22150, 'avg_loss': 8.54512362275445, 'avg_acc': 49.993369373843166, 'loss': 7.742403984069824}


EP_train:2:  80%|| 22162/27626 [52:13<12:48,  7.11it/s]

{'epoch': 2, 'iter': 22160, 'avg_loss': 8.545103606503824, 'avg_acc': 49.99294932539146, 'loss': 8.12106990814209}


EP_train:2:  80%|| 22172/27626 [52:14<12:58,  7.01it/s]

{'epoch': 2, 'iter': 22170, 'avg_loss': 8.545217889963244, 'avg_acc': 49.99267060574625, 'loss': 8.413843154907227}


EP_train:2:  80%|| 22182/27626 [52:16<12:52,  7.04it/s]

{'epoch': 2, 'iter': 22180, 'avg_loss': 8.545169982245453, 'avg_acc': 49.995350750642444, 'loss': 8.534207344055176}


EP_train:2:  80%|| 22192/27626 [52:17<12:48,  7.07it/s]

{'epoch': 2, 'iter': 22190, 'avg_loss': 8.54505378187581, 'avg_acc': 49.99577531431662, 'loss': 7.794291973114014}


EP_train:2:  80%|| 22202/27626 [52:19<12:54,  7.00it/s]

{'epoch': 2, 'iter': 22200, 'avg_loss': 8.544975461586548, 'avg_acc': 49.99648101436872, 'loss': 8.0869722366333}


EP_train:2:  80%|| 22212/27626 [52:20<12:50,  7.03it/s]

{'epoch': 2, 'iter': 22210, 'avg_loss': 8.544978978112088, 'avg_acc': 49.995216334248795, 'loss': 7.9973039627075195}


EP_train:2:  80%|| 22222/27626 [52:22<12:58,  6.94it/s]

{'epoch': 2, 'iter': 22220, 'avg_loss': 8.544825696788541, 'avg_acc': 49.99634354889519, 'loss': 8.174059867858887}


EP_train:2:  80%|| 22232/27626 [52:23<12:46,  7.03it/s]

{'epoch': 2, 'iter': 22230, 'avg_loss': 8.544868355192886, 'avg_acc': 49.99831316629931, 'loss': 8.853455543518066}


EP_train:2:  81%|| 22242/27626 [52:24<12:43,  7.05it/s]

{'epoch': 2, 'iter': 22240, 'avg_loss': 8.54475410704636, 'avg_acc': 49.9990164560946, 'loss': 8.5831298828125}


EP_train:2:  81%|| 22252/27626 [52:26<12:40,  7.06it/s]

{'epoch': 2, 'iter': 22250, 'avg_loss': 8.544887959708, 'avg_acc': 49.99943822749539, 'loss': 8.258378028869629}


EP_train:2:  81%|| 22262/27626 [52:27<12:49,  6.97it/s]

{'epoch': 2, 'iter': 22260, 'avg_loss': 8.544862105654586, 'avg_acc': 49.99831543955797, 'loss': 8.33456039428711}


EP_train:2:  81%|| 22272/27626 [52:29<12:43,  7.01it/s]

{'epoch': 2, 'iter': 22270, 'avg_loss': 8.544912413365486, 'avg_acc': 49.99775492793319, 'loss': 9.692821502685547}


EP_train:2:  81%|| 22282/27626 [52:30<12:40,  7.03it/s]

{'epoch': 2, 'iter': 22280, 'avg_loss': 8.54492588969023, 'avg_acc': 49.99747542749427, 'loss': 8.945837020874023}


EP_train:2:  81%|| 22292/27626 [52:31<12:34,  7.07it/s]

{'epoch': 2, 'iter': 22290, 'avg_loss': 8.545007509507347, 'avg_acc': 49.99761675115518, 'loss': 8.422709465026855}


EP_train:2:  81%|| 22302/27626 [52:33<12:37,  7.03it/s]

{'epoch': 2, 'iter': 22300, 'avg_loss': 8.545009679214008, 'avg_acc': 49.997337563337965, 'loss': 9.255341529846191}


EP_train:2:  81%|| 22312/27626 [52:34<12:28,  7.10it/s]

{'epoch': 2, 'iter': 22310, 'avg_loss': 8.544927211126462, 'avg_acc': 49.99761888754426, 'loss': 8.621198654174805}


EP_train:2:  81%|| 22322/27626 [52:36<12:27,  7.10it/s]

{'epoch': 2, 'iter': 22320, 'avg_loss': 8.544809355850497, 'avg_acc': 49.99733994892702, 'loss': 8.57453727722168}


EP_train:2:  81%|| 22332/27626 [52:37<12:29,  7.06it/s]

{'epoch': 2, 'iter': 22330, 'avg_loss': 8.544637723165717, 'avg_acc': 49.99734114011912, 'loss': 8.37470817565918}


EP_train:2:  81%|| 22342/27626 [52:38<12:27,  7.07it/s]

{'epoch': 2, 'iter': 22340, 'avg_loss': 8.544673859104623, 'avg_acc': 49.99580367933396, 'loss': 9.119524002075195}


EP_train:2:  81%|| 22352/27626 [52:40<12:32,  7.01it/s]

{'epoch': 2, 'iter': 22350, 'avg_loss': 8.544671255013302, 'avg_acc': 49.99510648293141, 'loss': 8.803129196166992}


EP_train:2:  81%|| 22362/27626 [52:41<12:27,  7.05it/s]

{'epoch': 2, 'iter': 22360, 'avg_loss': 8.54459598779753, 'avg_acc': 49.99399065336971, 'loss': 9.259345054626465}


EP_train:2:  81%|| 22372/27626 [52:43<12:23,  7.07it/s]

{'epoch': 2, 'iter': 22370, 'avg_loss': 8.544700920712371, 'avg_acc': 49.993015511152834, 'loss': 8.565345764160156}


EP_train:2:  81%|| 22382/27626 [52:44<12:36,  6.93it/s]

{'epoch': 2, 'iter': 22380, 'avg_loss': 8.544801065075, 'avg_acc': 49.993716768687726, 'loss': 8.418814659118652}


EP_train:2:  81%|| 22392/27626 [52:46<12:23,  7.04it/s]

{'epoch': 2, 'iter': 22390, 'avg_loss': 8.544644222887326, 'avg_acc': 49.99581304988612, 'loss': 7.966311454772949}


EP_train:2:  81%|| 22402/27626 [52:47<12:15,  7.10it/s]

{'epoch': 2, 'iter': 22400, 'avg_loss': 8.544782687082849, 'avg_acc': 49.99567541627606, 'loss': 8.772433280944824}


EP_train:2:  81%|| 22412/27626 [52:48<12:20,  7.04it/s]

{'epoch': 2, 'iter': 22410, 'avg_loss': 8.544862081735287, 'avg_acc': 49.99651398866628, 'loss': 8.878229141235352}


EP_train:2:  81%|| 22422/27626 [52:50<12:17,  7.06it/s]

{'epoch': 2, 'iter': 22420, 'avg_loss': 8.54480748880314, 'avg_acc': 49.994703626064855, 'loss': 8.63125991821289}


EP_train:2:  81%|| 22432/27626 [52:51<12:12,  7.10it/s]

{'epoch': 2, 'iter': 22430, 'avg_loss': 8.544904388611233, 'avg_acc': 49.99303419374972, 'loss': 9.424423217773438}


EP_train:2:  81%|| 22442/27626 [52:53<12:18,  7.02it/s]

{'epoch': 2, 'iter': 22440, 'avg_loss': 8.544823628611692, 'avg_acc': 49.991505503319814, 'loss': 7.681186199188232}


EP_train:2:  81%|| 22452/27626 [52:54<12:13,  7.06it/s]

{'epoch': 2, 'iter': 22450, 'avg_loss': 8.544867972351492, 'avg_acc': 49.98969979065521, 'loss': 8.605034828186035}


EP_train:2:  81%|| 22462/27626 [52:55<12:13,  7.04it/s]

{'epoch': 2, 'iter': 22460, 'avg_loss': 8.544725751097706, 'avg_acc': 49.9912348069988, 'loss': 8.22774887084961}


EP_train:2:  81%|| 22472/27626 [52:57<12:11,  7.04it/s]

{'epoch': 2, 'iter': 22470, 'avg_loss': 8.544549314665183, 'avg_acc': 49.99082150327088, 'loss': 8.666214942932129}


EP_train:2:  81%|| 22482/27626 [52:58<12:05,  7.09it/s]

{'epoch': 2, 'iter': 22480, 'avg_loss': 8.544488510857072, 'avg_acc': 49.99026956096259, 'loss': 8.105937957763672}


EP_train:2:  81%|| 22492/27626 [53:00<12:05,  7.08it/s]

{'epoch': 2, 'iter': 22490, 'avg_loss': 8.544349192279721, 'avg_acc': 49.99235805433284, 'loss': 8.26801872253418}


EP_train:2:  81%|| 22502/27626 [53:01<12:02,  7.09it/s]

{'epoch': 2, 'iter': 22500, 'avg_loss': 8.544316953841562, 'avg_acc': 49.99250033331852, 'loss': 7.656644821166992}


EP_train:2:  81%|| 22512/27626 [53:02<12:06,  7.04it/s]

{'epoch': 2, 'iter': 22510, 'avg_loss': 8.544321450445326, 'avg_acc': 49.99194838079161, 'loss': 8.535529136657715}


EP_train:2:  82%|| 22522/27626 [53:04<12:00,  7.09it/s]

{'epoch': 2, 'iter': 22520, 'avg_loss': 8.544187411200772, 'avg_acc': 49.991951955952224, 'loss': 8.406784057617188}


EP_train:2:  82%|| 22532/27626 [53:05<12:03,  7.04it/s]

{'epoch': 2, 'iter': 22530, 'avg_loss': 8.544158097468163, 'avg_acc': 49.99112334117438, 'loss': 9.174927711486816}


EP_train:2:  82%|| 22542/27626 [53:07<12:07,  6.99it/s]

{'epoch': 2, 'iter': 22540, 'avg_loss': 8.544238626309092, 'avg_acc': 49.9902954616033, 'loss': 7.971895217895508}


EP_train:2:  82%|| 22552/27626 [53:08<12:08,  6.96it/s]

{'epoch': 2, 'iter': 22550, 'avg_loss': 8.544175333788768, 'avg_acc': 49.99251696155381, 'loss': 7.500866889953613}


EP_train:2:  82%|| 22562/27626 [53:10<12:01,  7.02it/s]

{'epoch': 2, 'iter': 22560, 'avg_loss': 8.544122479559126, 'avg_acc': 49.992658791720224, 'loss': 7.740843772888184}


EP_train:2:  82%|| 22572/27626 [53:11<11:52,  7.10it/s]

{'epoch': 2, 'iter': 22570, 'avg_loss': 8.54415105261804, 'avg_acc': 49.99141597625271, 'loss': 8.793434143066406}


EP_train:2:  82%|| 22582/27626 [53:12<11:56,  7.04it/s]

{'epoch': 2, 'iter': 22580, 'avg_loss': 8.544116292798662, 'avg_acc': 49.9914197776892, 'loss': 8.439628601074219}


EP_train:2:  82%|| 22592/27626 [53:14<11:58,  7.01it/s]

{'epoch': 2, 'iter': 22590, 'avg_loss': 8.544012840776391, 'avg_acc': 49.99336018768536, 'loss': 8.044261932373047}


EP_train:2:  82%|| 22602/27626 [53:15<11:50,  7.07it/s]

{'epoch': 2, 'iter': 22600, 'avg_loss': 8.543948690720688, 'avg_acc': 49.99377793018008, 'loss': 8.329083442687988}


EP_train:2:  82%|| 22612/27626 [53:17<11:46,  7.10it/s]

{'epoch': 2, 'iter': 22610, 'avg_loss': 8.543800228589259, 'avg_acc': 49.994471717305736, 'loss': 7.804591655731201}


EP_train:2:  82%|| 22622/27626 [53:18<11:48,  7.06it/s]

{'epoch': 2, 'iter': 22620, 'avg_loss': 8.543836851998163, 'avg_acc': 49.9933689934132, 'loss': 8.884921073913574}


EP_train:2:  82%|| 22632/27626 [53:19<11:53,  7.00it/s]

{'epoch': 2, 'iter': 22630, 'avg_loss': 8.543724717571756, 'avg_acc': 49.995028942600854, 'loss': 8.553960800170898}


EP_train:2:  82%|| 22642/27626 [53:21<11:48,  7.03it/s]

{'epoch': 2, 'iter': 22640, 'avg_loss': 8.543745469817434, 'avg_acc': 49.9975155691003, 'loss': 8.208065032958984}


EP_train:2:  82%|| 22652/27626 [53:22<11:43,  7.07it/s]

{'epoch': 2, 'iter': 22650, 'avg_loss': 8.543796466636918, 'avg_acc': 49.997930554942386, 'loss': 8.165380477905273}


EP_train:2:  82%|| 22662/27626 [53:24<11:46,  7.02it/s]

{'epoch': 2, 'iter': 22660, 'avg_loss': 8.543824778541211, 'avg_acc': 49.99986209787741, 'loss': 8.680220603942871}


EP_train:2:  82%|| 22672/27626 [53:25<11:42,  7.06it/s]

{'epoch': 2, 'iter': 22670, 'avg_loss': 8.543749785119353, 'avg_acc': 50.000689206475236, 'loss': 8.78841781616211}


EP_train:2:  82%|| 22682/27626 [53:27<11:44,  7.01it/s]

{'epoch': 2, 'iter': 22680, 'avg_loss': 8.54372690529279, 'avg_acc': 50.0016533662537, 'loss': 7.950993537902832}


EP_train:2:  82%|| 22692/27626 [53:28<11:37,  7.08it/s]

{'epoch': 2, 'iter': 22690, 'avg_loss': 8.543747632617302, 'avg_acc': 50.00302983561764, 'loss': 8.919621467590332}


EP_train:2:  82%|| 22702/27626 [53:29<11:35,  7.08it/s]

{'epoch': 2, 'iter': 22700, 'avg_loss': 8.543546784833099, 'avg_acc': 50.004542751420644, 'loss': 8.306079864501953}


EP_train:2:  82%|| 22712/27626 [53:31<11:35,  7.06it/s]

{'epoch': 2, 'iter': 22710, 'avg_loss': 8.543559534505308, 'avg_acc': 50.0030271674519, 'loss': 7.9003119468688965}


EP_train:2:  82%|| 22722/27626 [53:32<11:42,  6.98it/s]

{'epoch': 2, 'iter': 22720, 'avg_loss': 8.543521076245373, 'avg_acc': 50.00206306940716, 'loss': 8.684043884277344}


EP_train:2:  82%|| 22732/27626 [53:34<11:38,  7.01it/s]

{'epoch': 2, 'iter': 22730, 'avg_loss': 8.543524389286686, 'avg_acc': 50.00206216180546, 'loss': 8.386478424072266}


EP_train:2:  82%|| 22742/27626 [53:35<11:30,  7.08it/s]

{'epoch': 2, 'iter': 22740, 'avg_loss': 8.543476692138647, 'avg_acc': 50.00206125500198, 'loss': 8.825175285339355}


EP_train:2:  82%|| 22752/27626 [53:36<11:29,  7.07it/s]

{'epoch': 2, 'iter': 22750, 'avg_loss': 8.543263398224504, 'avg_acc': 50.00192299239594, 'loss': 7.6571431159973145}


EP_train:2:  82%|| 22762/27626 [53:38<12:04,  6.71it/s]

{'epoch': 2, 'iter': 22760, 'avg_loss': 8.54312470055523, 'avg_acc': 50.00219674003779, 'loss': 7.571993827819824}


EP_train:2:  82%|| 22772/27626 [53:39<11:27,  7.06it/s]

{'epoch': 2, 'iter': 22770, 'avg_loss': 8.54305709474753, 'avg_acc': 50.00233301128628, 'loss': 8.62143611907959}


EP_train:2:  82%|| 22782/27626 [53:41<11:25,  7.07it/s]

{'epoch': 2, 'iter': 22780, 'avg_loss': 8.543197036945948, 'avg_acc': 50.00205763574909, 'loss': 8.749337196350098}


EP_train:2:  83%|| 22792/27626 [53:42<11:21,  7.09it/s]

{'epoch': 2, 'iter': 22790, 'avg_loss': 8.543308727445506, 'avg_acc': 50.003702119257596, 'loss': 8.200185775756836}


EP_train:2:  83%|| 22802/27626 [53:44<11:26,  7.03it/s]

{'epoch': 2, 'iter': 22800, 'avg_loss': 8.543133985274226, 'avg_acc': 50.00520810490768, 'loss': 8.632349014282227}


EP_train:2:  83%|| 22812/27626 [53:45<11:30,  6.97it/s]

{'epoch': 2, 'iter': 22810, 'avg_loss': 8.543193448896886, 'avg_acc': 50.00383586865985, 'loss': 8.468961715698242}


EP_train:2:  83%|| 22822/27626 [53:46<11:23,  7.03it/s]

{'epoch': 2, 'iter': 22820, 'avg_loss': 8.543100510401613, 'avg_acc': 50.002464835020376, 'loss': 9.125325202941895}


EP_train:2:  83%|| 22832/27626 [53:48<11:16,  7.09it/s]

{'epoch': 2, 'iter': 22830, 'avg_loss': 8.542882612002078, 'avg_acc': 50.00355875782927, 'loss': 8.592166900634766}


EP_train:2:  83%|| 22842/27626 [53:49<11:25,  6.98it/s]

{'epoch': 2, 'iter': 22840, 'avg_loss': 8.542806666743175, 'avg_acc': 50.004651722779215, 'loss': 8.184063911437988}


EP_train:2:  83%|| 22852/27626 [53:51<11:19,  7.02it/s]

{'epoch': 2, 'iter': 22850, 'avg_loss': 8.54272376360515, 'avg_acc': 50.00601724213383, 'loss': 8.753447532653809}


EP_train:2:  83%|| 22862/27626 [53:52<11:12,  7.08it/s]

{'epoch': 2, 'iter': 22860, 'avg_loss': 8.542560262810625, 'avg_acc': 50.00724487117799, 'loss': 8.786623001098633}


EP_train:2:  83%|| 22872/27626 [53:53<11:12,  7.07it/s]

{'epoch': 2, 'iter': 22870, 'avg_loss': 8.542498633095446, 'avg_acc': 50.002869354204016, 'loss': 8.562390327453613}


EP_train:2:  83%|| 22882/27626 [53:55<11:15,  7.02it/s]

{'epoch': 2, 'iter': 22880, 'avg_loss': 8.542455395109654, 'avg_acc': 50.00314125256763, 'loss': 8.302902221679688}


EP_train:2:  83%|| 22892/27626 [53:56<11:11,  7.05it/s]

{'epoch': 2, 'iter': 22890, 'avg_loss': 8.54236593528092, 'avg_acc': 50.002184264558124, 'loss': 8.51059341430664}


EP_train:2:  83%|| 22902/27626 [53:58<11:17,  6.97it/s]

{'epoch': 2, 'iter': 22900, 'avg_loss': 8.542362933127434, 'avg_acc': 50.00300205231213, 'loss': 7.862621307373047}


EP_train:2:  83%|| 22912/27626 [53:59<11:15,  6.98it/s]

{'epoch': 2, 'iter': 22910, 'avg_loss': 8.542319423456922, 'avg_acc': 50.00409192091135, 'loss': 8.182913780212402}


EP_train:2:  83%|| 22922/27626 [54:01<11:07,  7.04it/s]

{'epoch': 2, 'iter': 22920, 'avg_loss': 8.54234553452723, 'avg_acc': 50.003272108546746, 'loss': 8.41109561920166}


EP_train:2:  83%|| 22932/27626 [54:02<11:06,  7.05it/s]

{'epoch': 2, 'iter': 22930, 'avg_loss': 8.54224576368245, 'avg_acc': 50.001771619205435, 'loss': 8.373842239379883}


EP_train:2:  83%|| 22942/27626 [54:03<11:03,  7.06it/s]

{'epoch': 2, 'iter': 22940, 'avg_loss': 8.542160023807789, 'avg_acc': 50.00068109498278, 'loss': 8.011675834655762}


EP_train:2:  83%|| 22952/27626 [54:05<11:04,  7.04it/s]

{'epoch': 2, 'iter': 22950, 'avg_loss': 8.542054347984465, 'avg_acc': 50.00163391573352, 'loss': 7.604748249053955}


EP_train:2:  83%|| 22962/27626 [54:06<10:56,  7.10it/s]

{'epoch': 2, 'iter': 22960, 'avg_loss': 8.542077021110577, 'avg_acc': 50.00068050172031, 'loss': 8.487946510314941}


EP_train:2:  83%|| 22972/27626 [54:08<10:57,  7.07it/s]

{'epoch': 2, 'iter': 22970, 'avg_loss': 8.54207976904086, 'avg_acc': 50.00108832876236, 'loss': 8.56626033782959}


EP_train:2:  83%|| 22982/27626 [54:09<11:00,  7.03it/s]

{'epoch': 2, 'iter': 22980, 'avg_loss': 8.542178867390383, 'avg_acc': 50.00081589138854, 'loss': 8.214518547058105}


EP_train:2:  83%|| 22992/27626 [54:10<10:57,  7.04it/s]

{'epoch': 2, 'iter': 22990, 'avg_loss': 8.542219364660207, 'avg_acc': 50.00027184550476, 'loss': 9.698731422424316}


EP_train:2:  83%|| 23002/27626 [54:12<10:55,  7.06it/s]

{'epoch': 2, 'iter': 23000, 'avg_loss': 8.542305124774662, 'avg_acc': 50.0002717273162, 'loss': 8.711533546447754}


EP_train:2:  83%|| 23012/27626 [54:13<10:49,  7.10it/s]

{'epoch': 2, 'iter': 23010, 'avg_loss': 8.542248064088135, 'avg_acc': 49.99972839076963, 'loss': 8.77863597869873}


EP_train:2:  83%|| 23022/27626 [54:15<10:51,  7.06it/s]

{'epoch': 2, 'iter': 23020, 'avg_loss': 8.542180921204844, 'avg_acc': 49.999321271882195, 'loss': 8.947344779968262}


EP_train:2:  83%|| 23032/27626 [54:16<10:44,  7.13it/s]

{'epoch': 2, 'iter': 23030, 'avg_loss': 8.542341123251074, 'avg_acc': 49.999185879901006, 'loss': 8.328873634338379}


EP_train:2:  83%|| 23042/27626 [54:17<10:46,  7.09it/s]

{'epoch': 2, 'iter': 23040, 'avg_loss': 8.542311094705493, 'avg_acc': 50.00013562779393, 'loss': 8.878372192382812}


EP_train:2:  83%|| 23052/27626 [54:19<10:52,  7.00it/s]

{'epoch': 2, 'iter': 23050, 'avg_loss': 8.542256365483931, 'avg_acc': 50.00135568955793, 'loss': 8.957237243652344}


EP_train:2:  83%|| 23062/27626 [54:20<10:46,  7.06it/s]

{'epoch': 2, 'iter': 23060, 'avg_loss': 8.542258162287773, 'avg_acc': 50.00027102033736, 'loss': 8.73278522491455}


EP_train:2:  84%|| 23072/27626 [54:22<10:50,  7.00it/s]

{'epoch': 2, 'iter': 23070, 'avg_loss': 8.542279569304174, 'avg_acc': 50.000270902865076, 'loss': 8.87686538696289}


EP_train:2:  84%|| 23082/27626 [54:23<10:46,  7.03it/s]

{'epoch': 2, 'iter': 23080, 'avg_loss': 8.5423512309228, 'avg_acc': 50.00176010571465, 'loss': 8.461262702941895}


EP_train:2:  84%|| 23092/27626 [54:25<10:46,  7.01it/s]

{'epoch': 2, 'iter': 23090, 'avg_loss': 8.542372574425627, 'avg_acc': 50.001624009354295, 'loss': 8.234466552734375}


EP_train:2:  84%|| 23102/27626 [54:26<10:40,  7.06it/s]

{'epoch': 2, 'iter': 23100, 'avg_loss': 8.542233806632565, 'avg_acc': 50.000676377645995, 'loss': 8.065705299377441}


EP_train:2:  84%|| 23112/27626 [54:27<10:40,  7.04it/s]

{'epoch': 2, 'iter': 23110, 'avg_loss': 8.542209414792882, 'avg_acc': 50.00243390593224, 'loss': 8.37291431427002}


EP_train:2:  84%|| 23122/27626 [54:29<10:34,  7.10it/s]

{'epoch': 2, 'iter': 23120, 'avg_loss': 8.542397065494905, 'avg_acc': 50.002297694736384, 'loss': 9.403779029846191}


EP_train:2:  84%|| 23132/27626 [54:30<10:29,  7.13it/s]

{'epoch': 2, 'iter': 23130, 'avg_loss': 8.542557232585338, 'avg_acc': 50.00310730188924, 'loss': 9.326274871826172}


EP_train:2:  84%|| 23142/27626 [54:32<10:34,  7.07it/s]

{'epoch': 2, 'iter': 23140, 'avg_loss': 8.54274202772681, 'avg_acc': 50.00405125102632, 'loss': 9.01291561126709}


EP_train:2:  84%|| 23152/27626 [54:33<10:32,  7.08it/s]

{'epoch': 2, 'iter': 23150, 'avg_loss': 8.542731246970487, 'avg_acc': 50.00404950110146, 'loss': 7.218520164489746}


EP_train:2:  84%|| 23162/27626 [54:34<10:33,  7.04it/s]

{'epoch': 2, 'iter': 23160, 'avg_loss': 8.542594048042309, 'avg_acc': 50.00310327706058, 'loss': 8.617267608642578}


EP_train:2:  84%|| 23172/27626 [54:36<10:37,  6.99it/s]

{'epoch': 2, 'iter': 23170, 'avg_loss': 8.542623039356073, 'avg_acc': 50.00431573950196, 'loss': 8.970712661743164}


EP_train:2:  84%|| 23182/27626 [54:37<10:26,  7.09it/s]

{'epoch': 2, 'iter': 23180, 'avg_loss': 8.542541445281259, 'avg_acc': 50.004987921142316, 'loss': 7.456029891967773}


EP_train:2:  84%|| 23192/27626 [54:39<10:27,  7.07it/s]

{'epoch': 2, 'iter': 23190, 'avg_loss': 8.542625862816719, 'avg_acc': 50.00687227803889, 'loss': 9.088406562805176}


EP_train:2:  84%|| 23202/27626 [54:40<10:28,  7.04it/s]

{'epoch': 2, 'iter': 23200, 'avg_loss': 8.542688801516634, 'avg_acc': 50.007677470798676, 'loss': 8.314281463623047}


EP_train:2:  84%|| 23212/27626 [54:41<10:24,  7.06it/s]

{'epoch': 2, 'iter': 23210, 'avg_loss': 8.542675168997745, 'avg_acc': 50.00821270087459, 'loss': 8.357843399047852}


EP_train:2:  84%|| 23222/27626 [54:43<10:25,  7.04it/s]

{'epoch': 2, 'iter': 23220, 'avg_loss': 8.542588161008364, 'avg_acc': 50.00968950518927, 'loss': 8.188902854919434}


EP_train:2:  84%|| 23232/27626 [54:44<10:20,  7.08it/s]

{'epoch': 2, 'iter': 23230, 'avg_loss': 8.542518294348657, 'avg_acc': 50.01062696397056, 'loss': 9.060487747192383}


EP_train:2:  84%|| 23242/27626 [54:46<10:19,  7.08it/s]

{'epoch': 2, 'iter': 23240, 'avg_loss': 8.542399122210174, 'avg_acc': 50.01169807667484, 'loss': 8.185517311096191}


EP_train:2:  84%|| 23252/27626 [54:47<10:25,  7.00it/s]

{'epoch': 2, 'iter': 23250, 'avg_loss': 8.542256208077477, 'avg_acc': 50.01276826803148, 'loss': 7.50586462020874}


EP_train:2:  84%|| 23262/27626 [54:49<10:24,  6.99it/s]

{'epoch': 2, 'iter': 23260, 'avg_loss': 8.542248503595333, 'avg_acc': 50.01343450410558, 'loss': 8.855977058410645}


EP_train:2:  84%|| 23272/27626 [54:50<10:14,  7.09it/s]

{'epoch': 2, 'iter': 23270, 'avg_loss': 8.542301741552457, 'avg_acc': 50.0136973056594, 'loss': 9.08277702331543}


EP_train:2:  84%|| 23282/27626 [54:51<10:21,  6.99it/s]

{'epoch': 2, 'iter': 23280, 'avg_loss': 8.542445172846131, 'avg_acc': 50.014496799965634, 'loss': 8.264649391174316}


EP_train:2:  84%|| 23292/27626 [54:53<10:10,  7.10it/s]

{'epoch': 2, 'iter': 23290, 'avg_loss': 8.54225334960032, 'avg_acc': 50.013953887767805, 'loss': 8.772881507873535}


EP_train:2:  84%|| 23302/27626 [54:54<10:11,  7.07it/s]

{'epoch': 2, 'iter': 23300, 'avg_loss': 8.542178111835373, 'avg_acc': 50.01274086949058, 'loss': 7.848394393920898}


EP_train:2:  84%|| 23312/27626 [54:56<10:11,  7.05it/s]

{'epoch': 2, 'iter': 23310, 'avg_loss': 8.542107781471625, 'avg_acc': 50.011931062588474, 'loss': 8.125862121582031}


EP_train:2:  84%|| 23322/27626 [54:57<10:09,  7.06it/s]

{'epoch': 2, 'iter': 23320, 'avg_loss': 8.542197351119135, 'avg_acc': 50.012059945971444, 'loss': 9.182377815246582}


EP_train:2:  84%|| 23332/27626 [54:58<10:09,  7.04it/s]

{'epoch': 2, 'iter': 23330, 'avg_loss': 8.542046221400813, 'avg_acc': 50.01205477690627, 'loss': 8.6781005859375}


EP_train:2:  84%|| 23342/27626 [55:00<10:06,  7.06it/s]

{'epoch': 2, 'iter': 23340, 'avg_loss': 8.54190403001126, 'avg_acc': 50.01191572768947, 'loss': 8.238519668579102}


EP_train:2:  85%|| 23352/27626 [55:01<10:07,  7.03it/s]

{'epoch': 2, 'iter': 23350, 'avg_loss': 8.541886098900166, 'avg_acc': 50.01124148858721, 'loss': 8.349418640136719}


EP_train:2:  85%|| 23362/27626 [55:03<10:02,  7.08it/s]

{'epoch': 2, 'iter': 23360, 'avg_loss': 8.541946083158006, 'avg_acc': 50.01177175634604, 'loss': 8.392788887023926}


EP_train:2:  85%|| 23372/27626 [55:04<10:00,  7.09it/s]

{'epoch': 2, 'iter': 23370, 'avg_loss': 8.541760135699764, 'avg_acc': 50.012301570322194, 'loss': 8.689199447631836}


EP_train:2:  85%|| 23382/27626 [55:05<10:00,  7.07it/s]

{'epoch': 2, 'iter': 23380, 'avg_loss': 8.54178609000031, 'avg_acc': 50.0114943757752, 'loss': 8.661872863769531}


EP_train:2:  85%|| 23392/27626 [55:07<09:57,  7.09it/s]

{'epoch': 2, 'iter': 23390, 'avg_loss': 8.541831890752176, 'avg_acc': 50.01282544568424, 'loss': 8.937826156616211}


EP_train:2:  85%|| 23402/27626 [55:08<09:55,  7.09it/s]

{'epoch': 2, 'iter': 23400, 'avg_loss': 8.54188895136886, 'avg_acc': 50.01201871714884, 'loss': 7.968409538269043}


EP_train:2:  85%|| 23412/27626 [55:10<10:06,  6.95it/s]

{'epoch': 2, 'iter': 23410, 'avg_loss': 8.541859995350825, 'avg_acc': 50.01454978428944, 'loss': 8.752946853637695}


EP_train:2:  85%|| 23422/27626 [55:11<10:00,  7.01it/s]

{'epoch': 2, 'iter': 23420, 'avg_loss': 8.541711702766612, 'avg_acc': 50.0162781264677, 'loss': 8.245611190795898}


EP_train:2:  85%|| 23432/27626 [55:13<09:56,  7.03it/s]

{'epoch': 2, 'iter': 23430, 'avg_loss': 8.541628658234766, 'avg_acc': 50.01720477145661, 'loss': 8.197003364562988}


EP_train:2:  85%|| 23442/27626 [55:14<09:56,  7.01it/s]

{'epoch': 2, 'iter': 23440, 'avg_loss': 8.541749821675067, 'avg_acc': 50.01639755129901, 'loss': 8.677376747131348}


EP_train:2:  85%|| 23452/27626 [55:15<09:55,  7.01it/s]

{'epoch': 2, 'iter': 23450, 'avg_loss': 8.541849265021915, 'avg_acc': 50.01652381561554, 'loss': 8.869499206542969}


EP_train:2:  85%|| 23462/27626 [55:17<09:51,  7.04it/s]

{'epoch': 2, 'iter': 23460, 'avg_loss': 8.541648004483976, 'avg_acc': 50.01864796896977, 'loss': 8.196036338806152}


EP_train:2:  85%|| 23472/27626 [55:18<09:46,  7.08it/s]

{'epoch': 2, 'iter': 23470, 'avg_loss': 8.541807125863237, 'avg_acc': 50.01970516807975, 'loss': 9.174588203430176}


EP_train:2:  85%|| 23482/27626 [55:20<09:47,  7.05it/s]

{'epoch': 2, 'iter': 23480, 'avg_loss': 8.54176029511801, 'avg_acc': 50.020495294067544, 'loss': 8.261641502380371}


EP_train:2:  85%|| 23492/27626 [55:21<09:46,  7.04it/s]

{'epoch': 2, 'iter': 23490, 'avg_loss': 8.541767904074488, 'avg_acc': 50.021018688008176, 'loss': 9.043577194213867}


EP_train:2:  85%|| 23502/27626 [55:22<09:44,  7.06it/s]

{'epoch': 2, 'iter': 23500, 'avg_loss': 8.54188941313426, 'avg_acc': 50.02194055572103, 'loss': 7.729898929595947}


EP_train:2:  85%|| 23512/27626 [55:24<09:41,  7.07it/s]

{'epoch': 2, 'iter': 23510, 'avg_loss': 8.541924125977555, 'avg_acc': 50.021931223682536, 'loss': 8.63176155090332}


EP_train:2:  85%|| 23522/27626 [55:25<09:41,  7.05it/s]

{'epoch': 2, 'iter': 23520, 'avg_loss': 8.541955714697924, 'avg_acc': 50.02125759959185, 'loss': 8.515589714050293}


EP_train:2:  85%|| 23532/27626 [55:27<09:44,  7.01it/s]

{'epoch': 2, 'iter': 23530, 'avg_loss': 8.541876816312879, 'avg_acc': 50.02045174450724, 'loss': 8.318964958190918}


EP_train:2:  85%|| 23542/27626 [55:28<09:37,  7.07it/s]

{'epoch': 2, 'iter': 23540, 'avg_loss': 8.541879995824992, 'avg_acc': 50.018451849963895, 'loss': 7.881067752838135}


EP_train:2:  85%|| 23552/27626 [55:30<09:37,  7.06it/s]

{'epoch': 2, 'iter': 23550, 'avg_loss': 8.541930954166089, 'avg_acc': 50.018045942847436, 'loss': 9.409083366394043}


EP_train:2:  85%|| 23562/27626 [55:31<09:38,  7.03it/s]

{'epoch': 2, 'iter': 23560, 'avg_loss': 8.54196184668414, 'avg_acc': 50.016711939221594, 'loss': 7.871601104736328}


EP_train:2:  85%|| 23572/27626 [55:32<09:32,  7.08it/s]

{'epoch': 2, 'iter': 23570, 'avg_loss': 8.541954830421012, 'avg_acc': 50.0131252386407, 'loss': 8.56836986541748}


EP_train:2:  85%|| 23582/27626 [55:34<09:29,  7.10it/s]

{'epoch': 2, 'iter': 23580, 'avg_loss': 8.54186775674145, 'avg_acc': 50.01205949705271, 'loss': 7.575803756713867}


EP_train:2:  85%|| 23592/27626 [55:35<09:27,  7.11it/s]

{'epoch': 2, 'iter': 23590, 'avg_loss': 8.541819455356718, 'avg_acc': 50.0112595905218, 'loss': 8.557372093200684}


EP_train:2:  85%|| 23602/27626 [55:37<09:33,  7.01it/s]

{'epoch': 2, 'iter': 23600, 'avg_loss': 8.5418485002666, 'avg_acc': 50.01324096436591, 'loss': 8.48952579498291}


EP_train:2:  85%|| 23612/27626 [55:38<09:31,  7.03it/s]

{'epoch': 2, 'iter': 23610, 'avg_loss': 8.541805529775171, 'avg_acc': 50.01297064927365, 'loss': 7.930978775024414}


EP_train:2:  86%|| 23622/27626 [55:39<09:30,  7.02it/s]

{'epoch': 2, 'iter': 23620, 'avg_loss': 8.541669558037407, 'avg_acc': 50.01164218280344, 'loss': 7.413311004638672}


EP_train:2:  86%|| 23632/27626 [55:41<09:26,  7.05it/s]

{'epoch': 2, 'iter': 23630, 'avg_loss': 8.541629790977268, 'avg_acc': 50.01071156531675, 'loss': 8.914143562316895}


EP_train:2:  86%|| 23642/27626 [55:42<09:22,  7.08it/s]

{'epoch': 2, 'iter': 23640, 'avg_loss': 8.541569261884513, 'avg_acc': 50.00885643585297, 'loss': 7.559017658233643}


EP_train:2:  86%|| 23652/27626 [55:44<09:25,  7.03it/s]

{'epoch': 2, 'iter': 23650, 'avg_loss': 8.541615052554054, 'avg_acc': 50.00885269121813, 'loss': 8.550790786743164}


EP_train:2:  86%|| 23662/27626 [55:45<09:22,  7.05it/s]

{'epoch': 2, 'iter': 23660, 'avg_loss': 8.541677686379238, 'avg_acc': 50.01122627953172, 'loss': 9.470136642456055}


EP_train:2:  86%|| 23672/27626 [55:46<09:18,  7.08it/s]

{'epoch': 2, 'iter': 23670, 'avg_loss': 8.541716656764043, 'avg_acc': 50.012013645388876, 'loss': 8.897245407104492}


EP_train:2:  86%|| 23682/27626 [55:48<09:16,  7.08it/s]

{'epoch': 2, 'iter': 23680, 'avg_loss': 8.541686278250133, 'avg_acc': 50.011216798277104, 'loss': 8.33404541015625}


EP_train:2:  86%|| 23692/27626 [55:49<09:18,  7.05it/s]

{'epoch': 2, 'iter': 23690, 'avg_loss': 8.541543076336346, 'avg_acc': 50.011343970284074, 'loss': 7.855099201202393}


EP_train:2:  86%|| 23702/27626 [55:51<09:18,  7.02it/s]

{'epoch': 2, 'iter': 23700, 'avg_loss': 8.541728893798453, 'avg_acc': 50.01094363107042, 'loss': 9.375207901000977}


EP_train:2:  86%|| 23712/27626 [55:52<09:16,  7.04it/s]

{'epoch': 2, 'iter': 23710, 'avg_loss': 8.541667262061203, 'avg_acc': 50.01199337860065, 'loss': 8.659736633300781}


EP_train:2:  86%|| 23722/27626 [55:54<09:13,  7.06it/s]

{'epoch': 2, 'iter': 23720, 'avg_loss': 8.541747626906115, 'avg_acc': 50.012778761435015, 'loss': 8.958486557006836}


EP_train:2:  86%|| 23732/27626 [55:55<09:13,  7.03it/s]

{'epoch': 2, 'iter': 23730, 'avg_loss': 8.541869020327411, 'avg_acc': 50.01343179807004, 'loss': 8.755793571472168}


EP_train:2:  86%|| 23742/27626 [55:56<09:14,  7.00it/s]

{'epoch': 2, 'iter': 23740, 'avg_loss': 8.541815857111787, 'avg_acc': 50.012109852154495, 'loss': 7.933320045471191}


EP_train:2:  86%|| 23752/27626 [55:58<09:09,  7.05it/s]

{'epoch': 2, 'iter': 23750, 'avg_loss': 8.541742109729388, 'avg_acc': 50.01052587259483, 'loss': 8.436988830566406}


EP_train:2:  86%|| 23762/27626 [55:59<09:07,  7.05it/s]

{'epoch': 2, 'iter': 23760, 'avg_loss': 8.541674141129091, 'avg_acc': 50.01209965910526, 'loss': 7.790570259094238}


EP_train:2:  86%|| 23772/27626 [56:01<09:14,  6.95it/s]

{'epoch': 2, 'iter': 23770, 'avg_loss': 8.541590005697227, 'avg_acc': 50.01064847923941, 'loss': 8.196185111999512}


EP_train:2:  86%|| 23782/27626 [56:02<09:04,  7.06it/s]

{'epoch': 2, 'iter': 23780, 'avg_loss': 8.541598214878977, 'avg_acc': 50.01011837180943, 'loss': 8.731396675109863}


EP_train:2:  86%|| 23792/27626 [56:03<09:05,  7.02it/s]

{'epoch': 2, 'iter': 23790, 'avg_loss': 8.541617946914172, 'avg_acc': 50.010639527552435, 'loss': 8.070711135864258}


EP_train:2:  86%|| 23802/27626 [56:05<09:04,  7.03it/s]

{'epoch': 2, 'iter': 23800, 'avg_loss': 8.541596353777788, 'avg_acc': 50.01116024536785, 'loss': 8.177979469299316}


EP_train:2:  86%|| 23812/27626 [56:06<08:58,  7.09it/s]

{'epoch': 2, 'iter': 23810, 'avg_loss': 8.541290518687907, 'avg_acc': 50.01089307462937, 'loss': 8.288280487060547}


EP_train:2:  86%|| 23822/27626 [56:08<09:02,  7.01it/s]

{'epoch': 2, 'iter': 23820, 'avg_loss': 8.541314921858108, 'avg_acc': 50.01220036942195, 'loss': 8.94132137298584}


EP_train:2:  86%|| 23832/27626 [56:09<08:57,  7.05it/s]

{'epoch': 2, 'iter': 23830, 'avg_loss': 8.541371666166047, 'avg_acc': 50.01153959128865, 'loss': 9.025003433227539}


EP_train:2:  86%|| 23842/27626 [56:11<08:53,  7.10it/s]

{'epoch': 2, 'iter': 23840, 'avg_loss': 8.54148365770139, 'avg_acc': 50.01271444150832, 'loss': 8.791600227355957}


EP_train:2:  86%|| 23852/27626 [56:12<08:50,  7.11it/s]

{'epoch': 2, 'iter': 23850, 'avg_loss': 8.541498161732719, 'avg_acc': 50.01388830656995, 'loss': 8.365154266357422}


EP_train:2:  86%|| 23862/27626 [56:13<08:56,  7.02it/s]

{'epoch': 2, 'iter': 23860, 'avg_loss': 8.54146925355187, 'avg_acc': 50.012703784418086, 'loss': 8.298243522644043}


EP_train:2:  86%|| 23872/27626 [56:15<08:53,  7.04it/s]

{'epoch': 2, 'iter': 23870, 'avg_loss': 8.541468028044212, 'avg_acc': 50.011520254702354, 'loss': 8.803478240966797}


EP_train:2:  86%|| 23882/27626 [56:16<08:55,  6.99it/s]

{'epoch': 2, 'iter': 23880, 'avg_loss': 8.541408011623874, 'avg_acc': 50.012693145178176, 'loss': 8.15380859375}


EP_train:2:  86%|| 23892/27626 [56:18<08:51,  7.03it/s]

{'epoch': 2, 'iter': 23890, 'avg_loss': 8.541329501148326, 'avg_acc': 50.01347264660332, 'loss': 8.536922454833984}


EP_train:2:  87%|| 23902/27626 [56:19<08:50,  7.01it/s]

{'epoch': 2, 'iter': 23900, 'avg_loss': 8.541187946040592, 'avg_acc': 50.012944019078695, 'loss': 7.997366905212402}


EP_train:2:  87%|| 23912/27626 [56:20<08:46,  7.05it/s]

{'epoch': 2, 'iter': 23910, 'avg_loss': 8.541046093600162, 'avg_acc': 50.01150098281126, 'loss': 7.522097110748291}


EP_train:2:  87%|| 23922/27626 [56:22<08:44,  7.07it/s]

{'epoch': 2, 'iter': 23920, 'avg_loss': 8.540992329269242, 'avg_acc': 50.01214936666527, 'loss': 8.662463188171387}


EP_train:2:  87%|| 23932/27626 [56:23<08:44,  7.04it/s]

{'epoch': 2, 'iter': 23930, 'avg_loss': 8.541093331364433, 'avg_acc': 50.01253604111822, 'loss': 8.120534896850586}


EP_train:2:  87%|| 23942/27626 [56:25<08:43,  7.03it/s]

{'epoch': 2, 'iter': 23940, 'avg_loss': 8.541348307738502, 'avg_acc': 50.012400275677706, 'loss': 8.527185440063477}


EP_train:2:  87%|| 23952/27626 [56:26<08:39,  7.08it/s]

{'epoch': 2, 'iter': 23950, 'avg_loss': 8.541372650207485, 'avg_acc': 50.01122082585279, 'loss': 8.4925537109375}


EP_train:2:  87%|| 23962/27626 [56:27<08:42,  7.02it/s]

{'epoch': 2, 'iter': 23960, 'avg_loss': 8.541228061934095, 'avg_acc': 50.01278118609407, 'loss': 8.005388259887695}


EP_train:2:  87%|| 23972/27626 [56:29<08:42,  6.99it/s]

{'epoch': 2, 'iter': 23970, 'avg_loss': 8.54134668338523, 'avg_acc': 50.01407951274457, 'loss': 8.45175552368164}


EP_train:2:  87%|| 23982/27626 [56:30<08:40,  6.99it/s]

{'epoch': 2, 'iter': 23980, 'avg_loss': 8.541412042729387, 'avg_acc': 50.01407364163296, 'loss': 8.028298377990723}


EP_train:2:  87%|| 23992/27626 [56:32<08:35,  7.04it/s]

{'epoch': 2, 'iter': 23990, 'avg_loss': 8.541439860665403, 'avg_acc': 50.01471906131466, 'loss': 8.23108959197998}


EP_train:2:  87%|| 24002/27626 [56:33<08:31,  7.09it/s]

{'epoch': 2, 'iter': 24000, 'avg_loss': 8.541428863890474, 'avg_acc': 50.01484313153619, 'loss': 8.721607208251953}


EP_train:2:  87%|| 24012/27626 [56:35<08:33,  7.04it/s]

{'epoch': 2, 'iter': 24010, 'avg_loss': 8.54157200698071, 'avg_acc': 50.01366561159468, 'loss': 8.93643856048584}


EP_train:2:  87%|| 24022/27626 [56:36<08:29,  7.07it/s]

{'epoch': 2, 'iter': 24020, 'avg_loss': 8.541583901941268, 'avg_acc': 50.013790017068395, 'loss': 7.8046464920043945}


EP_train:2:  87%|| 24032/27626 [56:37<08:27,  7.08it/s]

{'epoch': 2, 'iter': 24030, 'avg_loss': 8.541492763983761, 'avg_acc': 50.01352419791103, 'loss': 8.154760360717773}


EP_train:2:  87%|| 24042/27626 [56:39<08:29,  7.03it/s]

{'epoch': 2, 'iter': 24040, 'avg_loss': 8.541451970589081, 'avg_acc': 50.015338380267046, 'loss': 9.129810333251953}


EP_train:2:  87%|| 24052/27626 [56:40<08:27,  7.05it/s]

{'epoch': 2, 'iter': 24050, 'avg_loss': 8.54147147581159, 'avg_acc': 50.01260342605297, 'loss': 8.232505798339844}


EP_train:2:  87%|| 24062/27626 [56:42<08:27,  7.03it/s]

{'epoch': 2, 'iter': 24060, 'avg_loss': 8.541491480263277, 'avg_acc': 50.012208553260464, 'loss': 8.349162101745605}


EP_train:2:  87%|| 24072/27626 [56:43<08:24,  7.05it/s]

{'epoch': 2, 'iter': 24070, 'avg_loss': 8.541438350441359, 'avg_acc': 50.0106455901292, 'loss': 8.757233619689941}


EP_train:2:  87%|| 24082/27626 [56:44<08:20,  7.08it/s]

{'epoch': 2, 'iter': 24080, 'avg_loss': 8.54142991617606, 'avg_acc': 50.01064116938665, 'loss': 8.201311111450195}


EP_train:2:  87%|| 24092/27626 [56:46<08:18,  7.09it/s]

{'epoch': 2, 'iter': 24090, 'avg_loss': 8.54140535449384, 'avg_acc': 50.00985845336433, 'loss': 8.156837463378906}


EP_train:2:  87%|| 24102/27626 [56:47<08:18,  7.07it/s]

{'epoch': 2, 'iter': 24100, 'avg_loss': 8.541447433167624, 'avg_acc': 50.00946537488071, 'loss': 8.913599967956543}


EP_train:2:  87%|| 24112/27626 [56:49<08:20,  7.02it/s]

{'epoch': 2, 'iter': 24110, 'avg_loss': 8.541421008374066, 'avg_acc': 50.01049832026876, 'loss': 8.967681884765625}


EP_train:2:  87%|| 24122/27626 [56:50<08:16,  7.06it/s]

{'epoch': 2, 'iter': 24120, 'avg_loss': 8.541380737577176, 'avg_acc': 50.009716636955346, 'loss': 8.575640678405762}


EP_train:2:  87%|| 24132/27626 [56:52<08:16,  7.04it/s]

{'epoch': 2, 'iter': 24130, 'avg_loss': 8.541372875722491, 'avg_acc': 50.011007625046624, 'loss': 8.57877254486084}


EP_train:2:  87%|| 24142/27626 [56:53<08:12,  7.07it/s]

{'epoch': 2, 'iter': 24140, 'avg_loss': 8.541415744390559, 'avg_acc': 50.01035582618781, 'loss': 8.842009544372559}


EP_train:2:  87%|| 24152/27626 [56:54<08:15,  7.01it/s]

{'epoch': 2, 'iter': 24150, 'avg_loss': 8.54137806647372, 'avg_acc': 50.00996335555463, 'loss': 8.224233627319336}


EP_train:2:  87%|| 24162/27626 [56:56<08:12,  7.04it/s]

{'epoch': 2, 'iter': 24160, 'avg_loss': 8.541308048816367, 'avg_acc': 50.012158023260625, 'loss': 7.8081254959106445}


EP_train:2:  87%|| 24172/27626 [56:57<08:09,  7.05it/s]

{'epoch': 2, 'iter': 24170, 'avg_loss': 8.54138411083208, 'avg_acc': 50.01124798312026, 'loss': 8.89283561706543}


EP_train:2:  88%|| 24182/27626 [56:59<08:08,  7.05it/s]

{'epoch': 2, 'iter': 24180, 'avg_loss': 8.541451477628799, 'avg_acc': 50.01292336958769, 'loss': 8.65134048461914}


EP_train:2:  88%|| 24192/27626 [57:00<08:04,  7.08it/s]

{'epoch': 2, 'iter': 24190, 'avg_loss': 8.541561142872288, 'avg_acc': 50.01149704435534, 'loss': 8.065518379211426}


EP_train:2:  88%|| 24202/27626 [57:01<08:03,  7.09it/s]

{'epoch': 2, 'iter': 24200, 'avg_loss': 8.54158961414813, 'avg_acc': 50.01110491302012, 'loss': 9.561676979064941}


EP_train:2:  88%|| 24212/27626 [57:03<08:06,  7.02it/s]

{'epoch': 2, 'iter': 24210, 'avg_loss': 8.541651519601128, 'avg_acc': 50.0121329147908, 'loss': 8.362435340881348}


EP_train:2:  88%|| 24222/27626 [57:04<08:02,  7.06it/s]

{'epoch': 2, 'iter': 24220, 'avg_loss': 8.541824509320756, 'avg_acc': 50.010966723091535, 'loss': 8.334650993347168}


EP_train:2:  88%|| 24232/27626 [57:06<08:02,  7.03it/s]

{'epoch': 2, 'iter': 24230, 'avg_loss': 8.541916913093969, 'avg_acc': 50.009285625851184, 'loss': 8.86539363861084}


EP_train:2:  88%|| 24242/27626 [57:07<08:00,  7.04it/s]

{'epoch': 2, 'iter': 24240, 'avg_loss': 8.541932708570595, 'avg_acc': 50.009023967658095, 'loss': 9.345535278320312}


EP_train:2:  88%|| 24252/27626 [57:08<07:59,  7.04it/s]

{'epoch': 2, 'iter': 24250, 'avg_loss': 8.542053982221605, 'avg_acc': 50.009277967918855, 'loss': 8.225675582885742}


EP_train:2:  88%|| 24262/27626 [57:10<07:58,  7.03it/s]

{'epoch': 2, 'iter': 24260, 'avg_loss': 8.542025623275935, 'avg_acc': 50.00966056634104, 'loss': 9.033661842346191}


EP_train:2:  88%|| 24272/27626 [57:11<07:54,  7.07it/s]

{'epoch': 2, 'iter': 24270, 'avg_loss': 8.541971595309622, 'avg_acc': 50.010300358452476, 'loss': 8.501049041748047}


EP_train:2:  88%|| 24282/27626 [57:13<07:56,  7.02it/s]

{'epoch': 2, 'iter': 24280, 'avg_loss': 8.541923671630663, 'avg_acc': 50.01106832502779, 'loss': 9.824807167053223}


EP_train:2:  88%|| 24292/27626 [57:14<07:50,  7.08it/s]

{'epoch': 2, 'iter': 24290, 'avg_loss': 8.5419599826474, 'avg_acc': 50.010163229179526, 'loss': 8.090209007263184}


EP_train:2:  88%|| 24302/27626 [57:16<07:50,  7.07it/s]

{'epoch': 2, 'iter': 24300, 'avg_loss': 8.541911173041457, 'avg_acc': 50.009258878235464, 'loss': 9.713532447814941}


EP_train:2:  88%|| 24312/27626 [57:17<07:52,  7.01it/s]

{'epoch': 2, 'iter': 24310, 'avg_loss': 8.542048108037852, 'avg_acc': 50.00989778289663, 'loss': 8.793094635009766}


EP_train:2:  88%|| 24322/27626 [57:18<07:46,  7.08it/s]

{'epoch': 2, 'iter': 24320, 'avg_loss': 8.542122847488564, 'avg_acc': 50.00976522346943, 'loss': 8.806290626525879}


EP_train:2:  88%|| 24332/27626 [57:20<07:45,  7.08it/s]

{'epoch': 2, 'iter': 24330, 'avg_loss': 8.542150383788146, 'avg_acc': 50.006164974723596, 'loss': 9.020732879638672}


EP_train:2:  88%|| 24342/27626 [57:21<07:47,  7.02it/s]

{'epoch': 2, 'iter': 24340, 'avg_loss': 8.542130760049316, 'avg_acc': 50.005905673554906, 'loss': 8.709238052368164}


EP_train:2:  88%|| 24352/27626 [57:23<07:42,  7.08it/s]

{'epoch': 2, 'iter': 24350, 'avg_loss': 8.54203558228578, 'avg_acc': 50.003208287133994, 'loss': 7.644935607910156}


EP_train:2:  88%|| 24362/27626 [57:24<07:39,  7.11it/s]

{'epoch': 2, 'iter': 24360, 'avg_loss': 8.5419769731043, 'avg_acc': 50.002822133738356, 'loss': 8.941313743591309}


EP_train:2:  88%|| 24372/27626 [57:25<07:39,  7.08it/s]

{'epoch': 2, 'iter': 24370, 'avg_loss': 8.542005724091224, 'avg_acc': 50.00307742809076, 'loss': 8.294095993041992}


EP_train:2:  88%|| 24382/27626 [57:27<07:39,  7.06it/s]

{'epoch': 2, 'iter': 24380, 'avg_loss': 8.541950939472303, 'avg_acc': 50.004357901644724, 'loss': 8.735405921936035}


EP_train:2:  88%|| 24392/27626 [57:28<07:37,  7.07it/s]

{'epoch': 2, 'iter': 24390, 'avg_loss': 8.54196346085194, 'avg_acc': 50.00576544627117, 'loss': 8.506351470947266}


EP_train:2:  88%|| 24402/27626 [57:30<07:37,  7.05it/s]

{'epoch': 2, 'iter': 24400, 'avg_loss': 8.541929617342776, 'avg_acc': 50.00576308348018, 'loss': 8.392093658447266}


EP_train:2:  88%|| 24412/27626 [57:31<07:33,  7.08it/s]

{'epoch': 2, 'iter': 24410, 'avg_loss': 8.54190036926357, 'avg_acc': 50.00524865839171, 'loss': 8.258000373840332}


EP_train:2:  88%|| 24422/27626 [57:32<07:35,  7.03it/s]

{'epoch': 2, 'iter': 24420, 'avg_loss': 8.54167888350112, 'avg_acc': 50.006142254616925, 'loss': 8.434239387512207}


EP_train:2:  88%|| 24432/27626 [57:34<07:33,  7.04it/s]

{'epoch': 2, 'iter': 24430, 'avg_loss': 8.54158781590793, 'avg_acc': 50.00460480537022, 'loss': 8.22675609588623}


EP_train:2:  88%|| 24442/27626 [57:35<07:33,  7.02it/s]

{'epoch': 2, 'iter': 24440, 'avg_loss': 8.541407525795464, 'avg_acc': 50.005370074874186, 'loss': 9.25547981262207}


EP_train:2:  89%|| 24452/27626 [57:37<07:29,  7.06it/s]

{'epoch': 2, 'iter': 24450, 'avg_loss': 8.541217717172314, 'avg_acc': 50.00460103881231, 'loss': 8.41760540008545}


EP_train:2:  89%|| 24462/27626 [57:38<07:27,  7.07it/s]

{'epoch': 2, 'iter': 24460, 'avg_loss': 8.541469941978038, 'avg_acc': 50.00421589468951, 'loss': 9.433328628540039}


EP_train:2:  89%|| 24472/27626 [57:40<07:28,  7.04it/s]

{'epoch': 2, 'iter': 24470, 'avg_loss': 8.541610384059524, 'avg_acc': 50.00408646969883, 'loss': 9.27357006072998}


EP_train:2:  89%|| 24482/27626 [57:41<07:24,  7.07it/s]

{'epoch': 2, 'iter': 24480, 'avg_loss': 8.54169396866011, 'avg_acc': 50.00370185041461, 'loss': 8.928919792175293}


EP_train:2:  89%|| 24492/27626 [57:42<07:23,  7.06it/s]

{'epoch': 2, 'iter': 24490, 'avg_loss': 8.541640353384864, 'avg_acc': 50.00267955575518, 'loss': 9.169451713562012}


EP_train:2:  89%|| 24502/27626 [57:44<07:26,  7.00it/s]

{'epoch': 2, 'iter': 24500, 'avg_loss': 8.541564956956346, 'avg_acc': 50.00102036651565, 'loss': 8.19730281829834}


EP_train:2:  89%|| 24512/27626 [57:45<07:23,  7.02it/s]

{'epoch': 2, 'iter': 24510, 'avg_loss': 8.541645188950437, 'avg_acc': 49.9998725062217, 'loss': 8.497525215148926}


EP_train:2:  89%|| 24522/27626 [57:47<07:19,  7.06it/s]

{'epoch': 2, 'iter': 24520, 'avg_loss': 8.541638437995056, 'avg_acc': 50.00012744178459, 'loss': 8.983277320861816}


EP_train:2:  89%|| 24532/27626 [57:48<07:18,  7.06it/s]

{'epoch': 2, 'iter': 24530, 'avg_loss': 8.541756138081002, 'avg_acc': 49.99898088133382, 'loss': 10.188713073730469}


EP_train:2:  89%|| 24542/27626 [57:49<07:16,  7.06it/s]

{'epoch': 2, 'iter': 24540, 'avg_loss': 8.541936087504386, 'avg_acc': 49.999363310378556, 'loss': 9.606915473937988}


EP_train:2:  89%|| 24552/27626 [57:51<07:12,  7.10it/s]

{'epoch': 2, 'iter': 24550, 'avg_loss': 8.542054356453079, 'avg_acc': 49.99859985336646, 'loss': 9.029899597167969}


EP_train:2:  89%|| 24562/27626 [57:52<07:13,  7.06it/s]

{'epoch': 2, 'iter': 24560, 'avg_loss': 8.54197000126357, 'avg_acc': 49.99707361263792, 'loss': 7.860962867736816}


EP_train:2:  89%|| 24572/27626 [57:54<07:15,  7.01it/s]

{'epoch': 2, 'iter': 24570, 'avg_loss': 8.54188328100587, 'avg_acc': 49.99694762117944, 'loss': 8.505578994750977}


EP_train:2:  89%|| 24582/27626 [57:55<07:11,  7.05it/s]

{'epoch': 2, 'iter': 24580, 'avg_loss': 8.541923589098468, 'avg_acc': 49.99898295431431, 'loss': 8.718676567077637}


EP_train:2:  89%|| 24592/27626 [57:56<07:08,  7.08it/s]

{'epoch': 2, 'iter': 24590, 'avg_loss': 8.541916572346178, 'avg_acc': 49.999364604936765, 'loss': 8.36388874053955}


EP_train:2:  89%|| 24602/27626 [57:58<07:08,  7.06it/s]

{'epoch': 2, 'iter': 24600, 'avg_loss': 8.541774513324022, 'avg_acc': 49.99771350758099, 'loss': 7.9721198081970215}


EP_train:2:  89%|| 24612/27626 [57:59<07:09,  7.02it/s]

{'epoch': 2, 'iter': 24610, 'avg_loss': 8.541714075826377, 'avg_acc': 49.99796838811913, 'loss': 8.879534721374512}


EP_train:2:  89%|| 24622/27626 [58:01<07:06,  7.04it/s]

{'epoch': 2, 'iter': 24620, 'avg_loss': 8.541797900557214, 'avg_acc': 49.99657304739856, 'loss': 8.201872825622559}


EP_train:2:  89%|| 24632/27626 [58:02<07:06,  7.02it/s]

{'epoch': 2, 'iter': 24630, 'avg_loss': 8.541928906305413, 'avg_acc': 49.99505196703342, 'loss': 8.519176483154297}


EP_train:2:  89%|| 24642/27626 [58:04<07:02,  7.07it/s]

{'epoch': 2, 'iter': 24640, 'avg_loss': 8.541874025680865, 'avg_acc': 49.99441986932349, 'loss': 8.766988754272461}


EP_train:2:  89%|| 24652/27626 [58:05<07:00,  7.07it/s]

{'epoch': 2, 'iter': 24650, 'avg_loss': 8.541839145462168, 'avg_acc': 49.995055981501764, 'loss': 8.509669303894043}


EP_train:2:  89%|| 24662/27626 [58:06<06:56,  7.11it/s]

{'epoch': 2, 'iter': 24660, 'avg_loss': 8.541754447003795, 'avg_acc': 49.99556485949475, 'loss': 8.37707233428955}


EP_train:2:  89%|| 24672/27626 [58:08<07:02,  6.99it/s]

{'epoch': 2, 'iter': 24670, 'avg_loss': 8.541860474856342, 'avg_acc': 49.99607332495643, 'loss': 8.411243438720703}


EP_train:2:  89%|| 24682/27626 [58:09<06:57,  7.06it/s]

{'epoch': 2, 'iter': 24680, 'avg_loss': 8.541839656873883, 'avg_acc': 49.996707994003486, 'loss': 9.009512901306152}


EP_train:2:  89%|| 24692/27626 [58:11<06:55,  7.06it/s]

{'epoch': 2, 'iter': 24690, 'avg_loss': 8.541892428574066, 'avg_acc': 49.99645619861488, 'loss': 9.458380699157715}


EP_train:2:  89%|| 24702/27626 [58:12<06:54,  7.06it/s]

{'epoch': 2, 'iter': 24700, 'avg_loss': 8.541943774252863, 'avg_acc': 49.99835532974374, 'loss': 8.221526145935059}


EP_train:2:  89%|| 24712/27626 [58:13<06:53,  7.04it/s]

{'epoch': 2, 'iter': 24710, 'avg_loss': 8.541952002849781, 'avg_acc': 49.9984824572053, 'loss': 8.003284454345703}


EP_train:2:  89%|| 24722/27626 [58:15<06:50,  7.07it/s]

{'epoch': 2, 'iter': 24720, 'avg_loss': 8.541976106007224, 'avg_acc': 49.99936794628049, 'loss': 8.686812400817871}


EP_train:2:  90%|| 24732/27626 [58:16<06:47,  7.11it/s]

{'epoch': 2, 'iter': 24730, 'avg_loss': 8.541907718702099, 'avg_acc': 49.99886276333347, 'loss': 8.064031600952148}


EP_train:2:  90%|| 24742/27626 [58:18<06:50,  7.03it/s]

{'epoch': 2, 'iter': 24740, 'avg_loss': 8.541751060541346, 'avg_acc': 49.998989531546826, 'loss': 7.852599620819092}


EP_train:2:  90%|| 24752/27626 [58:19<06:46,  7.07it/s]

{'epoch': 2, 'iter': 24750, 'avg_loss': 8.541644284192598, 'avg_acc': 50.00063128762474, 'loss': 8.367112159729004}


EP_train:2:  90%|| 24762/27626 [58:20<06:44,  7.08it/s]

{'epoch': 2, 'iter': 24760, 'avg_loss': 8.541668993798641, 'avg_acc': 50.00252413068939, 'loss': 8.631595611572266}


EP_train:2:  90%|| 24772/27626 [58:22<06:42,  7.10it/s]

{'epoch': 2, 'iter': 24770, 'avg_loss': 8.541684252084574, 'avg_acc': 50.00214464494772, 'loss': 8.555033683776855}


EP_train:2:  90%|| 24782/27626 [58:23<06:43,  7.04it/s]

{'epoch': 2, 'iter': 24780, 'avg_loss': 8.541521567851154, 'avg_acc': 50.00063052338485, 'loss': 8.702128410339355}


EP_train:2:  90%|| 24792/27626 [58:25<06:42,  7.05it/s]

{'epoch': 2, 'iter': 24790, 'avg_loss': 8.541417120386933, 'avg_acc': 49.99886551571135, 'loss': 7.77431058883667}


EP_train:2:  90%|| 24802/27626 [58:26<06:42,  7.02it/s]

{'epoch': 2, 'iter': 24800, 'avg_loss': 8.541458798331563, 'avg_acc': 49.99684992540623, 'loss': 7.747900485992432}


EP_train:2:  90%|| 24812/27626 [58:28<06:41,  7.01it/s]

{'epoch': 2, 'iter': 24810, 'avg_loss': 8.541313649876692, 'avg_acc': 49.9942061988634, 'loss': 8.297831535339355}


EP_train:2:  90%|| 24822/27626 [58:29<06:34,  7.10it/s]

{'epoch': 2, 'iter': 24820, 'avg_loss': 8.54130194484071, 'avg_acc': 49.99471213891463, 'loss': 9.192899703979492}


EP_train:2:  90%|| 24832/27626 [58:30<06:36,  7.06it/s]

{'epoch': 2, 'iter': 24830, 'avg_loss': 8.541232925190899, 'avg_acc': 49.994085014699365, 'loss': 8.778264045715332}


EP_train:2:  90%|| 24842/27626 [58:32<06:36,  7.02it/s]

{'epoch': 2, 'iter': 24840, 'avg_loss': 8.541374488122788, 'avg_acc': 49.99496799645747, 'loss': 8.87656021118164}


EP_train:2:  90%|| 24852/27626 [58:33<06:32,  7.06it/s]

{'epoch': 2, 'iter': 24850, 'avg_loss': 8.54144975076158, 'avg_acc': 49.99559876866122, 'loss': 8.298725128173828}


EP_train:2:  90%|| 24862/27626 [58:35<06:32,  7.05it/s]

{'epoch': 2, 'iter': 24860, 'avg_loss': 8.54144719183951, 'avg_acc': 49.99585193676843, 'loss': 7.755373001098633}


EP_train:2:  90%|| 24872/27626 [58:36<06:28,  7.08it/s]

{'epoch': 2, 'iter': 24870, 'avg_loss': 8.541463159516907, 'avg_acc': 49.997110088054356, 'loss': 8.994148254394531}


EP_train:2:  90%|| 24882/27626 [58:37<06:30,  7.02it/s]

{'epoch': 2, 'iter': 24880, 'avg_loss': 8.541323159592965, 'avg_acc': 49.99547847755315, 'loss': 7.94749641418457}


EP_train:2:  90%|| 24892/27626 [58:39<06:31,  6.98it/s]

{'epoch': 2, 'iter': 24890, 'avg_loss': 8.541275775617931, 'avg_acc': 49.996108031015225, 'loss': 8.616860389709473}


EP_train:2:  90%|| 24902/27626 [58:40<06:27,  7.03it/s]

{'epoch': 2, 'iter': 24900, 'avg_loss': 8.541260381237263, 'avg_acc': 49.99397614553632, 'loss': 8.616143226623535}


EP_train:2:  90%|| 24912/27626 [58:42<06:23,  7.08it/s]

{'epoch': 2, 'iter': 24910, 'avg_loss': 8.54138801573635, 'avg_acc': 49.99586026253463, 'loss': 9.052680015563965}


EP_train:2:  90%|| 24922/27626 [58:43<06:22,  7.07it/s]

{'epoch': 2, 'iter': 24920, 'avg_loss': 8.541442850926567, 'avg_acc': 49.99686509369608, 'loss': 8.63310718536377}


EP_train:2:  90%|| 24932/27626 [58:45<06:21,  7.06it/s]

{'epoch': 2, 'iter': 24930, 'avg_loss': 8.541432566966186, 'avg_acc': 49.99598892944527, 'loss': 8.828536987304688}


EP_train:2:  90%|| 24942/27626 [58:46<06:19,  7.06it/s]

{'epoch': 2, 'iter': 24940, 'avg_loss': 8.541444474418306, 'avg_acc': 49.995364059179664, 'loss': 7.838624954223633}


EP_train:2:  90%|| 24952/27626 [58:47<06:18,  7.07it/s]

{'epoch': 2, 'iter': 24950, 'avg_loss': 8.541521898744904, 'avg_acc': 49.994864935273135, 'loss': 9.964704513549805}


EP_train:2:  90%|| 24962/27626 [58:49<06:18,  7.04it/s]

{'epoch': 2, 'iter': 24960, 'avg_loss': 8.541478495744744, 'avg_acc': 49.995117383117666, 'loss': 8.29271125793457}


EP_train:2:  90%|| 24972/27626 [58:50<06:19,  6.99it/s]

{'epoch': 2, 'iter': 24970, 'avg_loss': 8.54134667737479, 'avg_acc': 49.993867886748625, 'loss': 8.776516914367676}


EP_train:2:  90%|| 24982/27626 [58:52<06:16,  7.01it/s]

{'epoch': 2, 'iter': 24980, 'avg_loss': 8.541380980280028, 'avg_acc': 49.99161863015892, 'loss': 7.179471969604492}


EP_train:2:  90%|| 24992/27626 [58:53<06:14,  7.03it/s]

{'epoch': 2, 'iter': 24990, 'avg_loss': 8.541423113042436, 'avg_acc': 49.99212216397903, 'loss': 8.947145462036133}


EP_train:2:  91%|| 25002/27626 [58:54<06:10,  7.09it/s]

{'epoch': 2, 'iter': 25000, 'avg_loss': 8.541482642642842, 'avg_acc': 49.9920003199872, 'loss': 8.771892547607422}


EP_train:2:  91%|| 25012/27626 [58:56<06:08,  7.09it/s]

{'epoch': 2, 'iter': 25010, 'avg_loss': 8.541429420800789, 'avg_acc': 49.993377913717964, 'loss': 7.644941329956055}


EP_train:2:  91%|| 25022/27626 [58:57<06:07,  7.09it/s]

{'epoch': 2, 'iter': 25020, 'avg_loss': 8.541421926010617, 'avg_acc': 49.993630350505576, 'loss': 7.837686538696289}


EP_train:2:  91%|| 25032/27626 [58:59<06:08,  7.03it/s]

{'epoch': 2, 'iter': 25030, 'avg_loss': 8.541304567648767, 'avg_acc': 49.99363289520994, 'loss': 7.9014482498168945}


EP_train:2:  91%|| 25042/27626 [59:00<06:03,  7.11it/s]

{'epoch': 2, 'iter': 25040, 'avg_loss': 8.541330649569879, 'avg_acc': 49.99575695858792, 'loss': 8.560310363769531}


EP_train:2:  91%|| 25052/27626 [59:01<06:06,  7.02it/s]

{'epoch': 2, 'iter': 25050, 'avg_loss': 8.54132314322404, 'avg_acc': 49.99650712546405, 'loss': 8.557912826538086}


EP_train:2:  91%|| 25062/27626 [59:03<06:06,  7.00it/s]

{'epoch': 2, 'iter': 25060, 'avg_loss': 8.541407735438792, 'avg_acc': 49.996009736243565, 'loss': 8.448856353759766}


EP_train:2:  91%|| 25072/27626 [59:04<06:02,  7.04it/s]

{'epoch': 2, 'iter': 25070, 'avg_loss': 8.541224583978073, 'avg_acc': 49.99763172589845, 'loss': 8.066490173339844}


EP_train:2:  91%|| 25082/27626 [59:06<06:00,  7.06it/s]

{'epoch': 2, 'iter': 25080, 'avg_loss': 8.541225755144822, 'avg_acc': 49.998504844304456, 'loss': 9.80480670928955}


EP_train:2:  91%|| 25092/27626 [59:07<06:03,  6.98it/s]

{'epoch': 2, 'iter': 25090, 'avg_loss': 8.541285556755053, 'avg_acc': 49.99788270694671, 'loss': 8.230311393737793}


EP_train:2:  91%|| 25102/27626 [59:09<05:59,  7.03it/s]

{'epoch': 2, 'iter': 25100, 'avg_loss': 8.541270846461144, 'avg_acc': 49.99838153858412, 'loss': 8.602601051330566}


EP_train:2:  91%|| 25112/27626 [59:10<05:54,  7.09it/s]

{'epoch': 2, 'iter': 25110, 'avg_loss': 8.541445835987483, 'avg_acc': 49.99800884074708, 'loss': 9.508332252502441}


EP_train:2:  91%|| 25122/27626 [59:11<05:56,  7.02it/s]

{'epoch': 2, 'iter': 25120, 'avg_loss': 8.541345236080382, 'avg_acc': 49.996641256319414, 'loss': 8.17752742767334}


EP_train:2:  91%|| 25132/27626 [59:13<05:53,  7.05it/s]

{'epoch': 2, 'iter': 25130, 'avg_loss': 8.541399926131001, 'avg_acc': 49.99626954757073, 'loss': 8.464707374572754}


EP_train:2:  91%|| 25142/27626 [59:14<05:52,  7.05it/s]

{'epoch': 2, 'iter': 25140, 'avg_loss': 8.541468375821912, 'avg_acc': 49.9967682271986, 'loss': 8.550896644592285}


EP_train:2:  91%|| 25152/27626 [59:16<05:51,  7.03it/s]

{'epoch': 2, 'iter': 25150, 'avg_loss': 8.541543043411957, 'avg_acc': 49.99503001868713, 'loss': 8.798187255859375}


EP_train:2:  91%|| 25162/27626 [59:17<05:52,  6.99it/s]

{'epoch': 2, 'iter': 25160, 'avg_loss': 8.541411899173825, 'avg_acc': 49.99465939350583, 'loss': 8.34714126586914}


EP_train:2:  91%|| 25172/27626 [59:18<05:49,  7.02it/s]

{'epoch': 2, 'iter': 25170, 'avg_loss': 8.5414045797685, 'avg_acc': 49.99304755472568, 'loss': 8.964005470275879}


EP_train:2:  91%|| 25182/27626 [59:20<05:47,  7.03it/s]

{'epoch': 2, 'iter': 25180, 'avg_loss': 8.541399009422513, 'avg_acc': 49.99230570668361, 'loss': 8.748224258422852}


EP_train:2:  91%|| 25192/27626 [59:21<05:44,  7.07it/s]

{'epoch': 2, 'iter': 25190, 'avg_loss': 8.541381346966936, 'avg_acc': 49.99131634313842, 'loss': 8.459012031555176}


EP_train:2:  91%|| 25202/27626 [59:23<05:41,  7.10it/s]

{'epoch': 2, 'iter': 25200, 'avg_loss': 8.541460514617324, 'avg_acc': 49.99107178286576, 'loss': 8.017312049865723}


EP_train:2:  91%|| 25212/27626 [59:24<05:43,  7.02it/s]

{'epoch': 2, 'iter': 25210, 'avg_loss': 8.541456730862905, 'avg_acc': 49.99020764745548, 'loss': 8.47141170501709}


EP_train:2:  91%|| 25222/27626 [59:26<05:40,  7.06it/s]

{'epoch': 2, 'iter': 25220, 'avg_loss': 8.541431343133366, 'avg_acc': 49.99008762539154, 'loss': 7.687539100646973}


EP_train:2:  91%|| 25232/27626 [59:27<05:40,  7.03it/s]

{'epoch': 2, 'iter': 25230, 'avg_loss': 8.541497986484218, 'avg_acc': 49.99207324323253, 'loss': 7.925689697265625}


EP_train:2:  91%|| 25242/27626 [59:28<05:41,  6.98it/s]

{'epoch': 2, 'iter': 25240, 'avg_loss': 8.541680030770614, 'avg_acc': 49.991828770650926, 'loss': 7.984133243560791}


EP_train:2:  91%|| 25252/27626 [59:30<05:37,  7.03it/s]

{'epoch': 2, 'iter': 25250, 'avg_loss': 8.541721310966214, 'avg_acc': 49.99257455150291, 'loss': 8.679396629333496}


EP_train:2:  91%|| 25262/27626 [59:31<05:33,  7.08it/s]

{'epoch': 2, 'iter': 25260, 'avg_loss': 8.54177807221281, 'avg_acc': 49.99158782312656, 'loss': 8.680892944335938}


EP_train:2:  91%|| 25272/27626 [59:33<05:33,  7.07it/s]

{'epoch': 2, 'iter': 25270, 'avg_loss': 8.541617588032496, 'avg_acc': 49.991467492382576, 'loss': 8.272162437438965}


EP_train:2:  92%|| 25282/27626 [59:34<05:33,  7.03it/s]

{'epoch': 2, 'iter': 25280, 'avg_loss': 8.54159758163684, 'avg_acc': 49.9917180886832, 'loss': 8.979833602905273}


EP_train:2:  92%|| 25292/27626 [59:35<05:29,  7.08it/s]

{'epoch': 2, 'iter': 25290, 'avg_loss': 8.541501025241784, 'avg_acc': 49.99085643114151, 'loss': 8.609408378601074}


EP_train:2:  92%|| 25302/27626 [59:37<05:28,  7.08it/s]

{'epoch': 2, 'iter': 25300, 'avg_loss': 8.541535489941099, 'avg_acc': 49.992465712817676, 'loss': 8.157079696655273}


EP_train:2:  92%|| 25312/27626 [59:38<05:28,  7.05it/s]

{'epoch': 2, 'iter': 25310, 'avg_loss': 8.541460895655133, 'avg_acc': 49.99197483307653, 'loss': 8.310391426086426}


EP_train:2:  92%|| 25322/27626 [59:40<05:27,  7.04it/s]

{'epoch': 2, 'iter': 25320, 'avg_loss': 8.541450707580347, 'avg_acc': 49.99308874057107, 'loss': 8.450764656066895}


EP_train:2:  92%|| 25332/27626 [59:41<05:25,  7.05it/s]

{'epoch': 2, 'iter': 25330, 'avg_loss': 8.541451453315506, 'avg_acc': 49.99309146895109, 'loss': 8.1497802734375}


EP_train:2:  92%|| 25342/27626 [59:42<05:23,  7.06it/s]

{'epoch': 2, 'iter': 25340, 'avg_loss': 8.541390986446517, 'avg_acc': 49.991984333688485, 'loss': 8.302634239196777}


EP_train:2:  92%|| 25352/27626 [59:44<05:22,  7.04it/s]

{'epoch': 2, 'iter': 25350, 'avg_loss': 8.541433124136706, 'avg_acc': 49.99124787976806, 'loss': 8.586722373962402}


EP_train:2:  92%|| 25362/27626 [59:45<05:20,  7.06it/s]

{'epoch': 2, 'iter': 25360, 'avg_loss': 8.541302654928488, 'avg_acc': 49.98952624107883, 'loss': 7.5706353187561035}


EP_train:2:  92%|| 25372/27626 [59:47<05:19,  7.07it/s]

{'epoch': 2, 'iter': 25370, 'avg_loss': 8.541292710729206, 'avg_acc': 49.99026940207323, 'loss': 7.5275163650512695}


EP_train:2:  92%|| 25382/27626 [59:48<05:18,  7.05it/s]

{'epoch': 2, 'iter': 25380, 'avg_loss': 8.541169788100882, 'avg_acc': 49.989780741499544, 'loss': 7.952884197235107}


EP_train:2:  92%|| 25392/27626 [59:50<05:18,  7.02it/s]

{'epoch': 2, 'iter': 25390, 'avg_loss': 8.541191662683262, 'avg_acc': 49.98953861604505, 'loss': 8.129792213439941}


EP_train:2:  92%|| 25402/27626 [59:51<05:15,  7.04it/s]

{'epoch': 2, 'iter': 25400, 'avg_loss': 8.541172411070017, 'avg_acc': 49.99077300106295, 'loss': 9.296130180358887}


EP_train:2:  92%|| 25412/27626 [59:52<05:13,  7.05it/s]

{'epoch': 2, 'iter': 25410, 'avg_loss': 8.541086232290185, 'avg_acc': 49.99040769745386, 'loss': 8.2156982421875}


EP_train:2:  92%|| 25422/27626 [59:54<05:12,  7.06it/s]

{'epoch': 2, 'iter': 25420, 'avg_loss': 8.541041548464722, 'avg_acc': 49.98918217221982, 'loss': 8.480585098266602}


EP_train:2:  92%|| 25432/27626 [59:55<05:10,  7.06it/s]

{'epoch': 2, 'iter': 25430, 'avg_loss': 8.541131654137647, 'avg_acc': 49.98906354449294, 'loss': 8.603140830993652}


EP_train:2:  92%|| 25442/27626 [59:57<05:11,  7.01it/s]

{'epoch': 2, 'iter': 25440, 'avg_loss': 8.5411949219004, 'avg_acc': 49.98919067646712, 'loss': 9.254572868347168}


EP_train:2:  92%|| 25452/27626 [59:58<05:08,  7.04it/s]

{'epoch': 2, 'iter': 25450, 'avg_loss': 8.541159245762804, 'avg_acc': 49.988949353659976, 'loss': 8.683213233947754}


EP_train:2:  92%|| 25462/27626 [59:59<05:06,  7.07it/s]

{'epoch': 2, 'iter': 25460, 'avg_loss': 8.541131160577718, 'avg_acc': 49.988094536742466, 'loss': 8.170973777770996}


EP_train:2:  92%|| 25472/27626 [1:00:01<05:04,  7.07it/s]

{'epoch': 2, 'iter': 25470, 'avg_loss': 8.541130541294905, 'avg_acc': 49.98822189941502, 'loss': 8.553330421447754}


EP_train:2:  92%|| 25482/27626 [1:00:02<05:03,  7.06it/s]

{'epoch': 2, 'iter': 25480, 'avg_loss': 8.541011937198569, 'avg_acc': 49.98969820650681, 'loss': 8.998886108398438}


EP_train:2:  92%|| 25492/27626 [1:00:04<05:03,  7.04it/s]

{'epoch': 2, 'iter': 25490, 'avg_loss': 8.541105936191908, 'avg_acc': 49.99117335530187, 'loss': 8.531506538391113}


EP_train:2:  92%|| 25502/27626 [1:00:05<04:59,  7.08it/s]

{'epoch': 2, 'iter': 25500, 'avg_loss': 8.541082259781346, 'avg_acc': 49.99056409552566, 'loss': 10.071080207824707}


EP_train:2:  92%|| 25512/27626 [1:00:06<04:59,  7.05it/s]

{'epoch': 2, 'iter': 25510, 'avg_loss': 8.541044823374136, 'avg_acc': 49.98958782485987, 'loss': 7.7553863525390625}


EP_train:2:  92%|| 25522/27626 [1:00:08<04:57,  7.07it/s]

{'epoch': 2, 'iter': 25520, 'avg_loss': 8.540974601459215, 'avg_acc': 49.988857215626346, 'loss': 8.271409034729004}


EP_train:2:  92%|| 25532/27626 [1:00:09<04:56,  7.06it/s]

{'epoch': 2, 'iter': 25530, 'avg_loss': 8.540946051561848, 'avg_acc': 49.98861677960127, 'loss': 8.17149829864502}


EP_train:2:  92%|| 25542/27626 [1:00:11<04:55,  7.06it/s]

{'epoch': 2, 'iter': 25540, 'avg_loss': 8.54086672076557, 'avg_acc': 49.9891106456286, 'loss': 8.300100326538086}


EP_train:2:  92%|| 25552/27626 [1:00:12<04:52,  7.08it/s]

{'epoch': 2, 'iter': 25550, 'avg_loss': 8.540943477826643, 'avg_acc': 49.98948182067238, 'loss': 8.453899383544922}


EP_train:2:  93%|| 25562/27626 [1:00:14<04:53,  7.02it/s]

{'epoch': 2, 'iter': 25560, 'avg_loss': 8.540847633166761, 'avg_acc': 49.988752396228634, 'loss': 8.095380783081055}


EP_train:2:  93%|| 25572/27626 [1:00:15<04:50,  7.08it/s]

{'epoch': 2, 'iter': 25570, 'avg_loss': 8.540928529129454, 'avg_acc': 49.98839016855031, 'loss': 8.08106517791748}


EP_train:2:  93%|| 25582/27626 [1:00:16<04:50,  7.05it/s]

{'epoch': 2, 'iter': 25580, 'avg_loss': 8.54075913498043, 'avg_acc': 49.98717309722059, 'loss': 8.138203620910645}


EP_train:2:  93%|| 25592/27626 [1:00:18<04:50,  6.99it/s]

{'epoch': 2, 'iter': 25590, 'avg_loss': 8.540735015900443, 'avg_acc': 49.98754444922043, 'loss': 8.765518188476562}


EP_train:2:  93%|| 25602/27626 [1:00:19<04:48,  7.01it/s]

{'epoch': 2, 'iter': 25600, 'avg_loss': 8.540591682290142, 'avg_acc': 49.98620659349244, 'loss': 7.841766357421875}


EP_train:2:  93%|| 25612/27626 [1:00:21<04:47,  7.01it/s]

{'epoch': 2, 'iter': 25610, 'avg_loss': 8.540562127578692, 'avg_acc': 49.9865780328765, 'loss': 8.124655723571777}


EP_train:2:  93%|| 25622/27626 [1:00:22<04:44,  7.05it/s]

{'epoch': 2, 'iter': 25620, 'avg_loss': 8.540514196585931, 'avg_acc': 49.98768100386401, 'loss': 8.761116027832031}


EP_train:2:  93%|| 25632/27626 [1:00:23<04:43,  7.04it/s]

{'epoch': 2, 'iter': 25630, 'avg_loss': 8.540522363963433, 'avg_acc': 49.9879296554953, 'loss': 8.551655769348145}


EP_train:2:  93%|| 25642/27626 [1:00:25<04:41,  7.05it/s]

{'epoch': 2, 'iter': 25640, 'avg_loss': 8.540634965158318, 'avg_acc': 49.98842186342186, 'loss': 9.412982940673828}


EP_train:2:  93%|| 25652/27626 [1:00:26<04:40,  7.05it/s]

{'epoch': 2, 'iter': 25650, 'avg_loss': 8.540861192276955, 'avg_acc': 49.989644653229895, 'loss': 9.228635787963867}


EP_train:2:  93%|| 25662/27626 [1:00:28<04:37,  7.07it/s]

{'epoch': 2, 'iter': 25660, 'avg_loss': 8.540878170691752, 'avg_acc': 49.98794376680566, 'loss': 8.793035507202148}


EP_train:2:  93%|| 25672/27626 [1:00:29<04:37,  7.04it/s]

{'epoch': 2, 'iter': 25670, 'avg_loss': 8.540920551999655, 'avg_acc': 49.98819192863542, 'loss': 8.558395385742188}


EP_train:2:  93%|| 25682/27626 [1:00:31<04:35,  7.07it/s]

{'epoch': 2, 'iter': 25680, 'avg_loss': 8.540975546262183, 'avg_acc': 49.98965675012656, 'loss': 8.457602500915527}


EP_train:2:  93%|| 25692/27626 [1:00:32<04:36,  6.99it/s]

{'epoch': 2, 'iter': 25690, 'avg_loss': 8.54117428931167, 'avg_acc': 49.989539138219605, 'loss': 9.616031646728516}


EP_train:2:  93%|| 25702/27626 [1:00:33<04:34,  7.02it/s]

{'epoch': 2, 'iter': 25700, 'avg_loss': 8.541236044882726, 'avg_acc': 49.98869207423836, 'loss': 8.0975980758667}


EP_train:2:  93%|| 25712/27626 [1:00:35<04:30,  7.07it/s]

{'epoch': 2, 'iter': 25710, 'avg_loss': 8.541262837479298, 'avg_acc': 49.98893955894364, 'loss': 7.893409729003906}


EP_train:2:  93%|| 25722/27626 [1:00:36<04:30,  7.04it/s]

{'epoch': 2, 'iter': 25720, 'avg_loss': 8.541231950525779, 'avg_acc': 49.988457874888226, 'loss': 8.718037605285645}


EP_train:2:  93%|| 25732/27626 [1:00:38<04:27,  7.07it/s]

{'epoch': 2, 'iter': 25730, 'avg_loss': 8.541288806603507, 'avg_acc': 49.98749076988846, 'loss': 9.459940910339355}


EP_train:2:  93%|| 25742/27626 [1:00:39<04:26,  7.08it/s]

{'epoch': 2, 'iter': 25740, 'avg_loss': 8.541489594911049, 'avg_acc': 49.98798123616021, 'loss': 8.449734687805176}


EP_train:2:  93%|| 25752/27626 [1:00:40<04:25,  7.06it/s]

{'epoch': 2, 'iter': 25750, 'avg_loss': 8.541573390410361, 'avg_acc': 49.98701506737602, 'loss': 8.486467361450195}


EP_train:2:  93%|| 25762/27626 [1:00:42<04:23,  7.07it/s]

{'epoch': 2, 'iter': 25760, 'avg_loss': 8.541539658560183, 'avg_acc': 49.986049648693765, 'loss': 8.586031913757324}


EP_train:2:  93%|| 25772/27626 [1:00:43<04:24,  7.02it/s]

{'epoch': 2, 'iter': 25770, 'avg_loss': 8.54154285630221, 'avg_acc': 49.984842458577475, 'loss': 7.701635360717773}


EP_train:2:  93%|| 25782/27626 [1:00:45<04:23,  7.00it/s]

{'epoch': 2, 'iter': 25780, 'avg_loss': 8.541547135022054, 'avg_acc': 49.98460591133005, 'loss': 9.707490921020508}


EP_train:2:  93%|| 25792/27626 [1:00:46<04:22,  7.00it/s]

{'epoch': 2, 'iter': 25790, 'avg_loss': 8.541584782443065, 'avg_acc': 49.98461188011321, 'loss': 8.559053421020508}


EP_train:2:  93%|| 25802/27626 [1:00:47<04:17,  7.09it/s]

{'epoch': 2, 'iter': 25800, 'avg_loss': 8.541487513279334, 'avg_acc': 49.9858290376342, 'loss': 8.40580940246582}


EP_train:2:  93%|| 25812/27626 [1:00:49<04:19,  7.00it/s]

{'epoch': 2, 'iter': 25810, 'avg_loss': 8.541421569140526, 'avg_acc': 49.98559238309248, 'loss': 8.88584041595459}


EP_train:2:  93%|| 25822/27626 [1:00:50<04:14,  7.07it/s]

{'epoch': 2, 'iter': 25820, 'avg_loss': 8.541523688131564, 'avg_acc': 49.98620309050772, 'loss': 8.1238431930542}


EP_train:2:  94%|| 25832/27626 [1:00:52<04:14,  7.05it/s]

{'epoch': 2, 'iter': 25830, 'avg_loss': 8.541377802435278, 'avg_acc': 49.98354690101041, 'loss': 8.315740585327148}


EP_train:2:  94%|| 25842/27626 [1:00:53<04:12,  7.07it/s]

{'epoch': 2, 'iter': 25840, 'avg_loss': 8.54134191958732, 'avg_acc': 49.98319047250494, 'loss': 8.694708824157715}


EP_train:2:  94%|| 25852/27626 [1:00:55<04:12,  7.03it/s]

{'epoch': 2, 'iter': 25850, 'avg_loss': 8.541272244846875, 'avg_acc': 49.98428494062125, 'loss': 8.504053115844727}


EP_train:2:  94%|| 25862/27626 [1:00:56<04:10,  7.04it/s]

{'epoch': 2, 'iter': 25860, 'avg_loss': 8.541379189759551, 'avg_acc': 49.98646610726576, 'loss': 9.104884147644043}


EP_train:2:  94%|| 25872/27626 [1:00:57<04:10,  7.01it/s]

{'epoch': 2, 'iter': 25870, 'avg_loss': 8.541364217476326, 'avg_acc': 49.988766379343666, 'loss': 8.173843383789062}


EP_train:2:  94%|| 25882/27626 [1:00:59<04:08,  7.01it/s]

{'epoch': 2, 'iter': 25880, 'avg_loss': 8.54127606792486, 'avg_acc': 49.98925369962521, 'loss': 8.33768081665039}


EP_train:2:  94%|| 25892/27626 [1:01:00<04:10,  6.93it/s]

{'epoch': 2, 'iter': 25890, 'avg_loss': 8.541337951301244, 'avg_acc': 49.989619945154686, 'loss': 8.298369407653809}


EP_train:2:  94%|| 25902/27626 [1:01:02<04:05,  7.03it/s]

{'epoch': 2, 'iter': 25900, 'avg_loss': 8.541354727226816, 'avg_acc': 49.99034786301687, 'loss': 8.972254753112793}


EP_train:2:  94%|| 25912/27626 [1:01:03<04:02,  7.07it/s]

{'epoch': 2, 'iter': 25910, 'avg_loss': 8.541266944645722, 'avg_acc': 49.99131642931574, 'loss': 7.545388698577881}


EP_train:2:  94%|| 25922/27626 [1:01:05<04:01,  7.04it/s]

{'epoch': 2, 'iter': 25920, 'avg_loss': 8.541277275947843, 'avg_acc': 49.99156089657035, 'loss': 9.084863662719727}


EP_train:2:  94%|| 25932/27626 [1:01:06<04:02,  6.98it/s]

{'epoch': 2, 'iter': 25930, 'avg_loss': 8.541332135791329, 'avg_acc': 49.9910821025028, 'loss': 8.43997573852539}


EP_train:2:  94%|| 25942/27626 [1:01:07<03:59,  7.02it/s]

{'epoch': 2, 'iter': 25940, 'avg_loss': 8.541382272960107, 'avg_acc': 49.99289252534598, 'loss': 8.734918594360352}


EP_train:2:  94%|| 25952/27626 [1:01:09<03:58,  7.02it/s]

{'epoch': 2, 'iter': 25950, 'avg_loss': 8.5414135411527, 'avg_acc': 49.993497360410004, 'loss': 8.527288436889648}


EP_train:2:  94%|| 25962/27626 [1:01:10<03:58,  6.97it/s]

{'epoch': 2, 'iter': 25960, 'avg_loss': 8.541563301322263, 'avg_acc': 49.992657255113436, 'loss': 8.836241722106934}


EP_train:2:  94%|| 25972/27626 [1:01:12<03:55,  7.02it/s]

{'epoch': 2, 'iter': 25970, 'avg_loss': 8.541602528938446, 'avg_acc': 49.99398367409803, 'loss': 8.189557075500488}


EP_train:2:  94%|| 25982/27626 [1:01:13<03:54,  7.01it/s]

{'epoch': 2, 'iter': 25980, 'avg_loss': 8.54147932614884, 'avg_acc': 49.99446711058081, 'loss': 7.940934658050537}


EP_train:2:  94%|| 25992/27626 [1:01:14<03:51,  7.07it/s]

{'epoch': 2, 'iter': 25990, 'avg_loss': 8.541411872880795, 'avg_acc': 49.992906198299416, 'loss': 9.328607559204102}


EP_train:2:  94%|| 26002/27626 [1:01:16<03:49,  7.08it/s]

{'epoch': 2, 'iter': 26000, 'avg_loss': 8.541579409813103, 'avg_acc': 49.99266855120957, 'loss': 8.604010581970215}


EP_train:2:  94%|| 26012/27626 [1:01:17<03:48,  7.07it/s]

{'epoch': 2, 'iter': 26010, 'avg_loss': 8.54165575198248, 'avg_acc': 49.992190803890665, 'loss': 8.38241958618164}


EP_train:2:  94%|| 26022/27626 [1:01:19<03:46,  7.08it/s]

{'epoch': 2, 'iter': 26020, 'avg_loss': 8.541840651542822, 'avg_acc': 49.992073709696015, 'loss': 9.109641075134277}


EP_train:2:  94%|| 26032/27626 [1:01:20<03:45,  7.08it/s]

{'epoch': 2, 'iter': 26030, 'avg_loss': 8.541836899510495, 'avg_acc': 49.99147650877799, 'loss': 8.652711868286133}


EP_train:2:  94%|| 26042/27626 [1:01:21<03:44,  7.07it/s]

{'epoch': 2, 'iter': 26040, 'avg_loss': 8.541719410607243, 'avg_acc': 49.99171978802657, 'loss': 7.750870227813721}


EP_train:2:  94%|| 26052/27626 [1:01:23<03:42,  7.06it/s]

{'epoch': 2, 'iter': 26050, 'avg_loss': 8.541676569477362, 'avg_acc': 49.989923611377684, 'loss': 8.500567436218262}


EP_train:2:  94%|| 26062/27626 [1:01:24<03:40,  7.11it/s]

{'epoch': 2, 'iter': 26060, 'avg_loss': 8.541666030078723, 'avg_acc': 49.988728368059554, 'loss': 8.698382377624512}


EP_train:2:  94%|| 26072/27626 [1:01:26<03:39,  7.08it/s]

{'epoch': 2, 'iter': 26070, 'avg_loss': 8.541708727151065, 'avg_acc': 49.9900512063212, 'loss': 8.47748851776123}


EP_train:2:  94%|| 26082/27626 [1:01:27<03:37,  7.09it/s]

{'epoch': 2, 'iter': 26080, 'avg_loss': 8.54179136469971, 'avg_acc': 49.99041447797247, 'loss': 8.496688842773438}


EP_train:2:  94%|| 26092/27626 [1:01:28<03:36,  7.10it/s]

{'epoch': 2, 'iter': 26090, 'avg_loss': 8.541750644395663, 'avg_acc': 49.9876633705109, 'loss': 8.120814323425293}


EP_train:2:  94%|| 26102/27626 [1:01:30<03:35,  7.08it/s]

{'epoch': 2, 'iter': 26100, 'avg_loss': 8.541593904280763, 'avg_acc': 49.98623137044558, 'loss': 8.45751953125}


EP_train:2:  95%|| 26112/27626 [1:01:31<03:33,  7.09it/s]

{'epoch': 2, 'iter': 26110, 'avg_loss': 8.54167791556282, 'avg_acc': 49.98551855539811, 'loss': 8.986773490905762}


EP_train:2:  95%|| 26122/27626 [1:01:33<03:33,  7.06it/s]

{'epoch': 2, 'iter': 26120, 'avg_loss': 8.54177584267882, 'avg_acc': 49.98624191263734, 'loss': 9.028535842895508}


EP_train:2:  95%|| 26132/27626 [1:01:34<03:32,  7.03it/s]

{'epoch': 2, 'iter': 26130, 'avg_loss': 8.541893915834267, 'avg_acc': 49.98624717768168, 'loss': 9.232425689697266}


EP_train:2:  95%|| 26142/27626 [1:01:36<03:31,  7.01it/s]

{'epoch': 2, 'iter': 26140, 'avg_loss': 8.541969538017721, 'avg_acc': 49.98696970276577, 'loss': 8.819032669067383}


EP_train:2:  95%|| 26152/27626 [1:01:37<03:29,  7.03it/s]

{'epoch': 2, 'iter': 26150, 'avg_loss': 8.541995054029615, 'avg_acc': 49.98625769569041, 'loss': 7.94841194152832}


EP_train:2:  95%|| 26162/27626 [1:01:38<03:26,  7.10it/s]

{'epoch': 2, 'iter': 26160, 'avg_loss': 8.5419139769285, 'avg_acc': 49.98614349604373, 'loss': 8.143875122070312}


EP_train:2:  95%|| 26172/27626 [1:01:40<03:26,  7.03it/s]

{'epoch': 2, 'iter': 26170, 'avg_loss': 8.541807251425505, 'avg_acc': 49.985312941805816, 'loss': 8.04110050201416}


EP_train:2:  95%|| 26182/27626 [1:01:41<03:24,  7.06it/s]

{'epoch': 2, 'iter': 26180, 'avg_loss': 8.541759556716086, 'avg_acc': 49.98519919025247, 'loss': 8.20866584777832}


EP_train:2:  95%|| 26192/27626 [1:01:43<03:24,  7.02it/s]

{'epoch': 2, 'iter': 26190, 'avg_loss': 8.541860980105643, 'avg_acc': 49.98508552556222, 'loss': 7.94482421875}


EP_train:2:  95%|| 26202/27626 [1:01:44<03:21,  7.08it/s]

{'epoch': 2, 'iter': 26200, 'avg_loss': 8.541846591376844, 'avg_acc': 49.98592610969047, 'loss': 8.887280464172363}


EP_train:2:  95%|| 26212/27626 [1:01:45<03:18,  7.13it/s]

{'epoch': 2, 'iter': 26210, 'avg_loss': 8.541813940618217, 'avg_acc': 49.987481400938535, 'loss': 8.06716251373291}


EP_train:2:  95%|| 26222/27626 [1:01:47<03:19,  7.02it/s]

{'epoch': 2, 'iter': 26220, 'avg_loss': 8.541830736159334, 'avg_acc': 49.98689027878418, 'loss': 8.613617897033691}


EP_train:2:  95%|| 26232/27626 [1:01:48<03:19,  6.97it/s]

{'epoch': 2, 'iter': 26230, 'avg_loss': 8.54177897061273, 'avg_acc': 49.98808661507377, 'loss': 8.930059432983398}


EP_train:2:  95%|| 26242/27626 [1:01:50<03:16,  7.05it/s]

{'epoch': 2, 'iter': 26240, 'avg_loss': 8.541737899952539, 'avg_acc': 49.98701935901833, 'loss': 8.703909873962402}


EP_train:2:  95%|| 26252/27626 [1:01:51<03:13,  7.10it/s]

{'epoch': 2, 'iter': 26250, 'avg_loss': 8.541666743242283, 'avg_acc': 49.9867862176679, 'loss': 7.66221809387207}


EP_train:2:  95%|| 26262/27626 [1:01:53<03:14,  7.01it/s]

{'epoch': 2, 'iter': 26260, 'avg_loss': 8.541694500192023, 'avg_acc': 49.98738623814782, 'loss': 8.966954231262207}


EP_train:2:  95%|| 26272/27626 [1:01:54<03:12,  7.04it/s]

{'epoch': 2, 'iter': 26270, 'avg_loss': 8.541676609804105, 'avg_acc': 49.987509992006395, 'loss': 8.74028491973877}


EP_train:2:  95%|| 26282/27626 [1:01:55<03:10,  7.05it/s]

{'epoch': 2, 'iter': 26280, 'avg_loss': 8.541647602645353, 'avg_acc': 49.98703911571097, 'loss': 8.576369285583496}


EP_train:2:  95%|| 26292/27626 [1:01:57<03:08,  7.09it/s]

{'epoch': 2, 'iter': 26290, 'avg_loss': 8.541513416458884, 'avg_acc': 49.9872817694268, 'loss': 8.521612167358398}


EP_train:2:  95%|| 26302/27626 [1:01:58<03:06,  7.11it/s]

{'epoch': 2, 'iter': 26300, 'avg_loss': 8.541520736257475, 'avg_acc': 49.987048971522, 'loss': 7.7109198570251465}


EP_train:2:  95%|| 26312/27626 [1:02:00<03:06,  7.04it/s]

{'epoch': 2, 'iter': 26310, 'avg_loss': 8.541606969334264, 'avg_acc': 49.98634126411007, 'loss': 8.588318824768066}


EP_train:2:  95%|| 26322/27626 [1:02:01<03:04,  7.05it/s]

{'epoch': 2, 'iter': 26320, 'avg_loss': 8.541630511126082, 'avg_acc': 49.98527791497283, 'loss': 9.281343460083008}


EP_train:2:  95%|| 26332/27626 [1:02:02<03:03,  7.07it/s]

{'epoch': 2, 'iter': 26330, 'avg_loss': 8.541530726732944, 'avg_acc': 49.98469009912271, 'loss': 9.307987213134766}


EP_train:2:  95%|| 26342/27626 [1:02:04<03:02,  7.04it/s]

{'epoch': 2, 'iter': 26340, 'avg_loss': 8.541531848697481, 'avg_acc': 49.9844586386242, 'loss': 7.975474834442139}


EP_train:2:  95%|| 26352/27626 [1:02:05<03:00,  7.07it/s]

{'epoch': 2, 'iter': 26350, 'avg_loss': 8.541604437532563, 'avg_acc': 49.98482031042465, 'loss': 8.27925968170166}


EP_train:2:  95%|| 26362/27626 [1:02:07<02:58,  7.10it/s]

{'epoch': 2, 'iter': 26360, 'avg_loss': 8.541705219640992, 'avg_acc': 49.98494461515117, 'loss': 7.63166618347168}


EP_train:2:  95%|| 26372/27626 [1:02:08<02:57,  7.06it/s]

{'epoch': 2, 'iter': 26370, 'avg_loss': 8.541637454087686, 'avg_acc': 49.983172803458345, 'loss': 7.952117443084717}


EP_train:2:  95%|| 26382/27626 [1:02:09<02:55,  7.09it/s]

{'epoch': 2, 'iter': 26380, 'avg_loss': 8.54164158953162, 'avg_acc': 49.982231530267995, 'loss': 8.69134521484375}


EP_train:2:  96%|| 26392/27626 [1:02:11<02:54,  7.08it/s]

{'epoch': 2, 'iter': 26390, 'avg_loss': 8.541676337457607, 'avg_acc': 49.983777613580386, 'loss': 8.365341186523438}


EP_train:2:  96%|| 26402/27626 [1:02:12<02:52,  7.08it/s]

{'epoch': 2, 'iter': 26400, 'avg_loss': 8.54167453791444, 'avg_acc': 49.984493958562176, 'loss': 8.492341995239258}


EP_train:2:  96%|| 26412/27626 [1:02:14<02:54,  6.97it/s]

{'epoch': 2, 'iter': 26410, 'avg_loss': 8.541743718563579, 'avg_acc': 49.985091439173075, 'loss': 8.794921875}


EP_train:2:  96%|| 26422/27626 [1:02:15<02:50,  7.05it/s]

{'epoch': 2, 'iter': 26420, 'avg_loss': 8.541663609194966, 'avg_acc': 49.985215358994736, 'loss': 8.257664680480957}


EP_train:2:  96%|| 26432/27626 [1:02:16<02:48,  7.09it/s]

{'epoch': 2, 'iter': 26430, 'avg_loss': 8.54167630071605, 'avg_acc': 49.98581211456244, 'loss': 9.3237886428833}


EP_train:2:  96%|| 26442/27626 [1:02:18<02:47,  7.08it/s]

{'epoch': 2, 'iter': 26440, 'avg_loss': 8.541631649736242, 'avg_acc': 49.985344729775726, 'loss': 8.81718635559082}


EP_train:2:  96%|| 26452/27626 [1:02:19<02:48,  6.95it/s]

{'epoch': 2, 'iter': 26450, 'avg_loss': 8.541542244977858, 'avg_acc': 49.985586556273866, 'loss': 9.494277954101562}


EP_train:2:  96%|| 26462/27626 [1:02:21<02:44,  7.09it/s]

{'epoch': 2, 'iter': 26460, 'avg_loss': 8.541595158764919, 'avg_acc': 49.98842636332716, 'loss': 8.069937705993652}


EP_train:2:  96%|| 26472/27626 [1:02:22<02:42,  7.10it/s]

{'epoch': 2, 'iter': 26470, 'avg_loss': 8.54168472305891, 'avg_acc': 49.98925711155604, 'loss': 9.063129425048828}


EP_train:2:  96%|| 26482/27626 [1:02:24<02:42,  7.04it/s]

{'epoch': 2, 'iter': 26480, 'avg_loss': 8.541661279456573, 'avg_acc': 49.98796306785997, 'loss': 8.6681489944458}


EP_train:2:  96%|| 26492/27626 [1:02:25<02:39,  7.09it/s]

{'epoch': 2, 'iter': 26490, 'avg_loss': 8.541694601855351, 'avg_acc': 49.98702389490771, 'loss': 9.067516326904297}


EP_train:2:  96%|| 26502/27626 [1:02:26<02:40,  7.01it/s]

{'epoch': 2, 'iter': 26500, 'avg_loss': 8.541752533077595, 'avg_acc': 49.985731670503, 'loss': 8.24285888671875}


EP_train:2:  96%|| 26512/27626 [1:02:28<02:38,  7.02it/s]

{'epoch': 2, 'iter': 26510, 'avg_loss': 8.541661368901698, 'avg_acc': 49.98491192335257, 'loss': 8.807121276855469}


EP_train:2:  96%|| 26522/27626 [1:02:29<02:36,  7.04it/s]

{'epoch': 2, 'iter': 26520, 'avg_loss': 8.54152419213719, 'avg_acc': 49.98468195015271, 'loss': 8.201951026916504}


EP_train:2:  96%|| 26532/27626 [1:02:31<02:34,  7.09it/s]

{'epoch': 2, 'iter': 26530, 'avg_loss': 8.541608065037618, 'avg_acc': 49.984687723794806, 'loss': 7.958430767059326}


EP_train:2:  96%|| 26542/27626 [1:02:32<02:33,  7.07it/s]

{'epoch': 2, 'iter': 26540, 'avg_loss': 8.541574392181824, 'avg_acc': 49.98539994725142, 'loss': 8.689237594604492}


EP_train:2:  96%|| 26552/27626 [1:02:33<02:32,  7.06it/s]

{'epoch': 2, 'iter': 26550, 'avg_loss': 8.54153318124268, 'avg_acc': 49.98517005009228, 'loss': 8.988648414611816}


EP_train:2:  96%|| 26562/27626 [1:02:35<02:30,  7.08it/s]

{'epoch': 2, 'iter': 26560, 'avg_loss': 8.541527933467785, 'avg_acc': 49.98646982417831, 'loss': 8.420913696289062}


EP_train:2:  96%|| 26572/27626 [1:02:36<02:30,  6.98it/s]

{'epoch': 2, 'iter': 26570, 'avg_loss': 8.541384968659466, 'avg_acc': 49.987651010500166, 'loss': 8.193655014038086}


EP_train:2:  96%|| 26582/27626 [1:02:38<02:28,  7.05it/s]

{'epoch': 2, 'iter': 26580, 'avg_loss': 8.541422317566306, 'avg_acc': 49.98671513487077, 'loss': 8.065140724182129}


EP_train:2:  96%|| 26592/27626 [1:02:39<02:26,  7.04it/s]

{'epoch': 2, 'iter': 26590, 'avg_loss': 8.541345440715116, 'avg_acc': 49.985074837350986, 'loss': 7.975736618041992}


EP_train:2:  96%|| 26602/27626 [1:02:41<02:25,  7.05it/s]

{'epoch': 2, 'iter': 26600, 'avg_loss': 8.541350088900701, 'avg_acc': 49.98496297131687, 'loss': 8.38315486907959}


EP_train:2:  96%|| 26612/27626 [1:02:42<02:23,  7.07it/s]

{'epoch': 2, 'iter': 26610, 'avg_loss': 8.541385869996702, 'avg_acc': 49.9842640261546, 'loss': 8.003666877746582}


EP_train:2:  96%|| 26622/27626 [1:02:43<02:21,  7.08it/s]

{'epoch': 2, 'iter': 26620, 'avg_loss': 8.5413674641003, 'avg_acc': 49.985795988129674, 'loss': 8.247517585754395}


EP_train:2:  96%|| 26632/27626 [1:02:45<02:20,  7.08it/s]

{'epoch': 2, 'iter': 26630, 'avg_loss': 8.5414224473835, 'avg_acc': 49.98544928842327, 'loss': 8.027132987976074}


EP_train:2:  96%|| 26642/27626 [1:02:46<02:19,  7.06it/s]

{'epoch': 2, 'iter': 26640, 'avg_loss': 8.541394680274204, 'avg_acc': 49.985454750197064, 'loss': 8.540149688720703}


EP_train:2:  96%|| 26652/27626 [1:02:48<02:18,  7.03it/s]

{'epoch': 2, 'iter': 26650, 'avg_loss': 8.541418262138308, 'avg_acc': 49.98522569509587, 'loss': 8.64055061340332}


EP_train:2:  97%|| 26662/27626 [1:02:49<02:17,  7.03it/s]

{'epoch': 2, 'iter': 26660, 'avg_loss': 8.541317839368707, 'avg_acc': 49.98476238700724, 'loss': 8.69528865814209}


EP_train:2:  97%|| 26672/27626 [1:02:50<02:14,  7.07it/s]

{'epoch': 2, 'iter': 26670, 'avg_loss': 8.541374544821844, 'avg_acc': 49.985588279404595, 'loss': 9.042598724365234}


EP_train:2:  97%|| 26682/27626 [1:02:52<02:13,  7.05it/s]

{'epoch': 2, 'iter': 26680, 'avg_loss': 8.54151633838356, 'avg_acc': 49.98594505453319, 'loss': 8.628490447998047}


EP_train:2:  97%|| 26692/27626 [1:02:53<02:13,  7.01it/s]

{'epoch': 2, 'iter': 26690, 'avg_loss': 8.541476131142076, 'avg_acc': 49.98641864298827, 'loss': 8.614072799682617}


EP_train:2:  97%|| 26702/27626 [1:02:55<02:10,  7.09it/s]

{'epoch': 2, 'iter': 26700, 'avg_loss': 8.54139929113127, 'avg_acc': 49.98700891352384, 'loss': 8.44412899017334}


EP_train:2:  97%|| 26712/27626 [1:02:56<02:09,  7.08it/s]

{'epoch': 2, 'iter': 26710, 'avg_loss': 8.541268773136222, 'avg_acc': 49.98642881209988, 'loss': 9.125447273254395}


EP_train:2:  97%|| 26722/27626 [1:02:57<02:07,  7.11it/s]

{'epoch': 2, 'iter': 26720, 'avg_loss': 8.541286276885883, 'avg_acc': 49.987837281538866, 'loss': 8.372984886169434}


EP_train:2:  97%|| 26732/27626 [1:02:59<02:05,  7.11it/s]

{'epoch': 2, 'iter': 26730, 'avg_loss': 8.541221907651588, 'avg_acc': 49.98760802065018, 'loss': 8.112828254699707}


EP_train:2:  97%|| 26742/27626 [1:03:00<02:04,  7.09it/s]

{'epoch': 2, 'iter': 26740, 'avg_loss': 8.541317967557902, 'avg_acc': 49.98808010171646, 'loss': 10.040114402770996}


EP_train:2:  97%|| 26752/27626 [1:03:02<02:03,  7.05it/s]

{'epoch': 2, 'iter': 26750, 'avg_loss': 8.541220696064865, 'avg_acc': 49.9872668311465, 'loss': 7.235921382904053}


EP_train:2:  97%|| 26762/27626 [1:03:03<02:02,  7.06it/s]

{'epoch': 2, 'iter': 26760, 'avg_loss': 8.541266059390829, 'avg_acc': 49.986687717200404, 'loss': 8.5537691116333}


EP_train:2:  97%|| 26772/27626 [1:03:05<02:01,  7.05it/s]

{'epoch': 2, 'iter': 26770, 'avg_loss': 8.541216512695515, 'avg_acc': 49.98669268985096, 'loss': 8.233091354370117}


EP_train:2:  97%|| 26782/27626 [1:03:06<01:59,  7.05it/s]

{'epoch': 2, 'iter': 26780, 'avg_loss': 8.541280300959698, 'avg_acc': 49.98459728912288, 'loss': 9.01349925994873}


EP_train:2:  97%|| 26792/27626 [1:03:07<01:57,  7.07it/s]

{'epoch': 2, 'iter': 26790, 'avg_loss': 8.541257290151172, 'avg_acc': 49.98448639468478, 'loss': 8.094971656799316}


EP_train:2:  97%|| 26802/27626 [1:03:09<01:56,  7.07it/s]

{'epoch': 2, 'iter': 26800, 'avg_loss': 8.54123073131094, 'avg_acc': 49.982276780717136, 'loss': 8.302924156188965}


EP_train:2:  97%|| 26812/27626 [1:03:10<01:54,  7.09it/s]

{'epoch': 2, 'iter': 26810, 'avg_loss': 8.54125304312989, 'avg_acc': 49.98379862742904, 'loss': 9.275985717773438}


EP_train:2:  97%|| 26822/27626 [1:03:12<01:53,  7.10it/s]

{'epoch': 2, 'iter': 26820, 'avg_loss': 8.541258140812294, 'avg_acc': 49.983921181164014, 'loss': 7.867909908294678}


EP_train:2:  97%|| 26832/27626 [1:03:13<01:52,  7.06it/s]

{'epoch': 2, 'iter': 26830, 'avg_loss': 8.541213733929295, 'avg_acc': 49.98020014162722, 'loss': 8.57477855682373}


EP_train:2:  97%|| 26842/27626 [1:03:14<01:51,  7.01it/s]

{'epoch': 2, 'iter': 26840, 'avg_loss': 8.541267771838315, 'avg_acc': 49.98090607652472, 'loss': 7.769962310791016}


EP_train:2:  97%|| 26852/27626 [1:03:16<01:49,  7.06it/s]

{'epoch': 2, 'iter': 26850, 'avg_loss': 8.541325811149777, 'avg_acc': 49.98102957059327, 'loss': 8.554271697998047}


EP_train:2:  97%|| 26862/27626 [1:03:17<01:48,  7.05it/s]

{'epoch': 2, 'iter': 26860, 'avg_loss': 8.541322247589084, 'avg_acc': 49.98196735043371, 'loss': 8.80081844329834}


EP_train:2:  97%|| 26872/27626 [1:03:19<01:46,  7.08it/s]

{'epoch': 2, 'iter': 26870, 'avg_loss': 8.541246526867981, 'avg_acc': 49.97964813367571, 'loss': 7.954354763031006}


EP_train:2:  97%|| 26882/27626 [1:03:20<01:45,  7.08it/s]

{'epoch': 2, 'iter': 26880, 'avg_loss': 8.541302899334084, 'avg_acc': 49.97895818607939, 'loss': 8.592592239379883}


EP_train:2:  97%|| 26892/27626 [1:03:21<01:43,  7.07it/s]

{'epoch': 2, 'iter': 26890, 'avg_loss': 8.54136408130285, 'avg_acc': 49.97803633185824, 'loss': 9.08027458190918}


EP_train:2:  97%|| 26902/27626 [1:03:23<01:42,  7.07it/s]

{'epoch': 2, 'iter': 26900, 'avg_loss': 8.541327580444555, 'avg_acc': 49.978160663172375, 'loss': 8.026792526245117}


EP_train:2:  97%|| 26912/27626 [1:03:24<01:41,  7.06it/s]

{'epoch': 2, 'iter': 26910, 'avg_loss': 8.541262368339744, 'avg_acc': 49.9774720374568, 'loss': 8.844991683959961}


EP_train:2:  97%|| 26922/27626 [1:03:26<01:40,  7.01it/s]

{'epoch': 2, 'iter': 26920, 'avg_loss': 8.541456832480481, 'avg_acc': 49.97759648601463, 'loss': 8.732260704040527}


EP_train:2:  97%|| 26932/27626 [1:03:27<01:38,  7.05it/s]

{'epoch': 2, 'iter': 26930, 'avg_loss': 8.541433238030578, 'avg_acc': 49.97748876759125, 'loss': 8.513847351074219}


EP_train:2:  98%|| 26942/27626 [1:03:28<01:37,  7.03it/s]

{'epoch': 2, 'iter': 26940, 'avg_loss': 8.541396375107945, 'avg_acc': 49.97622118703834, 'loss': 8.343334197998047}


EP_train:2:  98%|| 26952/27626 [1:03:30<01:35,  7.06it/s]

{'epoch': 2, 'iter': 26950, 'avg_loss': 8.541375436577955, 'avg_acc': 49.976925717042036, 'loss': 9.263853073120117}


EP_train:2:  98%|| 26962/27626 [1:03:31<01:34,  7.04it/s]

{'epoch': 2, 'iter': 26960, 'avg_loss': 8.541439911771, 'avg_acc': 49.979136530544125, 'loss': 8.223627090454102}


EP_train:2:  98%|| 26972/27626 [1:03:33<01:32,  7.03it/s]

{'epoch': 2, 'iter': 26970, 'avg_loss': 8.541406863921432, 'avg_acc': 49.976942827481366, 'loss': 8.379570007324219}


EP_train:2:  98%|| 26982/27626 [1:03:34<01:30,  7.08it/s]

{'epoch': 2, 'iter': 26980, 'avg_loss': 8.54130892239138, 'avg_acc': 49.97521403950928, 'loss': 8.174860000610352}


EP_train:2:  98%|| 26992/27626 [1:03:36<01:29,  7.06it/s]

{'epoch': 2, 'iter': 26990, 'avg_loss': 8.541215594738544, 'avg_acc': 49.97302341521248, 'loss': 8.732463836669922}


EP_train:2:  98%|| 27002/27626 [1:03:37<01:28,  7.06it/s]

{'epoch': 2, 'iter': 27000, 'avg_loss': 8.541206186830783, 'avg_acc': 49.97372782489538, 'loss': 8.071234703063965}


EP_train:2:  98%|| 27012/27626 [1:03:38<01:26,  7.06it/s]

{'epoch': 2, 'iter': 27010, 'avg_loss': 8.541140328323575, 'avg_acc': 49.97408463218689, 'loss': 8.884795188903809}


EP_train:2:  98%|| 27022/27626 [1:03:40<01:25,  7.05it/s]

{'epoch': 2, 'iter': 27020, 'avg_loss': 8.541214026422205, 'avg_acc': 49.97328466748085, 'loss': 8.836529731750488}


EP_train:2:  98%|| 27032/27626 [1:03:41<01:24,  7.03it/s]

{'epoch': 2, 'iter': 27030, 'avg_loss': 8.541132938918738, 'avg_acc': 49.973641374717914, 'loss': 7.963742733001709}


EP_train:2:  98%|| 27042/27626 [1:03:43<01:22,  7.05it/s]

{'epoch': 2, 'iter': 27040, 'avg_loss': 8.541218398939085, 'avg_acc': 49.97480677489738, 'loss': 8.323058128356934}


EP_train:2:  98%|| 27052/27626 [1:03:44<01:21,  7.03it/s]

{'epoch': 2, 'iter': 27050, 'avg_loss': 8.541113066058966, 'avg_acc': 49.9737763853462, 'loss': 8.117467880249023}


EP_train:2:  98%|| 27062/27626 [1:03:45<01:19,  7.08it/s]

{'epoch': 2, 'iter': 27060, 'avg_loss': 8.541129516218698, 'avg_acc': 49.97343963637707, 'loss': 9.265685081481934}


EP_train:2:  98%|| 27072/27626 [1:03:47<01:18,  7.03it/s]

{'epoch': 2, 'iter': 27070, 'avg_loss': 8.541231663815136, 'avg_acc': 49.97448838240183, 'loss': 8.968393325805664}


EP_train:2:  98%|| 27082/27626 [1:03:48<01:16,  7.07it/s]

{'epoch': 2, 'iter': 27080, 'avg_loss': 8.54120244244204, 'avg_acc': 49.975767143015396, 'loss': 7.963144302368164}


EP_train:2:  98%|| 27092/27626 [1:03:50<01:15,  7.05it/s]

{'epoch': 2, 'iter': 27090, 'avg_loss': 8.54130942624984, 'avg_acc': 49.97543003211399, 'loss': 9.003813743591309}


EP_train:2:  98%|| 27102/27626 [1:03:51<01:14,  7.04it/s]

{'epoch': 2, 'iter': 27100, 'avg_loss': 8.541369502427662, 'avg_acc': 49.97394007601196, 'loss': 8.425796508789062}


EP_train:2:  98%|| 27112/27626 [1:03:53<01:15,  6.84it/s]

{'epoch': 2, 'iter': 27110, 'avg_loss': 8.541305860200216, 'avg_acc': 49.974295488915935, 'loss': 8.50970458984375}


EP_train:2:  98%|| 27122/27626 [1:03:54<01:11,  7.09it/s]

{'epoch': 2, 'iter': 27120, 'avg_loss': 8.541284934824132, 'avg_acc': 49.97315272298219, 'loss': 8.850510597229004}


EP_train:2:  98%|| 27132/27626 [1:03:55<01:09,  7.09it/s]

{'epoch': 2, 'iter': 27130, 'avg_loss': 8.54129703975243, 'avg_acc': 49.973277800302235, 'loss': 7.887999534606934}


EP_train:2:  98%|| 27142/27626 [1:03:57<01:09,  6.99it/s]

{'epoch': 2, 'iter': 27140, 'avg_loss': 8.541379776670702, 'avg_acc': 49.9729422276261, 'loss': 8.551939964294434}


EP_train:2:  98%|| 27152/27626 [1:03:58<01:07,  7.07it/s]

{'epoch': 2, 'iter': 27150, 'avg_loss': 8.54145109225389, 'avg_acc': 49.972837096239545, 'loss': 8.910079956054688}


EP_train:2:  98%|| 27162/27626 [1:04:00<01:05,  7.07it/s]

{'epoch': 2, 'iter': 27160, 'avg_loss': 8.541551097293196, 'avg_acc': 49.97399764368028, 'loss': 8.400064468383789}


EP_train:2:  98%|| 27172/27626 [1:04:01<01:03,  7.11it/s]

{'epoch': 2, 'iter': 27170, 'avg_loss': 8.541593943831716, 'avg_acc': 49.97492731220787, 'loss': 8.478922843933105}


EP_train:2:  98%|| 27182/27626 [1:04:02<01:03,  7.01it/s]

{'epoch': 2, 'iter': 27180, 'avg_loss': 8.541408644444957, 'avg_acc': 49.97608623670946, 'loss': 7.766073703765869}


EP_train:2:  98%|| 27192/27626 [1:04:04<01:01,  7.02it/s]

{'epoch': 2, 'iter': 27190, 'avg_loss': 8.541417143229225, 'avg_acc': 49.97598010371079, 'loss': 9.589010238647461}


EP_train:2:  98%|| 27202/27626 [1:04:05<00:59,  7.09it/s]

{'epoch': 2, 'iter': 27200, 'avg_loss': 8.541442477910389, 'avg_acc': 49.97575916326606, 'loss': 8.503751754760742}


EP_train:2:  99%|| 27212/27626 [1:04:07<00:58,  7.07it/s]

{'epoch': 2, 'iter': 27210, 'avg_loss': 8.541373458544214, 'avg_acc': 49.97576807173569, 'loss': 8.60457706451416}


EP_train:2:  99%|| 27222/27626 [1:04:08<00:57,  7.00it/s]

{'epoch': 2, 'iter': 27220, 'avg_loss': 8.541409132916012, 'avg_acc': 49.97543257044194, 'loss': 8.72039794921875}


EP_train:2:  99%|| 27232/27626 [1:04:10<00:56,  7.01it/s]

{'epoch': 2, 'iter': 27230, 'avg_loss': 8.54135421557719, 'avg_acc': 49.975441592302886, 'loss': 9.23399829864502}


EP_train:2:  99%|| 27242/27626 [1:04:11<00:54,  7.07it/s]

{'epoch': 2, 'iter': 27240, 'avg_loss': 8.541279388696742, 'avg_acc': 49.974991740391324, 'loss': 7.833870887756348}


EP_train:2:  99%|| 27252/27626 [1:04:12<00:52,  7.08it/s]

{'epoch': 2, 'iter': 27250, 'avg_loss': 8.541385178417071, 'avg_acc': 49.975115592088365, 'loss': 8.324234962463379}


EP_train:2:  99%|| 27262/27626 [1:04:14<00:51,  7.08it/s]

{'epoch': 2, 'iter': 27260, 'avg_loss': 8.541329714737758, 'avg_acc': 49.97512472029639, 'loss': 8.592117309570312}


EP_train:2:  99%|| 27272/27626 [1:04:15<00:50,  7.06it/s]

{'epoch': 2, 'iter': 27270, 'avg_loss': 8.541316374331513, 'avg_acc': 49.97559220417293, 'loss': 8.105459213256836}


EP_train:2:  99%|| 27282/27626 [1:04:17<00:48,  7.08it/s]

{'epoch': 2, 'iter': 27280, 'avg_loss': 8.541257666639602, 'avg_acc': 49.97537205381035, 'loss': 7.930004596710205}


EP_train:2:  99%|| 27292/27626 [1:04:18<00:47,  7.09it/s]

{'epoch': 2, 'iter': 27290, 'avg_loss': 8.541392895463474, 'avg_acc': 49.97480854494156, 'loss': 9.754812240600586}


EP_train:2:  99%|| 27302/27626 [1:04:19<00:46,  7.04it/s]

{'epoch': 2, 'iter': 27300, 'avg_loss': 8.54144564828027, 'avg_acc': 49.97390205486978, 'loss': 8.873360633850098}


EP_train:2:  99%|| 27312/27626 [1:04:21<00:45,  6.97it/s]

{'epoch': 2, 'iter': 27310, 'avg_loss': 8.541456646311833, 'avg_acc': 49.97379718794625, 'loss': 9.17463493347168}


EP_train:2:  99%|| 27322/27626 [1:04:22<00:43,  7.04it/s]

{'epoch': 2, 'iter': 27320, 'avg_loss': 8.541536567108265, 'avg_acc': 49.973234874272535, 'loss': 8.500253677368164}


EP_train:2:  99%|| 27332/27626 [1:04:24<00:41,  7.11it/s]

{'epoch': 2, 'iter': 27330, 'avg_loss': 8.541531922173357, 'avg_acc': 49.973702023343456, 'loss': 8.326921463012695}


EP_train:2:  99%|| 27342/27626 [1:04:25<00:40,  7.09it/s]

{'epoch': 2, 'iter': 27340, 'avg_loss': 8.541476801717417, 'avg_acc': 49.97405453348451, 'loss': 8.599653244018555}


EP_train:2:  99%|| 27352/27626 [1:04:26<00:39,  7.00it/s]

{'epoch': 2, 'iter': 27350, 'avg_loss': 8.541574002619805, 'avg_acc': 49.975892106321524, 'loss': 8.819634437561035}


EP_train:2:  99%|| 27362/27626 [1:04:28<00:37,  7.08it/s]

{'epoch': 2, 'iter': 27360, 'avg_loss': 8.54167300048599, 'avg_acc': 49.97590091736413, 'loss': 9.068411827087402}


EP_train:2:  99%|| 27372/27626 [1:04:29<00:35,  7.06it/s]

{'epoch': 2, 'iter': 27370, 'avg_loss': 8.541721189485035, 'avg_acc': 49.974996346498116, 'loss': 8.559931755065918}


EP_train:2:  99%|| 27382/27626 [1:04:31<00:34,  7.04it/s]

{'epoch': 2, 'iter': 27380, 'avg_loss': 8.541749915754137, 'avg_acc': 49.97705982250466, 'loss': 7.809206008911133}


EP_train:2:  99%|| 27392/27626 [1:04:32<00:33,  7.05it/s]

{'epoch': 2, 'iter': 27390, 'avg_loss': 8.541849054776149, 'avg_acc': 49.97820908327553, 'loss': 8.547417640686035}


EP_train:2:  99%|| 27402/27626 [1:04:34<00:31,  7.04it/s]

{'epoch': 2, 'iter': 27400, 'avg_loss': 8.54193306724493, 'avg_acc': 49.97730466041385, 'loss': 8.685559272766113}


EP_train:2:  99%|| 27412/27626 [1:04:35<00:30,  7.00it/s]

{'epoch': 2, 'iter': 27410, 'avg_loss': 8.54178409956341, 'avg_acc': 49.976856918755246, 'loss': 8.28121280670166}


EP_train:2:  99%|| 27422/27626 [1:04:36<00:28,  7.08it/s]

{'epoch': 2, 'iter': 27420, 'avg_loss': 8.541681631376296, 'avg_acc': 49.97583968491302, 'loss': 8.033102989196777}


EP_train:2:  99%|| 27432/27626 [1:04:38<00:27,  7.06it/s]

{'epoch': 2, 'iter': 27430, 'avg_loss': 8.541653989611294, 'avg_acc': 49.976873792424634, 'loss': 8.407511711120605}


EP_train:2:  99%|| 27442/27626 [1:04:39<00:26,  7.07it/s]

{'epoch': 2, 'iter': 27440, 'avg_loss': 8.541663149860126, 'avg_acc': 49.97733774279363, 'loss': 8.400086402893066}


EP_train:2:  99%|| 27452/27626 [1:04:41<00:24,  7.08it/s]

{'epoch': 2, 'iter': 27450, 'avg_loss': 8.541552777637927, 'avg_acc': 49.97802903355069, 'loss': 7.797774791717529}


EP_train:2:  99%|| 27462/27626 [1:04:42<00:23,  7.04it/s]

{'epoch': 2, 'iter': 27460, 'avg_loss': 8.541535972745796, 'avg_acc': 49.97894741633589, 'loss': 8.1929349899292}


EP_train:2:  99%|| 27472/27626 [1:04:43<00:21,  7.06it/s]

{'epoch': 2, 'iter': 27470, 'avg_loss': 8.541418559671616, 'avg_acc': 49.98032015580066, 'loss': 8.998080253601074}


EP_train:2:  99%|| 27482/27626 [1:04:45<00:20,  7.09it/s]

{'epoch': 2, 'iter': 27480, 'avg_loss': 8.541419166035235, 'avg_acc': 49.98055474691605, 'loss': 8.758302688598633}


EP_train:2: 100%|| 27492/27626 [1:04:46<00:19,  7.05it/s]

{'epoch': 2, 'iter': 27490, 'avg_loss': 8.54143854641263, 'avg_acc': 49.98158488232512, 'loss': 8.465428352355957}


EP_train:2: 100%|| 27502/27626 [1:04:48<00:17,  7.03it/s]

{'epoch': 2, 'iter': 27500, 'avg_loss': 8.541452907682796, 'avg_acc': 49.98215973964583, 'loss': 8.70016860961914}


EP_train:2: 100%|| 27512/27626 [1:04:49<00:16,  7.04it/s]

{'epoch': 2, 'iter': 27510, 'avg_loss': 8.541479966705557, 'avg_acc': 49.98341572461925, 'loss': 8.496519088745117}


EP_train:2: 100%|| 27522/27626 [1:04:50<00:14,  6.99it/s]

{'epoch': 2, 'iter': 27520, 'avg_loss': 8.541409119266804, 'avg_acc': 49.98342175066313, 'loss': 8.803315162658691}


EP_train:2: 100%|| 27532/27626 [1:04:52<00:13,  7.00it/s]

{'epoch': 2, 'iter': 27530, 'avg_loss': 8.541485196812934, 'avg_acc': 49.98297373869456, 'loss': 8.74749755859375}


EP_train:2: 100%|| 27542/27626 [1:04:53<00:11,  7.06it/s]

{'epoch': 2, 'iter': 27540, 'avg_loss': 8.541445230036137, 'avg_acc': 49.98434152717766, 'loss': 8.803873062133789}


EP_train:2: 100%|| 27552/27626 [1:04:55<00:10,  7.09it/s]

{'epoch': 2, 'iter': 27550, 'avg_loss': 8.541668386720001, 'avg_acc': 49.98298609850822, 'loss': 9.349188804626465}


EP_train:2: 100%|| 27562/27626 [1:04:56<00:09,  7.03it/s]

{'epoch': 2, 'iter': 27560, 'avg_loss': 8.541611947868322, 'avg_acc': 49.982311962555784, 'loss': 8.512935638427734}


EP_train:2: 100%|| 27572/27626 [1:04:58<00:07,  7.08it/s]

{'epoch': 2, 'iter': 27570, 'avg_loss': 8.541573680665167, 'avg_acc': 49.98288509665954, 'loss': 7.989294052124023}


EP_train:2: 100%|| 27582/27626 [1:04:59<00:06,  7.04it/s]

{'epoch': 2, 'iter': 27580, 'avg_loss': 8.541571524165608, 'avg_acc': 49.98357111779848, 'loss': 8.130964279174805}


EP_train:2: 100%|| 27592/27626 [1:05:00<00:04,  7.00it/s]

{'epoch': 2, 'iter': 27590, 'avg_loss': 8.54166533780994, 'avg_acc': 49.982784241238086, 'loss': 9.391541481018066}


EP_train:2: 100%|| 27602/27626 [1:05:02<00:03,  7.07it/s]

{'epoch': 2, 'iter': 27600, 'avg_loss': 8.541786401851692, 'avg_acc': 49.98267725807036, 'loss': 7.8033928871154785}


EP_train:2: 100%|| 27612/27626 [1:05:03<00:01,  7.07it/s]

{'epoch': 2, 'iter': 27610, 'avg_loss': 8.541708317278244, 'avg_acc': 49.98347578863496, 'loss': 7.881045341491699}


EP_train:2: 100%|| 27622/27626 [1:05:05<00:00,  7.10it/s]

{'epoch': 2, 'iter': 27620, 'avg_loss': 8.541745354396317, 'avg_acc': 49.983029216900185, 'loss': 8.99925708770752}


EP_train:2: 100%|| 27626/27626 [1:05:05<00:00,  7.07it/s]


EP2_train, avg_loss= 8.541765704607537 total_acc= 49.98252288145084
EP:2 Model Saved on: ./wikitext_trained.model.ep2


EP_train:3:   0%|| 2/27626 [00:00<1:13:59,  6.22it/s]

{'epoch': 3, 'iter': 0, 'avg_loss': 8.941363334655762, 'avg_acc': 62.5, 'loss': 8.941363334655762}


EP_train:3:   0%|| 12/27626 [00:01<1:05:17,  7.05it/s]

{'epoch': 3, 'iter': 10, 'avg_loss': 8.477878180417148, 'avg_acc': 51.42045454545454, 'loss': 8.993285179138184}


EP_train:3:   0%|| 22/27626 [00:03<1:05:38,  7.01it/s]

{'epoch': 3, 'iter': 20, 'avg_loss': 8.541464555831183, 'avg_acc': 50.89285714285714, 'loss': 8.094441413879395}


EP_train:3:   0%|| 32/27626 [00:04<1:05:49,  6.99it/s]

{'epoch': 3, 'iter': 30, 'avg_loss': 8.437312664524201, 'avg_acc': 50.60483870967742, 'loss': 8.309529304504395}


EP_train:3:   0%|| 42/27626 [00:05<1:04:58,  7.07it/s]

{'epoch': 3, 'iter': 40, 'avg_loss': 8.490761489402956, 'avg_acc': 50.762195121951216, 'loss': 8.388762474060059}


EP_train:3:   0%|| 52/27626 [00:07<1:04:44,  7.10it/s]

{'epoch': 3, 'iter': 50, 'avg_loss': 8.517038242489685, 'avg_acc': 50.98039215686274, 'loss': 8.730222702026367}


EP_train:3:   0%|| 62/27626 [00:08<1:05:47,  6.98it/s]

{'epoch': 3, 'iter': 60, 'avg_loss': 8.539500854054435, 'avg_acc': 50.665983606557376, 'loss': 9.089186668395996}


EP_train:3:   0%|| 72/27626 [00:10<1:05:45,  6.98it/s]

{'epoch': 3, 'iter': 70, 'avg_loss': 8.577010859905833, 'avg_acc': 50.220070422535215, 'loss': 8.042672157287598}


EP_train:3:   0%|| 82/27626 [00:11<1:05:09,  7.05it/s]

{'epoch': 3, 'iter': 80, 'avg_loss': 8.550261468063166, 'avg_acc': 50.0, 'loss': 8.056671142578125}


EP_train:3:   0%|| 92/27626 [00:13<1:04:56,  7.07it/s]

{'epoch': 3, 'iter': 90, 'avg_loss': 8.571532631968404, 'avg_acc': 50.583791208791204, 'loss': 8.939767837524414}


EP_train:3:   0%|| 102/27626 [00:14<1:05:41,  6.98it/s]

{'epoch': 3, 'iter': 100, 'avg_loss': 8.558538498264728, 'avg_acc': 50.433168316831676, 'loss': 8.856060028076172}


EP_train:3:   0%|| 112/27626 [00:15<1:06:05,  6.94it/s]

{'epoch': 3, 'iter': 110, 'avg_loss': 8.56597666267876, 'avg_acc': 50.39414414414415, 'loss': 8.223311424255371}


EP_train:3:   0%|| 122/27626 [00:17<1:05:43,  6.97it/s]

{'epoch': 3, 'iter': 120, 'avg_loss': 8.532765999313229, 'avg_acc': 50.56818181818182, 'loss': 7.786491394042969}


EP_train:3:   0%|| 132/27626 [00:18<1:05:17,  7.02it/s]

{'epoch': 3, 'iter': 130, 'avg_loss': 8.517921575153148, 'avg_acc': 50.64408396946565, 'loss': 7.93011474609375}


EP_train:3:   1%|| 142/27626 [00:20<1:06:07,  6.93it/s]

{'epoch': 3, 'iter': 140, 'avg_loss': 8.517948515871739, 'avg_acc': 50.797872340425535, 'loss': 7.892404079437256}


EP_train:3:   1%|| 152/27626 [00:21<1:04:44,  7.07it/s]

{'epoch': 3, 'iter': 150, 'avg_loss': 8.491728094239898, 'avg_acc': 50.86920529801324, 'loss': 7.710724830627441}


EP_train:3:   1%|| 162/27626 [00:23<1:04:53,  7.05it/s]

{'epoch': 3, 'iter': 160, 'avg_loss': 8.497326077881807, 'avg_acc': 51.048136645962735, 'loss': 9.334342956542969}


EP_train:3:   1%|| 172/27626 [00:24<1:04:23,  7.11it/s]

{'epoch': 3, 'iter': 170, 'avg_loss': 8.481247045840437, 'avg_acc': 51.005116959064324, 'loss': 7.59337854385376}


EP_train:3:   1%|| 182/27626 [00:25<1:04:31,  7.09it/s]

{'epoch': 3, 'iter': 180, 'avg_loss': 8.485659952321765, 'avg_acc': 50.604281767955804, 'loss': 9.289432525634766}


EP_train:3:   1%|| 192/27626 [00:27<1:04:29,  7.09it/s]

{'epoch': 3, 'iter': 190, 'avg_loss': 8.493443848574973, 'avg_acc': 50.37630890052356, 'loss': 8.931809425354004}


EP_train:3:   1%|| 202/27626 [00:28<1:05:23,  6.99it/s]

{'epoch': 3, 'iter': 200, 'avg_loss': 8.506226885971145, 'avg_acc': 50.310945273631845, 'loss': 8.506278038024902}


EP_train:3:   1%|| 212/27626 [00:30<1:04:45,  7.06it/s]

{'epoch': 3, 'iter': 210, 'avg_loss': 8.507910821110151, 'avg_acc': 50.23696682464455, 'loss': 9.11433219909668}


EP_train:3:   1%|| 222/27626 [00:31<1:04:42,  7.06it/s]

{'epoch': 3, 'iter': 220, 'avg_loss': 8.520697453442741, 'avg_acc': 50.18382352941176, 'loss': 8.249678611755371}


EP_train:3:   1%|| 232/27626 [00:32<1:04:47,  7.05it/s]

{'epoch': 3, 'iter': 230, 'avg_loss': 8.508071228539272, 'avg_acc': 50.324675324675326, 'loss': 7.184711933135986}


EP_train:3:   1%|| 242/27626 [00:34<1:04:17,  7.10it/s]

{'epoch': 3, 'iter': 240, 'avg_loss': 8.504671375781173, 'avg_acc': 50.11670124481328, 'loss': 8.064407348632812}


EP_train:3:   1%|| 252/27626 [00:35<1:04:53,  7.03it/s]

{'epoch': 3, 'iter': 250, 'avg_loss': 8.489852831183201, 'avg_acc': 50.31125498007968, 'loss': 8.004436492919922}


EP_train:3:   1%|| 262/27626 [00:37<1:04:08,  7.11it/s]

{'epoch': 3, 'iter': 260, 'avg_loss': 8.494994110531277, 'avg_acc': 50.275383141762454, 'loss': 8.51147747039795}


EP_train:3:   1%|| 272/27626 [00:38<1:04:28,  7.07it/s]

{'epoch': 3, 'iter': 270, 'avg_loss': 8.482706398981524, 'avg_acc': 50.13837638376384, 'loss': 7.652774810791016}


EP_train:3:   1%|| 282/27626 [00:39<1:04:29,  7.07it/s]

{'epoch': 3, 'iter': 280, 'avg_loss': 8.474427951188273, 'avg_acc': 50.10008896797154, 'loss': 8.606172561645508}


EP_train:3:   1%|| 292/27626 [00:41<1:04:56,  7.01it/s]

{'epoch': 3, 'iter': 290, 'avg_loss': 8.46891291444654, 'avg_acc': 50.1073883161512, 'loss': 8.158742904663086}


EP_train:3:   1%|| 302/27626 [00:42<1:04:58,  7.01it/s]

{'epoch': 3, 'iter': 300, 'avg_loss': 8.480252281771941, 'avg_acc': 50.10382059800664, 'loss': 9.456006050109863}


EP_train:3:   1%|| 312/27626 [00:44<1:04:26,  7.07it/s]

{'epoch': 3, 'iter': 310, 'avg_loss': 8.488297736913061, 'avg_acc': 50.14067524115756, 'loss': 9.176676750183105}


EP_train:3:   1%|| 322/27626 [00:45<1:04:24,  7.07it/s]

{'epoch': 3, 'iter': 320, 'avg_loss': 8.496825755942277, 'avg_acc': 50.019470404984425, 'loss': 7.641320705413818}


EP_train:3:   1%|| 332/27626 [00:47<1:04:50,  7.02it/s]

{'epoch': 3, 'iter': 330, 'avg_loss': 8.49815493551986, 'avg_acc': 50.13217522658611, 'loss': 7.904150485992432}


EP_train:3:   1%|| 342/27626 [00:48<1:04:20,  7.07it/s]

{'epoch': 3, 'iter': 340, 'avg_loss': 8.504325289180901, 'avg_acc': 50.2474340175953, 'loss': 8.3369722366333}


EP_train:3:   1%|| 352/27626 [00:49<1:04:28,  7.05it/s]

{'epoch': 3, 'iter': 350, 'avg_loss': 8.510742017686198, 'avg_acc': 50.275997150997156, 'loss': 8.538374900817871}


EP_train:3:   1%|| 362/27626 [00:51<1:04:20,  7.06it/s]

{'epoch': 3, 'iter': 360, 'avg_loss': 8.520416665275341, 'avg_acc': 50.441481994459835, 'loss': 8.61372184753418}


EP_train:3:   1%|| 372/27626 [00:52<1:04:04,  7.09it/s]

{'epoch': 3, 'iter': 370, 'avg_loss': 8.521390235006328, 'avg_acc': 50.438005390835585, 'loss': 8.867507934570312}


EP_train:3:   1%|| 382/27626 [00:54<1:04:44,  7.01it/s]

{'epoch': 3, 'iter': 380, 'avg_loss': 8.529115725689985, 'avg_acc': 50.37729658792651, 'loss': 8.6111478805542}


EP_train:3:   1%|| 392/27626 [00:55<1:04:17,  7.06it/s]

{'epoch': 3, 'iter': 390, 'avg_loss': 8.522019094823266, 'avg_acc': 50.4156010230179, 'loss': 7.720302104949951}


EP_train:3:   1%|| 402/27626 [00:56<1:04:21,  7.05it/s]

{'epoch': 3, 'iter': 400, 'avg_loss': 8.527858655648933, 'avg_acc': 50.39744389027432, 'loss': 8.447526931762695}


EP_train:3:   1%|| 412/27626 [00:58<1:04:29,  7.03it/s]

{'epoch': 3, 'iter': 410, 'avg_loss': 8.532084982470584, 'avg_acc': 50.36496350364964, 'loss': 8.996048927307129}


EP_train:3:   2%|| 422/27626 [00:59<1:04:33,  7.02it/s]

{'epoch': 3, 'iter': 420, 'avg_loss': 8.530486376438459, 'avg_acc': 50.37114014251781, 'loss': 8.637948036193848}


EP_train:3:   2%|| 432/27626 [01:01<1:04:15,  7.05it/s]

{'epoch': 3, 'iter': 430, 'avg_loss': 8.532181520749686, 'avg_acc': 50.41328306264501, 'loss': 9.115068435668945}


EP_train:3:   2%|| 442/27626 [01:02<1:04:04,  7.07it/s]

{'epoch': 3, 'iter': 440, 'avg_loss': 8.535005569458008, 'avg_acc': 50.47477324263039, 'loss': 8.333597183227539}


EP_train:3:   2%|| 452/27626 [01:04<1:04:19,  7.04it/s]

{'epoch': 3, 'iter': 450, 'avg_loss': 8.541834372374542, 'avg_acc': 50.51967849223947, 'loss': 9.58956241607666}


EP_train:3:   2%|| 462/27626 [01:05<1:04:19,  7.04it/s]

{'epoch': 3, 'iter': 460, 'avg_loss': 8.55272966248353, 'avg_acc': 50.515184381778745, 'loss': 9.078495979309082}


EP_train:3:   2%|| 472/27626 [01:06<1:04:49,  6.98it/s]

{'epoch': 3, 'iter': 470, 'avg_loss': 8.55723964180916, 'avg_acc': 50.424628450106155, 'loss': 8.27790355682373}


EP_train:3:   2%|| 482/27626 [01:08<1:03:59,  7.07it/s]

{'epoch': 3, 'iter': 480, 'avg_loss': 8.560969211951114, 'avg_acc': 50.40280665280665, 'loss': 8.74356746673584}


EP_train:3:   2%|| 492/27626 [01:09<1:04:06,  7.06it/s]

{'epoch': 3, 'iter': 490, 'avg_loss': 8.561125136684254, 'avg_acc': 50.311863543788185, 'loss': 8.069345474243164}


EP_train:3:   2%|| 502/27626 [01:11<1:03:59,  7.06it/s]

{'epoch': 3, 'iter': 500, 'avg_loss': 8.561601851990599, 'avg_acc': 50.249500998003995, 'loss': 9.004633903503418}


EP_train:3:   2%|| 512/27626 [01:12<1:03:37,  7.10it/s]

{'epoch': 3, 'iter': 510, 'avg_loss': 8.555228651386418, 'avg_acc': 50.207925636007836, 'loss': 9.095707893371582}


EP_train:3:   2%|| 522/27626 [01:13<1:04:35,  6.99it/s]

{'epoch': 3, 'iter': 520, 'avg_loss': 8.559258424389157, 'avg_acc': 50.071976967370446, 'loss': 8.529143333435059}


EP_train:3:   2%|| 532/27626 [01:15<1:03:51,  7.07it/s]

{'epoch': 3, 'iter': 530, 'avg_loss': 8.557444919076119, 'avg_acc': 50.04708097928437, 'loss': 8.68587875366211}


EP_train:3:   2%|| 542/27626 [01:16<1:03:41,  7.09it/s]

{'epoch': 3, 'iter': 540, 'avg_loss': 8.562103888464062, 'avg_acc': 50.086645101663585, 'loss': 8.129003524780273}


EP_train:3:   2%|| 552/27626 [01:18<1:04:33,  6.99it/s]

{'epoch': 3, 'iter': 550, 'avg_loss': 8.558913751868717, 'avg_acc': 50.045372050816695, 'loss': 8.727669715881348}


EP_train:3:   2%|| 562/27626 [01:19<1:04:09,  7.03it/s]

{'epoch': 3, 'iter': 560, 'avg_loss': 8.559106309146166, 'avg_acc': 49.95543672014261, 'loss': 8.448206901550293}


EP_train:3:   2%|| 572/27626 [01:20<1:03:59,  7.05it/s]

{'epoch': 3, 'iter': 570, 'avg_loss': 8.556563237085024, 'avg_acc': 49.99452714535902, 'loss': 9.356882095336914}


EP_train:3:   2%|| 582/27626 [01:22<1:03:58,  7.05it/s]

{'epoch': 3, 'iter': 580, 'avg_loss': 8.551149410963468, 'avg_acc': 49.96234939759036, 'loss': 8.381081581115723}


EP_train:3:   2%|| 592/27626 [01:23<1:03:36,  7.08it/s]

{'epoch': 3, 'iter': 590, 'avg_loss': 8.552233831531506, 'avg_acc': 49.978849407783414, 'loss': 8.184819221496582}


EP_train:3:   2%|| 602/27626 [01:25<1:03:36,  7.08it/s]

{'epoch': 3, 'iter': 600, 'avg_loss': 8.551979802015815, 'avg_acc': 49.963602329450914, 'loss': 8.332802772521973}


EP_train:3:   2%|| 612/27626 [01:26<1:03:16,  7.12it/s]

{'epoch': 3, 'iter': 610, 'avg_loss': 8.554477551955053, 'avg_acc': 49.95396890343699, 'loss': 8.422992706298828}


EP_train:3:   2%|| 622/27626 [01:28<1:04:11,  7.01it/s]

{'epoch': 3, 'iter': 620, 'avg_loss': 8.553065118006462, 'avg_acc': 49.92954911433172, 'loss': 8.875116348266602}


EP_train:3:   2%|| 632/27626 [01:29<1:03:49,  7.05it/s]

{'epoch': 3, 'iter': 630, 'avg_loss': 8.548816949176334, 'avg_acc': 49.99009508716323, 'loss': 7.794764518737793}


EP_train:3:   2%|| 642/27626 [01:30<1:03:31,  7.08it/s]

{'epoch': 3, 'iter': 640, 'avg_loss': 8.544348692187281, 'avg_acc': 49.9804992199688, 'loss': 8.281596183776855}


EP_train:3:   2%|| 652/27626 [01:32<1:03:39,  7.06it/s]

{'epoch': 3, 'iter': 650, 'avg_loss': 8.545745215291435, 'avg_acc': 49.93759600614439, 'loss': 8.30837631225586}


EP_train:3:   2%|| 662/27626 [01:33<1:03:56,  7.03it/s]

{'epoch': 3, 'iter': 660, 'avg_loss': 8.541685197429109, 'avg_acc': 49.89599092284418, 'loss': 8.325224876403809}


EP_train:3:   2%|| 672/27626 [01:35<1:03:35,  7.06it/s]

{'epoch': 3, 'iter': 670, 'avg_loss': 8.54210517587676, 'avg_acc': 49.89754098360656, 'loss': 8.60731029510498}


EP_train:3:   2%|| 682/27626 [01:36<1:03:33,  7.07it/s]

{'epoch': 3, 'iter': 680, 'avg_loss': 8.5355142496756, 'avg_acc': 49.926578560939795, 'loss': 7.417570114135742}


EP_train:3:   3%|| 692/27626 [01:37<1:03:29,  7.07it/s]

{'epoch': 3, 'iter': 690, 'avg_loss': 8.534600149532821, 'avg_acc': 49.92764109985528, 'loss': 8.060323715209961}


EP_train:3:   3%|| 702/27626 [01:39<1:03:49,  7.03it/s]

{'epoch': 3, 'iter': 700, 'avg_loss': 8.530634752863994, 'avg_acc': 49.88409415121255, 'loss': 8.734498977661133}


EP_train:3:   3%|| 712/27626 [01:40<1:03:28,  7.07it/s]

{'epoch': 3, 'iter': 710, 'avg_loss': 8.532563761484438, 'avg_acc': 49.854957805907176, 'loss': 8.550423622131348}


EP_train:3:   3%|| 722/27626 [01:42<1:03:16,  7.09it/s]

{'epoch': 3, 'iter': 720, 'avg_loss': 8.527392831159531, 'avg_acc': 49.84830097087379, 'loss': 8.721324920654297}


EP_train:3:   3%|| 732/27626 [01:43<1:02:59,  7.11it/s]

{'epoch': 3, 'iter': 730, 'avg_loss': 8.533295360700865, 'avg_acc': 49.9187756497948, 'loss': 9.143728256225586}


EP_train:3:   3%|| 742/27626 [01:44<1:03:24,  7.07it/s]

{'epoch': 3, 'iter': 740, 'avg_loss': 8.533076860322321, 'avg_acc': 49.95360998650472, 'loss': 8.433733940124512}


EP_train:3:   3%|| 752/27626 [01:46<1:03:32,  7.05it/s]

{'epoch': 3, 'iter': 750, 'avg_loss': 8.536030872207824, 'avg_acc': 49.95006657789614, 'loss': 8.786513328552246}


EP_train:3:   3%|| 762/27626 [01:47<1:06:54,  6.69it/s]

{'epoch': 3, 'iter': 760, 'avg_loss': 8.536492765654716, 'avg_acc': 49.94250985545335, 'loss': 8.778233528137207}


EP_train:3:   3%|| 772/27626 [01:49<1:03:20,  7.07it/s]

{'epoch': 3, 'iter': 770, 'avg_loss': 8.539478569797659, 'avg_acc': 49.88651102464332, 'loss': 9.497164726257324}


EP_train:3:   3%|| 782/27626 [01:50<1:03:18,  7.07it/s]

{'epoch': 3, 'iter': 780, 'avg_loss': 8.540122969355076, 'avg_acc': 49.8839628681178, 'loss': 9.017867088317871}


EP_train:3:   3%|| 792/27626 [01:52<1:03:10,  7.08it/s]

{'epoch': 3, 'iter': 790, 'avg_loss': 8.538139017734455, 'avg_acc': 49.8972819216182, 'loss': 7.639129161834717}


EP_train:3:   3%|| 802/27626 [01:53<1:03:06,  7.08it/s]

{'epoch': 3, 'iter': 800, 'avg_loss': 8.533223774251569, 'avg_acc': 49.9102684144819, 'loss': 9.06872844696045}


EP_train:3:   3%|| 812/27626 [01:54<1:03:06,  7.08it/s]

{'epoch': 3, 'iter': 810, 'avg_loss': 8.532037657962334, 'avg_acc': 49.9113748458693, 'loss': 8.760342597961426}


EP_train:3:   3%|| 822/27626 [01:56<1:03:30,  7.03it/s]

{'epoch': 3, 'iter': 820, 'avg_loss': 8.528517669649856, 'avg_acc': 49.885809987819734, 'loss': 7.833415985107422}


EP_train:3:   3%|| 832/27626 [01:57<1:03:08,  7.07it/s]

{'epoch': 3, 'iter': 830, 'avg_loss': 8.529009171771659, 'avg_acc': 49.90974729241877, 'loss': 8.480414390563965}


EP_train:3:   3%|| 842/27626 [01:59<1:03:15,  7.06it/s]

{'epoch': 3, 'iter': 840, 'avg_loss': 8.523583250011757, 'avg_acc': 49.92939952437575, 'loss': 7.255640506744385}


EP_train:3:   3%|| 852/27626 [02:00<1:03:33,  7.02it/s]

{'epoch': 3, 'iter': 850, 'avg_loss': 8.525748556565174, 'avg_acc': 49.886163337250295, 'loss': 8.265996932983398}


EP_train:3:   3%|| 862/27626 [02:01<1:03:41,  7.00it/s]

{'epoch': 3, 'iter': 860, 'avg_loss': 8.521424247551186, 'avg_acc': 49.952816492450644, 'loss': 8.679190635681152}


EP_train:3:   3%|| 872/27626 [02:03<1:03:46,  6.99it/s]

{'epoch': 3, 'iter': 870, 'avg_loss': 8.523216295187563, 'avg_acc': 49.95335820895522, 'loss': 8.29300594329834}


EP_train:3:   3%|| 882/27626 [02:04<1:03:22,  7.03it/s]

{'epoch': 3, 'iter': 880, 'avg_loss': 8.519746398817531, 'avg_acc': 49.99290578887628, 'loss': 8.390180587768555}


EP_train:3:   3%|| 892/27626 [02:06<1:03:42,  6.99it/s]

{'epoch': 3, 'iter': 890, 'avg_loss': 8.517679857217637, 'avg_acc': 49.98597081930416, 'loss': 8.08354377746582}


EP_train:3:   3%|| 902/27626 [02:07<1:02:43,  7.10it/s]

{'epoch': 3, 'iter': 900, 'avg_loss': 8.514432928273733, 'avg_acc': 49.96531631520533, 'loss': 8.742147445678711}


EP_train:3:   3%|| 912/27626 [02:09<1:02:51,  7.08it/s]

{'epoch': 3, 'iter': 910, 'avg_loss': 8.51656754637131, 'avg_acc': 49.94511525795829, 'loss': 8.968204498291016}


EP_train:3:   3%|| 922/27626 [02:10<1:03:01,  7.06it/s]

{'epoch': 3, 'iter': 920, 'avg_loss': 8.51924971260543, 'avg_acc': 49.95928338762215, 'loss': 8.385472297668457}


EP_train:3:   3%|| 932/27626 [02:11<1:02:57,  7.07it/s]

{'epoch': 3, 'iter': 930, 'avg_loss': 8.519092351870428, 'avg_acc': 49.96643394199785, 'loss': 8.547477722167969}


EP_train:3:   3%|| 942/27626 [02:13<1:02:52,  7.07it/s]

{'epoch': 3, 'iter': 940, 'avg_loss': 8.518262561145422, 'avg_acc': 49.98671625929862, 'loss': 8.352222442626953}


EP_train:3:   3%|| 952/27626 [02:14<1:02:42,  7.09it/s]

{'epoch': 3, 'iter': 950, 'avg_loss': 8.51942919982846, 'avg_acc': 50.03943217665615, 'loss': 8.279850006103516}


EP_train:3:   3%|| 962/27626 [02:16<1:02:42,  7.09it/s]

{'epoch': 3, 'iter': 960, 'avg_loss': 8.516958816242516, 'avg_acc': 50.01951092611863, 'loss': 7.523932933807373}


EP_train:3:   4%|| 972/27626 [02:17<1:02:58,  7.05it/s]

{'epoch': 3, 'iter': 970, 'avg_loss': 8.517508975263727, 'avg_acc': 50.064366632337794, 'loss': 8.994270324707031}


EP_train:3:   4%|| 982/27626 [02:18<1:02:42,  7.08it/s]

{'epoch': 3, 'iter': 980, 'avg_loss': 8.516798404378628, 'avg_acc': 50.09875127420999, 'loss': 8.44408130645752}


EP_train:3:   4%|| 992/27626 [02:20<1:02:46,  7.07it/s]

{'epoch': 3, 'iter': 990, 'avg_loss': 8.515149669377763, 'avg_acc': 50.037840565085766, 'loss': 8.075199127197266}


EP_train:3:   4%|| 1002/27626 [02:21<1:02:35,  7.09it/s]

{'epoch': 3, 'iter': 1000, 'avg_loss': 8.513816601032024, 'avg_acc': 50.031218781218776, 'loss': 7.0966410636901855}


EP_train:3:   4%|| 1012/27626 [02:23<1:02:53,  7.05it/s]

{'epoch': 3, 'iter': 1010, 'avg_loss': 8.512697348608816, 'avg_acc': 50.04945598417409, 'loss': 8.435521125793457}


EP_train:3:   4%|| 1022/27626 [02:24<1:03:39,  6.97it/s]

{'epoch': 3, 'iter': 1020, 'avg_loss': 8.509964949470543, 'avg_acc': 50.055093046033306, 'loss': 7.788137912750244}


EP_train:3:   4%|| 1032/27626 [02:25<1:03:10,  7.02it/s]

{'epoch': 3, 'iter': 1030, 'avg_loss': 8.511338148847814, 'avg_acc': 50.063651794374394, 'loss': 8.67132568359375}


EP_train:3:   4%|| 1042/27626 [02:27<1:02:39,  7.07it/s]

{'epoch': 3, 'iter': 1040, 'avg_loss': 8.509502592279176, 'avg_acc': 50.06604226705092, 'loss': 8.610882759094238}


EP_train:3:   4%|| 1052/27626 [02:28<1:02:58,  7.03it/s]

{'epoch': 3, 'iter': 1050, 'avg_loss': 8.507376601421527, 'avg_acc': 50.07730732635585, 'loss': 8.462212562561035}


EP_train:3:   4%|| 1062/27626 [02:30<1:02:28,  7.09it/s]

{'epoch': 3, 'iter': 1060, 'avg_loss': 8.50776514885675, 'avg_acc': 50.11486804901036, 'loss': 9.279905319213867}


EP_train:3:   4%|| 1072/27626 [02:31<1:02:59,  7.03it/s]

{'epoch': 3, 'iter': 1070, 'avg_loss': 8.507565335399297, 'avg_acc': 50.10212418300654, 'loss': 8.392870903015137}


EP_train:3:   4%|| 1082/27626 [02:32<1:02:42,  7.06it/s]

{'epoch': 3, 'iter': 1080, 'avg_loss': 8.504106981240414, 'avg_acc': 50.10117946345976, 'loss': 9.295519828796387}


EP_train:3:   4%|| 1092/27626 [02:34<1:03:06,  7.01it/s]

{'epoch': 3, 'iter': 1090, 'avg_loss': 8.504999695296249, 'avg_acc': 50.12030247479377, 'loss': 8.509031295776367}


EP_train:3:   4%|| 1102/27626 [02:35<1:02:48,  7.04it/s]

{'epoch': 3, 'iter': 1100, 'avg_loss': 8.507284788518033, 'avg_acc': 50.13623978201635, 'loss': 7.873735427856445}


EP_train:3:   4%|| 1112/27626 [02:37<1:03:11,  6.99it/s]

{'epoch': 3, 'iter': 1110, 'avg_loss': 8.50801070888873, 'avg_acc': 50.1518901890189, 'loss': 8.662361145019531}


EP_train:3:   4%|| 1122/27626 [02:38<1:02:43,  7.04it/s]

{'epoch': 3, 'iter': 1120, 'avg_loss': 8.50948378148619, 'avg_acc': 50.150535236396074, 'loss': 8.793768882751465}


EP_train:3:   4%|| 1132/27626 [02:40<1:03:18,  6.98it/s]

{'epoch': 3, 'iter': 1130, 'avg_loss': 8.510766687064335, 'avg_acc': 50.116047745358095, 'loss': 8.264403343200684}


EP_train:3:   4%|| 1142/27626 [02:41<1:02:41,  7.04it/s]

{'epoch': 3, 'iter': 1140, 'avg_loss': 8.506582659237433, 'avg_acc': 50.13694127957932, 'loss': 8.104104042053223}


EP_train:3:   4%|| 1152/27626 [02:42<1:03:24,  6.96it/s]

{'epoch': 3, 'iter': 1150, 'avg_loss': 8.507925378665211, 'avg_acc': 50.105886185925286, 'loss': 9.128520965576172}


EP_train:3:   4%|| 1162/27626 [02:44<1:02:37,  7.04it/s]

{'epoch': 3, 'iter': 1160, 'avg_loss': 8.507367895144414, 'avg_acc': 50.0968992248062, 'loss': 7.915379524230957}


EP_train:3:   4%|| 1172/27626 [02:45<1:02:32,  7.05it/s]

{'epoch': 3, 'iter': 1170, 'avg_loss': 8.506672278110234, 'avg_acc': 50.07472245943638, 'loss': 8.170433044433594}


EP_train:3:   4%|| 1182/27626 [02:47<1:02:59,  7.00it/s]

{'epoch': 3, 'iter': 1180, 'avg_loss': 8.505655694068437, 'avg_acc': 50.06879762912786, 'loss': 8.641936302185059}


EP_train:3:   4%|| 1192/27626 [02:48<1:03:10,  6.97it/s]

{'epoch': 3, 'iter': 1190, 'avg_loss': 8.50351906223121, 'avg_acc': 50.068219983207385, 'loss': 8.037185668945312}


EP_train:3:   4%|| 1202/27626 [02:50<1:02:39,  7.03it/s]

{'epoch': 3, 'iter': 1200, 'avg_loss': 8.503993860986409, 'avg_acc': 50.085865945045796, 'loss': 7.671738624572754}


EP_train:3:   4%|| 1212/27626 [02:51<1:02:04,  7.09it/s]

{'epoch': 3, 'iter': 1210, 'avg_loss': 8.504722746022962, 'avg_acc': 50.10322047894302, 'loss': 8.618587493896484}


EP_train:3:   4%|| 1222/27626 [02:52<1:02:19,  7.06it/s]

{'epoch': 3, 'iter': 1220, 'avg_loss': 8.50304770254874, 'avg_acc': 50.102375102375106, 'loss': 9.05764102935791}


EP_train:3:   4%|| 1232/27626 [02:54<1:02:21,  7.05it/s]

{'epoch': 3, 'iter': 1230, 'avg_loss': 8.504097902125986, 'avg_acc': 50.109159220146225, 'loss': 8.69060230255127}


EP_train:3:   4%|| 1242/27626 [02:55<1:02:14,  7.06it/s]

{'epoch': 3, 'iter': 1240, 'avg_loss': 8.506166275233438, 'avg_acc': 50.10324335213537, 'loss': 8.759564399719238}


EP_train:3:   5%|| 1252/27626 [02:57<1:02:02,  7.08it/s]

{'epoch': 3, 'iter': 1250, 'avg_loss': 8.501309466495407, 'avg_acc': 50.09242605915267, 'loss': 8.106633186340332}


EP_train:3:   5%|| 1262/27626 [02:58<1:02:13,  7.06it/s]

{'epoch': 3, 'iter': 1260, 'avg_loss': 8.502498228525377, 'avg_acc': 50.08673671689136, 'loss': 9.408716201782227}


EP_train:3:   5%|| 1272/27626 [02:59<1:01:58,  7.09it/s]

{'epoch': 3, 'iter': 1270, 'avg_loss': 8.504186719538751, 'avg_acc': 50.086054287962234, 'loss': 7.575106143951416}


EP_train:3:   5%|| 1282/27626 [03:01<1:02:31,  7.02it/s]

{'epoch': 3, 'iter': 1280, 'avg_loss': 8.503681323939613, 'avg_acc': 50.12929352068697, 'loss': 7.72205924987793}


EP_train:3:   5%|| 1292/27626 [03:02<1:02:12,  7.06it/s]

{'epoch': 3, 'iter': 1290, 'avg_loss': 8.503492720638663, 'avg_acc': 50.13071262587142, 'loss': 9.524563789367676}


EP_train:3:   5%|| 1302/27626 [03:04<1:01:35,  7.12it/s]

{'epoch': 3, 'iter': 1300, 'avg_loss': 8.503293164229046, 'avg_acc': 50.182551883166795, 'loss': 9.152963638305664}


EP_train:3:   5%|| 1312/27626 [03:05<1:03:04,  6.95it/s]

{'epoch': 3, 'iter': 1310, 'avg_loss': 8.504674326636971, 'avg_acc': 50.19546147978642, 'loss': 8.970236778259277}


EP_train:3:   5%|| 1322/27626 [03:06<1:01:55,  7.08it/s]

{'epoch': 3, 'iter': 1320, 'avg_loss': 8.505951333461072, 'avg_acc': 50.18925056775171, 'loss': 8.39805793762207}


EP_train:3:   5%|| 1332/27626 [03:08<1:01:51,  7.08it/s]

{'epoch': 3, 'iter': 1330, 'avg_loss': 8.507552844433745, 'avg_acc': 50.183132982719755, 'loss': 10.312355995178223}


EP_train:3:   5%|| 1342/27626 [03:09<1:01:55,  7.07it/s]

{'epoch': 3, 'iter': 1340, 'avg_loss': 8.507844374483863, 'avg_acc': 50.177106636838175, 'loss': 9.013016700744629}


EP_train:3:   5%|| 1352/27626 [03:11<1:02:02,  7.06it/s]

{'epoch': 3, 'iter': 1350, 'avg_loss': 8.50582189934065, 'avg_acc': 50.15729089563287, 'loss': 7.903041839599609}


EP_train:3:   5%|| 1362/27626 [03:12<1:01:57,  7.07it/s]

{'epoch': 3, 'iter': 1360, 'avg_loss': 8.507227817061716, 'avg_acc': 50.20435341660544, 'loss': 8.733124732971191}


EP_train:3:   5%|| 1372/27626 [03:14<1:02:12,  7.03it/s]

{'epoch': 3, 'iter': 1370, 'avg_loss': 8.507120142184757, 'avg_acc': 50.18234865061999, 'loss': 7.894230842590332}


EP_train:3:   5%|| 1382/27626 [03:15<1:02:36,  6.99it/s]

{'epoch': 3, 'iter': 1380, 'avg_loss': 8.50463368528395, 'avg_acc': 50.192342505430844, 'loss': 9.215749740600586}


EP_train:3:   5%|| 1392/27626 [03:16<1:01:54,  7.06it/s]

{'epoch': 3, 'iter': 1390, 'avg_loss': 8.50421794278428, 'avg_acc': 50.16400071890727, 'loss': 8.596095085144043}


EP_train:3:   5%|| 1402/27626 [03:18<1:01:34,  7.10it/s]

{'epoch': 3, 'iter': 1400, 'avg_loss': 8.505176114661621, 'avg_acc': 50.15613847251963, 'loss': 8.710733413696289}


EP_train:3:   5%|| 1412/27626 [03:19<1:02:13,  7.02it/s]

{'epoch': 3, 'iter': 1410, 'avg_loss': 8.506601762129515, 'avg_acc': 50.14838766832034, 'loss': 8.026915550231934}


EP_train:3:   5%|| 1422/27626 [03:21<1:01:47,  7.07it/s]

{'epoch': 3, 'iter': 1420, 'avg_loss': 8.506716743646416, 'avg_acc': 50.17153413089373, 'loss': 8.495532035827637}


EP_train:3:   5%|| 1432/27626 [03:22<1:02:39,  6.97it/s]

{'epoch': 3, 'iter': 1430, 'avg_loss': 8.506391077421496, 'avg_acc': 50.12884346610762, 'loss': 8.687880516052246}


EP_train:3:   5%|| 1442/27626 [03:23<1:02:21,  7.00it/s]

{'epoch': 3, 'iter': 1440, 'avg_loss': 8.503769953328648, 'avg_acc': 50.11493754337266, 'loss': 8.068849563598633}


EP_train:3:   5%|| 1452/27626 [03:25<1:01:59,  7.04it/s]

{'epoch': 3, 'iter': 1450, 'avg_loss': 8.5056911497425, 'avg_acc': 50.129221226740185, 'loss': 9.57005500793457}


EP_train:3:   5%|| 1462/27626 [03:26<1:01:55,  7.04it/s]

{'epoch': 3, 'iter': 1460, 'avg_loss': 8.505396044637797, 'avg_acc': 50.13475359342916, 'loss': 9.330765724182129}


EP_train:3:   5%|| 1472/27626 [03:28<1:02:01,  7.03it/s]

{'epoch': 3, 'iter': 1470, 'avg_loss': 8.508959051219076, 'avg_acc': 50.1274643099932, 'loss': 8.769182205200195}


EP_train:3:   5%|| 1482/27626 [03:29<1:01:31,  7.08it/s]

{'epoch': 3, 'iter': 1480, 'avg_loss': 8.508706878440595, 'avg_acc': 50.116053342336265, 'loss': 8.302935600280762}


EP_train:3:   5%|| 1492/27626 [03:30<1:01:40,  7.06it/s]

{'epoch': 3, 'iter': 1490, 'avg_loss': 8.511177279499854, 'avg_acc': 50.12156270959088, 'loss': 9.211042404174805}


EP_train:3:   5%|| 1502/27626 [03:32<1:01:31,  7.08it/s]

{'epoch': 3, 'iter': 1500, 'avg_loss': 8.512547236613477, 'avg_acc': 50.13949033977349, 'loss': 9.028620719909668}


EP_train:3:   5%|| 1512/27626 [03:33<1:01:32,  7.07it/s]

{'epoch': 3, 'iter': 1510, 'avg_loss': 8.515695193522344, 'avg_acc': 50.126158173395105, 'loss': 8.85854434967041}


EP_train:3:   6%|| 1522/27626 [03:35<1:01:38,  7.06it/s]

{'epoch': 3, 'iter': 1520, 'avg_loss': 8.517024385388316, 'avg_acc': 50.11300131492439, 'loss': 8.492301940917969}


EP_train:3:   6%|| 1532/27626 [03:36<1:01:43,  7.05it/s]

{'epoch': 3, 'iter': 1530, 'avg_loss': 8.515189187967412, 'avg_acc': 50.10409862834748, 'loss': 8.207715034484863}


EP_train:3:   6%|| 1542/27626 [03:38<1:01:37,  7.06it/s]

{'epoch': 3, 'iter': 1540, 'avg_loss': 8.514220603791868, 'avg_acc': 50.093283582089555, 'loss': 8.437600135803223}


EP_train:3:   6%|| 1552/27626 [03:39<1:01:30,  7.07it/s]

{'epoch': 3, 'iter': 1550, 'avg_loss': 8.51326843510129, 'avg_acc': 50.06447453255964, 'loss': 8.841706275939941}


EP_train:3:   6%|| 1562/27626 [03:40<1:02:05,  7.00it/s]

{'epoch': 3, 'iter': 1560, 'avg_loss': 8.5134015428493, 'avg_acc': 50.07407110826393, 'loss': 8.055034637451172}


EP_train:3:   6%|| 1572/27626 [03:42<1:01:05,  7.11it/s]

{'epoch': 3, 'iter': 1570, 'avg_loss': 8.514824340329817, 'avg_acc': 50.09349140674729, 'loss': 8.359574317932129}


EP_train:3:   6%|| 1582/27626 [03:43<1:01:37,  7.04it/s]

{'epoch': 3, 'iter': 1580, 'avg_loss': 8.51568225363551, 'avg_acc': 50.104759645793806, 'loss': 8.541189193725586}


EP_train:3:   6%|| 1592/27626 [03:45<1:01:29,  7.06it/s]

{'epoch': 3, 'iter': 1590, 'avg_loss': 8.514694674819765, 'avg_acc': 50.10213702074168, 'loss': 8.095891952514648}


EP_train:3:   6%|| 1602/27626 [03:46<1:01:26,  7.06it/s]

{'epoch': 3, 'iter': 1600, 'avg_loss': 8.51385586653405, 'avg_acc': 50.0878357276702, 'loss': 7.209710597991943}


EP_train:3:   6%|| 1612/27626 [03:47<1:01:41,  7.03it/s]

{'epoch': 3, 'iter': 1610, 'avg_loss': 8.514550172638849, 'avg_acc': 50.10474860335196, 'loss': 8.08702564239502}


EP_train:3:   6%|| 1622/27626 [03:49<1:01:16,  7.07it/s]

{'epoch': 3, 'iter': 1620, 'avg_loss': 8.514057183839302, 'avg_acc': 50.131091918568785, 'loss': 8.562723159790039}


EP_train:3:   6%|| 1632/27626 [03:50<1:01:33,  7.04it/s]

{'epoch': 3, 'iter': 1630, 'avg_loss': 8.512974383566439, 'avg_acc': 50.14178418148375, 'loss': 8.464789390563965}


EP_train:3:   6%|| 1642/27626 [03:52<1:01:20,  7.06it/s]

{'epoch': 3, 'iter': 1640, 'avg_loss': 8.513982226542044, 'avg_acc': 50.15044180377818, 'loss': 8.246941566467285}


EP_train:3:   6%|| 1652/27626 [03:53<1:01:47,  7.01it/s]

{'epoch': 3, 'iter': 1650, 'avg_loss': 8.512940649694562, 'avg_acc': 50.160887341005456, 'loss': 8.320099830627441}


EP_train:3:   6%|| 1662/27626 [03:55<1:01:47,  7.00it/s]

{'epoch': 3, 'iter': 1660, 'avg_loss': 8.514700145767367, 'avg_acc': 50.152393136664664, 'loss': 8.778287887573242}


EP_train:3:   6%|| 1672/27626 [03:56<1:00:46,  7.12it/s]

{'epoch': 3, 'iter': 1670, 'avg_loss': 8.51556147764287, 'avg_acc': 50.14774087372831, 'loss': 8.884403228759766}


EP_train:3:   6%|| 1682/27626 [03:57<1:01:06,  7.08it/s]

{'epoch': 3, 'iter': 1680, 'avg_loss': 8.513913515421127, 'avg_acc': 50.15243902439024, 'loss': 7.928175449371338}


EP_train:3:   6%|| 1692/27626 [03:59<1:00:53,  7.10it/s]

{'epoch': 3, 'iter': 1690, 'avg_loss': 8.513789900233236, 'avg_acc': 50.14044943820225, 'loss': 7.817606449127197}


EP_train:3:   6%|| 1702/27626 [04:00<1:00:56,  7.09it/s]

{'epoch': 3, 'iter': 1700, 'avg_loss': 8.515153840035287, 'avg_acc': 50.135949441505, 'loss': 9.39834976196289}


EP_train:3:   6%|| 1712/27626 [04:02<1:01:16,  7.05it/s]

{'epoch': 3, 'iter': 1710, 'avg_loss': 8.515148575580447, 'avg_acc': 50.13332846288721, 'loss': 8.450766563415527}


EP_train:3:   6%|| 1722/27626 [04:03<1:01:04,  7.07it/s]

{'epoch': 3, 'iter': 1720, 'avg_loss': 8.516238830728215, 'avg_acc': 50.11984311446833, 'loss': 8.871479034423828}


EP_train:3:   6%|| 1732/27626 [04:04<1:01:58,  6.96it/s]

{'epoch': 3, 'iter': 1730, 'avg_loss': 8.516935905784901, 'avg_acc': 50.08846042749856, 'loss': 8.751720428466797}


EP_train:3:   6%|| 1742/27626 [04:06<1:01:26,  7.02it/s]

{'epoch': 3, 'iter': 1740, 'avg_loss': 8.518796093911703, 'avg_acc': 50.091542217116604, 'loss': 8.749387741088867}


EP_train:3:   6%|| 1752/27626 [04:07<1:00:47,  7.09it/s]

{'epoch': 3, 'iter': 1750, 'avg_loss': 8.516332873067197, 'avg_acc': 50.11778983438035, 'loss': 8.720234870910645}


EP_train:3:   6%|| 1762/27626 [04:09<1:01:08,  7.05it/s]

{'epoch': 3, 'iter': 1760, 'avg_loss': 8.516515054599864, 'avg_acc': 50.10824815445769, 'loss': 8.395020484924316}


EP_train:3:   6%|| 1772/27626 [04:10<1:01:03,  7.06it/s]

{'epoch': 3, 'iter': 1770, 'avg_loss': 8.515537210267388, 'avg_acc': 50.112930547713155, 'loss': 8.020101547241211}


EP_train:3:   6%|| 1782/27626 [04:11<1:01:02,  7.06it/s]

{'epoch': 3, 'iter': 1780, 'avg_loss': 8.515494417568595, 'avg_acc': 50.12106962380685, 'loss': 8.387347221374512}


EP_train:3:   6%|| 1792/27626 [04:13<1:01:01,  7.05it/s]

{'epoch': 3, 'iter': 1790, 'avg_loss': 8.51737333395706, 'avg_acc': 50.10294528196538, 'loss': 8.850213050842285}


EP_train:3:   7%|| 1802/27626 [04:14<1:01:31,  7.00it/s]

{'epoch': 3, 'iter': 1800, 'avg_loss': 8.517194296240078, 'avg_acc': 50.09022765130483, 'loss': 8.285602569580078}


EP_train:3:   7%|| 1812/27626 [04:16<1:01:13,  7.03it/s]

{'epoch': 3, 'iter': 1810, 'avg_loss': 8.517966237007617, 'avg_acc': 50.10008282716731, 'loss': 7.782444953918457}


EP_train:3:   7%|| 1822/27626 [04:17<1:01:03,  7.04it/s]

{'epoch': 3, 'iter': 1820, 'avg_loss': 8.518354571697019, 'avg_acc': 50.101249313563976, 'loss': 8.646515846252441}


EP_train:3:   7%|| 1832/27626 [04:19<1:01:19,  7.01it/s]

{'epoch': 3, 'iter': 1830, 'avg_loss': 8.517602868186913, 'avg_acc': 50.102403058438014, 'loss': 8.55970573425293}


EP_train:3:   7%|| 1842/27626 [04:20<1:00:39,  7.08it/s]

{'epoch': 3, 'iter': 1840, 'avg_loss': 8.5178815400322, 'avg_acc': 50.11372895165671, 'loss': 8.464988708496094}


EP_train:3:   7%|| 1852/27626 [04:21<1:00:30,  7.10it/s]

{'epoch': 3, 'iter': 1850, 'avg_loss': 8.518915450231633, 'avg_acc': 50.09623176661264, 'loss': 8.513370513916016}


EP_train:3:   7%|| 1862/27626 [04:23<1:00:12,  7.13it/s]

{'epoch': 3, 'iter': 1860, 'avg_loss': 8.518328681291912, 'avg_acc': 50.06716818914562, 'loss': 9.216039657592773}


EP_train:3:   7%|| 1872/27626 [04:24<1:00:58,  7.04it/s]

{'epoch': 3, 'iter': 1870, 'avg_loss': 8.515822987324539, 'avg_acc': 50.085181721004815, 'loss': 7.93984842300415}


EP_train:3:   7%|| 1882/27626 [04:26<1:00:33,  7.09it/s]

{'epoch': 3, 'iter': 1880, 'avg_loss': 8.518390153330225, 'avg_acc': 50.06479266347688, 'loss': 9.136606216430664}


EP_train:3:   7%|| 1892/27626 [04:27<1:00:51,  7.05it/s]

{'epoch': 3, 'iter': 1890, 'avg_loss': 8.52092285978296, 'avg_acc': 50.03635642517187, 'loss': 9.165990829467773}


EP_train:3:   7%|| 1902/27626 [04:28<1:01:05,  7.02it/s]

{'epoch': 3, 'iter': 1900, 'avg_loss': 8.520195049715317, 'avg_acc': 50.06904260915308, 'loss': 9.010934829711914}


EP_train:3:   7%|| 1912/27626 [04:30<1:01:16,  6.99it/s]

{'epoch': 3, 'iter': 1910, 'avg_loss': 8.520806329778075, 'avg_acc': 50.05723443223443, 'loss': 8.449423789978027}


EP_train:3:   7%|| 1922/27626 [04:31<1:01:18,  6.99it/s]

{'epoch': 3, 'iter': 1920, 'avg_loss': 8.521125894235734, 'avg_acc': 50.058563248308175, 'loss': 8.631937026977539}


EP_train:3:   7%|| 1932/27626 [04:33<1:00:26,  7.09it/s]

{'epoch': 3, 'iter': 1930, 'avg_loss': 8.520938660183575, 'avg_acc': 50.048549974106685, 'loss': 9.268763542175293}


EP_train:3:   7%|| 1942/27626 [04:34<1:01:07,  7.00it/s]

{'epoch': 3, 'iter': 1940, 'avg_loss': 8.521837270364266, 'avg_acc': 50.05473982483256, 'loss': 8.754690170288086}


EP_train:3:   7%|| 1952/27626 [04:35<1:00:56,  7.02it/s]

{'epoch': 3, 'iter': 1950, 'avg_loss': 8.524245800573603, 'avg_acc': 50.068874935930296, 'loss': 7.672258377075195}


EP_train:3:   7%|| 1962/27626 [04:37<1:00:32,  7.07it/s]

{'epoch': 3, 'iter': 1960, 'avg_loss': 8.52266606404306, 'avg_acc': 50.052587965323816, 'loss': 8.857980728149414}


EP_train:3:   7%|| 1972/27626 [04:38<1:00:52,  7.02it/s]

{'epoch': 3, 'iter': 1970, 'avg_loss': 8.52144659744913, 'avg_acc': 50.076103500761036, 'loss': 8.964439392089844}


EP_train:3:   7%|| 1982/27626 [04:40<1:00:23,  7.08it/s]

{'epoch': 3, 'iter': 1980, 'avg_loss': 8.52124221730509, 'avg_acc': 50.08045179202423, 'loss': 7.925478458404541}


EP_train:3:   7%|| 1992/27626 [04:41<1:01:04,  6.99it/s]

{'epoch': 3, 'iter': 1990, 'avg_loss': 8.521040581148034, 'avg_acc': 50.05336514314415, 'loss': 9.11270523071289}


EP_train:3:   7%|| 2002/27626 [04:43<1:00:36,  7.05it/s]

{'epoch': 3, 'iter': 2000, 'avg_loss': 8.518563072065422, 'avg_acc': 50.060907046476764, 'loss': 7.648637771606445}


EP_train:3:   7%|| 2012/27626 [04:44<1:01:10,  6.98it/s]

{'epoch': 3, 'iter': 2010, 'avg_loss': 8.516206644590312, 'avg_acc': 50.077697662854305, 'loss': 7.3809123039245605}


EP_train:3:   7%|| 2022/27626 [04:45<1:00:08,  7.10it/s]

{'epoch': 3, 'iter': 2020, 'avg_loss': 8.515419428682398, 'avg_acc': 50.07112815437902, 'loss': 8.212096214294434}


EP_train:3:   7%|| 2032/27626 [04:47<1:00:26,  7.06it/s]

{'epoch': 3, 'iter': 2030, 'avg_loss': 8.516218816512092, 'avg_acc': 50.070777941900545, 'loss': 9.01107120513916}


EP_train:3:   7%|| 2042/27626 [04:48<1:00:06,  7.09it/s]

{'epoch': 3, 'iter': 2040, 'avg_loss': 8.516344452184653, 'avg_acc': 50.041340029397354, 'loss': 8.415884971618652}


EP_train:3:   7%|| 2052/27626 [04:50<1:00:00,  7.10it/s]

{'epoch': 3, 'iter': 2050, 'avg_loss': 8.51811499830457, 'avg_acc': 50.04723305704535, 'loss': 9.079744338989258}


EP_train:3:   7%|| 2062/27626 [04:51<1:01:00,  6.98it/s]

{'epoch': 3, 'iter': 2060, 'avg_loss': 8.518222102489592, 'avg_acc': 50.06519893255701, 'loss': 8.214410781860352}


EP_train:3:   8%|| 2072/27626 [04:52<1:00:35,  7.03it/s]

{'epoch': 3, 'iter': 2070, 'avg_loss': 8.5186183112995, 'avg_acc': 50.06337518107195, 'loss': 8.583780288696289}


EP_train:3:   8%|| 2082/27626 [04:54<1:00:41,  7.01it/s]

{'epoch': 3, 'iter': 2080, 'avg_loss': 8.51849998870989, 'avg_acc': 50.076585776069194, 'loss': 8.239869117736816}


EP_train:3:   8%|| 2092/27626 [04:55<1:00:26,  7.04it/s]

{'epoch': 3, 'iter': 2090, 'avg_loss': 8.518062826664694, 'avg_acc': 50.074725011956, 'loss': 8.27127742767334}


EP_train:3:   8%|| 2102/27626 [04:57<1:00:41,  7.01it/s]

{'epoch': 3, 'iter': 2100, 'avg_loss': 8.52081291588189, 'avg_acc': 50.08478105663969, 'loss': 8.832670211791992}


EP_train:3:   8%|| 2112/27626 [04:58<59:56,  7.09it/s]  

{'epoch': 3, 'iter': 2110, 'avg_loss': 8.521556551340122, 'avg_acc': 50.06513500710563, 'loss': 8.836289405822754}


EP_train:3:   8%|| 2122/27626 [04:59<1:00:08,  7.07it/s]

{'epoch': 3, 'iter': 2120, 'avg_loss': 8.522330780290085, 'avg_acc': 50.06777463460632, 'loss': 9.063124656677246}


EP_train:3:   8%|| 2132/27626 [05:01<59:51,  7.10it/s]

{'epoch': 3, 'iter': 2130, 'avg_loss': 8.522548433873979, 'avg_acc': 50.07478883153449, 'loss': 9.248841285705566}


EP_train:3:   8%|| 2142/27626 [05:02<1:00:29,  7.02it/s]

{'epoch': 3, 'iter': 2140, 'avg_loss': 8.522807633080587, 'avg_acc': 50.07443951424568, 'loss': 9.182510375976562}


EP_train:3:   8%|| 2152/27626 [05:04<1:00:16,  7.04it/s]

{'epoch': 3, 'iter': 2150, 'avg_loss': 8.523394448099886, 'avg_acc': 50.059565318456535, 'loss': 8.620626449584961}


EP_train:3:   8%|| 2162/27626 [05:05<1:00:06,  7.06it/s]

{'epoch': 3, 'iter': 2160, 'avg_loss': 8.521686421340949, 'avg_acc': 50.075196668209166, 'loss': 7.853969573974609}


EP_train:3:   8%|| 2172/27626 [05:07<59:50,  7.09it/s]

{'epoch': 3, 'iter': 2170, 'avg_loss': 8.521553450935395, 'avg_acc': 50.08492630124367, 'loss': 8.885024070739746}


EP_train:3:   8%|| 2182/27626 [05:08<59:59,  7.07it/s]  

{'epoch': 3, 'iter': 2180, 'avg_loss': 8.521235791984253, 'avg_acc': 50.09170105456213, 'loss': 8.233570098876953}


EP_train:3:   8%|| 2192/27626 [05:09<1:00:25,  7.02it/s]

{'epoch': 3, 'iter': 2190, 'avg_loss': 8.521493404001909, 'avg_acc': 50.069888178913736, 'loss': 8.355310440063477}


EP_train:3:   8%|| 2202/27626 [05:11<59:56,  7.07it/s]  

{'epoch': 3, 'iter': 2200, 'avg_loss': 8.520446693285223, 'avg_acc': 50.07099045888233, 'loss': 8.881561279296875}


EP_train:3:   8%|| 2212/27626 [05:12<1:00:04,  7.05it/s]

{'epoch': 3, 'iter': 2210, 'avg_loss': 8.519313463524206, 'avg_acc': 50.0650158299412, 'loss': 7.794270992279053}


EP_train:3:   8%|| 2222/27626 [05:14<59:41,  7.09it/s]  

{'epoch': 3, 'iter': 2220, 'avg_loss': 8.521590450764347, 'avg_acc': 50.060502026114364, 'loss': 9.18951416015625}


EP_train:3:   8%|| 2232/27626 [05:15<59:28,  7.12it/s]

{'epoch': 3, 'iter': 2230, 'avg_loss': 8.520999787902149, 'avg_acc': 50.04622366651725, 'loss': 8.05801773071289}


EP_train:3:   8%|| 2242/27626 [05:16<59:52,  7.07it/s]

{'epoch': 3, 'iter': 2240, 'avg_loss': 8.520954705306005, 'avg_acc': 50.0334672021419, 'loss': 8.223398208618164}


EP_train:3:   8%|| 2252/27626 [05:18<1:00:22,  7.00it/s]

{'epoch': 3, 'iter': 2250, 'avg_loss': 8.520368359132323, 'avg_acc': 50.03609506885829, 'loss': 9.300213813781738}


EP_train:3:   8%|| 2262/27626 [05:19<59:24,  7.12it/s]

{'epoch': 3, 'iter': 2260, 'avg_loss': 8.522003988099804, 'avg_acc': 50.053903140203445, 'loss': 8.988077163696289}


EP_train:3:   8%|| 2272/27626 [05:21<59:40,  7.08it/s]

{'epoch': 3, 'iter': 2270, 'avg_loss': 8.5219509902897, 'avg_acc': 50.05366578599736, 'loss': 9.285138130187988}


EP_train:3:   8%|| 2282/27626 [05:22<1:00:00,  7.04it/s]

{'epoch': 3, 'iter': 2280, 'avg_loss': 8.523923404171821, 'avg_acc': 50.046580447172296, 'loss': 9.156988143920898}


EP_train:3:   8%|| 2292/27626 [05:23<59:52,  7.05it/s]

{'epoch': 3, 'iter': 2290, 'avg_loss': 8.525053056136638, 'avg_acc': 50.04910519423832, 'loss': 7.955507278442383}


EP_train:3:   8%|| 2302/27626 [05:25<59:46,  7.06it/s]  

{'epoch': 3, 'iter': 2300, 'avg_loss': 8.524725045084383, 'avg_acc': 50.05432420686658, 'loss': 8.570261001586914}


EP_train:3:   8%|| 2312/27626 [05:26<1:00:13,  7.01it/s]

{'epoch': 3, 'iter': 2310, 'avg_loss': 8.523611509867015, 'avg_acc': 50.03515794028559, 'loss': 9.601611137390137}


EP_train:3:   8%|| 2322/27626 [05:28<59:54,  7.04it/s]

{'epoch': 3, 'iter': 2320, 'avg_loss': 8.524920578200156, 'avg_acc': 50.04173847479535, 'loss': 8.72627067565918}


EP_train:3:   8%|| 2332/27626 [05:29<59:48,  7.05it/s]

{'epoch': 3, 'iter': 2330, 'avg_loss': 8.524706885109946, 'avg_acc': 50.04424066924067, 'loss': 8.696639060974121}


EP_train:3:   8%|| 2342/27626 [05:30<59:43,  7.06it/s]  

{'epoch': 3, 'iter': 2340, 'avg_loss': 8.52505203476832, 'avg_acc': 50.045386586928664, 'loss': 8.369473457336426}


EP_train:3:   9%|| 2352/27626 [05:32<59:34,  7.07it/s]

{'epoch': 3, 'iter': 2350, 'avg_loss': 8.525701289986205, 'avg_acc': 50.05848575074437, 'loss': 8.689556121826172}


EP_train:3:   9%|| 2362/27626 [05:33<1:00:18,  6.98it/s]

{'epoch': 3, 'iter': 2360, 'avg_loss': 8.526216761230765, 'avg_acc': 50.051620076238876, 'loss': 8.084332466125488}


EP_train:3:   9%|| 2372/27626 [05:35<1:00:23,  6.97it/s]

{'epoch': 3, 'iter': 2370, 'avg_loss': 8.524916876724932, 'avg_acc': 50.050084352593835, 'loss': 8.720255851745605}


EP_train:3:   9%|| 2382/27626 [05:36<59:28,  7.07it/s]

{'epoch': 3, 'iter': 2380, 'avg_loss': 8.525358337256549, 'avg_acc': 50.03806173876523, 'loss': 8.23487377166748}


EP_train:3:   9%|| 2392/27626 [05:38<59:19,  7.09it/s]

{'epoch': 3, 'iter': 2390, 'avg_loss': 8.527014823899215, 'avg_acc': 50.0418235048097, 'loss': 8.26999568939209}


EP_train:3:   9%|| 2402/27626 [05:39<59:30,  7.07it/s]

{'epoch': 3, 'iter': 2400, 'avg_loss': 8.527631160667767, 'avg_acc': 50.02993544356518, 'loss': 8.935237884521484}


EP_train:3:   9%|| 2412/27626 [05:40<1:00:06,  6.99it/s]

{'epoch': 3, 'iter': 2410, 'avg_loss': 8.527806652279525, 'avg_acc': 50.044068851099134, 'loss': 8.645679473876953}


EP_train:3:   9%|| 2422/27626 [05:42<59:28,  7.06it/s]

{'epoch': 3, 'iter': 2420, 'avg_loss': 8.528841206181497, 'avg_acc': 50.05163155720776, 'loss': 8.776376724243164}


EP_train:3:   9%|| 2432/27626 [05:43<59:14,  7.09it/s]

{'epoch': 3, 'iter': 2430, 'avg_loss': 8.52840451888222, 'avg_acc': 50.06298848210613, 'loss': 7.516959190368652}


EP_train:3:   9%|| 2442/27626 [05:45<59:15,  7.08it/s]

{'epoch': 3, 'iter': 2440, 'avg_loss': 8.527720266167915, 'avg_acc': 50.05760958623515, 'loss': 8.36324691772461}


EP_train:3:   9%|| 2452/27626 [05:46<1:00:00,  6.99it/s]

{'epoch': 3, 'iter': 2450, 'avg_loss': 8.526584833605053, 'avg_acc': 50.053549571603426, 'loss': 8.2821683883667}


EP_train:3:   9%|| 2462/27626 [05:47<59:39,  7.03it/s]

{'epoch': 3, 'iter': 2460, 'avg_loss': 8.526046353014248, 'avg_acc': 50.05714140593255, 'loss': 7.901237964630127}


EP_train:3:   9%|| 2472/27626 [05:49<59:20,  7.06it/s]

{'epoch': 3, 'iter': 2470, 'avg_loss': 8.525923180126652, 'avg_acc': 50.056910157830835, 'loss': 8.253585815429688}


EP_train:3:   9%|| 2482/27626 [05:50<59:20,  7.06it/s]

{'epoch': 3, 'iter': 2480, 'avg_loss': 8.5268162397164, 'avg_acc': 50.066757355904876, 'loss': 8.910924911499023}


EP_train:3:   9%|| 2492/27626 [05:52<59:28,  7.04it/s]

{'epoch': 3, 'iter': 2490, 'avg_loss': 8.526217720203254, 'avg_acc': 50.04265355279004, 'loss': 8.188294410705566}


EP_train:3:   9%|| 2502/27626 [05:53<59:19,  7.06it/s]

{'epoch': 3, 'iter': 2500, 'avg_loss': 8.526228161918215, 'avg_acc': 50.046231507397046, 'loss': 9.642996788024902}


EP_train:3:   9%|| 2512/27626 [05:55<59:24,  7.05it/s]

{'epoch': 3, 'iter': 2510, 'avg_loss': 8.526006182841883, 'avg_acc': 50.056003584229394, 'loss': 8.134462356567383}


EP_train:3:   9%|| 2522/27626 [05:56<59:08,  7.07it/s]

{'epoch': 3, 'iter': 2520, 'avg_loss': 8.5267784488622, 'avg_acc': 50.043385561285206, 'loss': 8.030180931091309}


EP_train:3:   9%|| 2532/27626 [05:57<59:00,  7.09it/s]

{'epoch': 3, 'iter': 2530, 'avg_loss': 8.52622129737224, 'avg_acc': 50.06296918214145, 'loss': 8.534320831298828}


EP_train:3:   9%|| 2542/27626 [05:59<59:18,  7.05it/s]

{'epoch': 3, 'iter': 2540, 'avg_loss': 8.525369814933168, 'avg_acc': 50.068870523415974, 'loss': 8.19643783569336}


EP_train:3:   9%|| 2552/27626 [06:00<59:35,  7.01it/s]

{'epoch': 3, 'iter': 2550, 'avg_loss': 8.525211301704147, 'avg_acc': 50.07717561740493, 'loss': 8.420279502868652}


EP_train:3:   9%|| 2562/27626 [06:02<58:39,  7.12it/s]

{'epoch': 3, 'iter': 2560, 'avg_loss': 8.525507731439545, 'avg_acc': 50.07687426786411, 'loss': 8.551531791687012}


EP_train:3:   9%|| 2572/27626 [06:03<59:30,  7.02it/s]

{'epoch': 3, 'iter': 2570, 'avg_loss': 8.52463496700422, 'avg_acc': 50.08629910540646, 'loss': 8.43221378326416}


EP_train:3:   9%|| 2582/27626 [06:04<59:05,  7.06it/s]

{'epoch': 3, 'iter': 2580, 'avg_loss': 8.52486383873379, 'avg_acc': 50.09080782642387, 'loss': 8.694738388061523}


EP_train:3:   9%|| 2592/27626 [06:06<59:02,  7.07it/s]

{'epoch': 3, 'iter': 2590, 'avg_loss': 8.525903324324483, 'avg_acc': 50.085632960247004, 'loss': 9.682979583740234}


EP_train:3:   9%|| 2602/27626 [06:07<58:46,  7.10it/s]

{'epoch': 3, 'iter': 2600, 'avg_loss': 8.527438261288031, 'avg_acc': 50.10332564398309, 'loss': 9.18548583984375}


EP_train:3:   9%|| 2612/27626 [06:09<58:47,  7.09it/s]

{'epoch': 3, 'iter': 2610, 'avg_loss': 8.527157301740234, 'avg_acc': 50.10412677135198, 'loss': 7.986377239227295}


EP_train:3:   9%|| 2622/27626 [06:10<59:21,  7.02it/s]

{'epoch': 3, 'iter': 2620, 'avg_loss': 8.52569536251314, 'avg_acc': 50.10730637161389, 'loss': 8.207695960998535}


EP_train:3:  10%|| 2632/27626 [06:11<58:53,  7.07it/s]

{'epoch': 3, 'iter': 2630, 'avg_loss': 8.524811750852033, 'avg_acc': 50.09145762067655, 'loss': 9.083090782165527}


EP_train:3:  10%|| 2642/27626 [06:13<58:52,  7.07it/s]

{'epoch': 3, 'iter': 2640, 'avg_loss': 8.526295784340352, 'avg_acc': 50.089928057553955, 'loss': 9.38556957244873}


EP_train:3:  10%|| 2652/27626 [06:14<59:11,  7.03it/s]

{'epoch': 3, 'iter': 2650, 'avg_loss': 8.528309164115592, 'avg_acc': 50.10019803847605, 'loss': 9.079082489013672}


EP_train:3:  10%|| 2662/27626 [06:16<59:09,  7.03it/s]

{'epoch': 3, 'iter': 2660, 'avg_loss': 8.528688980197154, 'avg_acc': 50.102170236753096, 'loss': 8.571026802062988}


EP_train:3:  10%|| 2672/27626 [06:17<58:59,  7.05it/s]

{'epoch': 3, 'iter': 2670, 'avg_loss': 8.528478552098132, 'avg_acc': 50.10178771995507, 'loss': 8.700517654418945}


EP_train:3:  10%|| 2682/27626 [06:19<59:08,  7.03it/s]

{'epoch': 3, 'iter': 2680, 'avg_loss': 8.527953642333278, 'avg_acc': 50.100242446848185, 'loss': 8.842775344848633}


EP_train:3:  10%|| 2692/27626 [06:20<59:30,  6.98it/s]

{'epoch': 3, 'iter': 2690, 'avg_loss': 8.527549615361707, 'avg_acc': 50.087095875139354, 'loss': 9.19882583618164}


EP_train:3:  10%|| 2702/27626 [06:21<58:42,  7.08it/s]

{'epoch': 3, 'iter': 2700, 'avg_loss': 8.528048526265364, 'avg_acc': 50.072889670492415, 'loss': 8.148635864257812}


EP_train:3:  10%|| 2712/27626 [06:23<58:22,  7.11it/s]

{'epoch': 3, 'iter': 2710, 'avg_loss': 8.528215060767016, 'avg_acc': 50.07146809295463, 'loss': 8.724449157714844}


EP_train:3:  10%|| 2722/27626 [06:24<59:21,  6.99it/s]

{'epoch': 3, 'iter': 2720, 'avg_loss': 8.529244586940374, 'avg_acc': 50.062017640573316, 'loss': 8.576120376586914}


EP_train:3:  10%|| 2732/27626 [06:26<59:08,  7.02it/s]

{'epoch': 3, 'iter': 2730, 'avg_loss': 8.529542339105651, 'avg_acc': 50.04920358842915, 'loss': 8.453330039978027}


EP_train:3:  10%|| 2742/27626 [06:27<58:57,  7.03it/s]

{'epoch': 3, 'iter': 2740, 'avg_loss': 8.531023751641225, 'avg_acc': 50.06726559649762, 'loss': 9.223908424377441}


EP_train:3:  10%|| 2752/27626 [06:28<58:41,  7.06it/s]

{'epoch': 3, 'iter': 2750, 'avg_loss': 8.532010961731578, 'avg_acc': 50.08065249000363, 'loss': 8.262950897216797}


EP_train:3:  10%|| 2762/27626 [06:30<58:34,  7.08it/s]

{'epoch': 3, 'iter': 2760, 'avg_loss': 8.532324572310678, 'avg_acc': 50.08941506700471, 'loss': 8.60571575164795}


EP_train:3:  10%|| 2772/27626 [06:31<58:56,  7.03it/s]

{'epoch': 3, 'iter': 2770, 'avg_loss': 8.532771034302671, 'avg_acc': 50.09022013713461, 'loss': 8.60901165008545}


EP_train:3:  10%|| 2782/27626 [06:33<59:00,  7.02it/s]

{'epoch': 3, 'iter': 2780, 'avg_loss': 8.531956423190548, 'avg_acc': 50.09101941747572, 'loss': 8.506682395935059}


EP_train:3:  10%|| 2792/27626 [06:34<58:23,  7.09it/s]

{'epoch': 3, 'iter': 2790, 'avg_loss': 8.53310276487317, 'avg_acc': 50.099650662844866, 'loss': 8.342097282409668}


EP_train:3:  10%|| 2802/27626 [06:35<58:35,  7.06it/s]

{'epoch': 3, 'iter': 2800, 'avg_loss': 8.533438280453558, 'avg_acc': 50.08367547304534, 'loss': 8.279325485229492}


EP_train:3:  10%|| 2812/27626 [06:37<58:48,  7.03it/s]

{'epoch': 3, 'iter': 2810, 'avg_loss': 8.533765723636591, 'avg_acc': 50.07003735325507, 'loss': 7.867574214935303}


EP_train:3:  10%|| 2822/27626 [06:38<59:24,  6.96it/s]

{'epoch': 3, 'iter': 2820, 'avg_loss': 8.532588706038515, 'avg_acc': 50.07422013470401, 'loss': 7.368027687072754}


EP_train:3:  10%|| 2832/27626 [06:40<58:20,  7.08it/s]

{'epoch': 3, 'iter': 2830, 'avg_loss': 8.532481357765803, 'avg_acc': 50.08830801836807, 'loss': 8.639871597290039}


EP_train:3:  10%|| 2842/27626 [06:41<58:45,  7.03it/s]

{'epoch': 3, 'iter': 2840, 'avg_loss': 8.531149762820627, 'avg_acc': 50.074797606476594, 'loss': 7.682071685791016}


EP_train:3:  10%|| 2852/27626 [06:43<58:42,  7.03it/s]

{'epoch': 3, 'iter': 2850, 'avg_loss': 8.531084350852204, 'avg_acc': 50.05699754472115, 'loss': 8.417770385742188}


EP_train:3:  10%|| 2862/27626 [06:44<58:08,  7.10it/s]

{'epoch': 3, 'iter': 2860, 'avg_loss': 8.530115989737059, 'avg_acc': 50.06990562740301, 'loss': 8.49044132232666}


EP_train:3:  10%|| 2872/27626 [06:45<58:37,  7.04it/s]

{'epoch': 3, 'iter': 2870, 'avg_loss': 8.530306113384942, 'avg_acc': 50.058777429467085, 'loss': 9.348605155944824}


EP_train:3:  10%|| 2882/27626 [06:47<57:59,  7.11it/s]

{'epoch': 3, 'iter': 2880, 'avg_loss': 8.531058066340629, 'avg_acc': 50.061827490454704, 'loss': 9.000277519226074}


EP_train:3:  10%|| 2892/27626 [06:48<57:58,  7.11it/s]

{'epoch': 3, 'iter': 2890, 'avg_loss': 8.532061687516984, 'avg_acc': 50.0713420961605, 'loss': 8.619348526000977}


EP_train:3:  11%|| 2902/27626 [06:50<58:19,  7.06it/s]

{'epoch': 3, 'iter': 2900, 'avg_loss': 8.531235261441756, 'avg_acc': 50.067864529472594, 'loss': 9.258151054382324}


EP_train:3:  11%|| 2912/27626 [06:51<58:32,  7.04it/s]

{'epoch': 3, 'iter': 2910, 'avg_loss': 8.531647513798118, 'avg_acc': 50.080513569220194, 'loss': 8.93924617767334}


EP_train:3:  11%|| 2922/27626 [06:52<58:23,  7.05it/s]

{'epoch': 3, 'iter': 2920, 'avg_loss': 8.531165493550834, 'avg_acc': 50.0866569667922, 'loss': 8.295397758483887}


EP_train:3:  11%|| 2932/27626 [06:54<58:07,  7.08it/s]

{'epoch': 3, 'iter': 2930, 'avg_loss': 8.530324775828554, 'avg_acc': 50.09808938928694, 'loss': 8.583702087402344}


EP_train:3:  11%|| 2942/27626 [06:55<58:26,  7.04it/s]

{'epoch': 3, 'iter': 2940, 'avg_loss': 8.53057386841915, 'avg_acc': 50.109444066643995, 'loss': 9.0904541015625}


EP_train:3:  11%|| 2952/27626 [06:57<58:25,  7.04it/s]

{'epoch': 3, 'iter': 2950, 'avg_loss': 8.530159466632622, 'avg_acc': 50.097424601829886, 'loss': 8.661354064941406}


EP_train:3:  11%|| 2962/27626 [06:58<58:01,  7.08it/s]

{'epoch': 3, 'iter': 2960, 'avg_loss': 8.529668039826275, 'avg_acc': 50.11398176291794, 'loss': 8.972050666809082}


EP_train:3:  11%|| 2972/27626 [06:59<58:03,  7.08it/s]

{'epoch': 3, 'iter': 2970, 'avg_loss': 8.530353828136144, 'avg_acc': 50.10307977112084, 'loss': 9.68213176727295}


EP_train:3:  11%|| 2982/27626 [07:01<57:50,  7.10it/s]

{'epoch': 3, 'iter': 2980, 'avg_loss': 8.531329159926985, 'avg_acc': 50.09329922844683, 'loss': 8.381105422973633}


EP_train:3:  11%|| 2992/27626 [07:02<58:05,  7.07it/s]

{'epoch': 3, 'iter': 2990, 'avg_loss': 8.532069798179073, 'avg_acc': 50.09507689735875, 'loss': 8.306286811828613}


EP_train:3:  11%|| 3002/27626 [07:04<57:57,  7.08it/s]

{'epoch': 3, 'iter': 3000, 'avg_loss': 8.533245273686695, 'avg_acc': 50.10204931689437, 'loss': 8.505457878112793}


EP_train:3:  11%|| 3012/27626 [07:05<57:47,  7.10it/s]

{'epoch': 3, 'iter': 3010, 'avg_loss': 8.533914013821118, 'avg_acc': 50.104823978744605, 'loss': 9.806855201721191}


EP_train:3:  11%|| 3022/27626 [07:07<57:53,  7.08it/s]

{'epoch': 3, 'iter': 3020, 'avg_loss': 8.534189912113517, 'avg_acc': 50.10240814299901, 'loss': 7.672214508056641}


EP_train:3:  11%|| 3032/27626 [07:08<57:57,  7.07it/s]

{'epoch': 3, 'iter': 3030, 'avg_loss': 8.534624674579012, 'avg_acc': 50.09176014516661, 'loss': 8.353047370910645}


EP_train:3:  11%|| 3042/27626 [07:09<57:55,  7.07it/s]

{'epoch': 3, 'iter': 3040, 'avg_loss': 8.533861548365122, 'avg_acc': 50.0976241367971, 'loss': 8.869057655334473}


EP_train:3:  11%|| 3052/27626 [07:11<57:27,  7.13it/s]

{'epoch': 3, 'iter': 3050, 'avg_loss': 8.533329605313842, 'avg_acc': 50.103449688626675, 'loss': 7.905591011047363}


EP_train:3:  11%|| 3062/27626 [07:12<57:35,  7.11it/s]

{'epoch': 3, 'iter': 3060, 'avg_loss': 8.533582279706305, 'avg_acc': 50.096986278993796, 'loss': 9.135271072387695}


EP_train:3:  11%|| 3072/27626 [07:14<58:04,  7.05it/s]

{'epoch': 3, 'iter': 3070, 'avg_loss': 8.534146179868747, 'avg_acc': 50.099723217193095, 'loss': 8.441973686218262}


EP_train:3:  11%|| 3082/27626 [07:15<57:41,  7.09it/s]

{'epoch': 3, 'iter': 3080, 'avg_loss': 8.535021970272838, 'avg_acc': 50.10345666991236, 'loss': 8.580302238464355}


EP_train:3:  11%|| 3092/27626 [07:16<58:08,  7.03it/s]

{'epoch': 3, 'iter': 3090, 'avg_loss': 8.535558707503506, 'avg_acc': 50.12030896150114, 'loss': 8.383216857910156}


EP_train:3:  11%|| 3102/27626 [07:18<57:41,  7.09it/s]

{'epoch': 3, 'iter': 3100, 'avg_loss': 8.536275063895749, 'avg_acc': 50.13201386649469, 'loss': 8.554383277893066}


EP_train:3:  11%|| 3112/27626 [07:19<58:19,  7.00it/s]

{'epoch': 3, 'iter': 3110, 'avg_loss': 8.537693386789075, 'avg_acc': 50.12958052073289, 'loss': 8.41268539428711}


EP_train:3:  11%|| 3122/27626 [07:21<58:04,  7.03it/s]

{'epoch': 3, 'iter': 3120, 'avg_loss': 8.538789614237718, 'avg_acc': 50.13417173982698, 'loss': 9.077301025390625}


EP_train:3:  11%|| 3132/27626 [07:22<58:14,  7.01it/s]

{'epoch': 3, 'iter': 3130, 'avg_loss': 8.53876493716537, 'avg_acc': 50.13374321303098, 'loss': 8.143159866333008}


EP_train:3:  11%|| 3142/27626 [07:23<57:35,  7.09it/s]

{'epoch': 3, 'iter': 3140, 'avg_loss': 8.538530427762105, 'avg_acc': 50.12834288443171, 'loss': 8.359540939331055}


EP_train:3:  11%|| 3152/27626 [07:25<57:50,  7.05it/s]

{'epoch': 3, 'iter': 3150, 'avg_loss': 8.539194680894605, 'avg_acc': 50.123968581402735, 'loss': 7.95498514175415}


EP_train:3:  11%|| 3162/27626 [07:26<57:46,  7.06it/s]

{'epoch': 3, 'iter': 3160, 'avg_loss': 8.539425634198912, 'avg_acc': 50.12159917747548, 'loss': 8.299942016601562}


EP_train:3:  11%|| 3172/27626 [07:28<58:21,  6.98it/s]

{'epoch': 3, 'iter': 3170, 'avg_loss': 8.538905882527121, 'avg_acc': 50.12909965310628, 'loss': 8.55493450164795}


EP_train:3:  12%|| 3182/27626 [07:29<58:17,  6.99it/s]

{'epoch': 3, 'iter': 3180, 'avg_loss': 8.539483684370706, 'avg_acc': 50.125746620559575, 'loss': 9.71130657196045}


EP_train:3:  12%|| 3192/27626 [07:31<57:45,  7.05it/s]

{'epoch': 3, 'iter': 3190, 'avg_loss': 8.540253624427434, 'avg_acc': 50.13710435600125, 'loss': 8.62586784362793}


EP_train:3:  12%|| 3202/27626 [07:32<57:53,  7.03it/s]

{'epoch': 3, 'iter': 3200, 'avg_loss': 8.539591106836664, 'avg_acc': 50.137652296157455, 'loss': 8.215582847595215}


EP_train:3:  12%|| 3212/27626 [07:33<58:16,  6.98it/s]

{'epoch': 3, 'iter': 3210, 'avg_loss': 8.539880235037007, 'avg_acc': 50.13138430395515, 'loss': 8.853812217712402}


EP_train:3:  12%|| 3222/27626 [07:35<57:39,  7.06it/s]

{'epoch': 3, 'iter': 3220, 'avg_loss': 8.540979919060533, 'avg_acc': 50.123214840111764, 'loss': 8.449294090270996}


EP_train:3:  12%|| 3232/27626 [07:36<58:09,  6.99it/s]

{'epoch': 3, 'iter': 3230, 'avg_loss': 8.54146258611865, 'avg_acc': 50.13250541627979, 'loss': 8.548282623291016}


EP_train:3:  12%|| 3242/27626 [07:38<57:34,  7.06it/s]

{'epoch': 3, 'iter': 3240, 'avg_loss': 8.541883970476603, 'avg_acc': 50.12920394939834, 'loss': 8.3959379196167}


EP_train:3:  12%|| 3252/27626 [07:39<57:56,  7.01it/s]

{'epoch': 3, 'iter': 3250, 'avg_loss': 8.542594646388148, 'avg_acc': 50.11631036604122, 'loss': 8.11423110961914}


EP_train:3:  12%|| 3262/27626 [07:40<58:03,  6.99it/s]

{'epoch': 3, 'iter': 3260, 'avg_loss': 8.541564746928048, 'avg_acc': 50.11691199018706, 'loss': 8.62178897857666}


EP_train:3:  12%|| 3272/27626 [07:42<58:06,  6.99it/s]

{'epoch': 3, 'iter': 3270, 'avg_loss': 8.541566083278907, 'avg_acc': 50.13852797309691, 'loss': 8.480938911437988}


EP_train:3:  12%|| 3282/27626 [07:43<57:30,  7.05it/s]

{'epoch': 3, 'iter': 3280, 'avg_loss': 8.540991545522841, 'avg_acc': 50.13620085339835, 'loss': 9.119805335998535}


EP_train:3:  12%|| 3292/27626 [07:45<57:45,  7.02it/s]

{'epoch': 3, 'iter': 3290, 'avg_loss': 8.541195692823347, 'avg_acc': 50.14243391066545, 'loss': 8.74380111694336}


EP_train:3:  12%|| 3302/27626 [07:46<57:16,  7.08it/s]

{'epoch': 3, 'iter': 3300, 'avg_loss': 8.539980353459702, 'avg_acc': 50.13916237503787, 'loss': 8.216639518737793}


EP_train:3:  12%|| 3312/27626 [07:48<57:12,  7.08it/s]

{'epoch': 3, 'iter': 3310, 'avg_loss': 8.539176042257786, 'avg_acc': 50.14157354273633, 'loss': 8.12637996673584}


EP_train:3:  12%|| 3322/27626 [07:49<56:59,  7.11it/s]

{'epoch': 3, 'iter': 3320, 'avg_loss': 8.539387847529374, 'avg_acc': 50.13079644685335, 'loss': 8.65693187713623}


EP_train:3:  12%|| 3332/27626 [07:50<57:21,  7.06it/s]

{'epoch': 3, 'iter': 3330, 'avg_loss': 8.53869333685214, 'avg_acc': 50.13884719303513, 'loss': 8.683675765991211}


EP_train:3:  12%|| 3342/27626 [07:52<57:54,  6.99it/s]

{'epoch': 3, 'iter': 3340, 'avg_loss': 8.538764261264568, 'avg_acc': 50.13094881771924, 'loss': 8.434521675109863}


EP_train:3:  12%|| 3352/27626 [07:53<57:41,  7.01it/s]

{'epoch': 3, 'iter': 3350, 'avg_loss': 8.539074132635287, 'avg_acc': 50.12123246792003, 'loss': 8.358599662780762}


EP_train:3:  12%|| 3362/27626 [07:55<57:56,  6.98it/s]

{'epoch': 3, 'iter': 3360, 'avg_loss': 8.53878137080026, 'avg_acc': 50.11064415352574, 'loss': 7.173582077026367}


EP_train:3:  12%|| 3372/27626 [07:56<1:01:27,  6.58it/s]

{'epoch': 3, 'iter': 3370, 'avg_loss': 8.538257715999816, 'avg_acc': 50.09455651142094, 'loss': 8.144203186035156}


EP_train:3:  12%|| 3382/27626 [07:57<57:35,  7.02it/s]

{'epoch': 3, 'iter': 3380, 'avg_loss': 8.538105223243035, 'avg_acc': 50.1016711032239, 'loss': 8.62194538116455}


EP_train:3:  12%|| 3392/27626 [07:59<57:20,  7.04it/s]

{'epoch': 3, 'iter': 3390, 'avg_loss': 8.538706102683388, 'avg_acc': 50.113351518726034, 'loss': 8.348823547363281}


EP_train:3:  12%|| 3402/27626 [08:00<57:08,  7.07it/s]

{'epoch': 3, 'iter': 3400, 'avg_loss': 8.540432558028847, 'avg_acc': 50.11393707733019, 'loss': 8.878926277160645}


EP_train:3:  12%|| 3412/27626 [08:02<57:01,  7.08it/s]

{'epoch': 3, 'iter': 3410, 'avg_loss': 8.539729036008533, 'avg_acc': 50.120932277924354, 'loss': 8.04351806640625}


EP_train:3:  12%|| 3422/27626 [08:03<57:19,  7.04it/s]

{'epoch': 3, 'iter': 3420, 'avg_loss': 8.539107033189575, 'avg_acc': 50.11875182695118, 'loss': 7.72535514831543}


EP_train:3:  12%|| 3432/27626 [08:05<57:10,  7.05it/s]

{'epoch': 3, 'iter': 3430, 'avg_loss': 8.538805001402554, 'avg_acc': 50.111119207228214, 'loss': 7.395981788635254}


EP_train:3:  12%|| 3442/27626 [08:06<57:00,  7.07it/s]

{'epoch': 3, 'iter': 3440, 'avg_loss': 8.537985170071154, 'avg_acc': 50.12351060738157, 'loss': 8.013728141784668}


EP_train:3:  12%|| 3452/27626 [08:07<57:23,  7.02it/s]

{'epoch': 3, 'iter': 3450, 'avg_loss': 8.538510511315964, 'avg_acc': 50.12677484787018, 'loss': 8.85307502746582}


EP_train:3:  13%|| 3462/27626 [08:09<56:49,  7.09it/s]

{'epoch': 3, 'iter': 3460, 'avg_loss': 8.53834872905728, 'avg_acc': 50.12369979774631, 'loss': 8.934504508972168}


EP_train:3:  13%|| 3472/27626 [08:10<57:20,  7.02it/s]

{'epoch': 3, 'iter': 3470, 'avg_loss': 8.53796539578648, 'avg_acc': 50.11343993085566, 'loss': 8.098312377929688}


EP_train:3:  13%|| 3482/27626 [08:12<56:41,  7.10it/s]

{'epoch': 3, 'iter': 3480, 'avg_loss': 8.53695235119507, 'avg_acc': 50.115807239299045, 'loss': 7.810622692108154}


EP_train:3:  13%|| 3492/27626 [08:13<56:45,  7.09it/s]

{'epoch': 3, 'iter': 3490, 'avg_loss': 8.537143851378284, 'avg_acc': 50.109209395588664, 'loss': 9.020682334899902}


EP_train:3:  13%|| 3502/27626 [08:14<57:00,  7.05it/s]

{'epoch': 3, 'iter': 3500, 'avg_loss': 8.540157830909266, 'avg_acc': 50.108004855755496, 'loss': 9.989453315734863}


EP_train:3:  13%|| 3512/27626 [08:16<56:53,  7.06it/s]

{'epoch': 3, 'iter': 3510, 'avg_loss': 8.539617288557427, 'avg_acc': 50.108587297066364, 'loss': 7.574760437011719}


EP_train:3:  13%|| 3522/27626 [08:17<56:57,  7.05it/s]

{'epoch': 3, 'iter': 3520, 'avg_loss': 8.539871794092015, 'avg_acc': 50.11182902584493, 'loss': 8.814693450927734}


EP_train:3:  13%|| 3532/27626 [08:19<57:22,  7.00it/s]

{'epoch': 3, 'iter': 3530, 'avg_loss': 8.540274750777865, 'avg_acc': 50.115052393089776, 'loss': 8.25396728515625}


EP_train:3:  13%|| 3542/27626 [08:20<57:03,  7.03it/s]

{'epoch': 3, 'iter': 3540, 'avg_loss': 8.539869295005508, 'avg_acc': 50.12178763061282, 'loss': 8.664052963256836}


EP_train:3:  13%|| 3552/27626 [08:22<57:28,  6.98it/s]

{'epoch': 3, 'iter': 3550, 'avg_loss': 8.54034439619747, 'avg_acc': 50.130245001408056, 'loss': 8.478374481201172}


EP_train:3:  13%|| 3562/27626 [08:23<57:19,  7.00it/s]

{'epoch': 3, 'iter': 3560, 'avg_loss': 8.540067604303829, 'avg_acc': 50.11320556023588, 'loss': 8.491565704345703}


EP_train:3:  13%|| 3572/27626 [08:24<57:11,  7.01it/s]

{'epoch': 3, 'iter': 3570, 'avg_loss': 8.539490187945937, 'avg_acc': 50.12426491178942, 'loss': 8.319859504699707}


EP_train:3:  13%|| 3582/27626 [08:26<57:01,  7.03it/s]

{'epoch': 3, 'iter': 3580, 'avg_loss': 8.539422700871839, 'avg_acc': 50.121299916224515, 'loss': 8.37884521484375}


EP_train:3:  13%|| 3592/27626 [08:27<57:02,  7.02it/s]

{'epoch': 3, 'iter': 3590, 'avg_loss': 8.539865083197835, 'avg_acc': 50.12096212754108, 'loss': 8.170309066772461}


EP_train:3:  13%|| 3602/27626 [08:29<56:44,  7.06it/s]

{'epoch': 3, 'iter': 3600, 'avg_loss': 8.5404294863306, 'avg_acc': 50.1076089975007, 'loss': 8.84970760345459}


EP_train:3:  13%|| 3612/27626 [08:30<56:52,  7.04it/s]

{'epoch': 3, 'iter': 3610, 'avg_loss': 8.540012108457613, 'avg_acc': 50.10731099418444, 'loss': 7.70305871963501}


EP_train:3:  13%|| 3622/27626 [08:31<56:38,  7.06it/s]

{'epoch': 3, 'iter': 3620, 'avg_loss': 8.540355110589854, 'avg_acc': 50.09838442419221, 'loss': 9.177356719970703}


EP_train:3:  13%|| 3632/27626 [08:33<56:48,  7.04it/s]

{'epoch': 3, 'iter': 3630, 'avg_loss': 8.54139351956413, 'avg_acc': 50.10585926741944, 'loss': 8.938492774963379}


EP_train:3:  13%|| 3642/27626 [08:34<56:34,  7.06it/s]

{'epoch': 3, 'iter': 3640, 'avg_loss': 8.541636081424201, 'avg_acc': 50.111576489975285, 'loss': 8.3154296875}


EP_train:3:  13%|| 3652/27626 [08:36<56:41,  7.05it/s]

{'epoch': 3, 'iter': 3650, 'avg_loss': 8.541318938621787, 'avg_acc': 50.11212681457135, 'loss': 8.826150894165039}


EP_train:3:  13%|| 3662/27626 [08:37<56:47,  7.03it/s]

{'epoch': 3, 'iter': 3660, 'avg_loss': 8.540813865963797, 'avg_acc': 50.10584539743239, 'loss': 8.238499641418457}


EP_train:3:  13%|| 3672/27626 [08:39<56:23,  7.08it/s]

{'epoch': 3, 'iter': 3670, 'avg_loss': 8.540846067369877, 'avg_acc': 50.10044946880959, 'loss': 8.101439476013184}


EP_train:3:  13%|| 3682/27626 [08:40<56:24,  7.08it/s]

{'epoch': 3, 'iter': 3680, 'avg_loss': 8.540906476572912, 'avg_acc': 50.10611926107036, 'loss': 9.558794975280762}


EP_train:3:  13%|| 3692/27626 [08:41<56:33,  7.05it/s]

{'epoch': 3, 'iter': 3690, 'avg_loss': 8.540372733403208, 'avg_acc': 50.110065023028994, 'loss': 8.944840431213379}


EP_train:3:  13%|| 3702/27626 [08:43<56:19,  7.08it/s]

{'epoch': 3, 'iter': 3700, 'avg_loss': 8.54050222779248, 'avg_acc': 50.10554579843286, 'loss': 8.343976020812988}


EP_train:3:  13%|| 3712/27626 [08:44<56:31,  7.05it/s]

{'epoch': 3, 'iter': 3710, 'avg_loss': 8.540482157165231, 'avg_acc': 50.09347210994341, 'loss': 8.202788352966309}


EP_train:3:  13%|| 3722/27626 [08:46<56:45,  7.02it/s]

{'epoch': 3, 'iter': 3720, 'avg_loss': 8.540656349025276, 'avg_acc': 50.09909970438054, 'loss': 9.281998634338379}


EP_train:3:  14%|| 3732/27626 [08:47<56:37,  7.03it/s]

{'epoch': 3, 'iter': 3730, 'avg_loss': 8.540306065875575, 'avg_acc': 50.08375770570892, 'loss': 7.846396446228027}


EP_train:3:  14%|| 3742/27626 [08:48<56:33,  7.04it/s]

{'epoch': 3, 'iter': 3740, 'avg_loss': 8.540305881250418, 'avg_acc': 50.09021651964716, 'loss': 8.25025749206543}


EP_train:3:  14%|| 3752/27626 [08:50<56:08,  7.09it/s]

{'epoch': 3, 'iter': 3750, 'avg_loss': 8.540932655842964, 'avg_acc': 50.09080911756865, 'loss': 8.041946411132812}


EP_train:3:  14%|| 3762/27626 [08:51<55:58,  7.11it/s]

{'epoch': 3, 'iter': 3760, 'avg_loss': 8.540433349903516, 'avg_acc': 50.1013693166711, 'loss': 8.773098945617676}


EP_train:3:  14%|| 3772/27626 [08:53<56:01,  7.10it/s]

{'epoch': 3, 'iter': 3770, 'avg_loss': 8.539842872902874, 'avg_acc': 50.100271811190666, 'loss': 7.6088786125183105}


EP_train:3:  14%|| 3782/27626 [08:54<55:50,  7.12it/s]

{'epoch': 3, 'iter': 3780, 'avg_loss': 8.54106207632949, 'avg_acc': 50.11405712774398, 'loss': 9.1353120803833}


EP_train:3:  14%|| 3792/27626 [08:55<55:59,  7.09it/s]

{'epoch': 3, 'iter': 3790, 'avg_loss': 8.54146109467539, 'avg_acc': 50.11622922711686, 'loss': 8.686873435974121}


EP_train:3:  14%|| 3802/27626 [08:57<56:28,  7.03it/s]

{'epoch': 3, 'iter': 3800, 'avg_loss': 8.541672369769547, 'avg_acc': 50.13072217837411, 'loss': 8.345088958740234}


EP_train:3:  14%|| 3812/27626 [08:58<56:25,  7.03it/s]

{'epoch': 3, 'iter': 3810, 'avg_loss': 8.541111601521981, 'avg_acc': 50.14431907635791, 'loss': 8.353737831115723}


EP_train:3:  14%|| 3822/27626 [09:00<56:12,  7.06it/s]

{'epoch': 3, 'iter': 3820, 'avg_loss': 8.540379513373896, 'avg_acc': 50.14066998168018, 'loss': 7.8795013427734375}


EP_train:3:  14%|| 3832/27626 [09:01<56:10,  7.06it/s]

{'epoch': 3, 'iter': 3830, 'avg_loss': 8.540855127661697, 'avg_acc': 50.12154137300966, 'loss': 8.894020080566406}


EP_train:3:  14%|| 3842/27626 [09:03<55:51,  7.10it/s]

{'epoch': 3, 'iter': 3840, 'avg_loss': 8.540865520113801, 'avg_acc': 50.10983467846914, 'loss': 8.827940940856934}


EP_train:3:  14%|| 3852/27626 [09:04<55:59,  7.08it/s]

{'epoch': 3, 'iter': 3850, 'avg_loss': 8.540668474547555, 'avg_acc': 50.11360685536225, 'loss': 8.447598457336426}


EP_train:3:  14%|| 3862/27626 [09:05<55:52,  7.09it/s]

{'epoch': 3, 'iter': 3860, 'avg_loss': 8.541273051609927, 'avg_acc': 50.110075110075115, 'loss': 9.614213943481445}


EP_train:3:  14%|| 3872/27626 [09:07<55:47,  7.10it/s]

{'epoch': 3, 'iter': 3870, 'avg_loss': 8.541774221059118, 'avg_acc': 50.12190002583312, 'loss': 7.97934627532959}


EP_train:3:  14%|| 3882/27626 [09:08<55:59,  7.07it/s]

{'epoch': 3, 'iter': 3880, 'avg_loss': 8.54073541762134, 'avg_acc': 50.117559907240405, 'loss': 8.788496971130371}


EP_train:3:  14%|| 3892/27626 [09:10<55:42,  7.10it/s]

{'epoch': 3, 'iter': 3890, 'avg_loss': 8.540186336554589, 'avg_acc': 50.106817013621175, 'loss': 9.071045875549316}


EP_train:3:  14%|| 3902/27626 [09:11<56:17,  7.02it/s]

{'epoch': 3, 'iter': 3900, 'avg_loss': 8.539914032643466, 'avg_acc': 50.10413996411177, 'loss': 8.29604434967041}


EP_train:3:  14%|| 3912/27626 [09:12<56:34,  6.99it/s]

{'epoch': 3, 'iter': 3910, 'avg_loss': 8.539796661702729, 'avg_acc': 50.106270774737915, 'loss': 8.94852066040039}


EP_train:3:  14%|| 3922/27626 [09:14<55:47,  7.08it/s]

{'epoch': 3, 'iter': 3920, 'avg_loss': 8.540140950962526, 'avg_acc': 50.10440576383576, 'loss': 8.915515899658203}


EP_train:3:  14%|| 3932/27626 [09:15<55:49,  7.07it/s]

{'epoch': 3, 'iter': 3930, 'avg_loss': 8.540239348905258, 'avg_acc': 50.108114983464766, 'loss': 8.088269233703613}


EP_train:3:  14%|| 3942/27626 [09:17<55:38,  7.09it/s]

{'epoch': 3, 'iter': 3940, 'avg_loss': 8.540001818868177, 'avg_acc': 50.106254757675714, 'loss': 8.988463401794434}


EP_train:3:  14%|| 3952/27626 [09:18<55:53,  7.06it/s]

{'epoch': 3, 'iter': 3950, 'avg_loss': 8.540138465970173, 'avg_acc': 50.10677676537585, 'loss': 8.558358192443848}


EP_train:3:  14%|| 3962/27626 [09:19<56:10,  7.02it/s]

{'epoch': 3, 'iter': 3960, 'avg_loss': 8.539808591608145, 'avg_acc': 50.10808507952538, 'loss': 8.055657386779785}


EP_train:3:  14%|| 3972/27626 [09:21<55:34,  7.09it/s]

{'epoch': 3, 'iter': 3970, 'avg_loss': 8.538029052183928, 'avg_acc': 50.1023042054898, 'loss': 8.096418380737305}


EP_train:3:  14%|| 3982/27626 [09:22<56:36,  6.96it/s]

{'epoch': 3, 'iter': 3980, 'avg_loss': 8.538584884642596, 'avg_acc': 50.10204722431549, 'loss': 8.45865535736084}


EP_train:3:  14%|| 3992/27626 [09:24<56:11,  7.01it/s]

{'epoch': 3, 'iter': 3990, 'avg_loss': 8.538678100311616, 'avg_acc': 50.09865948383864, 'loss': 8.859766006469727}


EP_train:3:  14%|| 4002/27626 [09:25<55:45,  7.06it/s]

{'epoch': 3, 'iter': 4000, 'avg_loss': 8.537701878479735, 'avg_acc': 50.0984128967758, 'loss': 7.942954063415527}


EP_train:3:  15%|| 4012/27626 [09:27<55:53,  7.04it/s]

{'epoch': 3, 'iter': 4010, 'avg_loss': 8.538362949098383, 'avg_acc': 50.09894664672152, 'loss': 8.585309982299805}


EP_train:3:  15%|| 4022/27626 [09:28<55:27,  7.09it/s]

{'epoch': 3, 'iter': 4020, 'avg_loss': 8.538846307416307, 'avg_acc': 50.109580950012436, 'loss': 8.809098243713379}


EP_train:3:  15%|| 4032/27626 [09:29<55:31,  7.08it/s]

{'epoch': 3, 'iter': 4030, 'avg_loss': 8.538638894595215, 'avg_acc': 50.11861200694617, 'loss': 8.801414489746094}


EP_train:3:  15%|| 4042/27626 [09:31<55:21,  7.10it/s]

{'epoch': 3, 'iter': 4040, 'avg_loss': 8.538846325679629, 'avg_acc': 50.123731749566936, 'loss': 8.532925605773926}


EP_train:3:  15%|| 4052/27626 [09:32<56:09,  7.00it/s]

{'epoch': 3, 'iter': 4050, 'avg_loss': 8.539303758649936, 'avg_acc': 50.12496914342138, 'loss': 8.63632869720459}


EP_train:3:  15%|| 4062/27626 [09:34<55:28,  7.08it/s]

{'epoch': 3, 'iter': 4060, 'avg_loss': 8.539928027779796, 'avg_acc': 50.12620044324058, 'loss': 9.09524154663086}


EP_train:3:  15%|| 4072/27626 [09:35<56:20,  6.97it/s]

{'epoch': 3, 'iter': 4070, 'avg_loss': 8.540080550026758, 'avg_acc': 50.13279906656841, 'loss': 8.257777214050293}


EP_train:3:  15%|| 4082/27626 [09:36<55:55,  7.02it/s]

{'epoch': 3, 'iter': 4080, 'avg_loss': 8.539664331705831, 'avg_acc': 50.13170791472679, 'loss': 8.676777839660645}


EP_train:3:  15%|| 4092/27626 [09:38<55:48,  7.03it/s]

{'epoch': 3, 'iter': 4090, 'avg_loss': 8.539007238408109, 'avg_acc': 50.134441456856514, 'loss': 7.9065656661987305}


EP_train:3:  15%|| 4102/27626 [09:39<55:07,  7.11it/s]

{'epoch': 3, 'iter': 4100, 'avg_loss': 8.538452902237864, 'avg_acc': 50.14478176054621, 'loss': 8.803723335266113}


EP_train:3:  15%|| 4112/27626 [09:41<55:32,  7.06it/s]

{'epoch': 3, 'iter': 4110, 'avg_loss': 8.538799984267785, 'avg_acc': 50.15051082461688, 'loss': 8.649144172668457}


EP_train:3:  15%|| 4122/27626 [09:42<55:07,  7.11it/s]

{'epoch': 3, 'iter': 4120, 'avg_loss': 8.539599481585881, 'avg_acc': 50.15924532880369, 'loss': 8.531767845153809}


EP_train:3:  15%|| 4132/27626 [09:43<55:23,  7.07it/s]

{'epoch': 3, 'iter': 4130, 'avg_loss': 8.538893600261636, 'avg_acc': 50.158103364802706, 'loss': 7.76732873916626}


EP_train:3:  15%|| 4142/27626 [09:45<55:12,  7.09it/s]

{'epoch': 3, 'iter': 4140, 'avg_loss': 8.538298651666004, 'avg_acc': 50.159985510746196, 'loss': 8.443624496459961}


EP_train:3:  15%|| 4152/27626 [09:46<55:23,  7.06it/s]

{'epoch': 3, 'iter': 4150, 'avg_loss': 8.538055993596837, 'avg_acc': 50.174656709226696, 'loss': 8.056116104125977}


EP_train:3:  15%|| 4162/27626 [09:48<55:28,  7.05it/s]

{'epoch': 3, 'iter': 4160, 'avg_loss': 8.537639287193645, 'avg_acc': 50.171983898101416, 'loss': 8.405653953552246}


EP_train:3:  15%|| 4172/27626 [09:49<55:36,  7.03it/s]

{'epoch': 3, 'iter': 4170, 'avg_loss': 8.537477740907635, 'avg_acc': 50.17082234476145, 'loss': 8.189651489257812}


EP_train:3:  15%|| 4182/27626 [09:51<55:12,  7.08it/s]

{'epoch': 3, 'iter': 4180, 'avg_loss': 8.537604534976706, 'avg_acc': 50.165929203539825, 'loss': 8.566786766052246}


EP_train:3:  15%|| 4192/27626 [09:52<55:11,  7.08it/s]

{'epoch': 3, 'iter': 4190, 'avg_loss': 8.537823475451095, 'avg_acc': 50.16031376759723, 'loss': 8.758744239807129}


EP_train:3:  15%|| 4202/27626 [09:53<55:30,  7.03it/s]

{'epoch': 3, 'iter': 4200, 'avg_loss': 8.5381574020077, 'avg_acc': 50.16365151154487, 'loss': 8.318083763122559}


EP_train:3:  15%|| 4212/27626 [09:55<55:36,  7.02it/s]

{'epoch': 3, 'iter': 4210, 'avg_loss': 8.53763734255401, 'avg_acc': 50.16400498693897, 'loss': 8.326250076293945}


EP_train:3:  15%|| 4222/27626 [09:56<55:21,  7.05it/s]

{'epoch': 3, 'iter': 4220, 'avg_loss': 8.537937285182206, 'avg_acc': 50.17101990049751, 'loss': 8.692079544067383}


EP_train:3:  15%|| 4232/27626 [09:58<55:38,  7.01it/s]

{'epoch': 3, 'iter': 4230, 'avg_loss': 8.538016831274716, 'avg_acc': 50.171354289766015, 'loss': 8.427204132080078}


EP_train:3:  15%|| 4242/27626 [09:59<55:10,  7.06it/s]

{'epoch': 3, 'iter': 4240, 'avg_loss': 8.538013180537314, 'avg_acc': 50.16579226597501, 'loss': 7.81466007232666}


EP_train:3:  15%|| 4252/27626 [10:00<55:37,  7.00it/s]

{'epoch': 3, 'iter': 4250, 'avg_loss': 8.537646851127946, 'avg_acc': 50.16393201599624, 'loss': 8.306557655334473}


EP_train:3:  15%|| 4262/27626 [10:02<55:28,  7.02it/s]

{'epoch': 3, 'iter': 4260, 'avg_loss': 8.53704031933308, 'avg_acc': 50.163547289368694, 'loss': 8.003989219665527}


EP_train:3:  15%|| 4272/27626 [10:03<55:06,  7.06it/s]

{'epoch': 3, 'iter': 4270, 'avg_loss': 8.5371485584878, 'avg_acc': 50.17340786701007, 'loss': 8.452417373657227}


EP_train:3:  15%|| 4282/27626 [10:05<55:12,  7.05it/s]

{'epoch': 3, 'iter': 4280, 'avg_loss': 8.536535306612873, 'avg_acc': 50.16132328895117, 'loss': 8.498601913452148}


EP_train:3:  16%|| 4292/27626 [10:06<55:02,  7.06it/s]

{'epoch': 3, 'iter': 4290, 'avg_loss': 8.537156284952797, 'avg_acc': 50.152208109997666, 'loss': 8.709388732910156}


EP_train:3:  16%|| 4302/27626 [10:07<54:33,  7.12it/s]

{'epoch': 3, 'iter': 4300, 'avg_loss': 8.536645048913554, 'avg_acc': 50.14822134387352, 'loss': 8.432072639465332}


EP_train:3:  16%|| 4312/27626 [10:09<55:07,  7.05it/s]

{'epoch': 3, 'iter': 4310, 'avg_loss': 8.536829836046135, 'avg_acc': 50.158025980051036, 'loss': 8.770428657531738}


EP_train:3:  16%|| 4322/27626 [10:10<55:09,  7.04it/s]

{'epoch': 3, 'iter': 4320, 'avg_loss': 8.537003280884191, 'avg_acc': 50.161999537144176, 'loss': 8.26083755493164}


EP_train:3:  16%|| 4332/27626 [10:12<54:49,  7.08it/s]

{'epoch': 3, 'iter': 4330, 'avg_loss': 8.53728049017766, 'avg_acc': 50.16956245670746, 'loss': 9.094403266906738}


EP_train:3:  16%|| 4342/27626 [10:13<55:06,  7.04it/s]

{'epoch': 3, 'iter': 4340, 'avg_loss': 8.537001988218282, 'avg_acc': 50.169171849804194, 'loss': 8.189969062805176}


EP_train:3:  16%|| 4352/27626 [10:15<56:03,  6.92it/s]

{'epoch': 3, 'iter': 4350, 'avg_loss': 8.53743349385245, 'avg_acc': 50.16950126407722, 'loss': 7.845468044281006}


EP_train:3:  16%|| 4362/27626 [10:16<54:53,  7.06it/s]

{'epoch': 3, 'iter': 4360, 'avg_loss': 8.536558164584749, 'avg_acc': 50.174845218986476, 'loss': 7.689537048339844}


EP_train:3:  16%|| 4372/27626 [10:17<54:33,  7.10it/s]

{'epoch': 3, 'iter': 4370, 'avg_loss': 8.536687886734025, 'avg_acc': 50.183739418897275, 'loss': 8.120840072631836}


EP_train:3:  16%|| 4382/27626 [10:19<55:16,  7.01it/s]

{'epoch': 3, 'iter': 4380, 'avg_loss': 8.536066265145385, 'avg_acc': 50.171193791371834, 'loss': 8.202909469604492}


EP_train:3:  16%|| 4392/27626 [10:20<54:54,  7.05it/s]

{'epoch': 3, 'iter': 4390, 'avg_loss': 8.535876089498737, 'avg_acc': 50.17222728307903, 'loss': 8.51546859741211}


EP_train:3:  16%|| 4402/27626 [10:22<54:32,  7.10it/s]

{'epoch': 3, 'iter': 4400, 'avg_loss': 8.536004001242116, 'avg_acc': 50.16899568279937, 'loss': 8.742106437683105}


EP_train:3:  16%|| 4412/27626 [10:23<54:39,  7.08it/s]

{'epoch': 3, 'iter': 4410, 'avg_loss': 8.536426477814931, 'avg_acc': 50.16577873498073, 'loss': 7.903625011444092}


EP_train:3:  16%|| 4422/27626 [10:24<54:28,  7.10it/s]

{'epoch': 3, 'iter': 4420, 'avg_loss': 8.536903968414983, 'avg_acc': 50.155507803664335, 'loss': 8.792875289916992}


EP_train:3:  16%|| 4432/27626 [10:26<55:05,  7.02it/s]

{'epoch': 3, 'iter': 4430, 'avg_loss': 8.537130483034582, 'avg_acc': 50.156567366283014, 'loss': 9.829375267028809}


EP_train:3:  16%|| 4442/27626 [10:27<55:22,  6.98it/s]

{'epoch': 3, 'iter': 4440, 'avg_loss': 8.537232220427889, 'avg_acc': 50.15480747579374, 'loss': 8.899157524108887}


EP_train:3:  16%|| 4452/27626 [10:29<54:35,  7.08it/s]

{'epoch': 3, 'iter': 4450, 'avg_loss': 8.53734831112579, 'avg_acc': 50.15726802965625, 'loss': 8.957201957702637}


EP_train:3:  16%|| 4462/27626 [10:30<54:24,  7.10it/s]

{'epoch': 3, 'iter': 4460, 'avg_loss': 8.537127564611309, 'avg_acc': 50.15271239632369, 'loss': 8.37710189819336}


EP_train:3:  16%|| 4472/27626 [10:31<54:24,  7.09it/s]

{'epoch': 3, 'iter': 4470, 'avg_loss': 8.53696128917311, 'avg_acc': 50.144682397673904, 'loss': 9.42176342010498}


EP_train:3:  16%|| 4482/27626 [10:33<54:43,  7.05it/s]

{'epoch': 3, 'iter': 4480, 'avg_loss': 8.53665045429835, 'avg_acc': 50.14924124079446, 'loss': 8.06700325012207}


EP_train:3:  16%|| 4492/27626 [10:34<54:41,  7.05it/s]

{'epoch': 3, 'iter': 4490, 'avg_loss': 8.536249410386766, 'avg_acc': 50.147517256735696, 'loss': 8.019585609436035}


EP_train:3:  16%|| 4502/27626 [10:36<54:28,  7.08it/s]

{'epoch': 3, 'iter': 4500, 'avg_loss': 8.535953604255987, 'avg_acc': 50.15343812486114, 'loss': 8.591896057128906}


EP_train:3:  16%|| 4512/27626 [10:37<54:12,  7.11it/s]

{'epoch': 3, 'iter': 4510, 'avg_loss': 8.536048555553844, 'avg_acc': 50.153097982708935, 'loss': 8.385444641113281}


EP_train:3:  16%|| 4522/27626 [10:39<54:23,  7.08it/s]

{'epoch': 3, 'iter': 4520, 'avg_loss': 8.535248386203333, 'avg_acc': 50.14861203273612, 'loss': 7.719062805175781}


EP_train:3:  16%|| 4532/27626 [10:40<55:20,  6.95it/s]

{'epoch': 3, 'iter': 4530, 'avg_loss': 8.534981188461549, 'avg_acc': 50.142076804237476, 'loss': 8.932482719421387}


EP_train:3:  16%|| 4542/27626 [10:41<54:59,  7.00it/s]

{'epoch': 3, 'iter': 4540, 'avg_loss': 8.53483583622109, 'avg_acc': 50.141075754239154, 'loss': 8.39599895477295}


EP_train:3:  16%|| 4552/27626 [10:43<54:22,  7.07it/s]

{'epoch': 3, 'iter': 4550, 'avg_loss': 8.535531424763596, 'avg_acc': 50.13252581850143, 'loss': 8.761000633239746}


EP_train:3:  17%|| 4562/27626 [10:44<54:32,  7.05it/s]

{'epoch': 3, 'iter': 4560, 'avg_loss': 8.535512489594014, 'avg_acc': 50.12127274720456, 'loss': 8.768633842468262}


EP_train:3:  17%|| 4572/27626 [10:46<54:03,  7.11it/s]

{'epoch': 3, 'iter': 4570, 'avg_loss': 8.535586765104037, 'avg_acc': 50.12169109604025, 'loss': 9.108366966247559}


EP_train:3:  17%|| 4582/27626 [10:47<54:45,  7.01it/s]

{'epoch': 3, 'iter': 4580, 'avg_loss': 8.535892003281516, 'avg_acc': 50.1193789565597, 'loss': 8.626173973083496}


EP_train:3:  17%|| 4592/27626 [10:48<54:28,  7.05it/s]

{'epoch': 3, 'iter': 4590, 'avg_loss': 8.536151960040977, 'avg_acc': 50.12592572424308, 'loss': 9.10975456237793}


EP_train:3:  17%|| 4602/27626 [10:50<54:23,  7.05it/s]

{'epoch': 3, 'iter': 4600, 'avg_loss': 8.536151685758249, 'avg_acc': 50.118860030428166, 'loss': 8.150270462036133}


EP_train:3:  17%|| 4612/27626 [10:51<54:58,  6.98it/s]

{'epoch': 3, 'iter': 4610, 'avg_loss': 8.536240789694322, 'avg_acc': 50.122668618520926, 'loss': 7.951111793518066}


EP_train:3:  17%|| 4622/27626 [10:53<55:01,  6.97it/s]

{'epoch': 3, 'iter': 4620, 'avg_loss': 8.536210807621982, 'avg_acc': 50.12443194113828, 'loss': 8.434793472290039}


EP_train:3:  17%|| 4632/27626 [10:54<54:40,  7.01it/s]

{'epoch': 3, 'iter': 4630, 'avg_loss': 8.536023416109359, 'avg_acc': 50.121464046642195, 'loss': 8.44217300415039}


EP_train:3:  17%|| 4642/27626 [10:55<54:16,  7.06it/s]

{'epoch': 3, 'iter': 4640, 'avg_loss': 8.536376514611534, 'avg_acc': 50.11244882568412, 'loss': 8.008767127990723}


EP_train:3:  17%|| 4652/27626 [10:57<53:59,  7.09it/s]

{'epoch': 3, 'iter': 4650, 'avg_loss': 8.536557539794503, 'avg_acc': 50.11691034186197, 'loss': 8.39406681060791}


EP_train:3:  17%|| 4662/27626 [10:58<54:36,  7.01it/s]

{'epoch': 3, 'iter': 4660, 'avg_loss': 8.536368862545764, 'avg_acc': 50.120682257026395, 'loss': 8.015432357788086}


EP_train:3:  17%|| 4672/27626 [11:00<54:41,  7.00it/s]

{'epoch': 3, 'iter': 4670, 'avg_loss': 8.536088043503627, 'avg_acc': 50.11440269749519, 'loss': 8.030245780944824}


EP_train:3:  17%|| 4682/27626 [11:01<54:13,  7.05it/s]

{'epoch': 3, 'iter': 4680, 'avg_loss': 8.53601006783117, 'avg_acc': 50.11616107669301, 'loss': 8.140009880065918}


EP_train:3:  17%|| 4692/27626 [11:03<54:42,  6.99it/s]

{'epoch': 3, 'iter': 4690, 'avg_loss': 8.536595672769248, 'avg_acc': 50.11058409720742, 'loss': 8.731565475463867}


EP_train:3:  17%|| 4702/27626 [11:04<54:25,  7.02it/s]

{'epoch': 3, 'iter': 4700, 'avg_loss': 8.537020950767745, 'avg_acc': 50.10569559668156, 'loss': 8.19440746307373}


EP_train:3:  17%|| 4712/27626 [11:05<54:45,  6.97it/s]

{'epoch': 3, 'iter': 4710, 'avg_loss': 8.537006737092224, 'avg_acc': 50.108787943111864, 'loss': 8.18820858001709}


EP_train:3:  17%|| 4722/27626 [11:07<54:04,  7.06it/s]

{'epoch': 3, 'iter': 4720, 'avg_loss': 8.536645407091887, 'avg_acc': 50.095318788392284, 'loss': 8.886516571044922}


EP_train:3:  17%|| 4732/27626 [11:08<53:58,  7.07it/s]

{'epoch': 3, 'iter': 4730, 'avg_loss': 8.537012983038723, 'avg_acc': 50.10502536461636, 'loss': 8.363136291503906}


EP_train:3:  17%|| 4742/27626 [11:10<53:53,  7.08it/s]

{'epoch': 3, 'iter': 4740, 'avg_loss': 8.537715970099512, 'avg_acc': 50.108099557055475, 'loss': 9.023051261901855}


EP_train:3:  17%|| 4752/27626 [11:11<53:52,  7.08it/s]

{'epoch': 3, 'iter': 4750, 'avg_loss': 8.537614410587473, 'avg_acc': 50.101294464323296, 'loss': 8.084447860717773}


EP_train:3:  17%|| 4762/27626 [11:12<53:57,  7.06it/s]

{'epoch': 3, 'iter': 4760, 'avg_loss': 8.537960416553052, 'avg_acc': 50.10567632850241, 'loss': 8.40697193145752}


EP_train:3:  17%|| 4772/27626 [11:14<53:39,  7.10it/s]

{'epoch': 3, 'iter': 4770, 'avg_loss': 8.538028055705556, 'avg_acc': 50.10545483127227, 'loss': 8.39880084991455}


EP_train:3:  17%|| 4782/27626 [11:15<54:30,  6.98it/s]

{'epoch': 3, 'iter': 4780, 'avg_loss': 8.537753886195112, 'avg_acc': 50.107848776406605, 'loss': 8.853805541992188}


EP_train:3:  17%|| 4792/27626 [11:17<54:38,  6.96it/s]

{'epoch': 3, 'iter': 4790, 'avg_loss': 8.537538398359693, 'avg_acc': 50.10240555207681, 'loss': 8.71863842010498}


EP_train:3:  17%|| 4802/27626 [11:18<53:59,  7.05it/s]

{'epoch': 3, 'iter': 4800, 'avg_loss': 8.537366439789142, 'avg_acc': 50.106097687981666, 'loss': 9.659646987915039}


EP_train:3:  17%|| 4812/27626 [11:20<53:51,  7.06it/s]

{'epoch': 3, 'iter': 4810, 'avg_loss': 8.536935798543526, 'avg_acc': 50.10133028476408, 'loss': 7.989993572235107}


EP_train:3:  17%|| 4822/27626 [11:21<53:50,  7.06it/s]

{'epoch': 3, 'iter': 4820, 'avg_loss': 8.536800614285584, 'avg_acc': 50.0959344534329, 'loss': 8.657623291015625}


EP_train:3:  17%|| 4832/27626 [11:22<53:39,  7.08it/s]

{'epoch': 3, 'iter': 4830, 'avg_loss': 8.536330700808987, 'avg_acc': 50.09120782446699, 'loss': 8.662859916687012}


EP_train:3:  18%|| 4842/27626 [11:24<54:02,  7.03it/s]

{'epoch': 3, 'iter': 4840, 'avg_loss': 8.536121489365863, 'avg_acc': 50.09424705639331, 'loss': 7.828200340270996}


EP_train:3:  18%|| 4852/27626 [11:25<53:54,  7.04it/s]

{'epoch': 3, 'iter': 4850, 'avg_loss': 8.535463205639099, 'avg_acc': 50.092120181405896, 'loss': 7.985411643981934}


EP_train:3:  18%|| 4862/27626 [11:27<53:30,  7.09it/s]

{'epoch': 3, 'iter': 4860, 'avg_loss': 8.535269001044748, 'avg_acc': 50.09578790372351, 'loss': 8.304983139038086}


EP_train:3:  18%|| 4872/27626 [11:28<53:51,  7.04it/s]

{'epoch': 3, 'iter': 4870, 'avg_loss': 8.53583288545164, 'avg_acc': 50.09623280640526, 'loss': 8.874835968017578}


EP_train:3:  18%|| 4882/27626 [11:29<54:13,  6.99it/s]

{'epoch': 3, 'iter': 4880, 'avg_loss': 8.53582075242502, 'avg_acc': 50.085791845933215, 'loss': 9.39754581451416}


EP_train:3:  18%|| 4892/27626 [11:31<53:57,  7.02it/s]

{'epoch': 3, 'iter': 4890, 'avg_loss': 8.53586298886605, 'avg_acc': 50.092644653445106, 'loss': 8.985634803771973}


EP_train:3:  18%|| 4902/27626 [11:32<54:18,  6.97it/s]

{'epoch': 3, 'iter': 4900, 'avg_loss': 8.535263550133152, 'avg_acc': 50.09436849622526, 'loss': 8.666501998901367}


EP_train:3:  18%|| 4912/27626 [11:34<53:40,  7.05it/s]

{'epoch': 3, 'iter': 4910, 'avg_loss': 8.53503810196684, 'avg_acc': 50.098630625127264, 'loss': 8.337754249572754}


EP_train:3:  18%|| 4922/27626 [11:35<53:23,  7.09it/s]

{'epoch': 3, 'iter': 4920, 'avg_loss': 8.534816367043346, 'avg_acc': 50.09398496240601, 'loss': 8.424076080322266}


EP_train:3:  18%|| 4932/27626 [11:37<53:23,  7.08it/s]

{'epoch': 3, 'iter': 4930, 'avg_loss': 8.535068491075275, 'avg_acc': 50.10013181910363, 'loss': 9.105382919311523}


EP_train:3:  18%|| 4942/27626 [11:38<53:49,  7.02it/s]

{'epoch': 3, 'iter': 4940, 'avg_loss': 8.535306414233796, 'avg_acc': 50.09486945962356, 'loss': 8.432887077331543}


EP_train:3:  18%|| 4952/27626 [11:39<53:53,  7.01it/s]

{'epoch': 3, 'iter': 4950, 'avg_loss': 8.535917009158318, 'avg_acc': 50.092784286002825, 'loss': 9.89258098602295}


EP_train:3:  18%|| 4962/27626 [11:41<53:53,  7.01it/s]

{'epoch': 3, 'iter': 4960, 'avg_loss': 8.536383633170486, 'avg_acc': 50.09448699858899, 'loss': 9.198918342590332}


EP_train:3:  18%|| 4972/27626 [11:42<53:58,  7.00it/s]

{'epoch': 3, 'iter': 4970, 'avg_loss': 8.536169188527653, 'avg_acc': 50.09744015288674, 'loss': 7.674144744873047}


EP_train:3:  18%|| 4982/27626 [11:44<53:52,  7.00it/s]

{'epoch': 3, 'iter': 4980, 'avg_loss': 8.53590738081688, 'avg_acc': 50.095989761092156, 'loss': 8.713149070739746}


EP_train:3:  18%|| 4992/27626 [11:45<53:32,  7.05it/s]

{'epoch': 3, 'iter': 4990, 'avg_loss': 8.535636343332122, 'avg_acc': 50.09642356241234, 'loss': 8.22557544708252}


EP_train:3:  18%|| 5002/27626 [11:46<53:27,  7.05it/s]

{'epoch': 3, 'iter': 5000, 'avg_loss': 8.535673242548755, 'avg_acc': 50.09123175364927, 'loss': 9.573195457458496}


EP_train:3:  18%|| 5012/27626 [11:48<53:18,  7.07it/s]

{'epoch': 3, 'iter': 5010, 'avg_loss': 8.535941114065961, 'avg_acc': 50.09292057473558, 'loss': 9.183515548706055}


EP_train:3:  18%|| 5022/27626 [11:49<53:07,  7.09it/s]

{'epoch': 3, 'iter': 5020, 'avg_loss': 8.536123050111529, 'avg_acc': 50.0877564230233, 'loss': 9.000629425048828}


EP_train:3:  18%|| 5032/27626 [11:51<53:08,  7.09it/s]

{'epoch': 3, 'iter': 5030, 'avg_loss': 8.535678309158358, 'avg_acc': 50.08633969389783, 'loss': 8.42773151397705}


EP_train:3:  18%|| 5042/27626 [11:52<53:08,  7.08it/s]

{'epoch': 3, 'iter': 5040, 'avg_loss': 8.535153499554841, 'avg_acc': 50.08058916881571, 'loss': 8.479896545410156}


EP_train:3:  18%|| 5052/27626 [11:53<53:12,  7.07it/s]

{'epoch': 3, 'iter': 5050, 'avg_loss': 8.53554307444453, 'avg_acc': 50.0841417541081, 'loss': 8.736832618713379}


EP_train:3:  18%|| 5062/27626 [11:55<53:25,  7.04it/s]

{'epoch': 3, 'iter': 5060, 'avg_loss': 8.535288515450855, 'avg_acc': 50.09076763485477, 'loss': 8.287535667419434}


EP_train:3:  18%|| 5072/27626 [11:56<53:26,  7.03it/s]

{'epoch': 3, 'iter': 5070, 'avg_loss': 8.53550892180885, 'avg_acc': 50.090588641293635, 'loss': 8.838010787963867}


EP_train:3:  18%|| 5082/27626 [11:58<53:21,  7.04it/s]

{'epoch': 3, 'iter': 5080, 'avg_loss': 8.534859772813387, 'avg_acc': 50.095945679984254, 'loss': 8.38682746887207}


EP_train:3:  18%|| 5092/27626 [11:59<53:19,  7.04it/s]

{'epoch': 3, 'iter': 5090, 'avg_loss': 8.534486743842331, 'avg_acc': 50.104964643488515, 'loss': 8.410368919372559}


EP_train:3:  18%|| 5102/27626 [12:01<53:15,  7.05it/s]

{'epoch': 3, 'iter': 5100, 'avg_loss': 8.53391064131781, 'avg_acc': 50.101695745932176, 'loss': 8.251323699951172}


EP_train:3:  19%|| 5112/27626 [12:02<55:03,  6.82it/s]

{'epoch': 3, 'iter': 5110, 'avg_loss': 8.53357055233407, 'avg_acc': 50.08254255527294, 'loss': 8.176729202270508}


EP_train:3:  19%|| 5122/27626 [12:03<53:19,  7.03it/s]

{'epoch': 3, 'iter': 5120, 'avg_loss': 8.532802845020589, 'avg_acc': 50.081771138449525, 'loss': 8.15304183959961}


EP_train:3:  19%|| 5132/27626 [12:05<53:17,  7.04it/s]

{'epoch': 3, 'iter': 5130, 'avg_loss': 8.532485871490644, 'avg_acc': 50.081002728512956, 'loss': 9.503552436828613}


EP_train:3:  19%|| 5142/27626 [12:06<53:01,  7.07it/s]

{'epoch': 3, 'iter': 5140, 'avg_loss': 8.531911252004537, 'avg_acc': 50.080237307916754, 'loss': 8.00312614440918}


EP_train:3:  19%|| 5152/27626 [12:08<53:23,  7.02it/s]

{'epoch': 3, 'iter': 5150, 'avg_loss': 8.531244322388549, 'avg_acc': 50.08372160745487, 'loss': 7.959611415863037}


EP_train:3:  19%|| 5162/27626 [12:09<53:03,  7.06it/s]

{'epoch': 3, 'iter': 5160, 'avg_loss': 8.531476219456641, 'avg_acc': 50.08053187366789, 'loss': 8.242083549499512}


EP_train:3:  19%|| 5172/27626 [12:10<53:02,  7.05it/s]

{'epoch': 3, 'iter': 5170, 'avg_loss': 8.531605168897533, 'avg_acc': 50.07795880874105, 'loss': 9.281176567077637}


EP_train:3:  19%|| 5182/27626 [12:12<53:13,  7.03it/s]

{'epoch': 3, 'iter': 5180, 'avg_loss': 8.532189948658667, 'avg_acc': 50.07961783439491, 'loss': 8.33671760559082}


EP_train:3:  19%|| 5192/27626 [12:13<53:37,  6.97it/s]

{'epoch': 3, 'iter': 5190, 'avg_loss': 8.531906643071816, 'avg_acc': 50.077056443845116, 'loss': 9.400247573852539}


EP_train:3:  19%|| 5202/27626 [12:15<53:07,  7.03it/s]

{'epoch': 3, 'iter': 5200, 'avg_loss': 8.53209536370349, 'avg_acc': 50.0702989809652, 'loss': 8.828181266784668}


EP_train:3:  19%|| 5212/27626 [12:16<53:04,  7.04it/s]

{'epoch': 3, 'iter': 5210, 'avg_loss': 8.531556389149264, 'avg_acc': 50.06776530416427, 'loss': 8.103307723999023}


EP_train:3:  19%|| 5222/27626 [12:18<53:07,  7.03it/s]

{'epoch': 3, 'iter': 5220, 'avg_loss': 8.532023741855998, 'avg_acc': 50.07002968779927, 'loss': 8.17131519317627}


EP_train:3:  19%|| 5232/27626 [12:19<52:44,  7.08it/s]

{'epoch': 3, 'iter': 5230, 'avg_loss': 8.532085635105572, 'avg_acc': 50.0728828139935, 'loss': 7.602243423461914}


EP_train:3:  19%|| 5242/27626 [12:20<52:53,  7.05it/s]

{'epoch': 3, 'iter': 5240, 'avg_loss': 8.532248492096244, 'avg_acc': 50.081687655027665, 'loss': 9.383716583251953}


EP_train:3:  19%|| 5252/27626 [12:22<53:04,  7.03it/s]

{'epoch': 3, 'iter': 5250, 'avg_loss': 8.531600190353176, 'avg_acc': 50.09224433441249, 'loss': 7.848822593688965}


EP_train:3:  19%|| 5262/27626 [12:23<52:56,  7.04it/s]

{'epoch': 3, 'iter': 5260, 'avg_loss': 8.531363637427871, 'avg_acc': 50.0944449724387, 'loss': 9.773238182067871}


EP_train:3:  19%|| 5272/27626 [12:25<52:45,  7.06it/s]

{'epoch': 3, 'iter': 5270, 'avg_loss': 8.53060701915196, 'avg_acc': 50.096044393853155, 'loss': 8.062830924987793}


EP_train:3:  19%|| 5282/27626 [12:26<52:39,  7.07it/s]

{'epoch': 3, 'iter': 5280, 'avg_loss': 8.530239753708697, 'avg_acc': 50.09586252603674, 'loss': 8.545971870422363}


EP_train:3:  19%|| 5292/27626 [12:27<52:44,  7.06it/s]

{'epoch': 3, 'iter': 5290, 'avg_loss': 8.529701125574482, 'avg_acc': 50.0968625968626, 'loss': 7.577012062072754}


EP_train:3:  19%|| 5302/27626 [12:29<52:58,  7.02it/s]

{'epoch': 3, 'iter': 5300, 'avg_loss': 8.52967777514858, 'avg_acc': 50.093142803244675, 'loss': 8.40988540649414}


EP_train:3:  19%|| 5312/27626 [12:30<52:41,  7.06it/s]

{'epoch': 3, 'iter': 5310, 'avg_loss': 8.529696759116378, 'avg_acc': 50.09296742609678, 'loss': 8.080436706542969}


EP_train:3:  19%|| 5322/27626 [12:32<52:35,  7.07it/s]

{'epoch': 3, 'iter': 5320, 'avg_loss': 8.530314282128918, 'avg_acc': 50.08985623003195, 'loss': 8.457353591918945}


EP_train:3:  19%|| 5332/27626 [12:33<52:41,  7.05it/s]

{'epoch': 3, 'iter': 5330, 'avg_loss': 8.530890409952256, 'avg_acc': 50.09496342149691, 'loss': 8.542250633239746}


EP_train:3:  19%|| 5342/27626 [12:35<52:49,  7.03it/s]

{'epoch': 3, 'iter': 5340, 'avg_loss': 8.530837062592141, 'avg_acc': 50.094785620670294, 'loss': 8.47999095916748}


EP_train:3:  19%|| 5352/27626 [12:36<52:43,  7.04it/s]

{'epoch': 3, 'iter': 5350, 'avg_loss': 8.531005570831041, 'avg_acc': 50.09285647542515, 'loss': 9.134659767150879}


EP_train:3:  19%|| 5362/27626 [12:37<52:38,  7.05it/s]

{'epoch': 3, 'iter': 5360, 'avg_loss': 8.532119831383351, 'avg_acc': 50.08976869986943, 'loss': 8.491485595703125}


EP_train:3:  19%|| 5372/27626 [12:39<52:23,  7.08it/s]

{'epoch': 3, 'iter': 5370, 'avg_loss': 8.531492060954468, 'avg_acc': 50.09425619065351, 'loss': 7.894224166870117}


EP_train:3:  19%|| 5382/27626 [12:40<52:20,  7.08it/s]

{'epoch': 3, 'iter': 5380, 'avg_loss': 8.531390953542486, 'avg_acc': 50.0952425199777, 'loss': 8.130265235900879}


EP_train:3:  20%|| 5392/27626 [12:42<52:36,  7.04it/s]

{'epoch': 3, 'iter': 5390, 'avg_loss': 8.531740290582169, 'avg_acc': 50.092747171211286, 'loss': 8.303145408630371}


EP_train:3:  20%|| 5402/27626 [12:43<52:46,  7.02it/s]

{'epoch': 3, 'iter': 5400, 'avg_loss': 8.532126594530038, 'avg_acc': 50.09199685243474, 'loss': 9.180048942565918}


EP_train:3:  20%|| 5412/27626 [12:44<52:10,  7.10it/s]

{'epoch': 3, 'iter': 5410, 'avg_loss': 8.532491350235636, 'avg_acc': 50.09298188874515, 'loss': 9.074711799621582}


EP_train:3:  20%|| 5422/27626 [12:46<52:43,  7.02it/s]

{'epoch': 3, 'iter': 5420, 'avg_loss': 8.532144068176292, 'avg_acc': 50.09281036709095, 'loss': 8.573225021362305}


EP_train:3:  20%|| 5432/27626 [12:47<52:28,  7.05it/s]

{'epoch': 3, 'iter': 5430, 'avg_loss': 8.53188993486992, 'avg_acc': 50.089762474682374, 'loss': 7.574051380157471}


EP_train:3:  20%|| 5442/27626 [12:49<52:43,  7.01it/s]

{'epoch': 3, 'iter': 5440, 'avg_loss': 8.532275508638033, 'avg_acc': 50.08730012865282, 'loss': 8.825620651245117}


EP_train:3:  20%|| 5452/27626 [12:50<52:36,  7.02it/s]

{'epoch': 3, 'iter': 5450, 'avg_loss': 8.532255247522333, 'avg_acc': 50.08713997431664, 'loss': 8.81143856048584}


EP_train:3:  20%|| 5462/27626 [12:52<52:18,  7.06it/s]

{'epoch': 3, 'iter': 5460, 'avg_loss': 8.532731537940279, 'avg_acc': 50.084119208936094, 'loss': 8.01112174987793}


EP_train:3:  20%|| 5472/27626 [12:53<52:40,  7.01it/s]

{'epoch': 3, 'iter': 5470, 'avg_loss': 8.532704910877264, 'avg_acc': 50.08853500274173, 'loss': 8.58304214477539}


EP_train:3:  20%|| 5482/27626 [12:54<52:20,  7.05it/s]

{'epoch': 3, 'iter': 5480, 'avg_loss': 8.533283824882375, 'avg_acc': 50.097495894909684, 'loss': 8.537578582763672}


EP_train:3:  20%|| 5492/27626 [12:56<52:29,  7.03it/s]

{'epoch': 3, 'iter': 5490, 'avg_loss': 8.53333163491539, 'avg_acc': 50.09845656528865, 'loss': 7.9662861824035645}


EP_train:3:  20%|| 5502/27626 [12:57<52:42,  7.00it/s]

{'epoch': 3, 'iter': 5500, 'avg_loss': 8.53285971513078, 'avg_acc': 50.09827758589347, 'loss': 7.446641445159912}


EP_train:3:  20%|| 5512/27626 [12:59<52:04,  7.08it/s]

{'epoch': 3, 'iter': 5510, 'avg_loss': 8.533046452714881, 'avg_acc': 50.0935628742515, 'loss': 9.152730941772461}


EP_train:3:  20%|| 5522/27626 [13:00<52:31,  7.01it/s]

{'epoch': 3, 'iter': 5520, 'avg_loss': 8.533195127764596, 'avg_acc': 50.10244973736641, 'loss': 8.076440811157227}


EP_train:3:  20%|| 5532/27626 [13:01<51:57,  7.09it/s]

{'epoch': 3, 'iter': 5530, 'avg_loss': 8.533241499399239, 'avg_acc': 50.10282950641837, 'loss': 8.97860336303711}


EP_train:3:  20%|| 5542/27626 [13:03<52:10,  7.05it/s]

{'epoch': 3, 'iter': 5540, 'avg_loss': 8.532791756353015, 'avg_acc': 50.10489983757444, 'loss': 8.258306503295898}


EP_train:3:  20%|| 5552/27626 [13:04<52:14,  7.04it/s]

{'epoch': 3, 'iter': 5550, 'avg_loss': 8.533100007826349, 'avg_acc': 50.110903440821474, 'loss': 8.219679832458496}


EP_train:3:  20%|| 5562/27626 [13:06<52:01,  7.07it/s]

{'epoch': 3, 'iter': 5560, 'avg_loss': 8.533767394508482, 'avg_acc': 50.11070401007013, 'loss': 8.48702335357666}


EP_train:3:  20%|| 5572/27626 [13:07<52:31,  7.00it/s]

{'epoch': 3, 'iter': 5570, 'avg_loss': 8.534032422022767, 'avg_acc': 50.098725542990486, 'loss': 9.065800666809082}


EP_train:3:  20%|| 5582/27626 [13:09<52:16,  7.03it/s]

{'epoch': 3, 'iter': 5580, 'avg_loss': 8.533959008356808, 'avg_acc': 50.096308905214116, 'loss': 8.747450828552246}


EP_train:3:  20%|| 5592/27626 [13:10<51:46,  7.09it/s]

{'epoch': 3, 'iter': 5590, 'avg_loss': 8.533949868876485, 'avg_acc': 50.100049186192095, 'loss': 8.224809646606445}


EP_train:3:  20%|| 5602/27626 [13:11<52:18,  7.02it/s]

{'epoch': 3, 'iter': 5600, 'avg_loss': 8.533392115376545, 'avg_acc': 50.09987055882878, 'loss': 8.082151412963867}


EP_train:3:  20%|| 5612/27626 [13:13<52:38,  6.97it/s]

{'epoch': 3, 'iter': 5610, 'avg_loss': 8.533517776931847, 'avg_acc': 50.09690785956158, 'loss': 8.17504596710205}


EP_train:3:  20%|| 5622/27626 [13:14<52:07,  7.03it/s]

{'epoch': 3, 'iter': 5620, 'avg_loss': 8.533188147342115, 'avg_acc': 50.10173901441025, 'loss': 8.470232009887695}


EP_train:3:  20%|| 5632/27626 [13:16<51:57,  7.05it/s]

{'epoch': 3, 'iter': 5630, 'avg_loss': 8.533015826988592, 'avg_acc': 50.094343811045995, 'loss': 8.623482704162598}


EP_train:3:  20%|| 5642/27626 [13:17<51:52,  7.06it/s]

{'epoch': 3, 'iter': 5640, 'avg_loss': 8.532709913417774, 'avg_acc': 50.09362258464811, 'loss': 8.83613109588623}


EP_train:3:  20%|| 5652/27626 [13:18<52:39,  6.96it/s]

{'epoch': 3, 'iter': 5650, 'avg_loss': 8.532332264581754, 'avg_acc': 50.08571491771367, 'loss': 8.229141235351562}


EP_train:3:  20%|| 5662/27626 [13:20<51:55,  7.05it/s]

{'epoch': 3, 'iter': 5660, 'avg_loss': 8.532274221198927, 'avg_acc': 50.08777159512454, 'loss': 7.799276828765869}


EP_train:3:  21%|| 5672/27626 [13:21<52:05,  7.02it/s]

{'epoch': 3, 'iter': 5670, 'avg_loss': 8.531719346585422, 'avg_acc': 50.089821019220594, 'loss': 8.558016777038574}


EP_train:3:  21%|| 5682/27626 [13:23<52:07,  7.02it/s]

{'epoch': 3, 'iter': 5680, 'avg_loss': 8.532000270155907, 'avg_acc': 50.08911283224784, 'loss': 8.791997909545898}


EP_train:3:  21%|| 5692/27626 [13:24<51:56,  7.04it/s]

{'epoch': 3, 'iter': 5690, 'avg_loss': 8.531675967992536, 'avg_acc': 50.08456334563346, 'loss': 8.028470039367676}


EP_train:3:  21%|| 5702/27626 [13:26<52:33,  6.95it/s]

{'epoch': 3, 'iter': 5700, 'avg_loss': 8.53209327426088, 'avg_acc': 50.085511313804595, 'loss': 9.150419235229492}


EP_train:3:  21%|| 5712/27626 [13:27<51:41,  7.07it/s]

{'epoch': 3, 'iter': 5710, 'avg_loss': 8.53241976679697, 'avg_acc': 50.08481439327613, 'loss': 8.909305572509766}


EP_train:3:  21%|| 5722/27626 [13:28<51:48,  7.05it/s]

{'epoch': 3, 'iter': 5720, 'avg_loss': 8.531890419558449, 'avg_acc': 50.078111344170594, 'loss': 8.506282806396484}


EP_train:3:  21%|| 5732/27626 [13:30<51:50,  7.04it/s]

{'epoch': 3, 'iter': 5730, 'avg_loss': 8.531497101158896, 'avg_acc': 50.07306752748212, 'loss': 8.613691329956055}


EP_train:3:  21%|| 5742/27626 [13:31<51:56,  7.02it/s]

{'epoch': 3, 'iter': 5740, 'avg_loss': 8.530824114067425, 'avg_acc': 50.07729489635951, 'loss': 8.216078758239746}


EP_train:3:  21%|| 5752/27626 [13:33<51:49,  7.03it/s]

{'epoch': 3, 'iter': 5750, 'avg_loss': 8.531086549574843, 'avg_acc': 50.08531125021736, 'loss': 9.017040252685547}


EP_train:3:  21%|| 5762/27626 [13:34<51:23,  7.09it/s]

{'epoch': 3, 'iter': 5760, 'avg_loss': 8.530929633549281, 'avg_acc': 50.08841780940809, 'loss': 8.624772071838379}


EP_train:3:  21%|| 5772/27626 [13:35<51:18,  7.10it/s]

{'epoch': 3, 'iter': 5770, 'avg_loss': 8.531080911858123, 'avg_acc': 50.08555709582395, 'loss': 8.240839958190918}


EP_train:3:  21%|| 5782/27626 [13:37<51:34,  7.06it/s]

{'epoch': 3, 'iter': 5780, 'avg_loss': 8.530635272439964, 'avg_acc': 50.0864902266044, 'loss': 9.146449089050293}


EP_train:3:  21%|| 5792/27626 [13:38<51:51,  7.02it/s]

{'epoch': 3, 'iter': 5790, 'avg_loss': 8.53091804616049, 'avg_acc': 50.0841823519254, 'loss': 9.027738571166992}


EP_train:3:  21%|| 5802/27626 [13:40<51:17,  7.09it/s]

{'epoch': 3, 'iter': 5800, 'avg_loss': 8.5311846474166, 'avg_acc': 50.0818824340631, 'loss': 7.797804355621338}


EP_train:3:  21%|| 5812/27626 [13:41<51:59,  6.99it/s]

{'epoch': 3, 'iter': 5810, 'avg_loss': 8.5312832837424, 'avg_acc': 50.07475047324041, 'loss': 9.336981773376465}


EP_train:3:  21%|| 5822/27626 [13:42<51:29,  7.06it/s]

{'epoch': 3, 'iter': 5820, 'avg_loss': 8.5311265510293, 'avg_acc': 50.08160109946744, 'loss': 7.469879150390625}


EP_train:3:  21%|| 5832/27626 [13:44<51:23,  7.07it/s]

{'epoch': 3, 'iter': 5830, 'avg_loss': 8.531256178275449, 'avg_acc': 50.07824558394787, 'loss': 9.087867736816406}


EP_train:3:  21%|| 5842/27626 [13:45<51:31,  7.05it/s]

{'epoch': 3, 'iter': 5840, 'avg_loss': 8.531457629152365, 'avg_acc': 50.086671802773495, 'loss': 7.574695110321045}


EP_train:3:  21%|| 5852/27626 [13:47<51:18,  7.07it/s]

{'epoch': 3, 'iter': 5850, 'avg_loss': 8.531641607100363, 'avg_acc': 50.08972825158092, 'loss': 8.97094440460205}


EP_train:3:  21%|| 5862/27626 [13:48<51:10,  7.09it/s]

{'epoch': 3, 'iter': 5860, 'avg_loss': 8.531144948730573, 'avg_acc': 50.08317693226412, 'loss': 7.616949558258057}


EP_train:3:  21%|| 5872/27626 [13:50<51:20,  7.06it/s]

{'epoch': 3, 'iter': 5870, 'avg_loss': 8.531540990220975, 'avg_acc': 50.08782575370465, 'loss': 9.257493019104004}


EP_train:3:  21%|| 5882/27626 [13:51<51:55,  6.98it/s]

{'epoch': 3, 'iter': 5880, 'avg_loss': 8.531896629781063, 'avg_acc': 50.08927053222241, 'loss': 8.588425636291504}


EP_train:3:  21%|| 5892/27626 [13:52<51:05,  7.09it/s]

{'epoch': 3, 'iter': 5890, 'avg_loss': 8.53123366571691, 'avg_acc': 50.087527584450854, 'loss': 8.507499694824219}


EP_train:3:  21%|| 5902/27626 [13:54<51:13,  7.07it/s]

{'epoch': 3, 'iter': 5900, 'avg_loss': 8.53144699435096, 'avg_acc': 50.0921453990849, 'loss': 7.6760334968566895}


EP_train:3:  21%|| 5912/27626 [13:55<51:42,  7.00it/s]

{'epoch': 3, 'iter': 5910, 'avg_loss': 8.531170039143866, 'avg_acc': 50.08934613432583, 'loss': 9.129570007324219}


EP_train:3:  21%|| 5922/27626 [13:57<51:13,  7.06it/s]

{'epoch': 3, 'iter': 5920, 'avg_loss': 8.530509804253883, 'avg_acc': 50.08180628272252, 'loss': 8.348441123962402}


EP_train:3:  21%|| 5932/27626 [13:58<51:15,  7.05it/s]

{'epoch': 3, 'iter': 5930, 'avg_loss': 8.53037873538102, 'avg_acc': 50.079560782330134, 'loss': 8.9700288772583}


EP_train:3:  22%|| 5942/27626 [13:59<50:59,  7.09it/s]

{'epoch': 3, 'iter': 5940, 'avg_loss': 8.530408361868673, 'avg_acc': 50.08205689277899, 'loss': 8.972858428955078}


EP_train:3:  22%|| 5952/27626 [14:01<51:25,  7.02it/s]

{'epoch': 3, 'iter': 5950, 'avg_loss': 8.530496037441068, 'avg_acc': 50.09242144177449, 'loss': 8.225595474243164}


EP_train:3:  22%|| 5962/27626 [14:02<51:29,  7.01it/s]

{'epoch': 3, 'iter': 5960, 'avg_loss': 8.530905501431736, 'avg_acc': 50.09698456634793, 'loss': 8.534300804138184}


EP_train:3:  22%|| 5972/27626 [14:04<51:33,  7.00it/s]

{'epoch': 3, 'iter': 5970, 'avg_loss': 8.53114296415307, 'avg_acc': 50.108336124602246, 'loss': 8.4924898147583}


EP_train:3:  22%|| 5982/27626 [14:05<50:59,  7.07it/s]

{'epoch': 3, 'iter': 5980, 'avg_loss': 8.531086214687727, 'avg_acc': 50.1112899180739, 'loss': 8.523077011108398}


EP_train:3:  22%|| 5992/27626 [14:06<50:58,  7.07it/s]

{'epoch': 3, 'iter': 5990, 'avg_loss': 8.5301085222824, 'avg_acc': 50.11162577199132, 'loss': 7.689239501953125}


EP_train:3:  22%|| 6002/27626 [14:08<50:57,  7.07it/s]

{'epoch': 3, 'iter': 6000, 'avg_loss': 8.529955085804612, 'avg_acc': 50.10727378770204, 'loss': 8.017735481262207}


EP_train:3:  22%|| 6012/27626 [14:09<50:57,  7.07it/s]

{'epoch': 3, 'iter': 6010, 'avg_loss': 8.529789516878612, 'avg_acc': 50.10293628348028, 'loss': 8.501482963562012}


EP_train:3:  22%|| 6022/27626 [14:11<51:08,  7.04it/s]

{'epoch': 3, 'iter': 6020, 'avg_loss': 8.52977895214003, 'avg_acc': 50.10328433814981, 'loss': 7.9761810302734375}


EP_train:3:  22%|| 6032/27626 [14:12<50:55,  7.07it/s]

{'epoch': 3, 'iter': 6030, 'avg_loss': 8.529484529278603, 'avg_acc': 50.098967832863536, 'loss': 8.86551570892334}


EP_train:3:  22%|| 6042/27626 [14:14<50:56,  7.06it/s]

{'epoch': 3, 'iter': 6040, 'avg_loss': 8.529462631205378, 'avg_acc': 50.100873199801356, 'loss': 8.13374137878418}


EP_train:3:  22%|| 6052/27626 [14:15<50:50,  7.07it/s]

{'epoch': 3, 'iter': 6050, 'avg_loss': 8.529498447565732, 'avg_acc': 50.10483804329863, 'loss': 8.4632568359375}


EP_train:3:  22%|| 6062/27626 [14:16<51:18,  7.01it/s]

{'epoch': 3, 'iter': 6060, 'avg_loss': 8.529457104715492, 'avg_acc': 50.103633888797226, 'loss': 8.204840660095215}


EP_train:3:  22%|| 6072/27626 [14:18<51:14,  7.01it/s]

{'epoch': 3, 'iter': 6070, 'avg_loss': 8.529157715129648, 'avg_acc': 50.09934524789985, 'loss': 8.26105785369873}


EP_train:3:  22%|| 6082/27626 [14:19<50:43,  7.08it/s]

{'epoch': 3, 'iter': 6080, 'avg_loss': 8.528819577471635, 'avg_acc': 50.09558460779478, 'loss': 8.638385772705078}


EP_train:3:  22%|| 6092/27626 [14:21<50:57,  7.04it/s]

{'epoch': 3, 'iter': 6090, 'avg_loss': 8.528285749517154, 'avg_acc': 50.09696683631587, 'loss': 8.508004188537598}


EP_train:3:  22%|| 6102/27626 [14:22<50:49,  7.06it/s]

{'epoch': 3, 'iter': 6100, 'avg_loss': 8.528604413345004, 'avg_acc': 50.095783478118335, 'loss': 8.465592384338379}


EP_train:3:  22%|| 6112/27626 [14:23<50:52,  7.05it/s]

{'epoch': 3, 'iter': 6110, 'avg_loss': 8.528605372167183, 'avg_acc': 50.08949026345933, 'loss': 8.756258010864258}


EP_train:3:  22%|| 6122/27626 [14:25<50:24,  7.11it/s]

{'epoch': 3, 'iter': 6120, 'avg_loss': 8.5284239966541, 'avg_acc': 50.090875673909494, 'loss': 8.565617561340332}


EP_train:3:  22%|| 6132/27626 [14:26<50:54,  7.04it/s]

{'epoch': 3, 'iter': 6130, 'avg_loss': 8.528194403224504, 'avg_acc': 50.082572174196706, 'loss': 8.677842140197754}


EP_train:3:  22%|| 6142/27626 [14:28<51:04,  7.01it/s]

{'epoch': 3, 'iter': 6140, 'avg_loss': 8.528046134239958, 'avg_acc': 50.077857840742546, 'loss': 8.075772285461426}


EP_train:3:  22%|| 6152/27626 [14:29<51:03,  7.01it/s]

{'epoch': 3, 'iter': 6150, 'avg_loss': 8.528288318672484, 'avg_acc': 50.07823931068119, 'loss': 9.313103675842285}


EP_train:3:  22%|| 6162/27626 [14:31<50:53,  7.03it/s]

{'epoch': 3, 'iter': 6160, 'avg_loss': 8.528632626907331, 'avg_acc': 50.07101119948061, 'loss': 8.786364555358887}


EP_train:3:  22%|| 6172/27626 [14:32<50:33,  7.07it/s]

{'epoch': 3, 'iter': 6170, 'avg_loss': 8.529523868186802, 'avg_acc': 50.07241532976827, 'loss': 9.054423332214355}


EP_train:3:  22%|| 6182/27626 [14:33<50:45,  7.04it/s]

{'epoch': 3, 'iter': 6180, 'avg_loss': 8.529403327101933, 'avg_acc': 50.07128700857466, 'loss': 7.795662879943848}


EP_train:3:  22%|| 6192/27626 [14:35<50:40,  7.05it/s]

{'epoch': 3, 'iter': 6190, 'avg_loss': 8.52966690995431, 'avg_acc': 50.073695687288, 'loss': 8.075793266296387}


EP_train:3:  22%|| 6202/27626 [14:36<50:29,  7.07it/s]

{'epoch': 3, 'iter': 6200, 'avg_loss': 8.529950436652236, 'avg_acc': 50.08063215610385, 'loss': 8.144088745117188}


EP_train:3:  22%|| 6212/27626 [14:38<50:31,  7.06it/s]

{'epoch': 3, 'iter': 6210, 'avg_loss': 8.529913020467935, 'avg_acc': 50.081508613749804, 'loss': 7.633196830749512}


EP_train:3:  23%|| 6222/27626 [14:39<50:34,  7.05it/s]

{'epoch': 3, 'iter': 6220, 'avg_loss': 8.530147042080552, 'avg_acc': 50.08941488506671, 'loss': 8.579301834106445}


EP_train:3:  23%|| 6232/27626 [14:40<50:34,  7.05it/s]

{'epoch': 3, 'iter': 6230, 'avg_loss': 8.530463213102523, 'avg_acc': 50.083253089391754, 'loss': 9.252533912658691}


EP_train:3:  23%|| 6242/27626 [14:42<50:41,  7.03it/s]

{'epoch': 3, 'iter': 6240, 'avg_loss': 8.530433906077803, 'avg_acc': 50.086124018586766, 'loss': 8.34287166595459}


EP_train:3:  23%|| 6252/27626 [14:43<50:27,  7.06it/s]

{'epoch': 3, 'iter': 6250, 'avg_loss': 8.530530717273347, 'avg_acc': 50.08598624220125, 'loss': 8.365630149841309}


EP_train:3:  23%|| 6262/27626 [14:45<50:35,  7.04it/s]

{'epoch': 3, 'iter': 6260, 'avg_loss': 8.530385190734961, 'avg_acc': 50.09183836447851, 'loss': 9.191207885742188}


EP_train:3:  23%|| 6272/27626 [14:46<50:46,  7.01it/s]

{'epoch': 3, 'iter': 6270, 'avg_loss': 8.53020866566194, 'avg_acc': 50.09318689204274, 'loss': 7.482675075531006}


EP_train:3:  23%|| 6282/27626 [14:48<50:32,  7.04it/s]

{'epoch': 3, 'iter': 6280, 'avg_loss': 8.530709232803439, 'avg_acc': 50.082590351854805, 'loss': 9.896979331970215}


EP_train:3:  23%|| 6292/27626 [14:49<50:37,  7.02it/s]

{'epoch': 3, 'iter': 6290, 'avg_loss': 8.530810021270232, 'avg_acc': 50.077491654744875, 'loss': 8.949519157409668}


EP_train:3:  23%|| 6302/27626 [14:50<50:14,  7.07it/s]

{'epoch': 3, 'iter': 6300, 'avg_loss': 8.530302584199674, 'avg_acc': 50.08381606094271, 'loss': 8.697061538696289}


EP_train:3:  23%|| 6312/27626 [14:52<50:06,  7.09it/s]

{'epoch': 3, 'iter': 6310, 'avg_loss': 8.529589008971413, 'avg_acc': 50.08467358580256, 'loss': 8.193254470825195}


EP_train:3:  23%|| 6322/27626 [14:53<50:23,  7.05it/s]

{'epoch': 3, 'iter': 6320, 'avg_loss': 8.52965992309119, 'avg_acc': 50.08256209460529, 'loss': 8.761434555053711}


EP_train:3:  23%|| 6332/27626 [14:55<50:41,  7.00it/s]

{'epoch': 3, 'iter': 6330, 'avg_loss': 8.530245983173188, 'avg_acc': 50.08243168535776, 'loss': 9.003510475158691}


EP_train:3:  23%|| 6342/27626 [14:56<50:28,  7.03it/s]

{'epoch': 3, 'iter': 6340, 'avg_loss': 8.530836894259883, 'avg_acc': 50.085751458760456, 'loss': 8.806571006774902}


EP_train:3:  23%|| 6352/27626 [14:57<50:12,  7.06it/s]

{'epoch': 3, 'iter': 6350, 'avg_loss': 8.530845075381157, 'avg_acc': 50.08758463234136, 'loss': 7.531116962432861}


EP_train:3:  23%|| 6362/27626 [14:59<50:04,  7.08it/s]

{'epoch': 3, 'iter': 6360, 'avg_loss': 8.530556307707057, 'avg_acc': 50.08941204213174, 'loss': 8.89698314666748}


EP_train:3:  23%|| 6372/27626 [15:00<50:36,  7.00it/s]

{'epoch': 3, 'iter': 6370, 'avg_loss': 8.531695470688687, 'avg_acc': 50.08191414220688, 'loss': 9.04203987121582}


EP_train:3:  23%|| 6382/27626 [15:02<50:42,  6.98it/s]

{'epoch': 3, 'iter': 6380, 'avg_loss': 8.531497124799122, 'avg_acc': 50.083744710860366, 'loss': 8.306445121765137}


EP_train:3:  23%|| 6392/27626 [15:03<50:22,  7.02it/s]

{'epoch': 3, 'iter': 6390, 'avg_loss': 8.530700594732373, 'avg_acc': 50.08605851979346, 'loss': 6.98921537399292}


EP_train:3:  23%|| 6402/27626 [15:04<50:07,  7.06it/s]

{'epoch': 3, 'iter': 6400, 'avg_loss': 8.530692253461426, 'avg_acc': 50.088853304171224, 'loss': 8.247867584228516}


EP_train:3:  23%|| 6412/27626 [15:06<50:14,  7.04it/s]

{'epoch': 3, 'iter': 6410, 'avg_loss': 8.531175510687346, 'avg_acc': 50.08920215255031, 'loss': 9.547986030578613}


EP_train:3:  23%|| 6422/27626 [15:07<50:30,  7.00it/s]

{'epoch': 3, 'iter': 6420, 'avg_loss': 8.53167200994499, 'avg_acc': 50.09393007319732, 'loss': 8.492959022521973}


EP_train:3:  23%|| 6432/27626 [15:09<50:15,  7.03it/s]

{'epoch': 3, 'iter': 6430, 'avg_loss': 8.531812445332559, 'avg_acc': 50.08892473954284, 'loss': 8.95785140991211}


EP_train:3:  23%|| 6442/27626 [15:10<49:52,  7.08it/s]

{'epoch': 3, 'iter': 6440, 'avg_loss': 8.531960165328517, 'avg_acc': 50.08830150597733, 'loss': 9.390674591064453}


EP_train:3:  23%|| 6452/27626 [15:12<50:07,  7.04it/s]

{'epoch': 3, 'iter': 6450, 'avg_loss': 8.531440590060306, 'avg_acc': 50.09058673073942, 'loss': 8.525774002075195}


EP_train:3:  23%|| 6462/27626 [15:13<49:58,  7.06it/s]

{'epoch': 3, 'iter': 6460, 'avg_loss': 8.531529587537692, 'avg_acc': 50.08802816901409, 'loss': 8.121641159057617}


EP_train:3:  23%|| 6472/27626 [15:14<50:08,  7.03it/s]

{'epoch': 3, 'iter': 6470, 'avg_loss': 8.531253371830426, 'avg_acc': 50.088857981764804, 'loss': 8.784502983093262}


EP_train:3:  23%|| 6482/27626 [15:16<50:09,  7.03it/s]

{'epoch': 3, 'iter': 6480, 'avg_loss': 8.531368971937804, 'avg_acc': 50.082452553618275, 'loss': 8.708562850952148}


EP_train:3:  23%|| 6492/27626 [15:17<49:42,  7.09it/s]

{'epoch': 3, 'iter': 6490, 'avg_loss': 8.531958781594838, 'avg_acc': 50.08473270682483, 'loss': 9.301116943359375}


EP_train:3:  24%|| 6502/27626 [15:19<50:05,  7.03it/s]

{'epoch': 3, 'iter': 6500, 'avg_loss': 8.532048918544064, 'avg_acc': 50.08508306414397, 'loss': 8.2467679977417}


EP_train:3:  24%|| 6512/27626 [15:20<50:36,  6.95it/s]

{'epoch': 3, 'iter': 6510, 'avg_loss': 8.532358971046351, 'avg_acc': 50.0911918292121, 'loss': 8.477304458618164}


EP_train:3:  24%|| 6522/27626 [15:21<50:22,  6.98it/s]

{'epoch': 3, 'iter': 6520, 'avg_loss': 8.532199670814732, 'avg_acc': 50.09153120687011, 'loss': 8.998855590820312}


EP_train:3:  24%|| 6532/27626 [15:23<49:38,  7.08it/s]

{'epoch': 3, 'iter': 6530, 'avg_loss': 8.532193480471049, 'avg_acc': 50.09330500689022, 'loss': 8.247828483581543}


EP_train:3:  24%|| 6542/27626 [15:24<49:41,  7.07it/s]

{'epoch': 3, 'iter': 6540, 'avg_loss': 8.531382553294208, 'avg_acc': 50.09029582632625, 'loss': 7.8292436599731445}


EP_train:3:  24%|| 6552/27626 [15:26<49:47,  7.05it/s]

{'epoch': 3, 'iter': 6550, 'avg_loss': 8.530835122047463, 'avg_acc': 50.080140436574574, 'loss': 8.004873275756836}


EP_train:3:  24%|| 6562/27626 [15:27<49:39,  7.07it/s]

{'epoch': 3, 'iter': 6560, 'avg_loss': 8.530790949354737, 'avg_acc': 50.0828760859625, 'loss': 9.004659652709961}


EP_train:3:  24%|| 6572/27626 [15:29<50:03,  7.01it/s]

{'epoch': 3, 'iter': 6570, 'avg_loss': 8.530918698970485, 'avg_acc': 50.0889324303759, 'loss': 8.516040802001953}


EP_train:3:  24%|| 6582/27626 [15:30<49:36,  7.07it/s]

{'epoch': 3, 'iter': 6580, 'avg_loss': 8.530868582732046, 'avg_acc': 50.08689788785898, 'loss': 8.630812644958496}


EP_train:3:  24%|| 6592/27626 [15:31<49:50,  7.03it/s]

{'epoch': 3, 'iter': 6590, 'avg_loss': 8.530889929200029, 'avg_acc': 50.08866257017145, 'loss': 8.734411239624023}


EP_train:3:  24%|| 6602/27626 [15:33<50:11,  6.98it/s]

{'epoch': 3, 'iter': 6600, 'avg_loss': 8.530964324409682, 'avg_acc': 50.08663460081806, 'loss': 7.665053367614746}


EP_train:3:  24%|| 6612/27626 [15:34<49:50,  7.03it/s]

{'epoch': 3, 'iter': 6610, 'avg_loss': 8.531152108454556, 'avg_acc': 50.085558160641355, 'loss': 8.368499755859375}


EP_train:3:  24%|| 6622/27626 [15:36<49:35,  7.06it/s]

{'epoch': 3, 'iter': 6620, 'avg_loss': 8.531338544792863, 'avg_acc': 50.09486859990938, 'loss': 9.785104751586914}


EP_train:3:  24%|| 6632/27626 [15:37<49:43,  7.04it/s]

{'epoch': 3, 'iter': 6630, 'avg_loss': 8.531513829951084, 'avg_acc': 50.09896697330719, 'loss': 9.421927452087402}


EP_train:3:  24%|| 6642/27626 [15:38<49:57,  7.00it/s]

{'epoch': 3, 'iter': 6640, 'avg_loss': 8.531771591423757, 'avg_acc': 50.09975907242885, 'loss': 8.7256441116333}


EP_train:3:  24%|| 6652/27626 [15:40<49:45,  7.03it/s]

{'epoch': 3, 'iter': 6650, 'avg_loss': 8.531813647832713, 'avg_acc': 50.095850248082996, 'loss': 9.532943725585938}


EP_train:3:  24%|| 6662/27626 [15:41<49:32,  7.05it/s]

{'epoch': 3, 'iter': 6660, 'avg_loss': 8.532019400628833, 'avg_acc': 50.100397838162436, 'loss': 7.856316089630127}


EP_train:3:  24%|| 6672/27626 [15:43<49:24,  7.07it/s]

{'epoch': 3, 'iter': 6670, 'avg_loss': 8.531883447607353, 'avg_acc': 50.09368910208365, 'loss': 8.060127258300781}


EP_train:3:  24%|| 6682/27626 [15:44<49:44,  7.02it/s]

{'epoch': 3, 'iter': 6680, 'avg_loss': 8.531993782675528, 'avg_acc': 50.094016614279305, 'loss': 9.113273620605469}


EP_train:3:  24%|| 6692/27626 [15:46<49:51,  7.00it/s]

{'epoch': 3, 'iter': 6690, 'avg_loss': 8.531637100980271, 'avg_acc': 50.09294201165745, 'loss': 8.396595001220703}


EP_train:3:  24%|| 6702/27626 [15:47<49:27,  7.05it/s]

{'epoch': 3, 'iter': 6700, 'avg_loss': 8.531450109363687, 'avg_acc': 50.09047157140726, 'loss': 8.48747444152832}


EP_train:3:  24%|| 6712/27626 [15:48<49:30,  7.04it/s]

{'epoch': 3, 'iter': 6710, 'avg_loss': 8.531364045933948, 'avg_acc': 50.09126806735211, 'loss': 9.089325904846191}


EP_train:3:  24%|| 6722/27626 [15:50<48:59,  7.11it/s]

{'epoch': 3, 'iter': 6720, 'avg_loss': 8.531528378753254, 'avg_acc': 50.09485195655409, 'loss': 8.721134185791016}


EP_train:3:  24%|| 6732/27626 [15:51<49:11,  7.08it/s]

{'epoch': 3, 'iter': 6730, 'avg_loss': 8.531087831019152, 'avg_acc': 50.09888946664686, 'loss': 8.937262535095215}


EP_train:3:  24%|| 6742/27626 [15:53<49:20,  7.05it/s]

{'epoch': 3, 'iter': 6740, 'avg_loss': 8.53108162208651, 'avg_acc': 50.10476932205904, 'loss': 8.317169189453125}


EP_train:3:  24%|| 6752/27626 [15:54<49:28,  7.03it/s]

{'epoch': 3, 'iter': 6750, 'avg_loss': 8.53097055838066, 'avg_acc': 50.10831728632795, 'loss': 7.963760852813721}


EP_train:3:  24%|| 6762/27626 [15:55<49:13,  7.06it/s]

{'epoch': 3, 'iter': 6760, 'avg_loss': 8.53104752327736, 'avg_acc': 50.10307277030025, 'loss': 8.836756706237793}


EP_train:3:  25%|| 6772/27626 [15:57<49:23,  7.04it/s]

{'epoch': 3, 'iter': 6770, 'avg_loss': 8.531247795182727, 'avg_acc': 50.10892039580565, 'loss': 8.460370063781738}


EP_train:3:  25%|| 6782/27626 [15:58<49:47,  6.98it/s]

{'epoch': 3, 'iter': 6780, 'avg_loss': 8.531034669663734, 'avg_acc': 50.11014230939389, 'loss': 8.116055488586426}


EP_train:3:  25%|| 6792/27626 [16:00<49:01,  7.08it/s]

{'epoch': 3, 'iter': 6790, 'avg_loss': 8.530872212473026, 'avg_acc': 50.10813944927109, 'loss': 7.998549938201904}


EP_train:3:  25%|| 6802/27626 [16:01<49:42,  6.98it/s]

{'epoch': 3, 'iter': 6800, 'avg_loss': 8.53135951563394, 'avg_acc': 50.11119688281135, 'loss': 8.627022743225098}


EP_train:3:  25%|| 6812/27626 [16:03<49:03,  7.07it/s]

{'epoch': 3, 'iter': 6810, 'avg_loss': 8.53142156252303, 'avg_acc': 50.10552782263985, 'loss': 8.380355834960938}


EP_train:3:  25%|| 6822/27626 [16:04<49:24,  7.02it/s]

{'epoch': 3, 'iter': 6820, 'avg_loss': 8.531368755731497, 'avg_acc': 50.10308239261106, 'loss': 8.204712867736816}


EP_train:3:  25%|| 6832/27626 [16:05<49:09,  7.05it/s]

{'epoch': 3, 'iter': 6830, 'avg_loss': 8.530818593744616, 'avg_acc': 50.09744180939833, 'loss': 8.8176851272583}


EP_train:3:  25%|| 6842/27626 [16:07<48:57,  7.08it/s]

{'epoch': 3, 'iter': 6840, 'avg_loss': 8.530502751354167, 'avg_acc': 50.094101739511764, 'loss': 8.578749656677246}


EP_train:3:  25%|| 6852/27626 [16:08<49:08,  7.04it/s]

{'epoch': 3, 'iter': 6850, 'avg_loss': 8.530659986784677, 'avg_acc': 50.085297766749385, 'loss': 8.378521919250488}


EP_train:3:  25%|| 6862/27626 [16:10<49:11,  7.04it/s]

{'epoch': 3, 'iter': 6860, 'avg_loss': 8.530525001105545, 'avg_acc': 50.0897281737356, 'loss': 7.9478912353515625}


EP_train:3:  25%|| 6872/27626 [16:11<49:29,  6.99it/s]

{'epoch': 3, 'iter': 6870, 'avg_loss': 8.531027878689256, 'avg_acc': 50.0832302430505, 'loss': 8.778355598449707}


EP_train:3:  25%|| 6882/27626 [16:12<49:31,  6.98it/s]

{'epoch': 3, 'iter': 6880, 'avg_loss': 8.530536986318998, 'avg_acc': 50.08083854090975, 'loss': 8.280997276306152}


EP_train:3:  25%|| 6892/27626 [16:14<48:55,  7.06it/s]

{'epoch': 3, 'iter': 6890, 'avg_loss': 8.530304028081057, 'avg_acc': 50.07845378029313, 'loss': 7.80678129196167}


EP_train:3:  25%|| 6902/27626 [16:15<48:54,  7.06it/s]

{'epoch': 3, 'iter': 6900, 'avg_loss': 8.529963355429913, 'avg_acc': 50.082415591943196, 'loss': 8.088324546813965}


EP_train:3:  25%|| 6912/27626 [16:17<49:27,  6.98it/s]

{'epoch': 3, 'iter': 6910, 'avg_loss': 8.52964663771192, 'avg_acc': 50.083652872232676, 'loss': 8.908080101013184}


EP_train:3:  25%|| 6922/27626 [16:18<49:14,  7.01it/s]

{'epoch': 3, 'iter': 6920, 'avg_loss': 8.530277051499874, 'avg_acc': 50.08624115012281, 'loss': 8.372856140136719}


EP_train:3:  25%|| 6932/27626 [16:19<48:39,  7.09it/s]

{'epoch': 3, 'iter': 6930, 'avg_loss': 8.530926763710394, 'avg_acc': 50.080706247294756, 'loss': 7.985743045806885}


EP_train:3:  25%|| 6942/27626 [16:21<48:53,  7.05it/s]

{'epoch': 3, 'iter': 6940, 'avg_loss': 8.530921381499853, 'avg_acc': 50.08239086586948, 'loss': 9.392027854919434}


EP_train:3:  25%|| 6952/27626 [16:22<48:50,  7.05it/s]

{'epoch': 3, 'iter': 6950, 'avg_loss': 8.530748972153942, 'avg_acc': 50.080923608113935, 'loss': 8.1113862991333}


EP_train:3:  25%|| 6962/27626 [16:24<49:06,  7.01it/s]

{'epoch': 3, 'iter': 6960, 'avg_loss': 8.530413008099263, 'avg_acc': 50.09068380979744, 'loss': 7.660823345184326}


EP_train:3:  25%|| 6972/27626 [16:25<48:57,  7.03it/s]

{'epoch': 3, 'iter': 6970, 'avg_loss': 8.53033801572529, 'avg_acc': 50.086967436522734, 'loss': 8.038755416870117}


EP_train:3:  25%|| 6982/27626 [16:27<48:42,  7.06it/s]

{'epoch': 3, 'iter': 6980, 'avg_loss': 8.530748950156077, 'avg_acc': 50.08415699756482, 'loss': 8.601330757141113}


EP_train:3:  25%|| 6992/27626 [16:28<48:46,  7.05it/s]

{'epoch': 3, 'iter': 6990, 'avg_loss': 8.531037824380007, 'avg_acc': 50.08090759547991, 'loss': 8.920825958251953}


EP_train:3:  25%|| 7002/27626 [16:29<48:49,  7.04it/s]

{'epoch': 3, 'iter': 7000, 'avg_loss': 8.53070617427044, 'avg_acc': 50.07722111126982, 'loss': 8.563627243041992}


EP_train:3:  25%|| 7012/27626 [16:31<48:34,  7.07it/s]

{'epoch': 3, 'iter': 7010, 'avg_loss': 8.530015771390028, 'avg_acc': 50.0829054343175, 'loss': 8.017219543457031}


EP_train:3:  25%|| 7022/27626 [16:32<49:17,  6.97it/s]

{'epoch': 3, 'iter': 7020, 'avg_loss': 8.529728254520084, 'avg_acc': 50.08768337843612, 'loss': 7.883841514587402}


EP_train:3:  25%|| 7032/27626 [16:34<48:45,  7.04it/s]

{'epoch': 3, 'iter': 7030, 'avg_loss': 8.529940918624154, 'avg_acc': 50.082225145782964, 'loss': 8.741220474243164}


EP_train:3:  25%|| 7042/27626 [16:35<49:02,  7.00it/s]

{'epoch': 3, 'iter': 7040, 'avg_loss': 8.52980448044662, 'avg_acc': 50.08610282630308, 'loss': 8.022621154785156}


EP_train:3:  26%|| 7052/27626 [16:36<48:45,  7.03it/s]

{'epoch': 3, 'iter': 7050, 'avg_loss': 8.529707683500474, 'avg_acc': 50.08731031059425, 'loss': 8.90465259552002}


EP_train:3:  26%|| 7062/27626 [16:38<48:50,  7.02it/s]

{'epoch': 3, 'iter': 7060, 'avg_loss': 8.529805537746372, 'avg_acc': 50.08541637161875, 'loss': 9.106415748596191}


EP_train:3:  26%|| 7072/27626 [16:39<48:32,  7.06it/s]

{'epoch': 3, 'iter': 7070, 'avg_loss': 8.530012687013604, 'avg_acc': 50.08883114128129, 'loss': 8.329947471618652}


EP_train:3:  26%|| 7082/27626 [16:41<48:56,  7.00it/s]

{'epoch': 3, 'iter': 7080, 'avg_loss': 8.530160270863847, 'avg_acc': 50.094442875300096, 'loss': 8.978477478027344}


EP_train:3:  26%|| 7092/27626 [16:42<48:35,  7.04it/s]

{'epoch': 3, 'iter': 7090, 'avg_loss': 8.530690126132601, 'avg_acc': 50.093868988859114, 'loss': 9.220396995544434}


EP_train:3:  26%|| 7102/27626 [16:44<48:38,  7.03it/s]

{'epoch': 3, 'iter': 7100, 'avg_loss': 8.530873853108393, 'avg_acc': 50.098577665117595, 'loss': 8.928895950317383}


EP_train:3:  26%|| 7112/27626 [16:45<48:50,  7.00it/s]

{'epoch': 3, 'iter': 7110, 'avg_loss': 8.530721056630652, 'avg_acc': 50.09799957811841, 'loss': 7.84443998336792}


EP_train:3:  26%|| 7122/27626 [16:46<48:33,  7.04it/s]

{'epoch': 3, 'iter': 7120, 'avg_loss': 8.531045362062459, 'avg_acc': 50.09610658615363, 'loss': 8.123983383178711}


EP_train:3:  26%|| 7132/27626 [16:48<48:33,  7.04it/s]

{'epoch': 3, 'iter': 7130, 'avg_loss': 8.531055577020963, 'avg_acc': 50.09597181320993, 'loss': 8.95425033569336}


EP_train:3:  26%|| 7142/27626 [16:49<48:31,  7.04it/s]

{'epoch': 3, 'iter': 7140, 'avg_loss': 8.531083752904982, 'avg_acc': 50.10065116930402, 'loss': 8.734950065612793}


EP_train:3:  26%|| 7152/27626 [16:51<48:31,  7.03it/s]

{'epoch': 3, 'iter': 7150, 'avg_loss': 8.53058581571615, 'avg_acc': 50.103569430848836, 'loss': 8.968301773071289}


EP_train:3:  26%|| 7162/27626 [16:52<48:12,  7.07it/s]

{'epoch': 3, 'iter': 7160, 'avg_loss': 8.53060707222265, 'avg_acc': 50.107352325094254, 'loss': 8.56590461730957}


EP_train:3:  26%|| 7172/27626 [16:53<48:24,  7.04it/s]

{'epoch': 3, 'iter': 7170, 'avg_loss': 8.530675419270286, 'avg_acc': 50.105459489610936, 'loss': 8.736638069152832}


EP_train:3:  26%|| 7182/27626 [16:55<48:12,  7.07it/s]

{'epoch': 3, 'iter': 7180, 'avg_loss': 8.530811050246642, 'avg_acc': 50.10966439214594, 'loss': 8.652068138122559}


EP_train:3:  26%|| 7192/27626 [16:56<48:08,  7.08it/s]

{'epoch': 3, 'iter': 7190, 'avg_loss': 8.53069110640132, 'avg_acc': 50.10516617994716, 'loss': 8.49948787689209}


EP_train:3:  26%|| 7202/27626 [16:58<48:17,  7.05it/s]

{'epoch': 3, 'iter': 7200, 'avg_loss': 8.530148639263105, 'avg_acc': 50.11022774614637, 'loss': 8.519417762756348}


EP_train:3:  26%|| 7212/27626 [16:59<48:19,  7.04it/s]

{'epoch': 3, 'iter': 7210, 'avg_loss': 8.530194417760137, 'avg_acc': 50.10790805713493, 'loss': 9.021536827087402}


EP_train:3:  26%|| 7222/27626 [17:01<48:19,  7.04it/s]

{'epoch': 3, 'iter': 7220, 'avg_loss': 8.53021933942963, 'avg_acc': 50.10689308959978, 'loss': 8.539960861206055}


EP_train:3:  26%|| 7232/27626 [17:02<48:50,  6.96it/s]

{'epoch': 3, 'iter': 7230, 'avg_loss': 8.5302464879906, 'avg_acc': 50.10372009403955, 'loss': 8.72065544128418}


EP_train:3:  26%|| 7242/27626 [17:03<48:07,  7.06it/s]

{'epoch': 3, 'iter': 7240, 'avg_loss': 8.530724112820582, 'avg_acc': 50.10141900290015, 'loss': 8.223111152648926}


EP_train:3:  26%|| 7252/27626 [17:05<48:31,  7.00it/s]

{'epoch': 3, 'iter': 7250, 'avg_loss': 8.530367265124433, 'avg_acc': 50.10472693421597, 'loss': 8.107922554016113}


EP_train:3:  26%|| 7262/27626 [17:06<47:54,  7.08it/s]

{'epoch': 3, 'iter': 7260, 'avg_loss': 8.530029557523214, 'avg_acc': 50.10415232061699, 'loss': 8.191874504089355}


EP_train:3:  26%|| 7272/27626 [17:08<48:33,  6.99it/s]

{'epoch': 3, 'iter': 7270, 'avg_loss': 8.53036266494004, 'avg_acc': 50.10615802503094, 'loss': 7.974553108215332}


EP_train:3:  26%|| 7282/27626 [17:09<48:11,  7.04it/s]

{'epoch': 3, 'iter': 7280, 'avg_loss': 8.530171358983214, 'avg_acc': 50.1004326328801, 'loss': 8.128273963928223}


EP_train:3:  26%|| 7292/27626 [17:10<47:58,  7.06it/s]

{'epoch': 3, 'iter': 7290, 'avg_loss': 8.530083149127911, 'avg_acc': 50.09815183102455, 'loss': 9.562121391296387}


EP_train:3:  26%|| 7302/27626 [17:12<47:54,  7.07it/s]

{'epoch': 3, 'iter': 7300, 'avg_loss': 8.53008799095086, 'avg_acc': 50.097161347760576, 'loss': 8.644607543945312}


EP_train:3:  26%|| 7312/27626 [17:13<48:21,  7.00it/s]

{'epoch': 3, 'iter': 7310, 'avg_loss': 8.530286421806473, 'avg_acc': 50.10173026945698, 'loss': 8.412437438964844}


EP_train:3:  27%|| 7322/27626 [17:15<47:51,  7.07it/s]

{'epoch': 3, 'iter': 7320, 'avg_loss': 8.530321726885639, 'avg_acc': 50.10927468925011, 'loss': 8.935932159423828}


EP_train:3:  27%|| 7332/27626 [17:16<48:05,  7.03it/s]

{'epoch': 3, 'iter': 7330, 'avg_loss': 8.530338673714548, 'avg_acc': 50.10912563088256, 'loss': 8.547747611999512}


EP_train:3:  27%|| 7342/27626 [17:18<47:51,  7.06it/s]

{'epoch': 3, 'iter': 7340, 'avg_loss': 8.530309689985181, 'avg_acc': 50.10429437406347, 'loss': 8.895544052124023}


EP_train:3:  27%|| 7352/27626 [17:19<47:43,  7.08it/s]

{'epoch': 3, 'iter': 7350, 'avg_loss': 8.530877498262935, 'avg_acc': 50.1084036185553, 'loss': 9.410857200622559}


EP_train:3:  27%|| 7362/27626 [17:20<47:50,  7.06it/s]

{'epoch': 3, 'iter': 7360, 'avg_loss': 8.53103081439277, 'avg_acc': 50.10910542045918, 'loss': 8.543523788452148}


EP_train:3:  27%|| 7372/27626 [17:22<48:10,  7.01it/s]

{'epoch': 3, 'iter': 7370, 'avg_loss': 8.53081970198024, 'avg_acc': 50.11107719441053, 'loss': 8.144598007202148}


EP_train:3:  27%|| 7382/27626 [17:23<48:17,  6.99it/s]

{'epoch': 3, 'iter': 7380, 'avg_loss': 8.531249667357468, 'avg_acc': 50.103305785123965, 'loss': 9.998178482055664}


EP_train:3:  27%|| 7392/27626 [17:25<47:44,  7.06it/s]

{'epoch': 3, 'iter': 7390, 'avg_loss': 8.531152991025314, 'avg_acc': 50.09893789744284, 'loss': 9.029776573181152}


EP_train:3:  27%|| 7402/27626 [17:26<48:12,  6.99it/s]

{'epoch': 3, 'iter': 7400, 'avg_loss': 8.531315691421812, 'avg_acc': 50.10387109850021, 'loss': 8.45322322845459}


EP_train:3:  27%|| 7412/27626 [17:27<48:20,  6.97it/s]

{'epoch': 3, 'iter': 7410, 'avg_loss': 8.531162127359822, 'avg_acc': 50.10710430441237, 'loss': 8.430150032043457}


EP_train:3:  27%|| 7422/27626 [17:29<47:47,  7.04it/s]

{'epoch': 3, 'iter': 7420, 'avg_loss': 8.531070336163356, 'avg_acc': 50.106959978439555, 'loss': 8.735102653503418}


EP_train:3:  27%|| 7432/27626 [17:30<47:23,  7.10it/s]

{'epoch': 3, 'iter': 7430, 'avg_loss': 8.53107764008324, 'avg_acc': 50.102610684968376, 'loss': 9.42284870147705}


EP_train:3:  27%|| 7442/27626 [17:32<47:38,  7.06it/s]

{'epoch': 3, 'iter': 7440, 'avg_loss': 8.53082919694326, 'avg_acc': 50.104572638086275, 'loss': 8.501848220825195}


EP_train:3:  27%|| 7452/27626 [17:33<47:34,  7.07it/s]

{'epoch': 3, 'iter': 7450, 'avg_loss': 8.530873492490107, 'avg_acc': 50.108626358878006, 'loss': 8.573936462402344}


EP_train:3:  27%|| 7462/27626 [17:35<47:28,  7.08it/s]

{'epoch': 3, 'iter': 7460, 'avg_loss': 8.530744949241631, 'avg_acc': 50.109737300629945, 'loss': 8.031996726989746}


EP_train:3:  27%|| 7472/27626 [17:36<47:58,  7.00it/s]

{'epoch': 3, 'iter': 7470, 'avg_loss': 8.530350106162345, 'avg_acc': 50.1083355641815, 'loss': 7.976905345916748}


EP_train:3:  27%|| 7482/27626 [17:37<47:47,  7.03it/s]

{'epoch': 3, 'iter': 7480, 'avg_loss': 8.530733018838664, 'avg_acc': 50.10735530009357, 'loss': 9.0980224609375}


EP_train:3:  27%|| 7492/27626 [17:39<47:39,  7.04it/s]

{'epoch': 3, 'iter': 7490, 'avg_loss': 8.531039523212666, 'avg_acc': 50.107629154985986, 'loss': 8.5519380569458}


EP_train:3:  27%|| 7502/27626 [17:40<47:37,  7.04it/s]

{'epoch': 3, 'iter': 7500, 'avg_loss': 8.53117189450576, 'avg_acc': 50.108318890814566, 'loss': 8.451276779174805}


EP_train:3:  27%|| 7512/27626 [17:42<48:05,  6.97it/s]

{'epoch': 3, 'iter': 7510, 'avg_loss': 8.531590204806449, 'avg_acc': 50.11025495939288, 'loss': 9.054291725158691}


EP_train:3:  27%|| 7522/27626 [17:43<48:00,  6.98it/s]

{'epoch': 3, 'iter': 7520, 'avg_loss': 8.531864531638378, 'avg_acc': 50.108861853476924, 'loss': 8.668848991394043}


EP_train:3:  27%|| 7532/27626 [17:44<47:41,  7.02it/s]

{'epoch': 3, 'iter': 7530, 'avg_loss': 8.531751602108521, 'avg_acc': 50.104982738016204, 'loss': 7.774649143218994}


EP_train:3:  27%|| 7542/27626 [17:46<47:31,  7.04it/s]

{'epoch': 3, 'iter': 7540, 'avg_loss': 8.53175727215453, 'avg_acc': 50.10442912080626, 'loss': 8.868879318237305}


EP_train:3:  27%|| 7552/27626 [17:47<47:29,  7.04it/s]

{'epoch': 3, 'iter': 7550, 'avg_loss': 8.532337956358433, 'avg_acc': 50.104704674877496, 'loss': 9.389925003051758}


EP_train:3:  27%|| 7562/27626 [17:49<47:24,  7.05it/s]

{'epoch': 3, 'iter': 7560, 'avg_loss': 8.53198181196994, 'avg_acc': 50.10291297447428, 'loss': 8.143171310424805}


EP_train:3:  27%|| 7572/27626 [17:50<47:48,  6.99it/s]

{'epoch': 3, 'iter': 7570, 'avg_loss': 8.532270085054256, 'avg_acc': 50.10030048870691, 'loss': 8.669329643249512}


EP_train:3:  27%|| 7582/27626 [17:52<47:40,  7.01it/s]

{'epoch': 3, 'iter': 7580, 'avg_loss': 8.53263748171713, 'avg_acc': 50.10140482785912, 'loss': 8.64698600769043}


EP_train:3:  27%|| 7592/27626 [17:53<47:30,  7.03it/s]

{'epoch': 3, 'iter': 7590, 'avg_loss': 8.532606875404918, 'avg_acc': 50.10085957054407, 'loss': 8.54969310760498}


EP_train:3:  28%|| 7602/27626 [17:54<47:25,  7.04it/s]

{'epoch': 3, 'iter': 7600, 'avg_loss': 8.53286516156703, 'avg_acc': 50.09990461781345, 'loss': 8.431379318237305}


EP_train:3:  28%|| 7612/27626 [17:56<47:16,  7.06it/s]

{'epoch': 3, 'iter': 7610, 'avg_loss': 8.532727421825841, 'avg_acc': 50.0968992248062, 'loss': 8.256525993347168}


EP_train:3:  28%|| 7622/27626 [17:57<47:06,  7.08it/s]

{'epoch': 3, 'iter': 7620, 'avg_loss': 8.5327025805069, 'avg_acc': 50.093081616585756, 'loss': 8.754280090332031}


EP_train:3:  28%|| 7632/27626 [17:59<47:06,  7.07it/s]

{'epoch': 3, 'iter': 7630, 'avg_loss': 8.532387625607168, 'avg_acc': 50.088454986240336, 'loss': 8.167452812194824}


EP_train:3:  28%|| 7642/27626 [18:00<47:20,  7.04it/s]

{'epoch': 3, 'iter': 7640, 'avg_loss': 8.532348992764957, 'avg_acc': 50.08752126684989, 'loss': 8.199929237365723}


EP_train:3:  28%|| 7652/27626 [18:01<47:25,  7.02it/s]

{'epoch': 3, 'iter': 7650, 'avg_loss': 8.532405206976708, 'avg_acc': 50.085773101555354, 'loss': 8.50256633758545}


EP_train:3:  28%|| 7662/27626 [18:03<47:30,  7.00it/s]

{'epoch': 3, 'iter': 7660, 'avg_loss': 8.532598651337757, 'avg_acc': 50.08566114084323, 'loss': 8.487391471862793}


EP_train:3:  28%|| 7672/27626 [18:04<47:30,  7.00it/s]

{'epoch': 3, 'iter': 7670, 'avg_loss': 8.532314313280118, 'avg_acc': 50.08758636422891, 'loss': 8.243745803833008}


EP_train:3:  28%|| 7682/27626 [18:06<47:36,  6.98it/s]

{'epoch': 3, 'iter': 7680, 'avg_loss': 8.532008315790229, 'avg_acc': 50.090320270798074, 'loss': 8.34670352935791}


EP_train:3:  28%|| 7692/27626 [18:07<47:06,  7.05it/s]

{'epoch': 3, 'iter': 7690, 'avg_loss': 8.531677167416548, 'avg_acc': 50.09873553504096, 'loss': 8.743464469909668}


EP_train:3:  28%|| 7702/27626 [18:09<47:05,  7.05it/s]

{'epoch': 3, 'iter': 7700, 'avg_loss': 8.531423253310901, 'avg_acc': 50.101447863913776, 'loss': 8.130085945129395}


EP_train:3:  28%|| 7712/27626 [18:10<46:40,  7.11it/s]

{'epoch': 3, 'iter': 7710, 'avg_loss': 8.531621733121163, 'avg_acc': 50.10455842303203, 'loss': 9.07556438446045}


EP_train:3:  28%|| 7722/27626 [18:11<46:59,  7.06it/s]

{'epoch': 3, 'iter': 7720, 'avg_loss': 8.531496862938422, 'avg_acc': 50.103208781245954, 'loss': 8.832747459411621}


EP_train:3:  28%|| 7732/27626 [18:13<47:02,  7.05it/s]

{'epoch': 3, 'iter': 7730, 'avg_loss': 8.531479820892429, 'avg_acc': 50.10792588280947, 'loss': 8.316118240356445}


EP_train:3:  28%|| 7742/27626 [18:14<47:11,  7.02it/s]

{'epoch': 3, 'iter': 7740, 'avg_loss': 8.531683224083118, 'avg_acc': 50.10617168324506, 'loss': 8.56337833404541}


EP_train:3:  28%|| 7752/27626 [18:16<46:52,  7.07it/s]

{'epoch': 3, 'iter': 7750, 'avg_loss': 8.53174438262475, 'avg_acc': 50.09958392465488, 'loss': 8.355610847473145}


EP_train:3:  28%|| 7762/27626 [18:17<46:59,  7.04it/s]

{'epoch': 3, 'iter': 7760, 'avg_loss': 8.531356829771168, 'avg_acc': 50.09865030279603, 'loss': 8.363998413085938}


EP_train:3:  28%|| 7772/27626 [18:18<46:48,  7.07it/s]

{'epoch': 3, 'iter': 7770, 'avg_loss': 8.531016729433645, 'avg_acc': 50.09731694762579, 'loss': 8.831178665161133}


EP_train:3:  28%|| 7782/27626 [18:20<46:58,  7.04it/s]

{'epoch': 3, 'iter': 7780, 'avg_loss': 8.531010075348414, 'avg_acc': 50.09478216167588, 'loss': 8.633249282836914}


EP_train:3:  28%|| 7792/27626 [18:21<46:59,  7.03it/s]

{'epoch': 3, 'iter': 7790, 'avg_loss': 8.530605696113987, 'avg_acc': 50.093858298036196, 'loss': 7.352758884429932}


EP_train:3:  28%|| 7802/27626 [18:23<47:01,  7.03it/s]

{'epoch': 3, 'iter': 7800, 'avg_loss': 8.530265171242714, 'avg_acc': 50.0873285476221, 'loss': 8.023624420166016}


EP_train:3:  28%|| 7812/27626 [18:24<46:49,  7.05it/s]

{'epoch': 3, 'iter': 7810, 'avg_loss': 8.529972558320065, 'avg_acc': 50.082815900652925, 'loss': 8.975920677185059}


EP_train:3:  28%|| 7822/27626 [18:25<46:58,  7.03it/s]

{'epoch': 3, 'iter': 7820, 'avg_loss': 8.530054556867714, 'avg_acc': 50.0871052295103, 'loss': 8.485174179077148}


EP_train:3:  28%|| 7832/27626 [18:27<46:45,  7.06it/s]

{'epoch': 3, 'iter': 7830, 'avg_loss': 8.530320662219715, 'avg_acc': 50.09617226407867, 'loss': 8.637901306152344}


EP_train:3:  28%|| 7842/27626 [18:28<46:53,  7.03it/s]

{'epoch': 3, 'iter': 7840, 'avg_loss': 8.530302504923952, 'avg_acc': 50.1020278025762, 'loss': 7.363083839416504}


EP_train:3:  28%|| 7852/27626 [18:30<46:34,  7.08it/s]

{'epoch': 3, 'iter': 7850, 'avg_loss': 8.530011252132498, 'avg_acc': 50.098713539676474, 'loss': 8.200567245483398}


EP_train:3:  28%|| 7862/27626 [18:31<46:57,  7.01it/s]

{'epoch': 3, 'iter': 7860, 'avg_loss': 8.529955860070487, 'avg_acc': 50.104550947716575, 'loss': 8.906569480895996}


EP_train:3:  28%|| 7872/27626 [18:33<46:35,  7.07it/s]

{'epoch': 3, 'iter': 7870, 'avg_loss': 8.53025276684243, 'avg_acc': 50.10163892770932, 'loss': 9.09234619140625}


EP_train:3:  29%|| 7882/27626 [18:34<46:52,  7.02it/s]

{'epoch': 3, 'iter': 7880, 'avg_loss': 8.530383795927115, 'avg_acc': 50.104285623651826, 'loss': 8.178731918334961}


EP_train:3:  29%|| 7892/27626 [18:35<46:37,  7.05it/s]

{'epoch': 3, 'iter': 7890, 'avg_loss': 8.530445771379897, 'avg_acc': 50.10177734127487, 'loss': 8.889564514160156}


EP_train:3:  29%|| 7902/27626 [18:37<46:35,  7.05it/s]

{'epoch': 3, 'iter': 7900, 'avg_loss': 8.530503613935846, 'avg_acc': 50.095320212631314, 'loss': 8.164687156677246}


EP_train:3:  29%|| 7912/27626 [18:38<46:42,  7.04it/s]

{'epoch': 3, 'iter': 7910, 'avg_loss': 8.530710655282803, 'avg_acc': 50.10072999620782, 'loss': 8.925023078918457}


EP_train:3:  29%|| 7922/27626 [18:40<46:50,  7.01it/s]

{'epoch': 3, 'iter': 7920, 'avg_loss': 8.530587671080287, 'avg_acc': 50.09941926524428, 'loss': 8.992440223693848}


EP_train:3:  29%|| 7932/27626 [18:41<46:30,  7.06it/s]

{'epoch': 3, 'iter': 7930, 'avg_loss': 8.530027167912225, 'avg_acc': 50.0996879334258, 'loss': 7.580757141113281}


EP_train:3:  29%|| 7942/27626 [18:42<46:43,  7.02it/s]

{'epoch': 3, 'iter': 7940, 'avg_loss': 8.530298705245727, 'avg_acc': 50.09405301599295, 'loss': 8.58817195892334}


EP_train:3:  29%|| 7952/27626 [18:44<46:51,  7.00it/s]

{'epoch': 3, 'iter': 7950, 'avg_loss': 8.530270408795413, 'avg_acc': 50.095506854483716, 'loss': 7.970156669616699}


EP_train:3:  29%|| 7962/27626 [18:45<46:34,  7.04it/s]

{'epoch': 3, 'iter': 7960, 'avg_loss': 8.53058026139722, 'avg_acc': 50.093816731566385, 'loss': 8.240163803100586}


EP_train:3:  29%|| 7972/27626 [18:47<46:28,  7.05it/s]

{'epoch': 3, 'iter': 7970, 'avg_loss': 8.530637824958607, 'avg_acc': 50.098795634173875, 'loss': 8.251510620117188}


EP_train:3:  29%|| 7982/27626 [18:48<46:28,  7.04it/s]

{'epoch': 3, 'iter': 7980, 'avg_loss': 8.530500326251016, 'avg_acc': 50.101021175291315, 'loss': 8.57986831665039}


EP_train:3:  29%|| 7992/27626 [18:50<46:17,  7.07it/s]

{'epoch': 3, 'iter': 7990, 'avg_loss': 8.53042094774023, 'avg_acc': 50.10402327618571, 'loss': 8.565110206604004}


EP_train:3:  29%|| 8002/27626 [18:51<46:36,  7.02it/s]

{'epoch': 3, 'iter': 8000, 'avg_loss': 8.53038413857478, 'avg_acc': 50.10194038245219, 'loss': 7.92975378036499}


EP_train:3:  29%|| 8012/27626 [18:52<46:40,  7.00it/s]

{'epoch': 3, 'iter': 8010, 'avg_loss': 8.530573087898686, 'avg_acc': 50.09869242291849, 'loss': 8.377403259277344}


EP_train:3:  29%|| 8022/27626 [18:54<46:10,  7.08it/s]

{'epoch': 3, 'iter': 8020, 'avg_loss': 8.530719359428446, 'avg_acc': 50.089998129908984, 'loss': 8.613447189331055}


EP_train:3:  29%|| 8032/27626 [18:55<46:19,  7.05it/s]

{'epoch': 3, 'iter': 8030, 'avg_loss': 8.530531772090555, 'avg_acc': 50.08949694932138, 'loss': 8.042571067810059}


EP_train:3:  29%|| 8042/27626 [18:57<46:22,  7.04it/s]

{'epoch': 3, 'iter': 8040, 'avg_loss': 8.530214235032766, 'avg_acc': 50.082001616714344, 'loss': 7.884128570556641}


EP_train:3:  29%|| 8052/27626 [18:58<46:25,  7.03it/s]

{'epoch': 3, 'iter': 8050, 'avg_loss': 8.53052143481426, 'avg_acc': 50.08228791454478, 'loss': 8.795016288757324}


EP_train:3:  29%|| 8062/27626 [18:59<46:28,  7.02it/s]

{'epoch': 3, 'iter': 8060, 'avg_loss': 8.530377064962686, 'avg_acc': 50.07714613571517, 'loss': 8.948783874511719}


EP_train:3:  29%|| 8072/27626 [19:01<46:09,  7.06it/s]

{'epoch': 3, 'iter': 8070, 'avg_loss': 8.530038511748652, 'avg_acc': 50.070855532152144, 'loss': 8.197211265563965}


EP_train:3:  29%|| 8082/27626 [19:02<46:07,  7.06it/s]

{'epoch': 3, 'iter': 8080, 'avg_loss': 8.529829588905299, 'avg_acc': 50.07734191312956, 'loss': 8.016422271728516}


EP_train:3:  29%|| 8092/27626 [19:04<46:17,  7.03it/s]

{'epoch': 3, 'iter': 8090, 'avg_loss': 8.529698219777863, 'avg_acc': 50.075701396613525, 'loss': 8.406073570251465}


EP_train:3:  29%|| 8102/27626 [19:05<46:17,  7.03it/s]

{'epoch': 3, 'iter': 8100, 'avg_loss': 8.52932198396981, 'avg_acc': 50.0744506851006, 'loss': 9.045795440673828}


EP_train:3:  29%|| 8112/27626 [19:07<45:53,  7.09it/s]

{'epoch': 3, 'iter': 8110, 'avg_loss': 8.528567629609444, 'avg_acc': 50.07204721982493, 'loss': 8.087492942810059}


EP_train:3:  29%|| 8122/27626 [19:08<46:09,  7.04it/s]

{'epoch': 3, 'iter': 8120, 'avg_loss': 8.528580522026397, 'avg_acc': 50.070034478512504, 'loss': 9.491921424865723}


EP_train:3:  29%|| 8132/27626 [19:09<46:03,  7.05it/s]

{'epoch': 3, 'iter': 8130, 'avg_loss': 8.529050618663014, 'avg_acc': 50.06764235641372, 'loss': 8.717789649963379}


EP_train:3:  29%|| 8142/27626 [19:11<46:08,  7.04it/s]

{'epoch': 3, 'iter': 8140, 'avg_loss': 8.529090700827803, 'avg_acc': 50.0690947058101, 'loss': 8.539758682250977}


EP_train:3:  30%|| 8152/27626 [19:12<46:40,  6.95it/s]

{'epoch': 3, 'iter': 8150, 'avg_loss': 8.52874940434141, 'avg_acc': 50.07054349159612, 'loss': 7.943870544433594}


EP_train:3:  30%|| 8162/27626 [19:14<46:18,  7.00it/s]

{'epoch': 3, 'iter': 8160, 'avg_loss': 8.52887633471728, 'avg_acc': 50.06969121431197, 'loss': 8.352947235107422}


EP_train:3:  30%|| 8172/27626 [19:15<46:01,  7.04it/s]

{'epoch': 3, 'iter': 8170, 'avg_loss': 8.52893620899412, 'avg_acc': 50.0715181740301, 'loss': 8.477293014526367}


EP_train:3:  30%|| 8182/27626 [19:16<45:51,  7.07it/s]

{'epoch': 3, 'iter': 8180, 'avg_loss': 8.529174023889238, 'avg_acc': 50.0744866153282, 'loss': 9.374058723449707}


EP_train:3:  30%|| 8192/27626 [19:18<45:55,  7.05it/s]

{'epoch': 3, 'iter': 8190, 'avg_loss': 8.52895232787311, 'avg_acc': 50.07019899890124, 'loss': 8.495471954345703}


EP_train:3:  30%|| 8202/27626 [19:19<45:39,  7.09it/s]

{'epoch': 3, 'iter': 8200, 'avg_loss': 8.52871865369157, 'avg_acc': 50.0678270942568, 'loss': 8.489811897277832}


EP_train:3:  30%|| 8212/27626 [19:21<46:07,  7.01it/s]

{'epoch': 3, 'iter': 8210, 'avg_loss': 8.52902876975083, 'avg_acc': 50.06508037997808, 'loss': 8.876556396484375}


EP_train:3:  30%|| 8222/27626 [19:22<46:17,  6.99it/s]

{'epoch': 3, 'iter': 8220, 'avg_loss': 8.528559928073195, 'avg_acc': 50.067662084904505, 'loss': 8.131677627563477}


EP_train:3:  30%|| 8232/27626 [19:24<45:56,  7.04it/s]

{'epoch': 3, 'iter': 8230, 'avg_loss': 8.528130920091762, 'avg_acc': 50.066820556433, 'loss': 7.8941850662231445}


EP_train:3:  30%|| 8242/27626 [19:25<45:41,  7.07it/s]

{'epoch': 3, 'iter': 8240, 'avg_loss': 8.52830883756799, 'avg_acc': 50.06408506249242, 'loss': 8.234543800354004}


EP_train:3:  30%|| 8252/27626 [19:26<46:00,  7.02it/s]

{'epoch': 3, 'iter': 8250, 'avg_loss': 8.528223445900656, 'avg_acc': 50.067794812749966, 'loss': 9.27611255645752}


EP_train:3:  30%|| 8262/27626 [19:28<45:41,  7.06it/s]

{'epoch': 3, 'iter': 8260, 'avg_loss': 8.528188872028531, 'avg_acc': 50.0718738651495, 'loss': 8.29818344116211}


EP_train:3:  30%|| 8272/27626 [19:29<45:56,  7.02it/s]

{'epoch': 3, 'iter': 8270, 'avg_loss': 8.527398919615287, 'avg_acc': 50.06952000967235, 'loss': 7.745735168457031}


EP_train:3:  30%|| 8282/27626 [19:31<45:37,  7.07it/s]

{'epoch': 3, 'iter': 8280, 'avg_loss': 8.5275163514619, 'avg_acc': 50.069436058447046, 'loss': 8.159361839294434}


EP_train:3:  30%|| 8292/27626 [19:32<45:28,  7.09it/s]

{'epoch': 3, 'iter': 8290, 'avg_loss': 8.527870322234264, 'avg_acc': 50.067844650826196, 'loss': 8.855013847351074}


EP_train:3:  30%|| 8302/27626 [19:33<45:47,  7.03it/s]

{'epoch': 3, 'iter': 8300, 'avg_loss': 8.528353453515424, 'avg_acc': 50.07303336947355, 'loss': 9.463668823242188}


EP_train:3:  30%|| 8312/27626 [19:35<45:42,  7.04it/s]

{'epoch': 3, 'iter': 8310, 'avg_loss': 8.52877042497514, 'avg_acc': 50.07106545542052, 'loss': 8.402454376220703}


EP_train:3:  30%|| 8322/27626 [19:36<45:33,  7.06it/s]

{'epoch': 3, 'iter': 8320, 'avg_loss': 8.528715041061675, 'avg_acc': 50.07060449465208, 'loss': 8.043493270874023}


EP_train:3:  30%|| 8332/27626 [19:38<45:33,  7.06it/s]

{'epoch': 3, 'iter': 8330, 'avg_loss': 8.52868376647048, 'avg_acc': 50.07126995558756, 'loss': 8.625682830810547}


EP_train:3:  30%|| 8342/27626 [19:39<45:31,  7.06it/s]

{'epoch': 3, 'iter': 8340, 'avg_loss': 8.528776194663116, 'avg_acc': 50.068561923030806, 'loss': 8.386778831481934}


EP_train:3:  30%|| 8352/27626 [19:41<45:20,  7.09it/s]

{'epoch': 3, 'iter': 8350, 'avg_loss': 8.5285246603761, 'avg_acc': 50.068479822775714, 'loss': 8.68542194366455}


EP_train:3:  30%|| 8362/27626 [19:42<45:25,  7.07it/s]

{'epoch': 3, 'iter': 8360, 'avg_loss': 8.527918643264762, 'avg_acc': 50.065781605071166, 'loss': 9.476068496704102}


EP_train:3:  30%|| 8372/27626 [19:43<45:11,  7.10it/s]

{'epoch': 3, 'iter': 8370, 'avg_loss': 8.52811078606021, 'avg_acc': 50.068689523354436, 'loss': 8.546313285827637}


EP_train:3:  30%|| 8382/27626 [19:45<45:26,  7.06it/s]

{'epoch': 3, 'iter': 8380, 'avg_loss': 8.527769641992286, 'avg_acc': 50.069353299128984, 'loss': 7.9953813552856445}


EP_train:3:  30%|| 8392/27626 [19:46<45:25,  7.06it/s]

{'epoch': 3, 'iter': 8390, 'avg_loss': 8.527538723775704, 'avg_acc': 50.074484566797764, 'loss': 8.271723747253418}


EP_train:3:  30%|| 8402/27626 [19:48<45:29,  7.04it/s]

{'epoch': 3, 'iter': 8400, 'avg_loss': 8.527283095189636, 'avg_acc': 50.07662778240686, 'loss': 8.82298755645752}


EP_train:3:  30%|| 8412/27626 [19:49<45:27,  7.05it/s]

{'epoch': 3, 'iter': 8410, 'avg_loss': 8.527648159360902, 'avg_acc': 50.07542206634169, 'loss': 8.329955101013184}


EP_train:3:  30%|| 8422/27626 [19:50<45:23,  7.05it/s]

{'epoch': 3, 'iter': 8420, 'avg_loss': 8.527823809617036, 'avg_acc': 50.071250445315286, 'loss': 8.309446334838867}


EP_train:3:  31%|| 8432/27626 [19:52<45:16,  7.07it/s]

{'epoch': 3, 'iter': 8430, 'avg_loss': 8.527857832319865, 'avg_acc': 50.07264855888981, 'loss': 8.558845520019531}


EP_train:3:  31%|| 8442/27626 [19:53<45:25,  7.04it/s]

{'epoch': 3, 'iter': 8440, 'avg_loss': 8.52792513301313, 'avg_acc': 50.07182205899775, 'loss': 7.9157257080078125}


EP_train:3:  31%|| 8452/27626 [19:55<45:06,  7.08it/s]

{'epoch': 3, 'iter': 8450, 'avg_loss': 8.52779965738674, 'avg_acc': 50.072846408709026, 'loss': 8.68358325958252}


EP_train:3:  31%|| 8462/27626 [19:56<44:57,  7.10it/s]

{'epoch': 3, 'iter': 8460, 'avg_loss': 8.527759193458282, 'avg_acc': 50.07239097033448, 'loss': 8.872666358947754}


EP_train:3:  31%|| 8472/27626 [19:57<45:25,  7.03it/s]

{'epoch': 3, 'iter': 8470, 'avg_loss': 8.52788591413033, 'avg_acc': 50.06640302207531, 'loss': 8.550821304321289}


EP_train:3:  31%|| 8482/27626 [19:59<45:41,  6.98it/s]

{'epoch': 3, 'iter': 8480, 'avg_loss': 8.527948325541402, 'avg_acc': 50.07000943284991, 'loss': 8.891535758972168}


EP_train:3:  31%|| 8492/27626 [20:00<45:26,  7.02it/s]

{'epoch': 3, 'iter': 8490, 'avg_loss': 8.527876006013976, 'avg_acc': 50.06919090802025, 'loss': 9.162824630737305}


EP_train:3:  31%|| 8502/27626 [20:02<45:07,  7.06it/s]

{'epoch': 3, 'iter': 8500, 'avg_loss': 8.52737018195703, 'avg_acc': 50.0698447241501, 'loss': 8.298234939575195}


EP_train:3:  31%|| 8512/27626 [20:03<45:04,  7.07it/s]

{'epoch': 3, 'iter': 8510, 'avg_loss': 8.52778418746794, 'avg_acc': 50.068293972506176, 'loss': 8.067700386047363}


EP_train:3:  31%|| 8522/27626 [20:05<45:11,  7.05it/s]

{'epoch': 3, 'iter': 8520, 'avg_loss': 8.52788461641084, 'avg_acc': 50.07518190353245, 'loss': 8.839543342590332}


EP_train:3:  31%|| 8532/27626 [20:06<45:08,  7.05it/s]

{'epoch': 3, 'iter': 8530, 'avg_loss': 8.527975597298868, 'avg_acc': 50.072529597936935, 'loss': 8.967174530029297}


EP_train:3:  31%|| 8542/27626 [20:07<44:56,  7.08it/s]

{'epoch': 3, 'iter': 8540, 'avg_loss': 8.528071392560896, 'avg_acc': 50.077567029621825, 'loss': 8.468103408813477}


EP_train:3:  31%|| 8552/27626 [20:09<45:16,  7.02it/s]

{'epoch': 3, 'iter': 8550, 'avg_loss': 8.527902758173072, 'avg_acc': 50.07601450122793, 'loss': 8.652586936950684}


EP_train:3:  31%|| 8562/27626 [20:10<45:02,  7.06it/s]

{'epoch': 3, 'iter': 8560, 'avg_loss': 8.528330178422886, 'avg_acc': 50.075195654713234, 'loss': 8.747810363769531}


EP_train:3:  31%|| 8572/27626 [20:12<45:07,  7.04it/s]

{'epoch': 3, 'iter': 8570, 'avg_loss': 8.528142479111107, 'avg_acc': 50.07437871893594, 'loss': 8.544363975524902}


EP_train:3:  31%|| 8582/27626 [20:13<45:27,  6.98it/s]

{'epoch': 3, 'iter': 8580, 'avg_loss': 8.527723166231592, 'avg_acc': 50.071014450530235, 'loss': 8.794596672058105}


EP_train:3:  31%|| 8592/27626 [20:15<45:25,  6.98it/s]

{'epoch': 3, 'iter': 8590, 'avg_loss': 8.527347586121913, 'avg_acc': 50.070204283552556, 'loss': 8.444268226623535}


EP_train:3:  31%|| 8602/27626 [20:16<44:58,  7.05it/s]

{'epoch': 3, 'iter': 8600, 'avg_loss': 8.527226969303468, 'avg_acc': 50.073392628764104, 'loss': 8.497872352600098}


EP_train:3:  31%|| 8612/27626 [20:17<44:56,  7.05it/s]

{'epoch': 3, 'iter': 8610, 'avg_loss': 8.527153080590384, 'avg_acc': 50.074759029148765, 'loss': 8.182100296020508}


EP_train:3:  31%|| 8622/27626 [20:19<45:15,  7.00it/s]

{'epoch': 3, 'iter': 8620, 'avg_loss': 8.527075633332315, 'avg_acc': 50.0695974944902, 'loss': 8.340041160583496}


EP_train:3:  31%|| 8632/27626 [20:20<45:10,  7.01it/s]

{'epoch': 3, 'iter': 8630, 'avg_loss': 8.526878751784151, 'avg_acc': 50.070240991773844, 'loss': 7.840550422668457}


EP_train:3:  31%|| 8642/27626 [20:22<44:44,  7.07it/s]

{'epoch': 3, 'iter': 8640, 'avg_loss': 8.527349399369758, 'avg_acc': 50.07160629556764, 'loss': 8.717612266540527}


EP_train:3:  31%|| 8652/27626 [20:23<44:49,  7.05it/s]

{'epoch': 3, 'iter': 8650, 'avg_loss': 8.52734912551079, 'avg_acc': 50.074774592532656, 'loss': 8.057090759277344}


EP_train:3:  31%|| 8662/27626 [20:24<45:01,  7.02it/s]

{'epoch': 3, 'iter': 8660, 'avg_loss': 8.527189293464879, 'avg_acc': 50.077574760420276, 'loss': 8.547633171081543}


EP_train:3:  31%|| 8672/27626 [20:26<44:50,  7.05it/s]

{'epoch': 3, 'iter': 8670, 'avg_loss': 8.52719670867524, 'avg_acc': 50.0767645023642, 'loss': 9.102782249450684}


EP_train:3:  31%|| 8682/27626 [20:27<45:04,  7.01it/s]

{'epoch': 3, 'iter': 8680, 'avg_loss': 8.52731978015792, 'avg_acc': 50.07559612947817, 'loss': 8.85154914855957}


EP_train:3:  31%|| 8692/27626 [20:29<45:09,  6.99it/s]

{'epoch': 3, 'iter': 8690, 'avg_loss': 8.527440971450885, 'avg_acc': 50.07550914739386, 'loss': 8.197247505187988}


EP_train:3:  31%|| 8702/27626 [20:30<45:04,  7.00it/s]

{'epoch': 3, 'iter': 8700, 'avg_loss': 8.527469699896008, 'avg_acc': 50.074704057004936, 'loss': 7.7730560302734375}


EP_train:3:  32%|| 8712/27626 [20:32<44:37,  7.07it/s]

{'epoch': 3, 'iter': 8710, 'avg_loss': 8.527055307748931, 'avg_acc': 50.06744346228906, 'loss': 7.936506748199463}


EP_train:3:  32%|| 8722/27626 [20:33<44:40,  7.05it/s]

{'epoch': 3, 'iter': 8720, 'avg_loss': 8.526887886842147, 'avg_acc': 50.07059110193786, 'loss': 8.061616897583008}


EP_train:3:  32%|| 8732/27626 [20:34<44:41,  7.05it/s]

{'epoch': 3, 'iter': 8730, 'avg_loss': 8.527354965858779, 'avg_acc': 50.06585729011568, 'loss': 8.34468936920166}


EP_train:3:  32%|| 8742/27626 [20:36<44:59,  7.00it/s]

{'epoch': 3, 'iter': 8740, 'avg_loss': 8.527403093014412, 'avg_acc': 50.06721198947489, 'loss': 7.978930473327637}


EP_train:3:  32%|| 8752/27626 [20:37<44:33,  7.06it/s]

{'epoch': 3, 'iter': 8750, 'avg_loss': 8.527394240685238, 'avg_acc': 50.065706776368415, 'loss': 7.960270881652832}


EP_train:3:  32%|| 8762/27626 [20:39<44:41,  7.04it/s]

{'epoch': 3, 'iter': 8760, 'avg_loss': 8.526928775999837, 'avg_acc': 50.06491838831184, 'loss': 8.613056182861328}


EP_train:3:  32%|| 8772/27626 [20:40<44:33,  7.05it/s]

{'epoch': 3, 'iter': 8770, 'avg_loss': 8.526922287765379, 'avg_acc': 50.065913236803105, 'loss': 8.310898780822754}


EP_train:3:  32%|| 8782/27626 [20:41<44:30,  7.06it/s]

{'epoch': 3, 'iter': 8780, 'avg_loss': 8.526648610535032, 'avg_acc': 50.070108757544695, 'loss': 7.756692886352539}


EP_train:3:  32%|| 8792/27626 [20:43<44:30,  7.05it/s]

{'epoch': 3, 'iter': 8790, 'avg_loss': 8.525578913245507, 'avg_acc': 50.063985894664995, 'loss': 7.451693534851074}


EP_train:3:  32%|| 8802/27626 [20:44<44:53,  6.99it/s]

{'epoch': 3, 'iter': 8800, 'avg_loss': 8.52486955565548, 'avg_acc': 50.06284797182139, 'loss': 8.363685607910156}


EP_train:3:  32%|| 8812/27626 [20:46<44:21,  7.07it/s]

{'epoch': 3, 'iter': 8810, 'avg_loss': 8.525043018937666, 'avg_acc': 50.05781125865395, 'loss': 8.865358352661133}


EP_train:3:  32%|| 8822/27626 [20:47<44:28,  7.05it/s]

{'epoch': 3, 'iter': 8820, 'avg_loss': 8.525047019095313, 'avg_acc': 50.058808525110535, 'loss': 8.613937377929688}


EP_train:3:  32%|| 8832/27626 [20:48<44:17,  7.07it/s]

{'epoch': 3, 'iter': 8830, 'avg_loss': 8.525038398712006, 'avg_acc': 50.059095798890276, 'loss': 9.031076431274414}


EP_train:3:  32%|| 8842/27626 [20:50<44:34,  7.02it/s]

{'epoch': 3, 'iter': 8840, 'avg_loss': 8.525381869393875, 'avg_acc': 50.05726162198846, 'loss': 8.430547714233398}


EP_train:3:  32%|| 8852/27626 [20:51<44:31,  7.03it/s]

{'epoch': 3, 'iter': 8850, 'avg_loss': 8.525678227411369, 'avg_acc': 50.05790306180092, 'loss': 8.019942283630371}


EP_train:3:  32%|| 8862/27626 [20:53<44:29,  7.03it/s]

{'epoch': 3, 'iter': 8860, 'avg_loss': 8.52574826013341, 'avg_acc': 50.05360568784562, 'loss': 9.185307502746582}


EP_train:3:  32%|| 8872/27626 [20:54<44:11,  7.07it/s]

{'epoch': 3, 'iter': 8870, 'avg_loss': 8.52535164255446, 'avg_acc': 50.05706797429828, 'loss': 7.907757759094238}


EP_train:3:  32%|| 8882/27626 [20:56<44:41,  6.99it/s]

{'epoch': 3, 'iter': 8880, 'avg_loss': 8.525281312940573, 'avg_acc': 50.05454059227564, 'loss': 8.941753387451172}


EP_train:3:  32%|| 8892/27626 [20:57<44:36,  7.00it/s]

{'epoch': 3, 'iter': 8890, 'avg_loss': 8.525119366225878, 'avg_acc': 50.05447924867844, 'loss': 8.206243515014648}


EP_train:3:  32%|| 8902/27626 [20:58<44:08,  7.07it/s]

{'epoch': 3, 'iter': 8900, 'avg_loss': 8.524958579088379, 'avg_acc': 50.056173463655774, 'loss': 8.097281455993652}


EP_train:3:  32%|| 8912/27626 [21:00<44:21,  7.03it/s]

{'epoch': 3, 'iter': 8910, 'avg_loss': 8.524817445384922, 'avg_acc': 50.05786387610818, 'loss': 8.580279350280762}


EP_train:3:  32%|| 8922/27626 [21:01<44:03,  7.08it/s]

{'epoch': 3, 'iter': 8920, 'avg_loss': 8.524750540467899, 'avg_acc': 50.0563978253559, 'loss': 8.220891952514648}


EP_train:3:  32%|| 8932/27626 [21:03<44:16,  7.04it/s]

{'epoch': 3, 'iter': 8930, 'avg_loss': 8.52491031564933, 'avg_acc': 50.05773429627142, 'loss': 8.750215530395508}


EP_train:3:  32%|| 8942/27626 [21:04<44:25,  7.01it/s]

{'epoch': 3, 'iter': 8940, 'avg_loss': 8.52470144533815, 'avg_acc': 50.05941729113075, 'loss': 7.711906909942627}


EP_train:3:  32%|| 8952/27626 [21:05<44:26,  7.00it/s]

{'epoch': 3, 'iter': 8950, 'avg_loss': 8.524821604420893, 'avg_acc': 50.064238632555025, 'loss': 8.266305923461914}


EP_train:3:  32%|| 8962/27626 [21:07<44:14,  7.03it/s]

{'epoch': 3, 'iter': 8960, 'avg_loss': 8.525194922165072, 'avg_acc': 50.06033087825019, 'loss': 8.338789939880371}


EP_train:3:  32%|| 8972/27626 [21:08<44:08,  7.04it/s]

{'epoch': 3, 'iter': 8970, 'avg_loss': 8.525405083355357, 'avg_acc': 50.05782521458031, 'loss': 9.324112892150879}


EP_train:3:  33%|| 8982/27626 [21:10<44:07,  7.04it/s]

{'epoch': 3, 'iter': 8980, 'avg_loss': 8.525584139440044, 'avg_acc': 50.062632223583115, 'loss': 8.745970726013184}


EP_train:3:  33%|| 8992/27626 [21:11<44:01,  7.05it/s]

{'epoch': 3, 'iter': 8990, 'avg_loss': 8.525598619024263, 'avg_acc': 50.05804415526638, 'loss': 8.945230484008789}


EP_train:3:  33%|| 9002/27626 [21:13<43:50,  7.08it/s]

{'epoch': 3, 'iter': 9000, 'avg_loss': 8.525437595022876, 'avg_acc': 50.05971558715699, 'loss': 8.522896766662598}


EP_train:3:  33%|| 9012/27626 [21:14<43:45,  7.09it/s]

{'epoch': 3, 'iter': 9010, 'avg_loss': 8.525461120592768, 'avg_acc': 50.06103651093109, 'loss': 8.122657775878906}


EP_train:3:  33%|| 9022/27626 [21:15<44:17,  7.00it/s]

{'epoch': 3, 'iter': 9020, 'avg_loss': 8.52552598099687, 'avg_acc': 50.058890366921624, 'loss': 8.31378173828125}


EP_train:3:  33%|| 9032/27626 [21:17<44:11,  7.01it/s]

{'epoch': 3, 'iter': 9030, 'avg_loss': 8.525595696922052, 'avg_acc': 50.05778706677001, 'loss': 8.836138725280762}


EP_train:3:  33%|| 9042/27626 [21:18<44:18,  6.99it/s]

{'epoch': 3, 'iter': 9040, 'avg_loss': 8.525474693748542, 'avg_acc': 50.0556492644619, 'loss': 8.722578048706055}


EP_train:3:  33%|| 9052/27626 [21:20<43:40,  7.09it/s]

{'epoch': 3, 'iter': 9050, 'avg_loss': 8.525325224236513, 'avg_acc': 50.054551983206274, 'loss': 8.608477592468262}


EP_train:3:  33%|| 9062/27626 [21:21<44:06,  7.01it/s]

{'epoch': 3, 'iter': 9060, 'avg_loss': 8.525216824776052, 'avg_acc': 50.05138781591436, 'loss': 8.284136772155762}


EP_train:3:  33%|| 9072/27626 [21:22<43:44,  7.07it/s]

{'epoch': 3, 'iter': 9070, 'avg_loss': 8.525307378515548, 'avg_acc': 50.0513311652519, 'loss': 8.64136028289795}


EP_train:3:  33%|| 9082/27626 [21:24<44:14,  6.98it/s]

{'epoch': 3, 'iter': 9080, 'avg_loss': 8.5255243483914, 'avg_acc': 50.050930514260536, 'loss': 9.110271453857422}


EP_train:3:  33%|| 9092/27626 [21:25<44:05,  7.01it/s]

{'epoch': 3, 'iter': 9090, 'avg_loss': 8.525625601340383, 'avg_acc': 50.05293697063029, 'loss': 8.225211143493652}


EP_train:3:  33%|| 9102/27626 [21:27<43:39,  7.07it/s]

{'epoch': 3, 'iter': 9100, 'avg_loss': 8.525664539680863, 'avg_acc': 50.05425227996923, 'loss': 8.308545112609863}


EP_train:3:  33%|| 9112/27626 [21:28<43:32,  7.09it/s]

{'epoch': 3, 'iter': 9110, 'avg_loss': 8.525696315823998, 'avg_acc': 50.05796564592251, 'loss': 8.700621604919434}


EP_train:3:  33%|| 9122/27626 [21:30<44:03,  7.00it/s]

{'epoch': 3, 'iter': 9120, 'avg_loss': 8.52518672073192, 'avg_acc': 50.05618901436246, 'loss': 8.417375564575195}


EP_train:3:  33%|| 9132/27626 [21:31<43:54,  7.02it/s]

{'epoch': 3, 'iter': 9130, 'avg_loss': 8.525080244795227, 'avg_acc': 50.049624904172596, 'loss': 8.335562705993652}


EP_train:3:  33%|| 9142/27626 [21:32<43:46,  7.04it/s]

{'epoch': 3, 'iter': 9140, 'avg_loss': 8.52514327051029, 'avg_acc': 50.04922874958976, 'loss': 9.130146980285645}


EP_train:3:  33%|| 9152/27626 [21:34<43:47,  7.03it/s]

{'epoch': 3, 'iter': 9150, 'avg_loss': 8.524876502833983, 'avg_acc': 50.049857939023056, 'loss': 7.6578779220581055}


EP_train:3:  33%|| 9162/27626 [21:35<43:44,  7.04it/s]

{'epoch': 3, 'iter': 9160, 'avg_loss': 8.52500701078147, 'avg_acc': 50.04400447549394, 'loss': 8.522761344909668}


EP_train:3:  33%|| 9172/27626 [21:37<43:32,  7.06it/s]

{'epoch': 3, 'iter': 9170, 'avg_loss': 8.524706882006365, 'avg_acc': 50.045660233344236, 'loss': 8.702579498291016}


EP_train:3:  33%|| 9182/27626 [21:38<43:28,  7.07it/s]

{'epoch': 3, 'iter': 9180, 'avg_loss': 8.524435045672144, 'avg_acc': 50.04527012308028, 'loss': 8.07479476928711}


EP_train:3:  33%|| 9192/27626 [21:39<43:38,  7.04it/s]

{'epoch': 3, 'iter': 9190, 'avg_loss': 8.524058056116338, 'avg_acc': 50.048620933521924, 'loss': 8.023316383361816}


EP_train:3:  33%|| 9202/27626 [21:41<43:32,  7.05it/s]

{'epoch': 3, 'iter': 9200, 'avg_loss': 8.524003069448208, 'avg_acc': 50.048228453428976, 'loss': 8.812556266784668}


EP_train:3:  33%|| 9212/27626 [21:42<43:47,  7.01it/s]

{'epoch': 3, 'iter': 9210, 'avg_loss': 8.524226332057662, 'avg_acc': 50.04647975246988, 'loss': 9.133636474609375}


EP_train:3:  33%|| 9222/27626 [21:44<43:20,  7.08it/s]

{'epoch': 3, 'iter': 9220, 'avg_loss': 8.52426558013542, 'avg_acc': 50.0467682463941, 'loss': 8.498676300048828}


EP_train:3:  33%|| 9232/27626 [21:45<43:26,  7.06it/s]

{'epoch': 3, 'iter': 9230, 'avg_loss': 8.524386892751604, 'avg_acc': 50.04942584768715, 'loss': 8.271552085876465}


EP_train:3:  33%|| 9242/27626 [21:46<43:20,  7.07it/s]

{'epoch': 3, 'iter': 9240, 'avg_loss': 8.524068735234344, 'avg_acc': 50.05072502975868, 'loss': 8.808670043945312}


EP_train:3:  33%|| 9252/27626 [21:48<43:20,  7.06it/s]

{'epoch': 3, 'iter': 9250, 'avg_loss': 8.523847018328373, 'avg_acc': 50.053372608366665, 'loss': 9.27302360534668}


EP_train:3:  34%|| 9262/27626 [21:49<43:35,  7.02it/s]

{'epoch': 3, 'iter': 9260, 'avg_loss': 8.52403291014999, 'avg_acc': 50.055002159594, 'loss': 8.325865745544434}


EP_train:3:  34%|| 9272/27626 [21:51<43:40,  7.00it/s]

{'epoch': 3, 'iter': 9270, 'avg_loss': 8.52405829208978, 'avg_acc': 50.04820138064934, 'loss': 8.718036651611328}


EP_train:3:  34%|| 9282/27626 [21:52<43:37,  7.01it/s]

{'epoch': 3, 'iter': 9280, 'avg_loss': 8.524053239344575, 'avg_acc': 50.048149445102894, 'loss': 8.381440162658691}


EP_train:3:  34%|| 9292/27626 [21:54<44:01,  6.94it/s]

{'epoch': 3, 'iter': 9290, 'avg_loss': 8.523811411464989, 'avg_acc': 50.05482456140351, 'loss': 9.155251502990723}


EP_train:3:  34%|| 9302/27626 [21:55<43:37,  7.00it/s]

{'epoch': 3, 'iter': 9300, 'avg_loss': 8.524136562242827, 'avg_acc': 50.05106977744329, 'loss': 8.468631744384766}


EP_train:3:  34%|| 9312/27626 [21:56<43:24,  7.03it/s]

{'epoch': 3, 'iter': 9310, 'avg_loss': 8.524838677718604, 'avg_acc': 50.052693051229724, 'loss': 10.636048316955566}


EP_train:3:  34%|| 9322/27626 [21:58<43:57,  6.94it/s]

{'epoch': 3, 'iter': 9320, 'avg_loss': 8.525086441158724, 'avg_acc': 50.053642313056535, 'loss': 8.038004875183105}


EP_train:3:  34%|| 9332/27626 [21:59<43:34,  7.00it/s]

{'epoch': 3, 'iter': 9330, 'avg_loss': 8.525155516532363, 'avg_acc': 50.05425463508735, 'loss': 9.131185531616211}


EP_train:3:  34%|| 9342/27626 [22:01<43:15,  7.05it/s]

{'epoch': 3, 'iter': 9340, 'avg_loss': 8.525207873955871, 'avg_acc': 50.05352745958677, 'loss': 8.046188354492188}


EP_train:3:  34%|| 9352/27626 [22:02<43:11,  7.05it/s]

{'epoch': 3, 'iter': 9350, 'avg_loss': 8.52517699190747, 'avg_acc': 50.04778900652337, 'loss': 8.857276916503906}


EP_train:3:  34%|| 9362/27626 [22:04<42:58,  7.08it/s]

{'epoch': 3, 'iter': 9360, 'avg_loss': 8.525131986711973, 'avg_acc': 50.05107627390236, 'loss': 8.437787055969238}


EP_train:3:  34%|| 9372/27626 [22:05<43:05,  7.06it/s]

{'epoch': 3, 'iter': 9370, 'avg_loss': 8.525087055484738, 'avg_acc': 50.05068829367196, 'loss': 9.001619338989258}


EP_train:3:  34%|| 9382/27626 [22:06<42:55,  7.08it/s]

{'epoch': 3, 'iter': 9380, 'avg_loss': 8.525033035813053, 'avg_acc': 50.04963490033045, 'loss': 8.440888404846191}


EP_train:3:  34%|| 9392/27626 [22:08<43:22,  7.01it/s]

{'epoch': 3, 'iter': 9390, 'avg_loss': 8.525125914368433, 'avg_acc': 50.043925034607604, 'loss': 8.988221168518066}


EP_train:3:  34%|| 9402/27626 [22:09<43:17,  7.01it/s]

{'epoch': 3, 'iter': 9400, 'avg_loss': 8.525204417426819, 'avg_acc': 50.03855972768855, 'loss': 8.942740440368652}


EP_train:3:  34%|| 9412/27626 [22:11<42:52,  7.08it/s]

{'epoch': 3, 'iter': 9410, 'avg_loss': 8.52522222933183, 'avg_acc': 50.032541706513655, 'loss': 8.326338768005371}


EP_train:3:  34%|| 9422/27626 [22:12<43:26,  6.99it/s]

{'epoch': 3, 'iter': 9420, 'avg_loss': 8.525522832543516, 'avg_acc': 50.03383398789938, 'loss': 8.672968864440918}


EP_train:3:  34%|| 9432/27626 [22:13<43:07,  7.03it/s]

{'epoch': 3, 'iter': 9430, 'avg_loss': 8.525725832572988, 'avg_acc': 50.032804050471846, 'loss': 8.538559913635254}


EP_train:3:  34%|| 9442/27626 [22:15<43:02,  7.04it/s]

{'epoch': 3, 'iter': 9440, 'avg_loss': 8.525692607270322, 'avg_acc': 50.03475532252939, 'loss': 9.058706283569336}


EP_train:3:  34%|| 9452/27626 [22:16<44:06,  6.87it/s]

{'epoch': 3, 'iter': 9450, 'avg_loss': 8.52559406423604, 'avg_acc': 50.03769442387049, 'loss': 8.510847091674805}


EP_train:3:  34%|| 9462/27626 [22:18<42:48,  7.07it/s]

{'epoch': 3, 'iter': 9460, 'avg_loss': 8.525757502985359, 'avg_acc': 50.03897579537047, 'loss': 8.826021194458008}


EP_train:3:  34%|| 9472/27626 [22:19<43:10,  7.01it/s]

{'epoch': 3, 'iter': 9470, 'avg_loss': 8.525654547847118, 'avg_acc': 50.041904233977405, 'loss': 8.540698051452637}


EP_train:3:  34%|| 9482/27626 [22:21<43:26,  6.96it/s]

{'epoch': 3, 'iter': 9480, 'avg_loss': 8.525632227640239, 'avg_acc': 50.03955278979011, 'loss': 9.037959098815918}


EP_train:3:  34%|| 9492/27626 [22:22<42:43,  7.08it/s]

{'epoch': 3, 'iter': 9490, 'avg_loss': 8.525854404979205, 'avg_acc': 50.04082815298704, 'loss': 9.324806213378906}


EP_train:3:  34%|| 9502/27626 [22:23<42:43,  7.07it/s]

{'epoch': 3, 'iter': 9500, 'avg_loss': 8.525711719292914, 'avg_acc': 50.038153878539106, 'loss': 8.585691452026367}


EP_train:3:  34%|| 9512/27626 [22:25<42:48,  7.05it/s]

{'epoch': 3, 'iter': 9510, 'avg_loss': 8.525862363043824, 'avg_acc': 50.037785196088734, 'loss': 8.301687240600586}


EP_train:3:  34%|| 9522/27626 [22:26<42:36,  7.08it/s]

{'epoch': 3, 'iter': 9520, 'avg_loss': 8.525890885525273, 'avg_acc': 50.0357761789728, 'loss': 7.859257698059082}


EP_train:3:  35%|| 9532/27626 [22:28<43:01,  7.01it/s]

{'epoch': 3, 'iter': 9530, 'avg_loss': 8.525991360571611, 'avg_acc': 50.03475500996748, 'loss': 8.632774353027344}


EP_train:3:  35%|| 9542/27626 [22:29<42:32,  7.08it/s]

{'epoch': 3, 'iter': 9540, 'avg_loss': 8.526008578869122, 'avg_acc': 50.03406351535479, 'loss': 8.167577743530273}


EP_train:3:  35%|| 9552/27626 [22:30<42:48,  7.04it/s]

{'epoch': 3, 'iter': 9550, 'avg_loss': 8.526247064476424, 'avg_acc': 50.03042875091614, 'loss': 8.472620010375977}


EP_train:3:  35%|| 9562/27626 [22:32<43:03,  6.99it/s]

{'epoch': 3, 'iter': 9560, 'avg_loss': 8.526175073195347, 'avg_acc': 50.02778213575986, 'loss': 8.566831588745117}


EP_train:3:  35%|| 9572/27626 [22:33<42:52,  7.02it/s]

{'epoch': 3, 'iter': 9570, 'avg_loss': 8.526028651651101, 'avg_acc': 50.026120572562945, 'loss': 7.354804992675781}


EP_train:3:  35%|| 9582/27626 [22:35<42:47,  7.03it/s]

{'epoch': 3, 'iter': 9580, 'avg_loss': 8.526211734704441, 'avg_acc': 50.02511481056258, 'loss': 8.021628379821777}


EP_train:3:  35%|| 9592/27626 [22:36<42:53,  7.01it/s]

{'epoch': 3, 'iter': 9590, 'avg_loss': 8.526201070280992, 'avg_acc': 50.02736940882077, 'loss': 8.771305084228516}


EP_train:3:  35%|| 9602/27626 [22:37<43:05,  6.97it/s]

{'epoch': 3, 'iter': 9600, 'avg_loss': 8.526445021965568, 'avg_acc': 50.02571346734715, 'loss': 8.661806106567383}


EP_train:3:  35%|| 9612/27626 [22:39<42:33,  7.05it/s]

{'epoch': 3, 'iter': 9610, 'avg_loss': 8.526247973149854, 'avg_acc': 50.02503641660597, 'loss': 7.912758827209473}


EP_train:3:  35%|| 9622/27626 [22:40<42:30,  7.06it/s]

{'epoch': 3, 'iter': 9620, 'avg_loss': 8.52631807738636, 'avg_acc': 50.0256600145515, 'loss': 8.436432838439941}


EP_train:3:  35%|| 9632/27626 [22:42<42:42,  7.02it/s]

{'epoch': 3, 'iter': 9630, 'avg_loss': 8.526299452858419, 'avg_acc': 50.02530889834909, 'loss': 9.319498062133789}


EP_train:3:  35%|| 9642/27626 [22:43<42:22,  7.07it/s]

{'epoch': 3, 'iter': 9640, 'avg_loss': 8.526496612312215, 'avg_acc': 50.023013691525776, 'loss': 8.69336986541748}


EP_train:3:  35%|| 9652/27626 [22:45<42:39,  7.02it/s]

{'epoch': 3, 'iter': 9650, 'avg_loss': 8.52674795444636, 'avg_acc': 50.02007563983006, 'loss': 8.753829002380371}


EP_train:3:  35%|| 9662/27626 [22:46<42:56,  6.97it/s]

{'epoch': 3, 'iter': 9660, 'avg_loss': 8.526793343694347, 'avg_acc': 50.02490684194183, 'loss': 8.425511360168457}


EP_train:3:  35%|| 9672/27626 [22:47<42:25,  7.05it/s]

{'epoch': 3, 'iter': 9670, 'avg_loss': 8.526468583660108, 'avg_acc': 50.026173611829186, 'loss': 7.775473594665527}


EP_train:3:  35%|| 9682/27626 [22:49<42:25,  7.05it/s]

{'epoch': 3, 'iter': 9680, 'avg_loss': 8.526224759279875, 'avg_acc': 50.02291860345006, 'loss': 7.601057529449463}


EP_train:3:  35%|| 9692/27626 [22:50<42:30,  7.03it/s]

{'epoch': 3, 'iter': 9690, 'avg_loss': 8.52619310380406, 'avg_acc': 50.02289495408111, 'loss': 7.934024333953857}


EP_train:3:  35%|| 9702/27626 [22:52<42:43,  6.99it/s]

{'epoch': 3, 'iter': 9700, 'avg_loss': 8.52641945477061, 'avg_acc': 50.020616431295736, 'loss': 8.453167915344238}


EP_train:3:  35%|| 9712/27626 [22:53<42:28,  7.03it/s]

{'epoch': 3, 'iter': 9710, 'avg_loss': 8.526351208029343, 'avg_acc': 50.01705540109155, 'loss': 8.031630516052246}


EP_train:3:  35%|| 9722/27626 [22:54<42:03,  7.09it/s]

{'epoch': 3, 'iter': 9720, 'avg_loss': 8.526431692957498, 'avg_acc': 50.01575198024894, 'loss': 8.249890327453613}


EP_train:3:  35%|| 9732/27626 [22:56<42:22,  7.04it/s]

{'epoch': 3, 'iter': 9730, 'avg_loss': 8.526462593594113, 'avg_acc': 50.0150935155688, 'loss': 9.389060020446777}


EP_train:3:  35%|| 9742/27626 [22:57<42:10,  7.07it/s]

{'epoch': 3, 'iter': 9740, 'avg_loss': 8.526553997280246, 'avg_acc': 50.015719638640796, 'loss': 8.888701438903809}


EP_train:3:  35%|| 9752/27626 [22:59<42:45,  6.97it/s]

{'epoch': 3, 'iter': 9750, 'avg_loss': 8.5265622460748, 'avg_acc': 50.01762639729258, 'loss': 8.803754806518555}


EP_train:3:  35%|| 9762/27626 [23:00<42:13,  7.05it/s]

{'epoch': 3, 'iter': 9760, 'avg_loss': 8.526813389592796, 'avg_acc': 50.01824864255712, 'loss': 8.314668655395508}


EP_train:3:  35%|| 9772/27626 [23:02<42:39,  6.98it/s]

{'epoch': 3, 'iter': 9770, 'avg_loss': 8.526651953469889, 'avg_acc': 50.01311278272439, 'loss': 7.446381092071533}


EP_train:3:  35%|| 9782/27626 [23:03<42:53,  6.93it/s]

{'epoch': 3, 'iter': 9780, 'avg_loss': 8.526321061975494, 'avg_acc': 50.01853082506901, 'loss': 7.7160186767578125}


EP_train:3:  35%|| 9792/27626 [23:04<42:19,  7.02it/s]

{'epoch': 3, 'iter': 9790, 'avg_loss': 8.526213278384244, 'avg_acc': 50.02106526401797, 'loss': 7.927517414093018}


EP_train:3:  35%|| 9802/27626 [23:06<42:12,  7.04it/s]

{'epoch': 3, 'iter': 9800, 'avg_loss': 8.526268677107717, 'avg_acc': 50.0194495459647, 'loss': 8.211520195007324}


EP_train:3:  36%|| 9812/27626 [23:07<42:12,  7.03it/s]

{'epoch': 3, 'iter': 9810, 'avg_loss': 8.526312012371577, 'avg_acc': 50.02134084191214, 'loss': 8.448708534240723}


EP_train:3:  36%|| 9822/27626 [23:09<42:04,  7.05it/s]

{'epoch': 3, 'iter': 9820, 'avg_loss': 8.526406712886965, 'avg_acc': 50.02004632929437, 'loss': 8.598579406738281}


EP_train:3:  36%|| 9832/27626 [23:10<42:03,  7.05it/s]

{'epoch': 3, 'iter': 9830, 'avg_loss': 8.526144303403422, 'avg_acc': 50.02256891465772, 'loss': 8.263646125793457}


EP_train:3:  36%|| 9842/27626 [23:11<42:15,  7.01it/s]

{'epoch': 3, 'iter': 9840, 'avg_loss': 8.526083634377205, 'avg_acc': 50.022228432069916, 'loss': 8.681325912475586}


EP_train:3:  36%|| 9852/27626 [23:13<42:13,  7.02it/s]

{'epoch': 3, 'iter': 9850, 'avg_loss': 8.526154547702866, 'avg_acc': 50.01871637397218, 'loss': 9.26832103729248}


EP_train:3:  36%|| 9862/27626 [23:14<42:15,  7.01it/s]

{'epoch': 3, 'iter': 9860, 'avg_loss': 8.526140539849969, 'avg_acc': 50.019965013690296, 'loss': 8.935455322265625}


EP_train:3:  36%|| 9872/27626 [23:16<41:48,  7.08it/s]

{'epoch': 3, 'iter': 9870, 'avg_loss': 8.526152212681856, 'avg_acc': 50.016778948434805, 'loss': 8.584897994995117}


EP_train:3:  36%|| 9882/27626 [23:17<41:56,  7.05it/s]

{'epoch': 3, 'iter': 9880, 'avg_loss': 8.525975426371891, 'avg_acc': 50.01771075802044, 'loss': 7.722001552581787}


EP_train:3:  36%|| 9892/27626 [23:19<41:45,  7.08it/s]

{'epoch': 3, 'iter': 9890, 'avg_loss': 8.525685802290406, 'avg_acc': 50.01927257102417, 'loss': 7.970654010772705}


EP_train:3:  36%|| 9902/27626 [23:20<41:44,  7.08it/s]

{'epoch': 3, 'iter': 9900, 'avg_loss': 8.525440449980024, 'avg_acc': 50.02083122916877, 'loss': 8.235209465026855}


EP_train:3:  36%|| 9912/27626 [23:21<41:49,  7.06it/s]

{'epoch': 3, 'iter': 9910, 'avg_loss': 8.525255571591867, 'avg_acc': 50.0258551104833, 'loss': 9.415946006774902}


EP_train:3:  36%|| 9922/27626 [23:23<41:55,  7.04it/s]

{'epoch': 3, 'iter': 9920, 'avg_loss': 8.524842662492139, 'avg_acc': 50.024254107448854, 'loss': 8.199237823486328}


EP_train:3:  36%|| 9932/27626 [23:24<41:54,  7.04it/s]

{'epoch': 3, 'iter': 9930, 'avg_loss': 8.52498197757205, 'avg_acc': 50.0239150135938, 'loss': 8.466670989990234}


EP_train:3:  36%|| 9942/27626 [23:26<42:08,  7.00it/s]

{'epoch': 3, 'iter': 9940, 'avg_loss': 8.524793447497002, 'avg_acc': 50.02263353787345, 'loss': 8.65040111541748}


EP_train:3:  36%|| 9952/27626 [23:27<41:44,  7.06it/s]

{'epoch': 3, 'iter': 9950, 'avg_loss': 8.524868585250927, 'avg_acc': 50.02637925836599, 'loss': 9.21268081665039}


EP_train:3:  36%|| 9962/27626 [23:28<41:47,  7.04it/s]

{'epoch': 3, 'iter': 9960, 'avg_loss': 8.524881588624025, 'avg_acc': 50.02698022286919, 'loss': 8.729625701904297}


EP_train:3:  36%|| 9972/27626 [23:30<41:43,  7.05it/s]

{'epoch': 3, 'iter': 9970, 'avg_loss': 8.524962541870769, 'avg_acc': 50.02319225754689, 'loss': 8.365727424621582}


EP_train:3:  36%|| 9982/27626 [23:31<41:45,  7.04it/s]

{'epoch': 3, 'iter': 9980, 'avg_loss': 8.52527056241033, 'avg_acc': 50.02222973649935, 'loss': 8.237760543823242}


EP_train:3:  36%|| 9992/27626 [23:33<41:28,  7.08it/s]

{'epoch': 3, 'iter': 9990, 'avg_loss': 8.525521486851442, 'avg_acc': 50.021894705234715, 'loss': 9.047982215881348}


EP_train:3:  36%|| 10002/27626 [23:34<41:58,  7.00it/s]

{'epoch': 3, 'iter': 10000, 'avg_loss': 8.525621038236066, 'avg_acc': 50.02249775022498, 'loss': 9.301637649536133}


EP_train:3:  36%|| 10012/27626 [23:36<41:34,  7.06it/s]

{'epoch': 3, 'iter': 10010, 'avg_loss': 8.525509275383627, 'avg_acc': 50.02403606033363, 'loss': 9.143131256103516}


EP_train:3:  36%|| 10022/27626 [23:37<41:59,  6.99it/s]

{'epoch': 3, 'iter': 10020, 'avg_loss': 8.525677752249756, 'avg_acc': 50.01964624288993, 'loss': 8.669144630432129}


EP_train:3:  36%|| 10032/27626 [23:38<41:47,  7.02it/s]

{'epoch': 3, 'iter': 10030, 'avg_loss': 8.525797099038453, 'avg_acc': 50.014953643704516, 'loss': 9.265810012817383}


EP_train:3:  36%|| 10042/27626 [23:40<41:22,  7.08it/s]

{'epoch': 3, 'iter': 10040, 'avg_loss': 8.52585361013611, 'avg_acc': 50.01524997510208, 'loss': 8.597732543945312}


EP_train:3:  36%|| 10052/27626 [23:41<41:45,  7.02it/s]

{'epoch': 3, 'iter': 10050, 'avg_loss': 8.52617001872717, 'avg_acc': 50.015234802507216, 'loss': 9.033530235290527}


EP_train:3:  36%|| 10062/27626 [23:43<41:53,  6.99it/s]

{'epoch': 3, 'iter': 10060, 'avg_loss': 8.526215802152782, 'avg_acc': 50.01956813438028, 'loss': 8.223828315734863}


EP_train:3:  36%|| 10072/27626 [23:44<41:47,  7.00it/s]

{'epoch': 3, 'iter': 10070, 'avg_loss': 8.526029568563185, 'avg_acc': 50.019548704200176, 'loss': 8.646424293518066}


EP_train:3:  36%|| 10082/27626 [23:45<41:35,  7.03it/s]

{'epoch': 3, 'iter': 10080, 'avg_loss': 8.52585047935756, 'avg_acc': 50.01921932347982, 'loss': 8.270299911499023}


EP_train:3:  37%|| 10092/27626 [23:47<41:16,  7.08it/s]

{'epoch': 3, 'iter': 10090, 'avg_loss': 8.525867348938322, 'avg_acc': 50.01858091368546, 'loss': 8.888956069946289}


EP_train:3:  37%|| 10102/27626 [23:48<41:49,  6.98it/s]

{'epoch': 3, 'iter': 10100, 'avg_loss': 8.525673634769209, 'avg_acc': 50.020109395109394, 'loss': 8.149550437927246}


EP_train:3:  37%|| 10112/27626 [23:50<41:46,  6.99it/s]

{'epoch': 3, 'iter': 10110, 'avg_loss': 8.525935129552929, 'avg_acc': 50.019471367817225, 'loss': 8.44828987121582}


EP_train:3:  37%|| 10122/27626 [23:51<41:15,  7.07it/s]

{'epoch': 3, 'iter': 10120, 'avg_loss': 8.525543859148435, 'avg_acc': 50.02161347692916, 'loss': 8.073471069335938}


EP_train:3:  37%|| 10132/27626 [23:53<41:09,  7.08it/s]

{'epoch': 3, 'iter': 10130, 'avg_loss': 8.52506669117637, 'avg_acc': 50.02868670417531, 'loss': 7.585723400115967}


EP_train:3:  37%|| 10142/27626 [23:54<41:33,  7.01it/s]

{'epoch': 3, 'iter': 10140, 'avg_loss': 8.525055303245404, 'avg_acc': 50.03050734641554, 'loss': 9.0201416015625}


EP_train:3:  37%|| 10152/27626 [23:55<41:32,  7.01it/s]

{'epoch': 3, 'iter': 10150, 'avg_loss': 8.525447779929694, 'avg_acc': 50.030169441434346, 'loss': 8.453742980957031}


EP_train:3:  37%|| 10162/27626 [23:57<41:19,  7.04it/s]

{'epoch': 3, 'iter': 10160, 'avg_loss': 8.52535685112293, 'avg_acc': 50.0276793622675, 'loss': 8.011932373046875}


EP_train:3:  37%|| 10172/27626 [23:58<41:04,  7.08it/s]

{'epoch': 3, 'iter': 10170, 'avg_loss': 8.525515058213848, 'avg_acc': 50.02918837872382, 'loss': 9.221152305603027}


EP_train:3:  37%|| 10182/27626 [24:00<41:17,  7.04it/s]

{'epoch': 3, 'iter': 10180, 'avg_loss': 8.525507672574687, 'avg_acc': 50.02885276495432, 'loss': 8.423273086547852}


EP_train:3:  37%|| 10192/27626 [24:01<41:27,  7.01it/s]

{'epoch': 3, 'iter': 10190, 'avg_loss': 8.525084392466006, 'avg_acc': 50.0248380924345, 'loss': 7.406187057495117}


EP_train:3:  37%|| 10202/27626 [24:02<41:28,  7.00it/s]

{'epoch': 3, 'iter': 10200, 'avg_loss': 8.525230735905579, 'avg_acc': 50.026039113812374, 'loss': 8.174809455871582}


EP_train:3:  37%|| 10212/27626 [24:04<41:15,  7.04it/s]

{'epoch': 3, 'iter': 10210, 'avg_loss': 8.524819226050608, 'avg_acc': 50.02907403780237, 'loss': 8.599026679992676}


EP_train:3:  37%|| 10222/27626 [24:05<40:54,  7.09it/s]

{'epoch': 3, 'iter': 10220, 'avg_loss': 8.524784341692143, 'avg_acc': 50.03057430779767, 'loss': 8.843597412109375}


EP_train:3:  37%|| 10232/27626 [24:07<41:01,  7.07it/s]

{'epoch': 3, 'iter': 10230, 'avg_loss': 8.524611204347003, 'avg_acc': 50.02626820447659, 'loss': 7.351603031158447}


EP_train:3:  37%|| 10242/27626 [24:08<41:12,  7.03it/s]

{'epoch': 3, 'iter': 10240, 'avg_loss': 8.524981797643616, 'avg_acc': 50.02898886827458, 'loss': 8.753812789916992}


EP_train:3:  37%|| 10252/27626 [24:10<41:21,  7.00it/s]

{'epoch': 3, 'iter': 10250, 'avg_loss': 8.525394565849092, 'avg_acc': 50.02591210613598, 'loss': 9.023279190063477}


EP_train:3:  37%|| 10262/27626 [24:11<40:58,  7.06it/s]

{'epoch': 3, 'iter': 10260, 'avg_loss': 8.525690500411638, 'avg_acc': 50.02771416041322, 'loss': 9.077495574951172}


EP_train:3:  37%|| 10272/27626 [24:12<41:12,  7.02it/s]

{'epoch': 3, 'iter': 10270, 'avg_loss': 8.52592193255138, 'avg_acc': 50.02373186642002, 'loss': 8.396547317504883}


EP_train:3:  37%|| 10282/27626 [24:14<41:12,  7.01it/s]

{'epoch': 3, 'iter': 10280, 'avg_loss': 8.525902222803207, 'avg_acc': 50.02036523684467, 'loss': 8.418784141540527}


EP_train:3:  37%|| 10292/27626 [24:15<41:26,  6.97it/s]

{'epoch': 3, 'iter': 10290, 'avg_loss': 8.526010068393568, 'avg_acc': 50.02125643766397, 'loss': 9.334610939025879}


EP_train:3:  37%|| 10302/27626 [24:17<40:59,  7.04it/s]

{'epoch': 3, 'iter': 10300, 'avg_loss': 8.525947393693805, 'avg_acc': 50.01911222211436, 'loss': 7.95566987991333}


EP_train:3:  37%|| 10312/27626 [24:18<41:09,  7.01it/s]

{'epoch': 3, 'iter': 10310, 'avg_loss': 8.52629580519477, 'avg_acc': 50.019699835127525, 'loss': 8.730206489562988}


EP_train:3:  37%|| 10322/27626 [24:19<40:46,  7.07it/s]

{'epoch': 3, 'iter': 10320, 'avg_loss': 8.526441652360623, 'avg_acc': 50.015441817653326, 'loss': 8.549510955810547}


EP_train:3:  37%|| 10332/27626 [24:21<40:43,  7.08it/s]

{'epoch': 3, 'iter': 10330, 'avg_loss': 8.526546799563079, 'avg_acc': 50.0178467718517, 'loss': 8.92723560333252}


EP_train:3:  37%|| 10342/27626 [24:22<41:05,  7.01it/s]

{'epoch': 3, 'iter': 10340, 'avg_loss': 8.526358705368832, 'avg_acc': 50.0178295135867, 'loss': 8.656952857971191}


EP_train:3:  37%|| 10352/27626 [24:24<40:55,  7.03it/s]

{'epoch': 3, 'iter': 10350, 'avg_loss': 8.52631451917517, 'avg_acc': 50.01781228866776, 'loss': 8.419151306152344}


EP_train:3:  38%|| 10362/27626 [24:25<40:42,  7.07it/s]

{'epoch': 3, 'iter': 10360, 'avg_loss': 8.525941926256454, 'avg_acc': 50.01658864974423, 'loss': 9.342315673828125}


EP_train:3:  38%|| 10372/27626 [24:26<40:58,  7.02it/s]

{'epoch': 3, 'iter': 10370, 'avg_loss': 8.525807052485773, 'avg_acc': 50.017476617491084, 'loss': 8.008094787597656}


EP_train:3:  38%|| 10382/27626 [24:28<40:55,  7.02it/s]

{'epoch': 3, 'iter': 10380, 'avg_loss': 8.525599766117496, 'avg_acc': 50.015352567190064, 'loss': 8.312701225280762}


EP_train:3:  38%|| 10392/27626 [24:29<40:40,  7.06it/s]

{'epoch': 3, 'iter': 10390, 'avg_loss': 8.525556646243702, 'avg_acc': 50.01593927437206, 'loss': 8.246952056884766}


EP_train:3:  38%|| 10402/27626 [24:31<40:37,  7.07it/s]

{'epoch': 3, 'iter': 10400, 'avg_loss': 8.525719342077252, 'avg_acc': 50.01021536390732, 'loss': 8.757946968078613}


EP_train:3:  38%|| 10412/27626 [24:32<40:50,  7.03it/s]

{'epoch': 3, 'iter': 10410, 'avg_loss': 8.525616428577502, 'avg_acc': 50.01620881759678, 'loss': 7.811661243438721}


EP_train:3:  38%|| 10422/27626 [24:34<40:51,  7.02it/s]

{'epoch': 3, 'iter': 10420, 'avg_loss': 8.525401891827892, 'avg_acc': 50.017092889358025, 'loss': 8.178943634033203}


EP_train:3:  38%|| 10432/27626 [24:35<40:52,  7.01it/s]

{'epoch': 3, 'iter': 10430, 'avg_loss': 8.52553896172119, 'avg_acc': 50.019772792637326, 'loss': 8.4430513381958}


EP_train:3:  38%|| 10442/27626 [24:36<41:05,  6.97it/s]

{'epoch': 3, 'iter': 10440, 'avg_loss': 8.525551757688278, 'avg_acc': 50.020951058327746, 'loss': 8.457036972045898}


EP_train:3:  38%|| 10452/27626 [24:38<40:40,  7.04it/s]

{'epoch': 3, 'iter': 10450, 'avg_loss': 8.525594893716395, 'avg_acc': 50.02093101138647, 'loss': 9.130302429199219}


EP_train:3:  38%|| 10462/27626 [24:39<40:43,  7.02it/s]

{'epoch': 3, 'iter': 10460, 'avg_loss': 8.525900255891905, 'avg_acc': 50.02091100277221, 'loss': 8.22034740447998}


EP_train:3:  38%|| 10472/27626 [24:41<41:01,  6.97it/s]

{'epoch': 3, 'iter': 10470, 'avg_loss': 8.525997734060716, 'avg_acc': 50.01701126921974, 'loss': 8.527469635009766}


EP_train:3:  38%|| 10482/27626 [24:42<40:40,  7.02it/s]

{'epoch': 3, 'iter': 10480, 'avg_loss': 8.526199599643572, 'avg_acc': 50.01431161148745, 'loss': 7.807766914367676}


EP_train:3:  38%|| 10492/27626 [24:43<40:47,  7.00it/s]

{'epoch': 3, 'iter': 10490, 'avg_loss': 8.526155503300009, 'avg_acc': 50.01251072347726, 'loss': 8.602416038513184}


EP_train:3:  38%|| 10502/27626 [24:45<40:27,  7.05it/s]

{'epoch': 3, 'iter': 10500, 'avg_loss': 8.526078661105007, 'avg_acc': 50.01428435387106, 'loss': 9.241578102111816}


EP_train:3:  38%|| 10512/27626 [24:46<40:45,  7.00it/s]

{'epoch': 3, 'iter': 10510, 'avg_loss': 8.526209027224382, 'avg_acc': 50.01843307011702, 'loss': 8.56024169921875}


EP_train:3:  38%|| 10522/27626 [24:48<40:19,  7.07it/s]

{'epoch': 3, 'iter': 10520, 'avg_loss': 8.526300213747339, 'avg_acc': 50.02227687482178, 'loss': 9.18277645111084}


EP_train:3:  38%|| 10532/27626 [24:49<40:35,  7.02it/s]

{'epoch': 3, 'iter': 10530, 'avg_loss': 8.52621643152256, 'avg_acc': 50.02255246415345, 'loss': 8.478610038757324}


EP_train:3:  38%|| 10542/27626 [24:51<40:19,  7.06it/s]

{'epoch': 3, 'iter': 10540, 'avg_loss': 8.526301595633402, 'avg_acc': 50.021938146285926, 'loss': 8.593852043151855}


EP_train:3:  38%|| 10552/27626 [24:52<40:56,  6.95it/s]

{'epoch': 3, 'iter': 10550, 'avg_loss': 8.5257378000974, 'avg_acc': 50.02636006065776, 'loss': 8.563217163085938}


EP_train:3:  38%|| 10562/27626 [24:53<40:29,  7.02it/s]

{'epoch': 3, 'iter': 10560, 'avg_loss': 8.525813305449073, 'avg_acc': 50.02248840071963, 'loss': 9.15062141418457}


EP_train:3:  38%|| 10572/27626 [24:55<40:12,  7.07it/s]

{'epoch': 3, 'iter': 10570, 'avg_loss': 8.525934029094612, 'avg_acc': 50.02394522750922, 'loss': 9.14853572845459}


EP_train:3:  38%|| 10582/27626 [24:56<40:15,  7.05it/s]

{'epoch': 3, 'iter': 10580, 'avg_loss': 8.525701166314649, 'avg_acc': 50.027762026273514, 'loss': 7.756232738494873}


EP_train:3:  38%|| 10592/27626 [24:58<40:29,  7.01it/s]

{'epoch': 3, 'iter': 10590, 'avg_loss': 8.52550966342778, 'avg_acc': 50.02390000944198, 'loss': 7.995090007781982}


EP_train:3:  38%|| 10602/27626 [24:59<40:19,  7.04it/s]

{'epoch': 3, 'iter': 10600, 'avg_loss': 8.525466099672414, 'avg_acc': 50.02328789736817, 'loss': 9.03148078918457}


EP_train:3:  38%|| 10612/27626 [25:00<40:21,  7.03it/s]

{'epoch': 3, 'iter': 10610, 'avg_loss': 8.525507519353223, 'avg_acc': 50.021204410517385, 'loss': 8.480721473693848}


EP_train:3:  38%|| 10622/27626 [25:02<40:17,  7.03it/s]

{'epoch': 3, 'iter': 10620, 'avg_loss': 8.52551394812682, 'avg_acc': 50.02147867432445, 'loss': 8.616238594055176}


EP_train:3:  38%|| 10632/27626 [25:03<40:03,  7.07it/s]

{'epoch': 3, 'iter': 10630, 'avg_loss': 8.525708046000737, 'avg_acc': 50.01910685730411, 'loss': 9.214337348937988}


EP_train:3:  39%|| 10642/27626 [25:05<40:19,  7.02it/s]

{'epoch': 3, 'iter': 10640, 'avg_loss': 8.525513672467943, 'avg_acc': 50.01673949816746, 'loss': 8.122740745544434}


EP_train:3:  39%|| 10652/27626 [25:06<40:13,  7.03it/s]

{'epoch': 3, 'iter': 10650, 'avg_loss': 8.525595565903703, 'avg_acc': 50.016430382123744, 'loss': 8.837079048156738}


EP_train:3:  39%|| 10662/27626 [25:08<40:02,  7.06it/s]

{'epoch': 3, 'iter': 10660, 'avg_loss': 8.525627140121738, 'avg_acc': 50.013190601256916, 'loss': 8.907791137695312}


EP_train:3:  39%|| 10672/27626 [25:09<40:16,  7.02it/s]

{'epoch': 3, 'iter': 10670, 'avg_loss': 8.525572453049216, 'avg_acc': 50.016985287227065, 'loss': 8.411932945251465}


EP_train:3:  39%|| 10682/27626 [25:10<40:13,  7.02it/s]

{'epoch': 3, 'iter': 10680, 'avg_loss': 8.525664699623373, 'avg_acc': 50.01696938488905, 'loss': 8.902349472045898}


EP_train:3:  39%|| 10692/27626 [25:12<40:12,  7.02it/s]

{'epoch': 3, 'iter': 10690, 'avg_loss': 8.525497372475344, 'avg_acc': 50.0181227200449, 'loss': 8.7936429977417}


EP_train:3:  39%|| 10702/27626 [25:13<39:56,  7.06it/s]

{'epoch': 3, 'iter': 10700, 'avg_loss': 8.52585962440085, 'avg_acc': 50.014017381553124, 'loss': 8.280895233154297}


EP_train:3:  39%|| 10712/27626 [25:15<40:05,  7.03it/s]

{'epoch': 3, 'iter': 10710, 'avg_loss': 8.525904699129146, 'avg_acc': 50.01429605078891, 'loss': 8.075403213500977}


EP_train:3:  39%|| 10722/27626 [25:16<39:58,  7.05it/s]

{'epoch': 3, 'iter': 10720, 'avg_loss': 8.525784385036415, 'avg_acc': 50.01136787613096, 'loss': 8.987398147583008}


EP_train:3:  39%|| 10732/27626 [25:17<40:11,  7.01it/s]

{'epoch': 3, 'iter': 10730, 'avg_loss': 8.525546685079195, 'avg_acc': 50.01281334451589, 'loss': 8.114816665649414}


EP_train:3:  39%|| 10742/27626 [25:19<39:59,  7.04it/s]

{'epoch': 3, 'iter': 10740, 'avg_loss': 8.52537945703958, 'avg_acc': 50.012801415138256, 'loss': 8.092756271362305}


EP_train:3:  39%|| 10752/27626 [25:20<40:06,  7.01it/s]

{'epoch': 3, 'iter': 10750, 'avg_loss': 8.52559581338256, 'avg_acc': 50.01075481350572, 'loss': 9.133275985717773}


EP_train:3:  39%|| 10762/27626 [25:22<40:06,  7.01it/s]

{'epoch': 3, 'iter': 10760, 'avg_loss': 8.525491764372404, 'avg_acc': 50.00987361769352, 'loss': 8.412153244018555}


EP_train:3:  39%|| 10772/27626 [25:23<39:52,  7.04it/s]

{'epoch': 3, 'iter': 10770, 'avg_loss': 8.525821051926524, 'avg_acc': 50.013346021725, 'loss': 8.880202293395996}


EP_train:3:  39%|| 10782/27626 [25:25<40:08,  6.99it/s]

{'epoch': 3, 'iter': 10780, 'avg_loss': 8.525790098480629, 'avg_acc': 50.00956543919859, 'loss': 8.0145845413208}


EP_train:3:  39%|| 10792/27626 [25:26<39:43,  7.06it/s]

{'epoch': 3, 'iter': 10790, 'avg_loss': 8.52574432593282, 'avg_acc': 50.01245250671856, 'loss': 7.30215311050415}


EP_train:3:  39%|| 10802/27626 [25:27<39:53,  7.03it/s]

{'epoch': 3, 'iter': 10800, 'avg_loss': 8.525248702659992, 'avg_acc': 50.010705027312284, 'loss': 7.639774799346924}


EP_train:3:  39%|| 10812/27626 [25:29<39:34,  7.08it/s]

{'epoch': 3, 'iter': 10810, 'avg_loss': 8.525148895642284, 'avg_acc': 50.012140412542784, 'loss': 9.374740600585938}


EP_train:3:  39%|| 10822/27626 [25:30<40:11,  6.97it/s]

{'epoch': 3, 'iter': 10820, 'avg_loss': 8.524973324581012, 'avg_acc': 50.01010766102948, 'loss': 8.07846736907959}


EP_train:3:  39%|| 10832/27626 [25:32<39:55,  7.01it/s]

{'epoch': 3, 'iter': 10830, 'avg_loss': 8.525211662524582, 'avg_acc': 50.00894423414274, 'loss': 7.900782108306885}


EP_train:3:  39%|| 10842/27626 [25:33<39:45,  7.04it/s]

{'epoch': 3, 'iter': 10840, 'avg_loss': 8.525519980527458, 'avg_acc': 50.00605340835717, 'loss': 8.64199161529541}


EP_train:3:  39%|| 10852/27626 [25:34<39:51,  7.02it/s]

{'epoch': 3, 'iter': 10850, 'avg_loss': 8.525643492522805, 'avg_acc': 50.00748778914386, 'loss': 9.340985298156738}


EP_train:3:  39%|| 10862/27626 [25:36<39:56,  6.99it/s]

{'epoch': 3, 'iter': 10860, 'avg_loss': 8.525544859363979, 'avg_acc': 50.00891952858852, 'loss': 8.186047554016113}


EP_train:3:  39%|| 10872/27626 [25:37<39:26,  7.08it/s]

{'epoch': 3, 'iter': 10870, 'avg_loss': 8.525591740008606, 'avg_acc': 50.006324165210195, 'loss': 7.927718639373779}


EP_train:3:  39%|| 10882/27626 [25:39<39:44,  7.02it/s]

{'epoch': 3, 'iter': 10880, 'avg_loss': 8.525516633327454, 'avg_acc': 50.00287197867843, 'loss': 8.88182258605957}


EP_train:3:  39%|| 10892/27626 [25:40<39:33,  7.05it/s]

{'epoch': 3, 'iter': 10890, 'avg_loss': 8.525562613915175, 'avg_acc': 50.00516481498485, 'loss': 8.759385108947754}


EP_train:3:  39%|| 10902/27626 [25:42<39:24,  7.07it/s]

{'epoch': 3, 'iter': 10900, 'avg_loss': 8.525375978474923, 'avg_acc': 50.00802678653334, 'loss': 9.115285873413086}


EP_train:3:  39%|| 10912/27626 [25:43<39:40,  7.02it/s]

{'epoch': 3, 'iter': 10910, 'avg_loss': 8.525703335560031, 'avg_acc': 50.00601457244982, 'loss': 9.95138931274414}


EP_train:3:  40%|| 10922/27626 [25:44<39:34,  7.03it/s]

{'epoch': 3, 'iter': 10920, 'avg_loss': 8.525813411022432, 'avg_acc': 50.00515062723194, 'loss': 8.11298656463623}


EP_train:3:  40%|| 10932/27626 [25:46<39:40,  7.01it/s]

{'epoch': 3, 'iter': 10930, 'avg_loss': 8.525589455320222, 'avg_acc': 50.007147104565, 'loss': 7.944924354553223}


EP_train:3:  40%|| 10942/27626 [25:47<39:14,  7.09it/s]

{'epoch': 3, 'iter': 10940, 'avg_loss': 8.525845671079242, 'avg_acc': 50.007711817932545, 'loss': 10.077493667602539}


EP_train:3:  40%|| 10952/27626 [25:49<39:26,  7.05it/s]

{'epoch': 3, 'iter': 10950, 'avg_loss': 8.525893213324172, 'avg_acc': 50.00370970687609, 'loss': 8.354211807250977}


EP_train:3:  40%|| 10962/27626 [25:50<39:20,  7.06it/s]

{'epoch': 3, 'iter': 10960, 'avg_loss': 8.526384406580933, 'avg_acc': 50.00513183103732, 'loss': 9.961496353149414}


EP_train:3:  40%|| 10972/27626 [25:51<39:25,  7.04it/s]

{'epoch': 3, 'iter': 10970, 'avg_loss': 8.52672716886927, 'avg_acc': 50.00170905113481, 'loss': 9.65067195892334}


EP_train:3:  40%|| 10982/27626 [25:53<39:27,  7.03it/s]

{'epoch': 3, 'iter': 10980, 'avg_loss': 8.526735581223534, 'avg_acc': 50.00227665968491, 'loss': 8.735569953918457}


EP_train:3:  40%|| 10992/27626 [25:54<39:14,  7.06it/s]

{'epoch': 3, 'iter': 10990, 'avg_loss': 8.527045822048109, 'avg_acc': 50.0, 'loss': 8.359456062316895}


EP_train:3:  40%|| 11002/27626 [25:56<39:33,  7.00it/s]

{'epoch': 3, 'iter': 11000, 'avg_loss': 8.527110356524277, 'avg_acc': 49.99602308881011, 'loss': 8.84153938293457}


EP_train:3:  40%|| 11012/27626 [25:57<39:33,  7.00it/s]

{'epoch': 3, 'iter': 11010, 'avg_loss': 8.527354555638412, 'avg_acc': 49.992337208246305, 'loss': 8.849854469299316}


EP_train:3:  40%|| 11022/27626 [25:59<39:22,  7.03it/s]

{'epoch': 3, 'iter': 11020, 'avg_loss': 8.527374625822537, 'avg_acc': 49.99064286362399, 'loss': 9.848592758178711}


EP_train:3:  40%|| 11032/27626 [26:00<39:08,  7.07it/s]

{'epoch': 3, 'iter': 11030, 'avg_loss': 8.527490393787572, 'avg_acc': 49.99150122382377, 'loss': 7.997691631317139}


EP_train:3:  40%|| 11042/27626 [26:01<39:26,  7.01it/s]

{'epoch': 3, 'iter': 11040, 'avg_loss': 8.527274966466665, 'avg_acc': 49.992924101077804, 'loss': 8.772540092468262}


EP_train:3:  40%|| 11052/27626 [26:03<39:13,  7.04it/s]

{'epoch': 3, 'iter': 11050, 'avg_loss': 8.527286778854661, 'avg_acc': 49.99745498144964, 'loss': 8.172653198242188}


EP_train:3:  40%|| 11062/27626 [26:04<39:18,  7.02it/s]

{'epoch': 3, 'iter': 11060, 'avg_loss': 8.527096227715367, 'avg_acc': 49.996892233975224, 'loss': 9.102130889892578}


EP_train:3:  40%|| 11072/27626 [26:06<39:15,  7.03it/s]

{'epoch': 3, 'iter': 11070, 'avg_loss': 8.527045538470455, 'avg_acc': 49.99858865504471, 'loss': 8.184308052062988}


EP_train:3:  40%|| 11082/27626 [26:07<39:07,  7.05it/s]

{'epoch': 3, 'iter': 11080, 'avg_loss': 8.526817820560803, 'avg_acc': 49.99858992870679, 'loss': 8.335556983947754}


EP_train:3:  40%|| 11092/27626 [26:08<39:28,  6.98it/s]

{'epoch': 3, 'iter': 11090, 'avg_loss': 8.526780120525268, 'avg_acc': 49.999718240014424, 'loss': 8.283814430236816}


EP_train:3:  40%|| 11102/27626 [26:10<39:14,  7.02it/s]

{'epoch': 3, 'iter': 11100, 'avg_loss': 8.526579961595466, 'avg_acc': 49.99831096297631, 'loss': 8.82837200164795}


EP_train:3:  40%|| 11112/27626 [26:11<39:30,  6.97it/s]

{'epoch': 3, 'iter': 11110, 'avg_loss': 8.526614861294792, 'avg_acc': 49.99831248312483, 'loss': 7.930417537689209}


EP_train:3:  40%|| 11122/27626 [26:13<39:04,  7.04it/s]

{'epoch': 3, 'iter': 11120, 'avg_loss': 8.526875261886977, 'avg_acc': 49.99185100260768, 'loss': 9.198405265808105}


EP_train:3:  40%|| 11132/27626 [26:14<39:06,  7.03it/s]

{'epoch': 3, 'iter': 11130, 'avg_loss': 8.526684141343264, 'avg_acc': 49.99129682867667, 'loss': 8.206299781799316}


EP_train:3:  40%|| 11142/27626 [26:16<39:04,  7.03it/s]

{'epoch': 3, 'iter': 11140, 'avg_loss': 8.527036014680492, 'avg_acc': 49.99298761332017, 'loss': 8.455706596374512}


EP_train:3:  40%|| 11152/27626 [26:17<39:13,  7.00it/s]

{'epoch': 3, 'iter': 11150, 'avg_loss': 8.527014986879248, 'avg_acc': 49.99327414581652, 'loss': 8.085941314697266}


EP_train:3:  40%|| 11162/27626 [26:18<39:08,  7.01it/s]

{'epoch': 3, 'iter': 11160, 'avg_loss': 8.52702579783684, 'avg_acc': 49.996360093181615, 'loss': 8.271079063415527}


EP_train:3:  40%|| 11172/27626 [26:20<39:18,  6.98it/s]

{'epoch': 3, 'iter': 11170, 'avg_loss': 8.527419524977793, 'avg_acc': 49.99468489839764, 'loss': 9.109082221984863}


EP_train:3:  40%|| 11182/27626 [26:21<38:57,  7.03it/s]

{'epoch': 3, 'iter': 11180, 'avg_loss': 8.52739512448063, 'avg_acc': 49.99049727215812, 'loss': 8.079358100891113}


EP_train:3:  41%|| 11192/27626 [26:23<38:59,  7.02it/s]

{'epoch': 3, 'iter': 11190, 'avg_loss': 8.527440056959774, 'avg_acc': 49.988271825574124, 'loss': 9.398747444152832}


EP_train:3:  41%|| 11202/27626 [26:24<38:37,  7.09it/s]

{'epoch': 3, 'iter': 11200, 'avg_loss': 8.52719211476199, 'avg_acc': 49.99386215516472, 'loss': 7.805312633514404}


EP_train:3:  41%|| 11212/27626 [26:25<39:00,  7.01it/s]

{'epoch': 3, 'iter': 11210, 'avg_loss': 8.527182835429288, 'avg_acc': 49.988292748193736, 'loss': 8.556600570678711}


EP_train:3:  41%|| 11222/27626 [26:27<38:47,  7.05it/s]

{'epoch': 3, 'iter': 11220, 'avg_loss': 8.52703154226238, 'avg_acc': 49.986075216112646, 'loss': 9.047253608703613}


EP_train:3:  41%|| 11232/27626 [26:28<38:39,  7.07it/s]

{'epoch': 3, 'iter': 11230, 'avg_loss': 8.527159534274666, 'avg_acc': 49.98775710088149, 'loss': 8.376556396484375}


EP_train:3:  41%|| 11242/27626 [26:30<38:46,  7.04it/s]

{'epoch': 3, 'iter': 11240, 'avg_loss': 8.527263429599474, 'avg_acc': 49.985265990570234, 'loss': 9.048251152038574}


EP_train:3:  41%|| 11252/27626 [26:31<38:42,  7.05it/s]

{'epoch': 3, 'iter': 11250, 'avg_loss': 8.527307449261482, 'avg_acc': 49.98611234556928, 'loss': 8.146405220031738}


EP_train:3:  41%|| 11262/27626 [26:33<38:48,  7.03it/s]

{'epoch': 3, 'iter': 11260, 'avg_loss': 8.527257345972112, 'avg_acc': 49.98612467809253, 'loss': 8.908065795898438}


EP_train:3:  41%|| 11272/27626 [26:34<38:54,  7.01it/s]

{'epoch': 3, 'iter': 11270, 'avg_loss': 8.527366043015007, 'avg_acc': 49.98475068760536, 'loss': 9.374552726745605}


EP_train:3:  41%|| 11282/27626 [26:35<39:01,  6.98it/s]

{'epoch': 3, 'iter': 11280, 'avg_loss': 8.527098100412168, 'avg_acc': 49.98393316195372, 'loss': 8.441865921020508}


EP_train:3:  41%|| 11292/27626 [26:37<38:37,  7.05it/s]

{'epoch': 3, 'iter': 11290, 'avg_loss': 8.527162963638123, 'avg_acc': 49.981179700646535, 'loss': 8.495283126831055}


EP_train:3:  41%|| 11302/27626 [26:38<38:41,  7.03it/s]

{'epoch': 3, 'iter': 11300, 'avg_loss': 8.52725553044217, 'avg_acc': 49.98230245111052, 'loss': 9.77185344696045}


EP_train:3:  41%|| 11312/27626 [26:40<38:37,  7.04it/s]

{'epoch': 3, 'iter': 11310, 'avg_loss': 8.526981732582037, 'avg_acc': 49.98287065688268, 'loss': 7.066668510437012}


EP_train:3:  41%|| 11322/27626 [26:41<38:29,  7.06it/s]

{'epoch': 3, 'iter': 11320, 'avg_loss': 8.526781991938906, 'avg_acc': 49.979573359243886, 'loss': 8.904556274414062}


EP_train:3:  41%|| 11332/27626 [26:42<38:37,  7.03it/s]

{'epoch': 3, 'iter': 11330, 'avg_loss': 8.526584316358363, 'avg_acc': 49.976833465713526, 'loss': 8.289145469665527}


EP_train:3:  41%|| 11342/27626 [26:44<38:38,  7.02it/s]

{'epoch': 3, 'iter': 11340, 'avg_loss': 8.52659567070327, 'avg_acc': 49.974373952914206, 'loss': 8.429892539978027}


EP_train:3:  41%|| 11352/27626 [26:45<38:37,  7.02it/s]

{'epoch': 3, 'iter': 11350, 'avg_loss': 8.526822065574022, 'avg_acc': 49.97191877367633, 'loss': 8.42606258392334}


EP_train:3:  41%|| 11362/27626 [26:47<38:50,  6.98it/s]

{'epoch': 3, 'iter': 11360, 'avg_loss': 8.52697338187855, 'avg_acc': 49.97386893759352, 'loss': 7.948276042938232}


EP_train:3:  41%|| 11372/27626 [26:48<38:51,  6.97it/s]

{'epoch': 3, 'iter': 11370, 'avg_loss': 8.526816997875294, 'avg_acc': 49.97031923313693, 'loss': 7.825735569000244}


EP_train:3:  41%|| 11382/27626 [26:50<38:25,  7.05it/s]

{'epoch': 3, 'iter': 11380, 'avg_loss': 8.526890637805263, 'avg_acc': 49.97418943853791, 'loss': 8.005168914794922}


EP_train:3:  41%|| 11392/27626 [26:51<38:29,  7.03it/s]

{'epoch': 3, 'iter': 11390, 'avg_loss': 8.526628780088709, 'avg_acc': 49.97558379422351, 'loss': 7.8954877853393555}


EP_train:3:  41%|| 11402/27626 [26:52<38:15,  7.07it/s]

{'epoch': 3, 'iter': 11400, 'avg_loss': 8.526660660674368, 'avg_acc': 49.9731383211999, 'loss': 7.994179725646973}


EP_train:3:  41%|| 11412/27626 [26:54<38:33,  7.01it/s]

{'epoch': 3, 'iter': 11410, 'avg_loss': 8.526790354841275, 'avg_acc': 49.96823240732626, 'loss': 8.750542640686035}


EP_train:3:  41%|| 11422/27626 [26:55<38:13,  7.06it/s]

{'epoch': 3, 'iter': 11420, 'avg_loss': 8.526690768168484, 'avg_acc': 49.9679866036249, 'loss': 8.39903736114502}


EP_train:3:  41%|| 11432/27626 [26:57<38:09,  7.07it/s]

{'epoch': 3, 'iter': 11430, 'avg_loss': 8.526474897634763, 'avg_acc': 49.967467850581755, 'loss': 7.394881248474121}


EP_train:3:  41%|| 11442/27626 [26:58<38:13,  7.06it/s]

{'epoch': 3, 'iter': 11440, 'avg_loss': 8.52610264815564, 'avg_acc': 49.9623066165545, 'loss': 8.242461204528809}


EP_train:3:  41%|| 11452/27626 [26:59<38:31,  7.00it/s]

{'epoch': 3, 'iter': 11450, 'avg_loss': 8.526343767137655, 'avg_acc': 49.96261243559515, 'loss': 7.859652042388916}


EP_train:3:  41%|| 11462/27626 [27:01<38:43,  6.96it/s]

{'epoch': 3, 'iter': 11460, 'avg_loss': 8.526226972453976, 'avg_acc': 49.9620997295175, 'loss': 8.22414779663086}


EP_train:3:  42%|| 11472/27626 [27:02<38:33,  6.98it/s]

{'epoch': 3, 'iter': 11470, 'avg_loss': 8.526334851156625, 'avg_acc': 49.95968093453055, 'loss': 8.23873233795166}


EP_train:3:  42%|| 11482/27626 [27:04<38:31,  6.98it/s]

{'epoch': 3, 'iter': 11480, 'avg_loss': 8.526096811403372, 'avg_acc': 49.95971605260866, 'loss': 8.764591217041016}


EP_train:3:  42%|| 11492/27626 [27:05<38:16,  7.02it/s]

{'epoch': 3, 'iter': 11490, 'avg_loss': 8.525996936995288, 'avg_acc': 49.959751109564, 'loss': 8.526918411254883}


EP_train:3:  42%|| 11502/27626 [27:07<38:26,  6.99it/s]

{'epoch': 3, 'iter': 11500, 'avg_loss': 8.52631640755584, 'avg_acc': 49.954351795496045, 'loss': 8.692913055419922}


EP_train:3:  42%|| 11512/27626 [27:08<37:55,  7.08it/s]

{'epoch': 3, 'iter': 11510, 'avg_loss': 8.526344848232155, 'avg_acc': 49.954662931109375, 'loss': 9.062488555908203}


EP_train:3:  42%|| 11522/27626 [27:09<37:59,  7.07it/s]

{'epoch': 3, 'iter': 11520, 'avg_loss': 8.526377002912081, 'avg_acc': 49.95253233226282, 'loss': 8.278508186340332}


EP_train:3:  42%|| 11532/27626 [27:11<38:16,  7.01it/s]

{'epoch': 3, 'iter': 11530, 'avg_loss': 8.526078491154697, 'avg_acc': 49.953115514699505, 'loss': 8.374069213867188}


EP_train:3:  42%|| 11542/27626 [27:12<38:21,  6.99it/s]

{'epoch': 3, 'iter': 11540, 'avg_loss': 8.525839818659135, 'avg_acc': 49.95288536521965, 'loss': 8.475215911865234}


EP_train:3:  42%|| 11552/27626 [27:14<38:08,  7.02it/s]

{'epoch': 3, 'iter': 11550, 'avg_loss': 8.525850597619701, 'avg_acc': 49.948597524023896, 'loss': 9.660191535949707}


EP_train:3:  42%|| 11562/27626 [27:15<37:58,  7.05it/s]

{'epoch': 3, 'iter': 11560, 'avg_loss': 8.525663317121154, 'avg_acc': 49.95134503935646, 'loss': 9.040818214416504}


EP_train:3:  42%|| 11572/27626 [27:16<38:02,  7.03it/s]

{'epoch': 3, 'iter': 11570, 'avg_loss': 8.525609578353595, 'avg_acc': 49.94949658629332, 'loss': 8.787510871887207}


EP_train:3:  42%|| 11582/27626 [27:18<37:54,  7.05it/s]

{'epoch': 3, 'iter': 11580, 'avg_loss': 8.525783723525336, 'avg_acc': 49.9492703566186, 'loss': 8.668408393859863}


EP_train:3:  42%|| 11592/27626 [27:19<37:52,  7.05it/s]

{'epoch': 3, 'iter': 11590, 'avg_loss': 8.525878101127057, 'avg_acc': 49.95012294021223, 'loss': 9.065797805786133}


EP_train:3:  42%|| 11602/27626 [27:21<37:57,  7.03it/s]

{'epoch': 3, 'iter': 11600, 'avg_loss': 8.525797309839318, 'avg_acc': 49.952051547280405, 'loss': 7.724991798400879}


EP_train:3:  42%|| 11612/27626 [27:22<37:55,  7.04it/s]

{'epoch': 3, 'iter': 11610, 'avg_loss': 8.525686878493135, 'avg_acc': 49.95612996296615, 'loss': 8.844545364379883}


EP_train:3:  42%|| 11622/27626 [27:24<38:45,  6.88it/s]

{'epoch': 3, 'iter': 11620, 'avg_loss': 8.525723622737905, 'avg_acc': 49.9532097065657, 'loss': 8.952088356018066}


EP_train:3:  42%|| 11632/27626 [27:25<37:53,  7.03it/s]

{'epoch': 3, 'iter': 11630, 'avg_loss': 8.525426938172803, 'avg_acc': 49.95486200670622, 'loss': 8.779942512512207}


EP_train:3:  42%|| 11642/27626 [27:26<38:01,  7.00it/s]

{'epoch': 3, 'iter': 11640, 'avg_loss': 8.525183611970496, 'avg_acc': 49.94845803625118, 'loss': 8.232192039489746}


EP_train:3:  42%|| 11652/27626 [27:28<38:02,  7.00it/s]

{'epoch': 3, 'iter': 11650, 'avg_loss': 8.524866941445643, 'avg_acc': 49.94769762252167, 'loss': 8.708511352539062}


EP_train:3:  42%|| 11662/27626 [27:29<37:46,  7.04it/s]

{'epoch': 3, 'iter': 11660, 'avg_loss': 8.525107692418851, 'avg_acc': 49.94881442414887, 'loss': 8.99074935913086}


EP_train:3:  42%|| 11672/27626 [27:31<37:42,  7.05it/s]

{'epoch': 3, 'iter': 11670, 'avg_loss': 8.525246321557471, 'avg_acc': 49.948322765829836, 'loss': 9.170157432556152}


EP_train:3:  42%|| 11682/27626 [27:32<37:38,  7.06it/s]

{'epoch': 3, 'iter': 11680, 'avg_loss': 8.52534391414465, 'avg_acc': 49.94649430699427, 'loss': 8.49803638458252}


EP_train:3:  42%|| 11692/27626 [27:33<37:34,  7.07it/s]

{'epoch': 3, 'iter': 11690, 'avg_loss': 8.52522711720808, 'avg_acc': 49.94440167650329, 'loss': 8.873239517211914}


EP_train:3:  42%|| 11702/27626 [27:35<37:40,  7.05it/s]

{'epoch': 3, 'iter': 11700, 'avg_loss': 8.524982735128772, 'avg_acc': 49.94391504999573, 'loss': 8.186400413513184}


EP_train:3:  42%|| 11712/27626 [27:36<37:31,  7.07it/s]

{'epoch': 3, 'iter': 11710, 'avg_loss': 8.524482106645369, 'avg_acc': 49.94689821535309, 'loss': 8.102045059204102}


EP_train:3:  42%|| 11722/27626 [27:38<37:47,  7.01it/s]

{'epoch': 3, 'iter': 11720, 'avg_loss': 8.524254949617056, 'avg_acc': 49.946143673747976, 'loss': 8.668122291564941}


EP_train:3:  42%|| 11732/27626 [27:39<37:53,  6.99it/s]

{'epoch': 3, 'iter': 11730, 'avg_loss': 8.524339271175029, 'avg_acc': 49.94432486574035, 'loss': 8.902701377868652}


EP_train:3:  43%|| 11742/27626 [27:41<37:43,  7.02it/s]

{'epoch': 3, 'iter': 11740, 'avg_loss': 8.524072603720048, 'avg_acc': 49.94463844646964, 'loss': 8.145752906799316}


EP_train:3:  43%|| 11752/27626 [27:42<37:37,  7.03it/s]

{'epoch': 3, 'iter': 11750, 'avg_loss': 8.524107730923262, 'avg_acc': 49.94734490681644, 'loss': 9.06635570526123}


EP_train:3:  43%|| 11762/27626 [27:43<37:50,  6.99it/s]

{'epoch': 3, 'iter': 11760, 'avg_loss': 8.524036983802665, 'avg_acc': 49.949249638636175, 'loss': 8.15060806274414}


EP_train:3:  43%|| 11772/27626 [27:45<37:46,  6.99it/s]

{'epoch': 3, 'iter': 11770, 'avg_loss': 8.52432399861076, 'avg_acc': 49.94610695777759, 'loss': 8.49621295928955}


EP_train:3:  43%|| 11782/27626 [27:46<37:26,  7.05it/s]

{'epoch': 3, 'iter': 11780, 'avg_loss': 8.524407257542789, 'avg_acc': 49.94296961208725, 'loss': 9.190775871276855}


EP_train:3:  43%|| 11792/27626 [27:48<37:22,  7.06it/s]

{'epoch': 3, 'iter': 11790, 'avg_loss': 8.524462657993496, 'avg_acc': 49.94540327368332, 'loss': 8.650959014892578}


EP_train:3:  43%|| 11802/27626 [27:49<37:23,  7.05it/s]

{'epoch': 3, 'iter': 11800, 'avg_loss': 8.524423249807715, 'avg_acc': 49.944919922040505, 'loss': 8.554176330566406}


EP_train:3:  43%|| 11812/27626 [27:50<37:49,  6.97it/s]

{'epoch': 3, 'iter': 11810, 'avg_loss': 8.524367300443267, 'avg_acc': 49.94708322749979, 'loss': 8.034605979919434}


EP_train:3:  43%|| 11822/27626 [27:52<37:54,  6.95it/s]

{'epoch': 3, 'iter': 11820, 'avg_loss': 8.524206414983897, 'avg_acc': 49.94554183233229, 'loss': 8.618544578552246}


EP_train:3:  43%|| 11832/27626 [27:53<37:14,  7.07it/s]

{'epoch': 3, 'iter': 11830, 'avg_loss': 8.524293025059265, 'avg_acc': 49.94902163806948, 'loss': 9.231864929199219}


EP_train:3:  43%|| 11842/27626 [27:55<37:07,  7.09it/s]

{'epoch': 3, 'iter': 11840, 'avg_loss': 8.524192605567496, 'avg_acc': 49.949592517523854, 'loss': 8.032124519348145}


EP_train:3:  43%|| 11852/27626 [27:56<37:16,  7.05it/s]

{'epoch': 3, 'iter': 11850, 'avg_loss': 8.524337776803073, 'avg_acc': 49.948580288583244, 'loss': 8.874371528625488}


EP_train:3:  43%|| 11862/27626 [27:58<37:10,  7.07it/s]

{'epoch': 3, 'iter': 11860, 'avg_loss': 8.524454729260144, 'avg_acc': 49.9499409830537, 'loss': 8.964991569519043}


EP_train:3:  43%|| 11872/27626 [27:59<37:09,  7.07it/s]

{'epoch': 3, 'iter': 11870, 'avg_loss': 8.524381014483527, 'avg_acc': 49.95287886445961, 'loss': 7.632718086242676}


EP_train:3:  43%|| 11882/27626 [28:00<37:08,  7.07it/s]

{'epoch': 3, 'iter': 11880, 'avg_loss': 8.524552306345482, 'avg_acc': 49.95291852537665, 'loss': 7.794033050537109}


EP_train:3:  43%|| 11892/27626 [28:02<37:13,  7.05it/s]

{'epoch': 3, 'iter': 11890, 'avg_loss': 8.524531042993885, 'avg_acc': 49.95216970818266, 'loss': 9.167946815490723}


EP_train:3:  43%|| 11902/27626 [28:03<37:26,  7.00it/s]

{'epoch': 3, 'iter': 11900, 'avg_loss': 8.52466405394618, 'avg_acc': 49.94958406856567, 'loss': 8.565730094909668}


EP_train:3:  43%|| 11912/27626 [28:05<37:17,  7.02it/s]

{'epoch': 3, 'iter': 11910, 'avg_loss': 8.52463204557835, 'avg_acc': 49.95093820837881, 'loss': 8.427135467529297}


EP_train:3:  43%|| 11922/27626 [28:06<37:16,  7.02it/s]

{'epoch': 3, 'iter': 11920, 'avg_loss': 8.524690093014947, 'avg_acc': 49.94966865195873, 'loss': 9.063375473022461}


EP_train:3:  43%|| 11932/27626 [28:07<36:57,  7.08it/s]

{'epoch': 3, 'iter': 11930, 'avg_loss': 8.524915758057318, 'avg_acc': 49.948663146425275, 'loss': 7.83500337600708}


EP_train:3:  43%|| 11942/27626 [28:09<36:52,  7.09it/s]

{'epoch': 3, 'iter': 11940, 'avg_loss': 8.524815754019508, 'avg_acc': 49.94608910476509, 'loss': 8.381546974182129}


EP_train:3:  43%|| 11952/27626 [28:10<36:56,  7.07it/s]

{'epoch': 3, 'iter': 11950, 'avg_loss': 8.524632891255257, 'avg_acc': 49.94116601121245, 'loss': 7.4221696853637695}


EP_train:3:  43%|| 11962/27626 [28:12<37:08,  7.03it/s]

{'epoch': 3, 'iter': 11960, 'avg_loss': 8.524097201477554, 'avg_acc': 49.93651241534989, 'loss': 8.010497093200684}


EP_train:3:  43%|| 11972/27626 [28:13<37:04,  7.04it/s]

{'epoch': 3, 'iter': 11970, 'avg_loss': 8.524039904303086, 'avg_acc': 49.936565449837104, 'loss': 8.653818130493164}


EP_train:3:  43%|| 11982/27626 [28:15<37:01,  7.04it/s]

{'epoch': 3, 'iter': 11980, 'avg_loss': 8.524341257453692, 'avg_acc': 49.93766171438111, 'loss': 9.367225646972656}


EP_train:3:  43%|| 11992/27626 [28:16<37:14,  7.00it/s]

{'epoch': 3, 'iter': 11990, 'avg_loss': 8.524500219868258, 'avg_acc': 49.93458635643399, 'loss': 9.13594913482666}


EP_train:3:  43%|| 12002/27626 [28:17<37:02,  7.03it/s]

{'epoch': 3, 'iter': 12000, 'avg_loss': 8.524563677479213, 'avg_acc': 49.93568244312974, 'loss': 8.246204376220703}


EP_train:3:  43%|| 12012/27626 [28:19<37:03,  7.02it/s]

{'epoch': 3, 'iter': 12010, 'avg_loss': 8.524491524618934, 'avg_acc': 49.93859795187745, 'loss': 8.833151817321777}


EP_train:3:  44%|| 12022/27626 [28:20<36:51,  7.05it/s]

{'epoch': 3, 'iter': 12020, 'avg_loss': 8.524261375243508, 'avg_acc': 49.93163006405457, 'loss': 7.5426764488220215}


EP_train:3:  44%|| 12032/27626 [28:22<36:52,  7.05it/s]

{'epoch': 3, 'iter': 12030, 'avg_loss': 8.524407214411337, 'avg_acc': 49.93116740088105, 'loss': 9.113714218139648}


EP_train:3:  44%|| 12042/27626 [28:23<36:47,  7.06it/s]

{'epoch': 3, 'iter': 12040, 'avg_loss': 8.524343481848737, 'avg_acc': 49.930705506187195, 'loss': 8.89710807800293}


EP_train:3:  44%|| 12052/27626 [28:24<36:41,  7.07it/s]

{'epoch': 3, 'iter': 12050, 'avg_loss': 8.524483185686538, 'avg_acc': 49.93180026553813, 'loss': 8.575247764587402}


EP_train:3:  44%|| 12062/27626 [28:26<36:57,  7.02it/s]

{'epoch': 3, 'iter': 12060, 'avg_loss': 8.524430812803748, 'avg_acc': 49.931079512478235, 'loss': 7.613795280456543}


EP_train:3:  44%|| 12072/27626 [28:27<36:41,  7.07it/s]

{'epoch': 3, 'iter': 12070, 'avg_loss': 8.52463277839368, 'avg_acc': 49.93268991798525, 'loss': 7.824845314025879}


EP_train:3:  44%|| 12082/27626 [28:29<36:46,  7.04it/s]

{'epoch': 3, 'iter': 12080, 'avg_loss': 8.524766506772593, 'avg_acc': 49.935591010677925, 'loss': 8.812065124511719}


EP_train:3:  44%|| 12092/27626 [28:30<37:07,  6.97it/s]

{'epoch': 3, 'iter': 12090, 'avg_loss': 8.524745251994188, 'avg_acc': 49.93538582416674, 'loss': 8.757810592651367}


EP_train:3:  44%|| 12102/27626 [28:32<36:32,  7.08it/s]

{'epoch': 3, 'iter': 12100, 'avg_loss': 8.524332987612983, 'avg_acc': 49.932856788695155, 'loss': 8.686159133911133}


EP_train:3:  44%|| 12112/27626 [28:33<36:46,  7.03it/s]

{'epoch': 3, 'iter': 12110, 'avg_loss': 8.52395456148881, 'avg_acc': 49.933944348113286, 'loss': 8.019973754882812}


EP_train:3:  44%|| 12122/27626 [28:34<36:40,  7.04it/s]

{'epoch': 3, 'iter': 12120, 'avg_loss': 8.524021978199693, 'avg_acc': 49.93580356406237, 'loss': 9.423043251037598}


EP_train:3:  44%|| 12132/27626 [28:36<36:47,  7.02it/s]

{'epoch': 3, 'iter': 12130, 'avg_loss': 8.524336314105051, 'avg_acc': 49.9353412744209, 'loss': 9.449565887451172}


EP_train:3:  44%|| 12142/27626 [28:37<36:42,  7.03it/s]

{'epoch': 3, 'iter': 12140, 'avg_loss': 8.524471916699564, 'avg_acc': 49.93333539247179, 'loss': 8.091126441955566}


EP_train:3:  44%|| 12152/27626 [28:39<36:40,  7.03it/s]

{'epoch': 3, 'iter': 12150, 'avg_loss': 8.524201844613863, 'avg_acc': 49.93467615834088, 'loss': 8.370777130126953}


EP_train:3:  44%|| 12162/27626 [28:40<36:36,  7.04it/s]

{'epoch': 3, 'iter': 12160, 'avg_loss': 8.524053895368592, 'avg_acc': 49.930875339199076, 'loss': 8.94332218170166}


EP_train:3:  44%|| 12172/27626 [28:41<36:33,  7.05it/s]

{'epoch': 3, 'iter': 12170, 'avg_loss': 8.523799423399247, 'avg_acc': 49.929134828691154, 'loss': 8.525402069091797}


EP_train:3:  44%|| 12182/27626 [28:43<36:48,  6.99it/s]

{'epoch': 3, 'iter': 12180, 'avg_loss': 8.5242167848171, 'avg_acc': 49.92970609966341, 'loss': 8.089166641235352}


EP_train:3:  44%|| 12192/27626 [28:44<36:24,  7.06it/s]

{'epoch': 3, 'iter': 12190, 'avg_loss': 8.52447537790711, 'avg_acc': 49.92873841358379, 'loss': 8.12398910522461}


EP_train:3:  44%|| 12202/27626 [28:46<36:42,  7.00it/s]

{'epoch': 3, 'iter': 12200, 'avg_loss': 8.52446320490528, 'avg_acc': 49.92674780755676, 'loss': 8.596296310424805}


EP_train:3:  44%|| 12212/27626 [28:47<36:42,  7.00it/s]

{'epoch': 3, 'iter': 12210, 'avg_loss': 8.524049447293695, 'avg_acc': 49.925272295471295, 'loss': 7.350843906402588}


EP_train:3:  44%|| 12222/27626 [28:49<36:38,  7.01it/s]

{'epoch': 3, 'iter': 12220, 'avg_loss': 8.523923912060825, 'avg_acc': 49.92686768676868, 'loss': 8.292262077331543}


EP_train:3:  44%|| 12232/27626 [28:50<36:45,  6.98it/s]

{'epoch': 3, 'iter': 12230, 'avg_loss': 8.523984010782335, 'avg_acc': 49.92590548606001, 'loss': 8.068586349487305}


EP_train:3:  44%|| 12242/27626 [28:51<36:31,  7.02it/s]

{'epoch': 3, 'iter': 12240, 'avg_loss': 8.523962571840837, 'avg_acc': 49.92698717425047, 'loss': 9.124245643615723}


EP_train:3:  44%|| 12252/27626 [28:53<36:18,  7.06it/s]

{'epoch': 3, 'iter': 12250, 'avg_loss': 8.523882739758571, 'avg_acc': 49.928067096563545, 'loss': 8.251754760742188}


EP_train:3:  44%|| 12262/27626 [28:54<36:53,  6.94it/s]

{'epoch': 3, 'iter': 12260, 'avg_loss': 8.523869117120478, 'avg_acc': 49.9304196231955, 'loss': 8.930041313171387}


EP_train:3:  44%|| 12272/27626 [28:56<36:29,  7.01it/s]

{'epoch': 3, 'iter': 12270, 'avg_loss': 8.52386468294809, 'avg_acc': 49.92742034064053, 'loss': 8.252535820007324}


EP_train:3:  44%|| 12282/27626 [28:57<36:27,  7.02it/s]

{'epoch': 3, 'iter': 12280, 'avg_loss': 8.523933811380328, 'avg_acc': 49.92900618842114, 'loss': 8.255544662475586}


EP_train:3:  44%|| 12292/27626 [28:58<36:28,  7.01it/s]

{'epoch': 3, 'iter': 12290, 'avg_loss': 8.523971333339617, 'avg_acc': 49.927538442762994, 'loss': 8.478092193603516}


EP_train:3:  45%|| 12302/27626 [29:00<36:13,  7.05it/s]

{'epoch': 3, 'iter': 12300, 'avg_loss': 8.523908869210961, 'avg_acc': 49.92708926103569, 'loss': 8.983455657958984}


EP_train:3:  45%|| 12312/27626 [29:01<36:21,  7.02it/s]

{'epoch': 3, 'iter': 12310, 'avg_loss': 8.524038736501257, 'avg_acc': 49.92943302737389, 'loss': 9.779611587524414}


EP_train:3:  45%|| 12322/27626 [29:03<36:29,  6.99it/s]

{'epoch': 3, 'iter': 12320, 'avg_loss': 8.524360573840367, 'avg_acc': 49.93278751724698, 'loss': 7.99912691116333}


EP_train:3:  45%|| 12332/27626 [29:04<36:01,  7.07it/s]

{'epoch': 3, 'iter': 12330, 'avg_loss': 8.524283351616678, 'avg_acc': 49.93360230313843, 'loss': 7.99830961227417}


EP_train:3:  45%|| 12342/27626 [29:06<36:07,  7.05it/s]

{'epoch': 3, 'iter': 12340, 'avg_loss': 8.52461689587457, 'avg_acc': 49.93593509440078, 'loss': 8.455988883972168}


EP_train:3:  45%|| 12352/27626 [29:07<36:14,  7.02it/s]

{'epoch': 3, 'iter': 12350, 'avg_loss': 8.524619587885978, 'avg_acc': 49.9344688689175, 'loss': 8.548986434936523}


EP_train:3:  45%|| 12362/27626 [29:08<36:22,  6.99it/s]

{'epoch': 3, 'iter': 12360, 'avg_loss': 8.52471742081455, 'avg_acc': 49.93325782703665, 'loss': 8.228297233581543}


EP_train:3:  45%|| 12372/27626 [29:10<36:14,  7.01it/s]

{'epoch': 3, 'iter': 12370, 'avg_loss': 8.524368369428027, 'avg_acc': 49.93103831541508, 'loss': 7.9502692222595215}


EP_train:3:  45%|| 12382/27626 [29:11<36:04,  7.04it/s]

{'epoch': 3, 'iter': 12380, 'avg_loss': 8.524161607515696, 'avg_acc': 49.93134641789839, 'loss': 8.300713539123535}


EP_train:3:  45%|| 12392/27626 [29:13<36:10,  7.02it/s]

{'epoch': 3, 'iter': 12390, 'avg_loss': 8.523949549760188, 'avg_acc': 49.93064522637398, 'loss': 7.715865612030029}


EP_train:3:  45%|| 12402/27626 [29:14<36:02,  7.04it/s]

{'epoch': 3, 'iter': 12400, 'avg_loss': 8.523658574026406, 'avg_acc': 49.93498508184824, 'loss': 8.86580753326416}


EP_train:3:  45%|| 12412/27626 [29:15<35:54,  7.06it/s]

{'epoch': 3, 'iter': 12410, 'avg_loss': 8.523850150289299, 'avg_acc': 49.93428208846991, 'loss': 8.535664558410645}


EP_train:3:  45%|| 12422/27626 [29:17<35:58,  7.04it/s]

{'epoch': 3, 'iter': 12420, 'avg_loss': 8.523832889294606, 'avg_acc': 49.93483817728041, 'loss': 8.098962783813477}


EP_train:3:  45%|| 12432/27626 [29:18<35:55,  7.05it/s]

{'epoch': 3, 'iter': 12430, 'avg_loss': 8.523771506688417, 'avg_acc': 49.93338227013112, 'loss': 7.8480939865112305}


EP_train:3:  45%|| 12442/27626 [29:20<36:12,  6.99it/s]

{'epoch': 3, 'iter': 12440, 'avg_loss': 8.523581110849497, 'avg_acc': 49.93243107467246, 'loss': 8.301661491394043}


EP_train:3:  45%|| 12452/27626 [29:21<36:24,  6.95it/s]

{'epoch': 3, 'iter': 12450, 'avg_loss': 8.52340568951187, 'avg_acc': 49.928720584691995, 'loss': 8.543854713439941}


EP_train:3:  45%|| 12462/27626 [29:23<35:46,  7.07it/s]

{'epoch': 3, 'iter': 12460, 'avg_loss': 8.523526810822924, 'avg_acc': 49.92852700425327, 'loss': 8.576865196228027}


EP_train:3:  45%|| 12472/27626 [29:24<35:54,  7.03it/s]

{'epoch': 3, 'iter': 12470, 'avg_loss': 8.523494704514706, 'avg_acc': 49.929586641007134, 'loss': 8.768174171447754}


EP_train:3:  45%|| 12482/27626 [29:25<35:48,  7.05it/s]

{'epoch': 3, 'iter': 12480, 'avg_loss': 8.523503512815065, 'avg_acc': 49.93039419918276, 'loss': 8.044631004333496}


EP_train:3:  45%|| 12492/27626 [29:27<35:43,  7.06it/s]

{'epoch': 3, 'iter': 12490, 'avg_loss': 8.52356382729904, 'avg_acc': 49.93720478744696, 'loss': 8.091779708862305}


EP_train:3:  45%|| 12502/27626 [29:28<35:33,  7.09it/s]

{'epoch': 3, 'iter': 12500, 'avg_loss': 8.523603621468277, 'avg_acc': 49.93600511959043, 'loss': 8.843578338623047}


EP_train:3:  45%|| 12512/27626 [29:30<35:42,  7.05it/s]

{'epoch': 3, 'iter': 12510, 'avg_loss': 8.523861397256974, 'avg_acc': 49.93505714970826, 'loss': 9.229474067687988}


EP_train:3:  45%|| 12522/27626 [29:31<35:42,  7.05it/s]

{'epoch': 3, 'iter': 12520, 'avg_loss': 8.523952718303185, 'avg_acc': 49.933861113329606, 'loss': 7.673638343811035}


EP_train:3:  45%|| 12532/27626 [29:32<35:47,  7.03it/s]

{'epoch': 3, 'iter': 12530, 'avg_loss': 8.523439918014555, 'avg_acc': 49.93366451201022, 'loss': 7.304166793823242}


EP_train:3:  45%|| 12542/27626 [29:34<36:07,  6.96it/s]

{'epoch': 3, 'iter': 12540, 'avg_loss': 8.523386606597109, 'avg_acc': 49.93421577226697, 'loss': 8.742270469665527}


EP_train:3:  45%|| 12552/27626 [29:35<35:59,  6.98it/s]

{'epoch': 3, 'iter': 12550, 'avg_loss': 8.52323119732101, 'avg_acc': 49.93601107481476, 'loss': 9.41646671295166}


EP_train:3:  45%|| 12562/27626 [29:37<35:39,  7.04it/s]

{'epoch': 3, 'iter': 12560, 'avg_loss': 8.523095958409971, 'avg_acc': 49.936310803279994, 'loss': 8.448223114013672}


EP_train:3:  46%|| 12572/27626 [29:38<35:25,  7.08it/s]

{'epoch': 3, 'iter': 12570, 'avg_loss': 8.523254923827702, 'avg_acc': 49.93362699864768, 'loss': 7.939154624938965}


EP_train:3:  46%|| 12582/27626 [29:40<35:33,  7.05it/s]

{'epoch': 3, 'iter': 12580, 'avg_loss': 8.523070022626063, 'avg_acc': 49.93343136475638, 'loss': 8.854050636291504}


EP_train:3:  46%|| 12592/27626 [29:41<35:19,  7.09it/s]

{'epoch': 3, 'iter': 12590, 'avg_loss': 8.523274114858866, 'avg_acc': 49.93125049638631, 'loss': 8.708475112915039}


EP_train:3:  46%|| 12602/27626 [29:42<35:21,  7.08it/s]

{'epoch': 3, 'iter': 12600, 'avg_loss': 8.52332947123818, 'avg_acc': 49.927337116101896, 'loss': 9.021588325500488}


EP_train:3:  46%|| 12612/27626 [29:44<35:36,  7.03it/s]

{'epoch': 3, 'iter': 12610, 'avg_loss': 8.523320114231632, 'avg_acc': 49.92590793751487, 'loss': 8.233501434326172}


EP_train:3:  46%|| 12622/27626 [29:45<35:29,  7.05it/s]

{'epoch': 3, 'iter': 12620, 'avg_loss': 8.523562410706887, 'avg_acc': 49.92522383329372, 'loss': 9.153380393981934}


EP_train:3:  46%|| 12632/27626 [29:47<35:26,  7.05it/s]

{'epoch': 3, 'iter': 12630, 'avg_loss': 8.52328115354835, 'avg_acc': 49.92181933338611, 'loss': 8.446475982666016}


EP_train:3:  46%|| 12642/27626 [29:48<35:20,  7.06it/s]

{'epoch': 3, 'iter': 12640, 'avg_loss': 8.523390792968595, 'avg_acc': 49.91916185428368, 'loss': 8.559001922607422}


EP_train:3:  46%|| 12652/27626 [29:49<35:25,  7.04it/s]

{'epoch': 3, 'iter': 12650, 'avg_loss': 8.52344353386898, 'avg_acc': 49.915520512212474, 'loss': 8.445666313171387}


EP_train:3:  46%|| 12662/27626 [29:51<35:25,  7.04it/s]

{'epoch': 3, 'iter': 12660, 'avg_loss': 8.523542594066042, 'avg_acc': 49.911884922202034, 'loss': 8.080182075500488}


EP_train:3:  46%|| 12672/27626 [29:52<35:43,  6.98it/s]

{'epoch': 3, 'iter': 12670, 'avg_loss': 8.523530193741607, 'avg_acc': 49.91294096756373, 'loss': 8.338086128234863}


EP_train:3:  46%|| 12682/27626 [29:54<35:30,  7.01it/s]

{'epoch': 3, 'iter': 12680, 'avg_loss': 8.523362842703005, 'avg_acc': 49.91276318902295, 'loss': 7.95875358581543}


EP_train:3:  46%|| 12692/27626 [29:55<35:19,  7.05it/s]

{'epoch': 3, 'iter': 12690, 'avg_loss': 8.523128544689682, 'avg_acc': 49.91627925301395, 'loss': 7.783872127532959}


EP_train:3:  46%|| 12702/27626 [29:57<35:14,  7.06it/s]

{'epoch': 3, 'iter': 12700, 'avg_loss': 8.52306554899583, 'avg_acc': 49.915360995197226, 'loss': 9.015222549438477}


EP_train:3:  46%|| 12712/27626 [29:58<35:25,  7.02it/s]

{'epoch': 3, 'iter': 12710, 'avg_loss': 8.522852597083952, 'avg_acc': 49.91223153174416, 'loss': 8.418195724487305}


EP_train:3:  46%|| 12722/27626 [29:59<35:45,  6.95it/s]

{'epoch': 3, 'iter': 12720, 'avg_loss': 8.522760578731576, 'avg_acc': 49.90959830202028, 'loss': 8.795363426208496}


EP_train:3:  46%|| 12732/27626 [30:01<35:20,  7.02it/s]

{'epoch': 3, 'iter': 12730, 'avg_loss': 8.522844626723563, 'avg_acc': 49.91286034090017, 'loss': 8.519405364990234}


EP_train:3:  46%|| 12742/27626 [30:02<35:22,  7.01it/s]

{'epoch': 3, 'iter': 12740, 'avg_loss': 8.522971886348223, 'avg_acc': 49.913174005180124, 'loss': 9.592312812805176}


EP_train:3:  46%|| 12752/27626 [30:04<35:14,  7.04it/s]

{'epoch': 3, 'iter': 12750, 'avg_loss': 8.523223369339009, 'avg_acc': 49.91275194102423, 'loss': 9.40878963470459}


EP_train:3:  46%|| 12762/27626 [30:05<35:07,  7.05it/s]

{'epoch': 3, 'iter': 12760, 'avg_loss': 8.523233965379058, 'avg_acc': 49.91159587806598, 'loss': 8.866920471191406}


EP_train:3:  46%|| 12772/27626 [30:06<35:18,  7.01it/s]

{'epoch': 3, 'iter': 12770, 'avg_loss': 8.523530094599856, 'avg_acc': 49.90848406546081, 'loss': 8.443877220153809}


EP_train:3:  46%|| 12782/27626 [30:08<35:07,  7.04it/s]

{'epoch': 3, 'iter': 12780, 'avg_loss': 8.52383908762095, 'avg_acc': 49.906110632970815, 'loss': 8.362275123596191}


EP_train:3:  46%|| 12792/27626 [30:09<35:06,  7.04it/s]

{'epoch': 3, 'iter': 12790, 'avg_loss': 8.523761181693054, 'avg_acc': 49.90984872175749, 'loss': 8.601762771606445}


EP_train:3:  46%|| 12802/27626 [30:11<35:15,  7.01it/s]

{'epoch': 3, 'iter': 12800, 'avg_loss': 8.52373945645062, 'avg_acc': 49.9133368486837, 'loss': 8.337648391723633}


EP_train:3:  46%|| 12812/27626 [30:12<35:13,  7.01it/s]

{'epoch': 3, 'iter': 12810, 'avg_loss': 8.523540741963126, 'avg_acc': 49.91145304816173, 'loss': 8.955338478088379}


EP_train:3:  46%|| 12822/27626 [30:14<35:04,  7.03it/s]

{'epoch': 3, 'iter': 12820, 'avg_loss': 8.523654325379754, 'avg_acc': 49.911278371421886, 'loss': 8.872695922851562}


EP_train:3:  46%|| 12832/27626 [30:15<34:56,  7.06it/s]

{'epoch': 3, 'iter': 12830, 'avg_loss': 8.523571238979697, 'avg_acc': 49.91353947470969, 'loss': 8.190142631530762}


EP_train:3:  46%|| 12842/27626 [30:16<35:05,  7.02it/s]

{'epoch': 3, 'iter': 12840, 'avg_loss': 8.52353549194841, 'avg_acc': 49.9131200841056, 'loss': 8.244211196899414}


EP_train:3:  47%|| 12852/27626 [30:18<34:51,  7.06it/s]

{'epoch': 3, 'iter': 12850, 'avg_loss': 8.523498646879593, 'avg_acc': 49.91683526573807, 'loss': 9.267433166503906}


EP_train:3:  47%|| 12862/27626 [30:19<34:49,  7.07it/s]

{'epoch': 3, 'iter': 12860, 'avg_loss': 8.52398781349681, 'avg_acc': 49.92175958323614, 'loss': 9.468087196350098}


EP_train:3:  47%|| 12872/27626 [30:21<34:47,  7.07it/s]

{'epoch': 3, 'iter': 12870, 'avg_loss': 8.524136353183687, 'avg_acc': 49.918906844845004, 'loss': 8.340211868286133}


EP_train:3:  47%|| 12882/27626 [30:22<34:55,  7.04it/s]

{'epoch': 3, 'iter': 12880, 'avg_loss': 8.524414349875736, 'avg_acc': 49.91969761664467, 'loss': 8.37302303314209}


EP_train:3:  47%|| 12892/27626 [30:23<35:00,  7.02it/s]

{'epoch': 3, 'iter': 12890, 'avg_loss': 8.524278397871205, 'avg_acc': 49.91733573811186, 'loss': 8.903246879577637}


EP_train:3:  47%|| 12902/27626 [30:25<35:02,  7.00it/s]

{'epoch': 3, 'iter': 12900, 'avg_loss': 8.524306226743208, 'avg_acc': 49.9164308968297, 'loss': 8.88701057434082}


EP_train:3:  47%|| 12912/27626 [30:26<34:41,  7.07it/s]

{'epoch': 3, 'iter': 12910, 'avg_loss': 8.524405743905701, 'avg_acc': 49.916011540546826, 'loss': 8.364188194274902}


EP_train:3:  47%|| 12922/27626 [30:28<34:51,  7.03it/s]

{'epoch': 3, 'iter': 12920, 'avg_loss': 8.524413937210769, 'avg_acc': 49.91148130949617, 'loss': 9.040072441101074}


EP_train:3:  47%|| 12932/27626 [30:29<34:30,  7.10it/s]

{'epoch': 3, 'iter': 12930, 'avg_loss': 8.524337747867529, 'avg_acc': 49.914691439177176, 'loss': 7.841925621032715}


EP_train:3:  47%|| 12942/27626 [30:31<34:52,  7.02it/s]

{'epoch': 3, 'iter': 12940, 'avg_loss': 8.524205196070511, 'avg_acc': 49.91451587976199, 'loss': 8.154062271118164}


EP_train:3:  47%|| 12952/27626 [30:32<35:01,  6.98it/s]

{'epoch': 3, 'iter': 12950, 'avg_loss': 8.524037985035502, 'avg_acc': 49.91385800324299, 'loss': 7.881576061248779}


EP_train:3:  47%|| 12962/27626 [30:33<34:56,  6.99it/s]

{'epoch': 3, 'iter': 12960, 'avg_loss': 8.52353972850064, 'avg_acc': 49.91127227837358, 'loss': 8.033642768859863}


EP_train:3:  47%|| 12972/27626 [30:35<34:53,  7.00it/s]

{'epoch': 3, 'iter': 12970, 'avg_loss': 8.523355439747595, 'avg_acc': 49.91423174774497, 'loss': 8.933712005615234}


EP_train:3:  47%|| 12982/27626 [30:36<34:39,  7.04it/s]

{'epoch': 3, 'iter': 12980, 'avg_loss': 8.523650138222576, 'avg_acc': 49.91983475849318, 'loss': 8.438212394714355}


EP_train:3:  47%|| 12992/27626 [30:38<34:50,  7.00it/s]

{'epoch': 3, 'iter': 12990, 'avg_loss': 8.523768807712838, 'avg_acc': 49.92615079670541, 'loss': 9.123457908630371}


EP_train:3:  47%|| 13002/27626 [30:39<34:36,  7.04it/s]

{'epoch': 3, 'iter': 13000, 'avg_loss': 8.52383858834255, 'avg_acc': 49.928851626797936, 'loss': 8.64937973022461}


EP_train:3:  47%|| 13012/27626 [30:40<34:33,  7.05it/s]

{'epoch': 3, 'iter': 13010, 'avg_loss': 8.523667107408304, 'avg_acc': 49.93058757974022, 'loss': 9.519713401794434}


EP_train:3:  47%|| 13022/27626 [30:42<34:26,  7.07it/s]

{'epoch': 3, 'iter': 13020, 'avg_loss': 8.523836967115825, 'avg_acc': 49.93400084478919, 'loss': 8.745707511901855}


EP_train:3:  47%|| 13032/27626 [30:43<34:30,  7.05it/s]

{'epoch': 3, 'iter': 13030, 'avg_loss': 8.523901773042484, 'avg_acc': 49.935969994628195, 'loss': 9.094770431518555}


EP_train:3:  47%|| 13042/27626 [30:45<34:55,  6.96it/s]

{'epoch': 3, 'iter': 13040, 'avg_loss': 8.523842757688454, 'avg_acc': 49.93721723794188, 'loss': 9.184415817260742}


EP_train:3:  47%|| 13052/27626 [30:46<34:21,  7.07it/s]

{'epoch': 3, 'iter': 13050, 'avg_loss': 8.523752930538652, 'avg_acc': 49.93606811738564, 'loss': 8.878100395202637}


EP_train:3:  47%|| 13062/27626 [30:47<34:19,  7.07it/s]

{'epoch': 3, 'iter': 13060, 'avg_loss': 8.523775602188385, 'avg_acc': 49.93396370875124, 'loss': 8.53304386138916}


EP_train:3:  47%|| 13072/27626 [30:49<34:22,  7.06it/s]

{'epoch': 3, 'iter': 13070, 'avg_loss': 8.523911131831904, 'avg_acc': 49.93401422997476, 'loss': 8.624083518981934}


EP_train:3:  47%|| 13082/27626 [30:50<34:34,  7.01it/s]

{'epoch': 3, 'iter': 13080, 'avg_loss': 8.523793660940978, 'avg_acc': 49.93430357006345, 'loss': 8.198749542236328}


EP_train:3:  47%|| 13092/27626 [30:52<34:27,  7.03it/s]

{'epoch': 3, 'iter': 13090, 'avg_loss': 8.523611331166325, 'avg_acc': 49.936740890688256, 'loss': 8.663274765014648}


EP_train:3:  47%|| 13102/27626 [30:53<34:18,  7.06it/s]

{'epoch': 3, 'iter': 13100, 'avg_loss': 8.523380995943258, 'avg_acc': 49.9360735821693, 'loss': 8.5647611618042}


EP_train:3:  47%|| 13112/27626 [30:55<34:47,  6.95it/s]

{'epoch': 3, 'iter': 13110, 'avg_loss': 8.523382173723983, 'avg_acc': 49.9377907863626, 'loss': 8.42385196685791}


EP_train:3:  47%|| 13122/27626 [30:56<34:43,  6.96it/s]

{'epoch': 3, 'iter': 13120, 'avg_loss': 8.523715399358432, 'avg_acc': 49.93378934532429, 'loss': 9.120360374450684}


EP_train:3:  48%|| 13132/27626 [30:57<34:09,  7.07it/s]

{'epoch': 3, 'iter': 13130, 'avg_loss': 8.5237898194375, 'avg_acc': 49.93074594471099, 'loss': 8.519940376281738}


EP_train:3:  48%|| 13142/27626 [30:59<34:28,  7.00it/s]

{'epoch': 3, 'iter': 13140, 'avg_loss': 8.523973731088924, 'avg_acc': 49.929847424092536, 'loss': 8.821343421936035}


EP_train:3:  48%|| 13152/27626 [31:00<34:13,  7.05it/s]

{'epoch': 3, 'iter': 13150, 'avg_loss': 8.523817559302366, 'avg_acc': 49.929187894456696, 'loss': 8.595401763916016}


EP_train:3:  48%|| 13162/27626 [31:02<34:27,  7.00it/s]

{'epoch': 3, 'iter': 13160, 'avg_loss': 8.523951254991692, 'avg_acc': 49.9287668110326, 'loss': 9.302404403686523}


EP_train:3:  48%|| 13172/27626 [31:03<34:20,  7.02it/s]

{'epoch': 3, 'iter': 13170, 'avg_loss': 8.523721977189284, 'avg_acc': 49.927634575962344, 'loss': 8.031458854675293}


EP_train:3:  48%|| 13182/27626 [31:05<34:12,  7.04it/s]

{'epoch': 3, 'iter': 13180, 'avg_loss': 8.523692262072311, 'avg_acc': 49.92721530991579, 'loss': 8.560070037841797}


EP_train:3:  48%|| 13192/27626 [31:06<34:01,  7.07it/s]

{'epoch': 3, 'iter': 13190, 'avg_loss': 8.523810857458045, 'avg_acc': 49.92869191115154, 'loss': 9.174561500549316}


EP_train:3:  48%|| 13202/27626 [31:07<34:04,  7.06it/s]

{'epoch': 3, 'iter': 13200, 'avg_loss': 8.523829368468208, 'avg_acc': 49.92732558139535, 'loss': 8.97305965423584}


EP_train:3:  48%|| 13212/27626 [31:09<34:17,  7.01it/s]

{'epoch': 3, 'iter': 13210, 'avg_loss': 8.52370742856035, 'avg_acc': 49.923595867080465, 'loss': 8.047688484191895}


EP_train:3:  48%|| 13222/27626 [31:10<34:20,  6.99it/s]

{'epoch': 3, 'iter': 13220, 'avg_loss': 8.523600404142261, 'avg_acc': 49.9227081915135, 'loss': 8.63415813446045}


EP_train:3:  48%|| 13232/27626 [31:12<33:58,  7.06it/s]

{'epoch': 3, 'iter': 13230, 'avg_loss': 8.523504477619053, 'avg_acc': 49.926073237094705, 'loss': 7.910262107849121}


EP_train:3:  48%|| 13242/27626 [31:13<34:00,  7.05it/s]

{'epoch': 3, 'iter': 13240, 'avg_loss': 8.523581924219522, 'avg_acc': 49.92494902197719, 'loss': 8.572648048400879}


EP_train:3:  48%|| 13252/27626 [31:14<34:13,  7.00it/s]

{'epoch': 3, 'iter': 13250, 'avg_loss': 8.523639203017545, 'avg_acc': 49.923119009886044, 'loss': 8.365255355834961}


EP_train:3:  48%|| 13262/27626 [31:16<33:57,  7.05it/s]

{'epoch': 3, 'iter': 13260, 'avg_loss': 8.52359156622605, 'avg_acc': 49.92011349068698, 'loss': 7.644697189331055}


EP_train:3:  48%|| 13272/27626 [31:17<34:06,  7.01it/s]

{'epoch': 3, 'iter': 13270, 'avg_loss': 8.523569380524703, 'avg_acc': 49.92040916283626, 'loss': 8.55403995513916}


EP_train:3:  48%|| 13282/27626 [31:19<33:48,  7.07it/s]

{'epoch': 3, 'iter': 13280, 'avg_loss': 8.523424739031967, 'avg_acc': 49.91741021007454, 'loss': 8.643414497375488}


EP_train:3:  48%|| 13292/27626 [31:20<33:47,  7.07it/s]

{'epoch': 3, 'iter': 13290, 'avg_loss': 8.523484266061706, 'avg_acc': 49.920764050861486, 'loss': 9.480345726013184}


EP_train:3:  48%|| 13302/27626 [31:22<33:50,  7.05it/s]

{'epoch': 3, 'iter': 13300, 'avg_loss': 8.523160718632914, 'avg_acc': 49.921058567025035, 'loss': 7.778360843658447}


EP_train:3:  48%|| 13312/27626 [31:23<33:41,  7.08it/s]

{'epoch': 3, 'iter': 13310, 'avg_loss': 8.523304226611435, 'avg_acc': 49.922761250093906, 'loss': 9.053934097290039}


EP_train:3:  48%|| 13322/27626 [31:24<33:46,  7.06it/s]

{'epoch': 3, 'iter': 13320, 'avg_loss': 8.523194817986255, 'avg_acc': 49.92188086479994, 'loss': 8.483438491821289}


EP_train:3:  48%|| 13332/27626 [31:26<33:43,  7.06it/s]

{'epoch': 3, 'iter': 13330, 'avg_loss': 8.52324502339877, 'avg_acc': 49.91959530417823, 'loss': 8.227867126464844}


EP_train:3:  48%|| 13342/27626 [31:27<33:45,  7.05it/s]

{'epoch': 3, 'iter': 13340, 'avg_loss': 8.523236995175472, 'avg_acc': 49.91918709242186, 'loss': 8.483842849731445}


EP_train:3:  48%|| 13352/27626 [31:29<33:44,  7.05it/s]

{'epoch': 3, 'iter': 13350, 'avg_loss': 8.523392613805035, 'avg_acc': 49.9180772975807, 'loss': 8.454499244689941}


EP_train:3:  48%|| 13362/27626 [31:30<34:04,  6.98it/s]

{'epoch': 3, 'iter': 13360, 'avg_loss': 8.523488184495847, 'avg_acc': 49.91884028141606, 'loss': 8.80515193939209}


EP_train:3:  48%|| 13372/27626 [31:31<33:52,  7.01it/s]

{'epoch': 3, 'iter': 13370, 'avg_loss': 8.523682235111233, 'avg_acc': 49.92217298631366, 'loss': 9.313348770141602}


EP_train:3:  48%|| 13382/27626 [31:33<33:36,  7.06it/s]

{'epoch': 3, 'iter': 13380, 'avg_loss': 8.52373185309939, 'avg_acc': 49.92059636798445, 'loss': 8.231304168701172}


EP_train:3:  48%|| 13392/27626 [31:34<33:38,  7.05it/s]

{'epoch': 3, 'iter': 13390, 'avg_loss': 8.52388261429413, 'avg_acc': 49.920422298558734, 'loss': 8.682929992675781}


EP_train:3:  49%|| 13402/27626 [31:36<33:36,  7.05it/s]

{'epoch': 3, 'iter': 13400, 'avg_loss': 8.523851427887102, 'avg_acc': 49.921880829788826, 'loss': 7.955354690551758}


EP_train:3:  49%|| 13412/27626 [31:37<33:48,  7.01it/s]

{'epoch': 3, 'iter': 13410, 'avg_loss': 8.523844731519814, 'avg_acc': 49.922638132875996, 'loss': 8.459711074829102}


EP_train:3:  49%|| 13422/27626 [31:39<33:27,  7.07it/s]

{'epoch': 3, 'iter': 13420, 'avg_loss': 8.523628236106802, 'avg_acc': 49.92572274793234, 'loss': 8.398876190185547}


EP_train:3:  49%|| 13432/27626 [31:40<33:35,  7.04it/s]

{'epoch': 3, 'iter': 13430, 'avg_loss': 8.523491817649322, 'avg_acc': 49.9250800387164, 'loss': 8.418436050415039}


EP_train:3:  49%|| 13442/27626 [31:41<33:34,  7.04it/s]

{'epoch': 3, 'iter': 13440, 'avg_loss': 8.52332861087709, 'avg_acc': 49.92467078342385, 'loss': 8.243762016296387}


EP_train:3:  49%|| 13452/27626 [31:43<33:29,  7.05it/s]

{'epoch': 3, 'iter': 13450, 'avg_loss': 8.523349206604113, 'avg_acc': 49.91961564195971, 'loss': 8.267329216003418}


EP_train:3:  49%|| 13462/27626 [31:44<33:33,  7.03it/s]

{'epoch': 3, 'iter': 13460, 'avg_loss': 8.523143617831069, 'avg_acc': 49.923621944877794, 'loss': 8.14985179901123}


EP_train:3:  49%|| 13472/27626 [31:46<33:21,  7.07it/s]

{'epoch': 3, 'iter': 13470, 'avg_loss': 8.523091903641498, 'avg_acc': 49.92135884492614, 'loss': 9.323955535888672}


EP_train:3:  49%|| 13482/27626 [31:47<33:23,  7.06it/s]

{'epoch': 3, 'iter': 13480, 'avg_loss': 8.523332149902338, 'avg_acc': 49.92141717973444, 'loss': 8.277449607849121}


EP_train:3:  49%|| 13492/27626 [31:48<33:28,  7.04it/s]

{'epoch': 3, 'iter': 13490, 'avg_loss': 8.523510503376594, 'avg_acc': 49.9233285153065, 'loss': 8.795501708984375}


EP_train:3:  49%|| 13502/27626 [31:50<33:42,  6.98it/s]

{'epoch': 3, 'iter': 13500, 'avg_loss': 8.523608283107965, 'avg_acc': 49.92222798311236, 'loss': 8.653820037841797}


EP_train:3:  49%|| 13512/27626 [31:51<33:43,  6.98it/s]

{'epoch': 3, 'iter': 13510, 'avg_loss': 8.523573890461858, 'avg_acc': 49.9236733032344, 'loss': 7.614822864532471}


EP_train:3:  49%|| 13522/27626 [31:53<33:34,  7.00it/s]

{'epoch': 3, 'iter': 13520, 'avg_loss': 8.523340504847107, 'avg_acc': 49.92650321721766, 'loss': 8.457205772399902}


EP_train:3:  49%|| 13532/27626 [31:54<33:41,  6.97it/s]

{'epoch': 3, 'iter': 13530, 'avg_loss': 8.523535814476281, 'avg_acc': 49.92586468110265, 'loss': 8.624858856201172}


EP_train:3:  49%|| 13542/27626 [31:56<33:13,  7.06it/s]

{'epoch': 3, 'iter': 13540, 'avg_loss': 8.52342925636202, 'avg_acc': 49.92661177165645, 'loss': 8.371582984924316}


EP_train:3:  49%|| 13552/27626 [31:57<33:24,  7.02it/s]

{'epoch': 3, 'iter': 13550, 'avg_loss': 8.523140342409219, 'avg_acc': 49.928510811010256, 'loss': 7.366851806640625}


EP_train:3:  49%|| 13562/27626 [31:58<33:08,  7.07it/s]

{'epoch': 3, 'iter': 13560, 'avg_loss': 8.522913708042301, 'avg_acc': 49.933172332423865, 'loss': 8.543566703796387}


EP_train:3:  49%|| 13572/27626 [32:00<33:11,  7.06it/s]

{'epoch': 3, 'iter': 13570, 'avg_loss': 8.522780033019092, 'avg_acc': 49.932991304988576, 'loss': 7.745131492614746}


EP_train:3:  49%|| 13582/27626 [32:01<33:12,  7.05it/s]

{'epoch': 3, 'iter': 13580, 'avg_loss': 8.522549181375629, 'avg_acc': 49.935571754657246, 'loss': 8.761946678161621}


EP_train:3:  49%|| 13592/27626 [32:03<33:25,  7.00it/s]

{'epoch': 3, 'iter': 13590, 'avg_loss': 8.52242578059209, 'avg_acc': 49.93883820175115, 'loss': 8.845708847045898}


EP_train:3:  49%|| 13602/27626 [32:04<33:12,  7.04it/s]

{'epoch': 3, 'iter': 13600, 'avg_loss': 8.522656184664088, 'avg_acc': 49.937504595250346, 'loss': 9.156209945678711}


EP_train:3:  49%|| 13612/27626 [32:05<33:17,  7.02it/s]

{'epoch': 3, 'iter': 13610, 'avg_loss': 8.522404836317364, 'avg_acc': 49.93341782381897, 'loss': 8.831454277038574}


EP_train:3:  49%|| 13622/27626 [32:07<33:13,  7.03it/s]

{'epoch': 3, 'iter': 13620, 'avg_loss': 8.522475607742972, 'avg_acc': 49.93392555612657, 'loss': 8.3265380859375}


EP_train:3:  49%|| 13632/27626 [32:08<33:15,  7.01it/s]

{'epoch': 3, 'iter': 13630, 'avg_loss': 8.52250815203968, 'avg_acc': 49.93466180030812, 'loss': 8.299967765808105}


EP_train:3:  49%|| 13642/27626 [32:10<33:02,  7.05it/s]

{'epoch': 3, 'iter': 13640, 'avg_loss': 8.522713034779224, 'avg_acc': 49.9326478997141, 'loss': 8.703899383544922}


EP_train:3:  49%|| 13652/27626 [32:11<33:19,  6.99it/s]

{'epoch': 3, 'iter': 13650, 'avg_loss': 8.522675925423815, 'avg_acc': 49.93544428979562, 'loss': 8.281533241271973}


EP_train:3:  49%|| 13662/27626 [32:13<33:05,  7.03it/s]

{'epoch': 3, 'iter': 13660, 'avg_loss': 8.522869993692504, 'avg_acc': 49.929315203865016, 'loss': 8.497596740722656}


EP_train:3:  49%|| 13672/27626 [32:14<33:03,  7.04it/s]

{'epoch': 3, 'iter': 13670, 'avg_loss': 8.523221242018865, 'avg_acc': 49.9309670104601, 'loss': 9.019396781921387}


EP_train:3:  50%|| 13682/27626 [32:15<33:14,  6.99it/s]

{'epoch': 3, 'iter': 13680, 'avg_loss': 8.523445647951892, 'avg_acc': 49.92919011768146, 'loss': 9.038810729980469}


EP_train:3:  50%|| 13692/27626 [32:17<32:57,  7.05it/s]

{'epoch': 3, 'iter': 13690, 'avg_loss': 8.52319093468365, 'avg_acc': 49.92741582061208, 'loss': 7.8581318855285645}


EP_train:3:  50%|| 13702/27626 [32:18<32:57,  7.04it/s]

{'epoch': 3, 'iter': 13700, 'avg_loss': 8.523124245655518, 'avg_acc': 49.92541602802715, 'loss': 8.459146499633789}


EP_train:3:  50%|| 13712/27626 [32:20<33:06,  7.01it/s]

{'epoch': 3, 'iter': 13710, 'avg_loss': 8.523049372637631, 'avg_acc': 49.92341915250529, 'loss': 8.26211166381836}


EP_train:3:  50%|| 13722/27626 [32:21<32:54,  7.04it/s]

{'epoch': 3, 'iter': 13720, 'avg_loss': 8.522967723757192, 'avg_acc': 49.92370271846075, 'loss': 8.764324188232422}


EP_train:3:  50%|| 13732/27626 [32:22<32:47,  7.06it/s]

{'epoch': 3, 'iter': 13730, 'avg_loss': 8.523327946636858, 'avg_acc': 49.92580656907727, 'loss': 8.11526870727539}


EP_train:3:  50%|| 13742/27626 [32:24<32:44,  7.07it/s]

{'epoch': 3, 'iter': 13740, 'avg_loss': 8.523380223948765, 'avg_acc': 49.92017502365185, 'loss': 8.590495109558105}


EP_train:3:  50%|| 13752/27626 [32:25<32:50,  7.04it/s]

{'epoch': 3, 'iter': 13750, 'avg_loss': 8.523666545301166, 'avg_acc': 49.91796051196277, 'loss': 9.271690368652344}


EP_train:3:  50%|| 13762/27626 [32:27<32:44,  7.06it/s]

{'epoch': 3, 'iter': 13760, 'avg_loss': 8.523461624197068, 'avg_acc': 49.91915558462321, 'loss': 9.038702011108398}


EP_train:3:  50%|| 13772/27626 [32:28<32:36,  7.08it/s]

{'epoch': 3, 'iter': 13770, 'avg_loss': 8.523601484094412, 'avg_acc': 49.92012199549779, 'loss': 8.88744831085205}


EP_train:3:  50%|| 13782/27626 [32:29<32:37,  7.07it/s]

{'epoch': 3, 'iter': 13780, 'avg_loss': 8.524127051200725, 'avg_acc': 49.91949967346346, 'loss': 9.415392875671387}


EP_train:3:  50%|| 13792/27626 [32:31<33:20,  6.91it/s]

{'epoch': 3, 'iter': 13790, 'avg_loss': 8.524087653375503, 'avg_acc': 49.92431658327895, 'loss': 7.748508930206299}


EP_train:3:  50%|| 13802/27626 [32:32<32:58,  6.99it/s]

{'epoch': 3, 'iter': 13800, 'avg_loss': 8.524180031230454, 'avg_acc': 49.92006919788421, 'loss': 7.703146457672119}


EP_train:3:  50%|| 13812/27626 [32:34<32:42,  7.04it/s]

{'epoch': 3, 'iter': 13810, 'avg_loss': 8.524028593880514, 'avg_acc': 49.918769459126786, 'loss': 8.707279205322266}


EP_train:3:  50%|| 13822/27626 [32:35<32:34,  7.06it/s]

{'epoch': 3, 'iter': 13820, 'avg_loss': 8.52388064518531, 'avg_acc': 49.91724549598437, 'loss': 8.461294174194336}


EP_train:3:  50%|| 13832/27626 [32:37<32:34,  7.06it/s]

{'epoch': 3, 'iter': 13830, 'avg_loss': 8.524020898661897, 'avg_acc': 49.91594967825898, 'loss': 8.204353332519531}


EP_train:3:  50%|| 13842/27626 [32:38<32:34,  7.05it/s]

{'epoch': 3, 'iter': 13840, 'avg_loss': 8.523868477763955, 'avg_acc': 49.91623618235677, 'loss': 8.274230003356934}


EP_train:3:  50%|| 13852/27626 [32:39<32:46,  7.00it/s]

{'epoch': 3, 'iter': 13850, 'avg_loss': 8.523888928426679, 'avg_acc': 49.919229658508414, 'loss': 8.677251815795898}


EP_train:3:  50%|| 13862/27626 [32:41<32:40,  7.02it/s]

{'epoch': 3, 'iter': 13860, 'avg_loss': 8.523880484162467, 'avg_acc': 49.92131700454513, 'loss': 8.852886199951172}


EP_train:3:  50%|| 13872/27626 [32:42<32:49,  6.98it/s]

{'epoch': 3, 'iter': 13870, 'avg_loss': 8.523720728655778, 'avg_acc': 49.922950760579624, 'loss': 8.435832977294922}


EP_train:3:  50%|| 13882/27626 [32:44<32:31,  7.04it/s]

{'epoch': 3, 'iter': 13880, 'avg_loss': 8.523609985751143, 'avg_acc': 49.91917909372523, 'loss': 8.14438533782959}


EP_train:3:  50%|| 13892/27626 [32:45<32:44,  6.99it/s]

{'epoch': 3, 'iter': 13890, 'avg_loss': 8.523492993807983, 'avg_acc': 49.9253113526744, 'loss': 8.920597076416016}


EP_train:3:  50%|| 13902/27626 [32:47<32:45,  6.98it/s]

{'epoch': 3, 'iter': 13900, 'avg_loss': 8.523362558699796, 'avg_acc': 49.92761312135818, 'loss': 8.328414916992188}


EP_train:3:  50%|| 13912/27626 [32:48<32:26,  7.05it/s]

{'epoch': 3, 'iter': 13910, 'avg_loss': 8.523184648964477, 'avg_acc': 49.92272302494429, 'loss': 7.581256866455078}


EP_train:3:  50%|| 13922/27626 [32:49<32:28,  7.03it/s]

{'epoch': 3, 'iter': 13920, 'avg_loss': 8.52354139251002, 'avg_acc': 49.922105093024925, 'loss': 8.098665237426758}


EP_train:3:  50%|| 13932/27626 [32:51<32:29,  7.03it/s]

{'epoch': 3, 'iter': 13930, 'avg_loss': 8.523590443353541, 'avg_acc': 49.9259744454813, 'loss': 9.271586418151855}


EP_train:3:  50%|| 13942/27626 [32:52<32:29,  7.02it/s]

{'epoch': 3, 'iter': 13940, 'avg_loss': 8.523296929014592, 'avg_acc': 49.92849329316405, 'loss': 8.045360565185547}


EP_train:3:  51%|| 13952/27626 [32:54<32:09,  7.09it/s]

{'epoch': 3, 'iter': 13950, 'avg_loss': 8.52311762515195, 'avg_acc': 49.92787255393878, 'loss': 8.57254409790039}


EP_train:3:  51%|| 13962/27626 [32:55<32:23,  7.03it/s]

{'epoch': 3, 'iter': 13960, 'avg_loss': 8.52299119090549, 'avg_acc': 49.92434281211948, 'loss': 7.947493076324463}


EP_train:3:  51%|| 13972/27626 [32:56<32:47,  6.94it/s]

{'epoch': 3, 'iter': 13970, 'avg_loss': 8.52297614399104, 'avg_acc': 49.923725932288306, 'loss': 8.583377838134766}


EP_train:3:  51%|| 13982/27626 [32:58<32:37,  6.97it/s]

{'epoch': 3, 'iter': 13980, 'avg_loss': 8.5232228778154, 'avg_acc': 49.92534511122238, 'loss': 8.07447624206543}


EP_train:3:  51%|| 13992/27626 [32:59<32:00,  7.10it/s]

{'epoch': 3, 'iter': 13990, 'avg_loss': 8.523357152887721, 'avg_acc': 49.92495175469945, 'loss': 9.285983085632324}


EP_train:3:  51%|| 14002/27626 [33:01<32:11,  7.05it/s]

{'epoch': 3, 'iter': 14000, 'avg_loss': 8.523440134337132, 'avg_acc': 49.92545175344618, 'loss': 8.616820335388184}


EP_train:3:  51%|| 14012/27626 [33:02<32:13,  7.04it/s]

{'epoch': 3, 'iter': 14010, 'avg_loss': 8.523580522491628, 'avg_acc': 49.92662015559203, 'loss': 8.718781471252441}


EP_train:3:  51%|| 14022/27626 [33:04<32:12,  7.04it/s]

{'epoch': 3, 'iter': 14020, 'avg_loss': 8.52348682994501, 'avg_acc': 49.924666571571215, 'loss': 9.184295654296875}


EP_train:3:  51%|| 14032/27626 [33:05<32:15,  7.02it/s]

{'epoch': 3, 'iter': 14030, 'avg_loss': 8.523626811318639, 'avg_acc': 49.925165704511436, 'loss': 8.1065092086792}


EP_train:3:  51%|| 14042/27626 [33:06<32:07,  7.05it/s]

{'epoch': 3, 'iter': 14040, 'avg_loss': 8.52340792791175, 'avg_acc': 49.923883626522326, 'loss': 8.033345222473145}


EP_train:3:  51%|| 14052/27626 [33:08<32:24,  6.98it/s]

{'epoch': 3, 'iter': 14050, 'avg_loss': 8.523524406329397, 'avg_acc': 49.92883068820724, 'loss': 9.316673278808594}


EP_train:3:  51%|| 14062/27626 [33:09<32:17,  7.00it/s]

{'epoch': 3, 'iter': 14060, 'avg_loss': 8.523561787074563, 'avg_acc': 49.93110376217908, 'loss': 9.040213584899902}


EP_train:3:  51%|| 14072/27626 [33:11<32:23,  6.97it/s]

{'epoch': 3, 'iter': 14070, 'avg_loss': 8.523466849155954, 'avg_acc': 49.9313748134461, 'loss': 8.178446769714355}


EP_train:3:  51%|| 14082/27626 [33:12<32:01,  7.05it/s]

{'epoch': 3, 'iter': 14080, 'avg_loss': 8.523447786184478, 'avg_acc': 49.934974433633975, 'loss': 8.851455688476562}


EP_train:3:  51%|| 14092/27626 [33:13<32:09,  7.02it/s]

{'epoch': 3, 'iter': 14090, 'avg_loss': 8.52358742046539, 'avg_acc': 49.93213753459655, 'loss': 9.283483505249023}


EP_train:3:  51%|| 14102/27626 [33:15<31:53,  7.07it/s]

{'epoch': 3, 'iter': 14100, 'avg_loss': 8.523595729049035, 'avg_acc': 49.93285050705624, 'loss': 9.555778503417969}


EP_train:3:  51%|| 14112/27626 [33:16<31:54,  7.06it/s]

{'epoch': 3, 'iter': 14110, 'avg_loss': 8.523481999237037, 'avg_acc': 49.931126426192336, 'loss': 8.034480094909668}


EP_train:3:  51%|| 14122/27626 [33:18<32:01,  7.03it/s]

{'epoch': 3, 'iter': 14120, 'avg_loss': 8.523432201336602, 'avg_acc': 49.93515862899228, 'loss': 8.621346473693848}


EP_train:3:  51%|| 14132/27626 [33:19<32:08,  7.00it/s]

{'epoch': 3, 'iter': 14130, 'avg_loss': 8.523430481946408, 'avg_acc': 49.935867949897386, 'loss': 8.584832191467285}


EP_train:3:  51%|| 14142/27626 [33:21<31:51,  7.05it/s]

{'epoch': 3, 'iter': 14140, 'avg_loss': 8.523506019250497, 'avg_acc': 49.937239233434696, 'loss': 8.061114311218262}


EP_train:3:  51%|| 14152/27626 [33:22<31:54,  7.04it/s]

{'epoch': 3, 'iter': 14150, 'avg_loss': 8.52353607185417, 'avg_acc': 49.93662108684899, 'loss': 9.250251770019531}


EP_train:3:  51%|| 14162/27626 [33:23<32:03,  7.00it/s]

{'epoch': 3, 'iter': 14160, 'avg_loss': 8.52325657594087, 'avg_acc': 49.93865193136078, 'loss': 8.304414749145508}


EP_train:3:  51%|| 14172/27626 [33:25<31:48,  7.05it/s]

{'epoch': 3, 'iter': 14170, 'avg_loss': 8.52304330036001, 'avg_acc': 49.941341472020326, 'loss': 8.253205299377441}


EP_train:3:  51%|| 14182/27626 [33:26<31:53,  7.03it/s]

{'epoch': 3, 'iter': 14180, 'avg_loss': 8.522913296075913, 'avg_acc': 49.94160320146675, 'loss': 8.511639595031738}


EP_train:3:  51%|| 14192/27626 [33:28<31:45,  7.05it/s]

{'epoch': 3, 'iter': 14190, 'avg_loss': 8.522684938097326, 'avg_acc': 49.94010288210838, 'loss': 7.440017223358154}


EP_train:3:  51%|| 14202/27626 [33:29<31:39,  7.07it/s]

{'epoch': 3, 'iter': 14200, 'avg_loss': 8.522542728587663, 'avg_acc': 49.939704950355605, 'loss': 8.415966987609863}


EP_train:3:  51%|| 14212/27626 [33:30<31:41,  7.06it/s]

{'epoch': 3, 'iter': 14210, 'avg_loss': 8.522472432124255, 'avg_acc': 49.93820807824924, 'loss': 9.033709526062012}


EP_train:3:  51%|| 14222/27626 [33:32<31:58,  6.99it/s]

{'epoch': 3, 'iter': 14220, 'avg_loss': 8.522183616472677, 'avg_acc': 49.9408884747908, 'loss': 8.942484855651855}


EP_train:3:  52%|| 14232/27626 [33:33<31:41,  7.05it/s]

{'epoch': 3, 'iter': 14230, 'avg_loss': 8.522061130065843, 'avg_acc': 49.94356510434966, 'loss': 8.251084327697754}


EP_train:3:  52%|| 14242/27626 [33:35<31:33,  7.07it/s]

{'epoch': 3, 'iter': 14240, 'avg_loss': 8.522202446390242, 'avg_acc': 49.945799101186715, 'loss': 8.223442077636719}


EP_train:3:  52%|| 14252/27626 [33:36<31:58,  6.97it/s]

{'epoch': 3, 'iter': 14250, 'avg_loss': 8.52235962862584, 'avg_acc': 49.94430215423479, 'loss': 8.959615707397461}


EP_train:3:  52%|| 14262/27626 [33:38<31:41,  7.03it/s]

{'epoch': 3, 'iter': 14260, 'avg_loss': 8.522415792312605, 'avg_acc': 49.94456033938714, 'loss': 8.483144760131836}


EP_train:3:  52%|| 14272/27626 [33:39<31:48,  7.00it/s]

{'epoch': 3, 'iter': 14270, 'avg_loss': 8.522366599690463, 'avg_acc': 49.94481816270759, 'loss': 8.638623237609863}


EP_train:3:  52%|| 14282/27626 [33:40<31:30,  7.06it/s]

{'epoch': 3, 'iter': 14280, 'avg_loss': 8.522388712947972, 'avg_acc': 49.94573209159022, 'loss': 7.5720038414001465}


EP_train:3:  52%|| 14292/27626 [33:42<31:24,  7.08it/s]

{'epoch': 3, 'iter': 14290, 'avg_loss': 8.522404264648001, 'avg_acc': 49.945551395983486, 'loss': 8.603521347045898}


EP_train:3:  52%|| 14302/27626 [33:43<31:18,  7.09it/s]

{'epoch': 3, 'iter': 14300, 'avg_loss': 8.522406599736865, 'avg_acc': 49.94886721208307, 'loss': 8.29751205444336}


EP_train:3:  52%|| 14312/27626 [33:45<31:30,  7.04it/s]

{'epoch': 3, 'iter': 14310, 'avg_loss': 8.522869974618422, 'avg_acc': 49.94890294179303, 'loss': 8.565415382385254}


EP_train:3:  52%|| 14322/27626 [33:46<31:19,  7.08it/s]

{'epoch': 3, 'iter': 14320, 'avg_loss': 8.522796183822457, 'avg_acc': 49.94828398854828, 'loss': 8.231406211853027}


EP_train:3:  52%|| 14332/27626 [33:47<31:27,  7.04it/s]

{'epoch': 3, 'iter': 14330, 'avg_loss': 8.522697587246489, 'avg_acc': 49.944831135301094, 'loss': 9.032962799072266}


EP_train:3:  52%|| 14342/27626 [33:49<31:32,  7.02it/s]

{'epoch': 3, 'iter': 14340, 'avg_loss': 8.522590521996046, 'avg_acc': 49.94552332473328, 'loss': 7.840402126312256}


EP_train:3:  52%|| 14352/27626 [33:50<31:42,  6.98it/s]

{'epoch': 3, 'iter': 14350, 'avg_loss': 8.522743386878725, 'avg_acc': 49.94381924604557, 'loss': 8.754626274108887}


EP_train:3:  52%|| 14362/27626 [33:52<31:16,  7.07it/s]

{'epoch': 3, 'iter': 14360, 'avg_loss': 8.522739775341357, 'avg_acc': 49.946469605180695, 'loss': 8.166704177856445}


EP_train:3:  52%|| 14372/27626 [33:53<31:35,  6.99it/s]

{'epoch': 3, 'iter': 14370, 'avg_loss': 8.52288783466747, 'avg_acc': 49.94302762507829, 'loss': 8.603253364562988}


EP_train:3:  52%|| 14382/27626 [33:54<31:12,  7.07it/s]

{'epoch': 3, 'iter': 14380, 'avg_loss': 8.522692990778848, 'avg_acc': 49.94524024754885, 'loss': 7.727672100067139}


EP_train:3:  52%|| 14392/27626 [33:56<31:24,  7.02it/s]

{'epoch': 3, 'iter': 14390, 'avg_loss': 8.5225634704777, 'avg_acc': 49.94223820443332, 'loss': 8.63151741027832}


EP_train:3:  52%|| 14402/27626 [33:57<31:21,  7.03it/s]

{'epoch': 3, 'iter': 14400, 'avg_loss': 8.5224989445108, 'avg_acc': 49.943146309284074, 'loss': 8.034234046936035}


EP_train:3:  52%|| 14412/27626 [33:59<31:32,  6.98it/s]

{'epoch': 3, 'iter': 14410, 'avg_loss': 8.522537177775902, 'avg_acc': 49.94708902921379, 'loss': 8.34666633605957}


EP_train:3:  52%|| 14422/27626 [34:00<31:26,  7.00it/s]

{'epoch': 3, 'iter': 14420, 'avg_loss': 8.522459181128308, 'avg_acc': 49.94647562582345, 'loss': 8.161192893981934}


EP_train:3:  52%|| 14432/27626 [34:02<31:16,  7.03it/s]

{'epoch': 3, 'iter': 14430, 'avg_loss': 8.52266702823138, 'avg_acc': 49.94413069087381, 'loss': 9.173285484313965}


EP_train:3:  52%|| 14442/27626 [34:03<31:10,  7.05it/s]

{'epoch': 3, 'iter': 14440, 'avg_loss': 8.522645710165271, 'avg_acc': 49.94157260577523, 'loss': 8.295804023742676}


EP_train:3:  52%|| 14452/27626 [34:04<31:09,  7.05it/s]

{'epoch': 3, 'iter': 14450, 'avg_loss': 8.522653717539663, 'avg_acc': 49.93901806103384, 'loss': 8.802597999572754}


EP_train:3:  52%|| 14462/27626 [34:06<31:07,  7.05it/s]

{'epoch': 3, 'iter': 14460, 'avg_loss': 8.522598118086622, 'avg_acc': 49.94057292026831, 'loss': 7.8197503089904785}


EP_train:3:  52%|| 14472/27626 [34:07<31:05,  7.05it/s]

{'epoch': 3, 'iter': 14470, 'avg_loss': 8.522740505699067, 'avg_acc': 49.93953424089558, 'loss': 8.993602752685547}


EP_train:3:  52%|| 14482/27626 [34:09<31:00,  7.07it/s]

{'epoch': 3, 'iter': 14480, 'avg_loss': 8.522808150752507, 'avg_acc': 49.93892859609143, 'loss': 7.982637405395508}


EP_train:3:  52%|| 14492/27626 [34:10<30:57,  7.07it/s]

{'epoch': 3, 'iter': 14490, 'avg_loss': 8.522593474669467, 'avg_acc': 49.94026464702229, 'loss': 7.578928470611572}


EP_train:3:  52%|| 14502/27626 [34:11<30:56,  7.07it/s]

{'epoch': 3, 'iter': 14500, 'avg_loss': 8.522591136404666, 'avg_acc': 49.941598855251364, 'loss': 8.579901695251465}


EP_train:3:  53%|| 14512/27626 [34:13<30:56,  7.07it/s]

{'epoch': 3, 'iter': 14510, 'avg_loss': 8.522217306046784, 'avg_acc': 49.94206980911033, 'loss': 7.717295169830322}


EP_train:3:  53%|| 14522/27626 [34:14<31:13,  7.00it/s]

{'epoch': 3, 'iter': 14520, 'avg_loss': 8.522261621176922, 'avg_acc': 49.94017285310929, 'loss': 8.90006160736084}


EP_train:3:  53%|| 14532/27626 [34:16<31:00,  7.04it/s]

{'epoch': 3, 'iter': 14530, 'avg_loss': 8.522245796029825, 'avg_acc': 49.93999896772417, 'loss': 8.54889965057373}


EP_train:3:  53%|| 14542/27626 [34:17<30:55,  7.05it/s]

{'epoch': 3, 'iter': 14540, 'avg_loss': 8.522351148867818, 'avg_acc': 49.94261914586342, 'loss': 8.872215270996094}


EP_train:3:  53%|| 14552/27626 [34:19<31:10,  6.99it/s]

{'epoch': 3, 'iter': 14550, 'avg_loss': 8.522417797582534, 'avg_acc': 49.94502096075871, 'loss': 8.592069625854492}


EP_train:3:  53%|| 14562/27626 [34:20<30:52,  7.05it/s]

{'epoch': 3, 'iter': 14560, 'avg_loss': 8.522461325329301, 'avg_acc': 49.94570256163725, 'loss': 8.92592716217041}


EP_train:3:  53%|| 14572/27626 [34:21<30:53,  7.04it/s]

{'epoch': 3, 'iter': 14570, 'avg_loss': 8.522431315541898, 'avg_acc': 49.94531089149681, 'loss': 8.101106643676758}


EP_train:3:  53%|| 14582/27626 [34:23<30:56,  7.03it/s]

{'epoch': 3, 'iter': 14580, 'avg_loss': 8.52265472109937, 'avg_acc': 49.94449111857897, 'loss': 8.890758514404297}


EP_train:3:  53%|| 14592/27626 [34:24<31:12,  6.96it/s]

{'epoch': 3, 'iter': 14590, 'avg_loss': 8.522500357432767, 'avg_acc': 49.94345829620999, 'loss': 7.576179504394531}


EP_train:3:  53%|| 14602/27626 [34:26<30:52,  7.03it/s]

{'epoch': 3, 'iter': 14600, 'avg_loss': 8.52229800820963, 'avg_acc': 49.94542325868091, 'loss': 7.792890548706055}


EP_train:3:  53%|| 14612/27626 [34:27<31:15,  6.94it/s]

{'epoch': 3, 'iter': 14610, 'avg_loss': 8.522000668891017, 'avg_acc': 49.94567449182123, 'loss': 8.286093711853027}


EP_train:3:  53%|| 14622/27626 [34:28<31:00,  6.99it/s]

{'epoch': 3, 'iter': 14620, 'avg_loss': 8.52177610107734, 'avg_acc': 49.947207783325354, 'loss': 8.694071769714355}


EP_train:3:  53%|| 14632/27626 [34:30<30:44,  7.04it/s]

{'epoch': 3, 'iter': 14630, 'avg_loss': 8.521873163106331, 'avg_acc': 49.94809821611646, 'loss': 8.763611793518066}


EP_train:3:  53%|| 14642/27626 [34:31<30:52,  7.01it/s]

{'epoch': 3, 'iter': 14640, 'avg_loss': 8.521964377925725, 'avg_acc': 49.9470664572092, 'loss': 8.395634651184082}


EP_train:3:  53%|| 14652/27626 [34:33<30:35,  7.07it/s]

{'epoch': 3, 'iter': 14650, 'avg_loss': 8.522130210752268, 'avg_acc': 49.94688929083339, 'loss': 9.771598815917969}


EP_train:3:  53%|| 14662/27626 [34:34<30:46,  7.02it/s]

{'epoch': 3, 'iter': 14660, 'avg_loss': 8.522097996739767, 'avg_acc': 49.945433462928854, 'loss': 8.270977973937988}


EP_train:3:  53%|| 14672/27626 [34:36<30:41,  7.03it/s]

{'epoch': 3, 'iter': 14670, 'avg_loss': 8.522127101468646, 'avg_acc': 49.943340603912475, 'loss': 8.40917682647705}


EP_train:3:  53%|| 14682/27626 [34:37<30:33,  7.06it/s]

{'epoch': 3, 'iter': 14680, 'avg_loss': 8.521985773672874, 'avg_acc': 49.9401862952115, 'loss': 8.488961219787598}


EP_train:3:  53%|| 14692/27626 [34:38<30:42,  7.02it/s]

{'epoch': 3, 'iter': 14690, 'avg_loss': 8.522082814786637, 'avg_acc': 49.94107787080526, 'loss': 10.024374008178711}


EP_train:3:  53%|| 14702/27626 [34:40<30:43,  7.01it/s]

{'epoch': 3, 'iter': 14700, 'avg_loss': 8.522350782667258, 'avg_acc': 49.94281851574723, 'loss': 8.517032623291016}


EP_train:3:  53%|| 14712/27626 [34:41<30:28,  7.06it/s]

{'epoch': 3, 'iter': 14710, 'avg_loss': 8.522418763358623, 'avg_acc': 49.94710590714431, 'loss': 8.85187816619873}


EP_train:3:  53%|| 14722/27626 [34:43<30:35,  7.03it/s]

{'epoch': 3, 'iter': 14720, 'avg_loss': 8.522320677927837, 'avg_acc': 49.95244888254874, 'loss': 8.633325576782227}


EP_train:3:  53%|| 14732/27626 [34:44<30:37,  7.02it/s]

{'epoch': 3, 'iter': 14730, 'avg_loss': 8.522477871676859, 'avg_acc': 49.95184474916842, 'loss': 8.956951141357422}


EP_train:3:  53%|| 14742/27626 [34:45<30:35,  7.02it/s]

{'epoch': 3, 'iter': 14740, 'avg_loss': 8.522601573990155, 'avg_acc': 49.949333491622006, 'loss': 8.284658432006836}


EP_train:3:  53%|| 14752/27626 [34:47<30:14,  7.09it/s]

{'epoch': 3, 'iter': 14750, 'avg_loss': 8.522479274831992, 'avg_acc': 49.94534268863128, 'loss': 7.819311141967773}


EP_train:3:  53%|| 14762/27626 [34:48<30:20,  7.06it/s]

{'epoch': 3, 'iter': 14760, 'avg_loss': 8.522515620717543, 'avg_acc': 49.94326265158187, 'loss': 8.177689552307129}


EP_train:3:  53%|| 14772/27626 [34:50<30:25,  7.04it/s]

{'epoch': 3, 'iter': 14770, 'avg_loss': 8.52254538048291, 'avg_acc': 49.94499356847877, 'loss': 7.851651191711426}


EP_train:3:  54%|| 14782/27626 [34:51<30:38,  6.99it/s]

{'epoch': 3, 'iter': 14780, 'avg_loss': 8.52262472349884, 'avg_acc': 49.94799066368987, 'loss': 8.545510292053223}


EP_train:3:  54%|| 14792/27626 [34:53<30:23,  7.04it/s]

{'epoch': 3, 'iter': 14790, 'avg_loss': 8.522791223395114, 'avg_acc': 49.94844838077209, 'loss': 7.796803951263428}


EP_train:3:  54%|| 14802/27626 [34:54<30:23,  7.03it/s]

{'epoch': 3, 'iter': 14800, 'avg_loss': 8.522694041830295, 'avg_acc': 49.95080568880481, 'loss': 8.255256652832031}


EP_train:3:  54%|| 14812/27626 [34:55<30:29,  7.00it/s]

{'epoch': 3, 'iter': 14810, 'avg_loss': 8.522767424832944, 'avg_acc': 49.95126088717845, 'loss': 8.137402534484863}


EP_train:3:  54%|| 14822/27626 [34:57<30:10,  7.07it/s]

{'epoch': 3, 'iter': 14820, 'avg_loss': 8.522614077770166, 'avg_acc': 49.95129377235004, 'loss': 8.165339469909668}


EP_train:3:  54%|| 14832/27626 [34:58<30:04,  7.09it/s]

{'epoch': 3, 'iter': 14830, 'avg_loss': 8.522408535118636, 'avg_acc': 49.94964095475693, 'loss': 8.44262409210205}


EP_train:3:  54%|| 14842/27626 [35:00<30:26,  7.00it/s]

{'epoch': 3, 'iter': 14840, 'avg_loss': 8.522424599944593, 'avg_acc': 49.94988545246277, 'loss': 8.914325714111328}


EP_train:3:  54%|| 14852/27626 [35:01<30:02,  7.08it/s]

{'epoch': 3, 'iter': 14850, 'avg_loss': 8.522348043453203, 'avg_acc': 49.94970877381994, 'loss': 7.509659767150879}


EP_train:3:  54%|| 14862/27626 [35:02<30:10,  7.05it/s]

{'epoch': 3, 'iter': 14860, 'avg_loss': 8.522356829807853, 'avg_acc': 49.95121458852029, 'loss': 8.560608863830566}


EP_train:3:  54%|| 14872/27626 [35:04<30:08,  7.05it/s]

{'epoch': 3, 'iter': 14870, 'avg_loss': 8.52235876342715, 'avg_acc': 49.951457534799275, 'loss': 8.386564254760742}


EP_train:3:  54%|| 14882/27626 [35:05<30:06,  7.05it/s]

{'epoch': 3, 'iter': 14880, 'avg_loss': 8.52214619921273, 'avg_acc': 49.9510701565755, 'loss': 8.693340301513672}


EP_train:3:  54%|| 14892/27626 [35:07<30:11,  7.03it/s]

{'epoch': 3, 'iter': 14890, 'avg_loss': 8.522264337549279, 'avg_acc': 49.95341145658451, 'loss': 8.190255165100098}


EP_train:3:  54%|| 14902/27626 [35:08<30:00,  7.07it/s]

{'epoch': 3, 'iter': 14900, 'avg_loss': 8.522186826758798, 'avg_acc': 49.95407187437085, 'loss': 7.436845302581787}


EP_train:3:  54%|| 14912/27626 [35:09<30:06,  7.04it/s]

{'epoch': 3, 'iter': 14910, 'avg_loss': 8.522191056494629, 'avg_acc': 49.95200690765207, 'loss': 8.00495433807373}


EP_train:3:  54%|| 14922/27626 [35:11<29:49,  7.10it/s]

{'epoch': 3, 'iter': 14920, 'avg_loss': 8.522080669086058, 'avg_acc': 49.95162019971852, 'loss': 8.040186882019043}


EP_train:3:  54%|| 14932/27626 [35:12<30:02,  7.04it/s]

{'epoch': 3, 'iter': 14930, 'avg_loss': 8.522154338776119, 'avg_acc': 49.95144330587368, 'loss': 8.748555183410645}


EP_train:3:  54%|| 14942/27626 [35:14<29:55,  7.07it/s]

{'epoch': 3, 'iter': 14940, 'avg_loss': 8.522176937552375, 'avg_acc': 49.951057492805035, 'loss': 8.3349027633667}


EP_train:3:  54%|| 14952/27626 [35:15<29:57,  7.05it/s]

{'epoch': 3, 'iter': 14950, 'avg_loss': 8.52235937765965, 'avg_acc': 49.95046317972042, 'loss': 9.029867172241211}


EP_train:3:  54%|| 14962/27626 [35:17<30:10,  7.00it/s]

{'epoch': 3, 'iter': 14960, 'avg_loss': 8.522548822562696, 'avg_acc': 49.94882527905888, 'loss': 7.41437292098999}


EP_train:3:  54%|| 14972/27626 [35:18<30:18,  6.96it/s]

{'epoch': 3, 'iter': 14970, 'avg_loss': 8.52263355920807, 'avg_acc': 49.94906819851713, 'loss': 8.57713794708252}


EP_train:3:  54%|| 14982/27626 [35:19<30:08,  6.99it/s]

{'epoch': 3, 'iter': 14980, 'avg_loss': 8.522635064259699, 'avg_acc': 49.95264835458247, 'loss': 7.936880111694336}


EP_train:3:  54%|| 14992/27626 [35:21<30:00,  7.02it/s]

{'epoch': 3, 'iter': 14990, 'avg_loss': 8.52267184132501, 'avg_acc': 49.95288839970649, 'loss': 8.19037914276123}


EP_train:3:  54%|| 15002/27626 [35:22<29:51,  7.05it/s]

{'epoch': 3, 'iter': 15000, 'avg_loss': 8.522573818279007, 'avg_acc': 49.95458636090927, 'loss': 8.331966400146484}


EP_train:3:  54%|| 15012/27626 [35:24<29:52,  7.04it/s]

{'epoch': 3, 'iter': 15010, 'avg_loss': 8.522335543844386, 'avg_acc': 49.95773932449537, 'loss': 8.167580604553223}


EP_train:3:  54%|| 15022/27626 [35:25<29:47,  7.05it/s]

{'epoch': 3, 'iter': 15020, 'avg_loss': 8.522390361600754, 'avg_acc': 49.95755941681645, 'loss': 8.90578842163086}


EP_train:3:  54%|| 15032/27626 [35:26<29:56,  7.01it/s]

{'epoch': 3, 'iter': 15030, 'avg_loss': 8.522570059325007, 'avg_acc': 49.95550861552791, 'loss': 9.41309928894043}


EP_train:3:  54%|| 15042/27626 [35:28<29:38,  7.07it/s]

{'epoch': 3, 'iter': 15040, 'avg_loss': 8.522645861265087, 'avg_acc': 49.95761585000997, 'loss': 8.74417781829834}


EP_train:3:  54%|| 15052/27626 [35:29<29:53,  7.01it/s]

{'epoch': 3, 'iter': 15050, 'avg_loss': 8.522521924405437, 'avg_acc': 49.95868214736562, 'loss': 8.079008102416992}


EP_train:3:  55%|| 15062/27626 [35:31<29:53,  7.01it/s]

{'epoch': 3, 'iter': 15060, 'avg_loss': 8.522257605600101, 'avg_acc': 49.960784476462386, 'loss': 8.612408638000488}


EP_train:3:  55%|| 15072/27626 [35:32<29:42,  7.04it/s]

{'epoch': 3, 'iter': 15070, 'avg_loss': 8.522316381741627, 'avg_acc': 49.96163990445226, 'loss': 8.551677703857422}


EP_train:3:  55%|| 15082/27626 [35:34<29:37,  7.06it/s]

{'epoch': 3, 'iter': 15080, 'avg_loss': 8.522330072559908, 'avg_acc': 49.960836482991844, 'loss': 8.412898063659668}


EP_train:3:  55%|| 15092/27626 [35:35<30:03,  6.95it/s]

{'epoch': 3, 'iter': 15090, 'avg_loss': 8.52234799426969, 'avg_acc': 49.96231197402425, 'loss': 9.299819946289062}


EP_train:3:  55%|| 15102/27626 [35:36<29:27,  7.09it/s]

{'epoch': 3, 'iter': 15100, 'avg_loss': 8.522398510481946, 'avg_acc': 49.96171611151579, 'loss': 8.611369132995605}


EP_train:3:  55%|| 15112/27626 [35:38<29:32,  7.06it/s]

{'epoch': 3, 'iter': 15110, 'avg_loss': 8.52217175360826, 'avg_acc': 49.96153464363709, 'loss': 7.9301605224609375}


EP_train:3:  55%|| 15122/27626 [35:39<29:29,  7.07it/s]

{'epoch': 3, 'iter': 15120, 'avg_loss': 8.522283336171933, 'avg_acc': 49.96032008465048, 'loss': 8.300468444824219}


EP_train:3:  55%|| 15132/27626 [35:41<29:21,  7.09it/s]

{'epoch': 3, 'iter': 15130, 'avg_loss': 8.522379564392804, 'avg_acc': 49.96034630890225, 'loss': 9.124832153320312}


EP_train:3:  55%|| 15142/27626 [35:42<29:28,  7.06it/s]

{'epoch': 3, 'iter': 15140, 'avg_loss': 8.522322095406512, 'avg_acc': 49.96181725117231, 'loss': 8.21555233001709}


EP_train:3:  55%|| 15152/27626 [35:43<29:30,  7.04it/s]

{'epoch': 3, 'iter': 15150, 'avg_loss': 8.52227368149283, 'avg_acc': 49.959779882516, 'loss': 7.771114349365234}


EP_train:3:  55%|| 15162/27626 [35:45<29:25,  7.06it/s]

{'epoch': 3, 'iter': 15160, 'avg_loss': 8.522309207985453, 'avg_acc': 49.960424774091415, 'loss': 8.21393871307373}


EP_train:3:  55%|| 15172/27626 [35:46<29:31,  7.03it/s]

{'epoch': 3, 'iter': 15170, 'avg_loss': 8.522224497460366, 'avg_acc': 49.96127480060642, 'loss': 7.4734930992126465}


EP_train:3:  55%|| 15182/27626 [35:48<29:24,  7.05it/s]

{'epoch': 3, 'iter': 15180, 'avg_loss': 8.522046535070546, 'avg_acc': 49.96047691192938, 'loss': 8.847673416137695}


EP_train:3:  55%|| 15192/27626 [35:49<29:23,  7.05it/s]

{'epoch': 3, 'iter': 15190, 'avg_loss': 8.522215286414593, 'avg_acc': 49.95906293199921, 'loss': 9.335882186889648}


EP_train:3:  55%|| 15202/27626 [35:51<29:12,  7.09it/s]

{'epoch': 3, 'iter': 15200, 'avg_loss': 8.522275000557713, 'avg_acc': 49.957445233866196, 'loss': 8.790602684020996}


EP_train:3:  55%|| 15212/27626 [35:52<29:12,  7.09it/s]

{'epoch': 3, 'iter': 15210, 'avg_loss': 8.522299025971135, 'avg_acc': 49.95952764446782, 'loss': 8.465158462524414}


EP_train:3:  55%|| 15222/27626 [35:53<29:06,  7.10it/s]

{'epoch': 3, 'iter': 15220, 'avg_loss': 8.522311544145785, 'avg_acc': 49.96222324420209, 'loss': 9.559221267700195}


EP_train:3:  55%|| 15232/27626 [35:55<29:18,  7.05it/s]

{'epoch': 3, 'iter': 15230, 'avg_loss': 8.522239144856036, 'avg_acc': 49.96429978333662, 'loss': 7.860807418823242}


EP_train:3:  55%|| 15242/27626 [35:56<29:20,  7.03it/s]

{'epoch': 3, 'iter': 15240, 'avg_loss': 8.522145244175896, 'avg_acc': 49.966168558493536, 'loss': 7.795935153961182}


EP_train:3:  55%|| 15252/27626 [35:58<29:12,  7.06it/s]

{'epoch': 3, 'iter': 15250, 'avg_loss': 8.522075819004622, 'avg_acc': 49.961477935873056, 'loss': 8.240915298461914}


EP_train:3:  55%|| 15262/27626 [35:59<29:05,  7.08it/s]

{'epoch': 3, 'iter': 15260, 'avg_loss': 8.521907360961027, 'avg_acc': 49.96191271869471, 'loss': 8.477531433105469}


EP_train:3:  55%|| 15272/27626 [36:00<29:13,  7.05it/s]

{'epoch': 3, 'iter': 15270, 'avg_loss': 8.521970642361627, 'avg_acc': 49.964597930718355, 'loss': 8.4806547164917}


EP_train:3:  55%|| 15282/27626 [36:02<29:21,  7.01it/s]

{'epoch': 3, 'iter': 15280, 'avg_loss': 8.522021958859334, 'avg_acc': 49.96625711668084, 'loss': 8.31824779510498}


EP_train:3:  55%|| 15292/27626 [36:03<29:10,  7.05it/s]

{'epoch': 3, 'iter': 15290, 'avg_loss': 8.52200994129217, 'avg_acc': 49.96546170950232, 'loss': 8.437518119812012}


EP_train:3:  55%|| 15302/27626 [36:05<29:04,  7.06it/s]

{'epoch': 3, 'iter': 15300, 'avg_loss': 8.521934210852885, 'avg_acc': 49.96487157702111, 'loss': 8.330071449279785}


EP_train:3:  55%|| 15312/27626 [36:06<29:19,  7.00it/s]

{'epoch': 3, 'iter': 15310, 'avg_loss': 8.521976617134557, 'avg_acc': 49.96632323166351, 'loss': 8.833657264709473}


EP_train:3:  55%|| 15322/27626 [36:07<29:02,  7.06it/s]

{'epoch': 3, 'iter': 15320, 'avg_loss': 8.522018376035374, 'avg_acc': 49.966957117681616, 'loss': 7.6055588722229}


EP_train:3:  55%|| 15332/27626 [36:09<29:11,  7.02it/s]

{'epoch': 3, 'iter': 15330, 'avg_loss': 8.52198285420777, 'avg_acc': 49.96575565846977, 'loss': 8.233644485473633}


EP_train:3:  56%|| 15342/27626 [36:10<28:58,  7.06it/s]

{'epoch': 3, 'iter': 15340, 'avg_loss': 8.52210208096985, 'avg_acc': 49.96618538556808, 'loss': 8.99445629119873}


EP_train:3:  56%|| 15352/27626 [36:12<28:57,  7.06it/s]

{'epoch': 3, 'iter': 15350, 'avg_loss': 8.522069015873193, 'avg_acc': 49.966614552797864, 'loss': 7.866279125213623}


EP_train:3:  56%|| 15362/27626 [36:13<29:06,  7.02it/s]

{'epoch': 3, 'iter': 15360, 'avg_loss': 8.522079733245176, 'avg_acc': 49.96928097129093, 'loss': 9.435585975646973}


EP_train:3:  56%|| 15372/27626 [36:15<29:04,  7.03it/s]

{'epoch': 3, 'iter': 15370, 'avg_loss': 8.522079162208021, 'avg_acc': 49.968487736646935, 'loss': 7.876180648803711}


EP_train:3:  56%|| 15382/27626 [36:16<28:51,  7.07it/s]

{'epoch': 3, 'iter': 15380, 'avg_loss': 8.52233822427423, 'avg_acc': 49.970539951888696, 'loss': 8.598731994628906}


EP_train:3:  56%|| 15392/27626 [36:17<28:51,  7.07it/s]

{'epoch': 3, 'iter': 15390, 'avg_loss': 8.5223546895311, 'avg_acc': 49.970559092976416, 'loss': 8.877962112426758}


EP_train:3:  56%|| 15402/27626 [36:19<28:59,  7.03it/s]

{'epoch': 3, 'iter': 15400, 'avg_loss': 8.522519561998427, 'avg_acc': 49.96875202908902, 'loss': 8.918745040893555}


EP_train:3:  56%|| 15412/27626 [36:20<29:10,  6.98it/s]

{'epoch': 3, 'iter': 15410, 'avg_loss': 8.522325411289069, 'avg_acc': 49.96694731036273, 'loss': 8.792914390563965}


EP_train:3:  56%|| 15422/27626 [36:22<28:57,  7.03it/s]

{'epoch': 3, 'iter': 15420, 'avg_loss': 8.522354242364285, 'avg_acc': 49.96757668114908, 'loss': 8.362824440002441}


EP_train:3:  56%|| 15432/27626 [36:23<28:59,  7.01it/s]

{'epoch': 3, 'iter': 15430, 'avg_loss': 8.52254167683705, 'avg_acc': 49.968002721793795, 'loss': 8.898176193237305}


EP_train:3:  56%|| 15442/27626 [36:24<28:44,  7.06it/s]

{'epoch': 3, 'iter': 15440, 'avg_loss': 8.522616755907558, 'avg_acc': 49.96660676122013, 'loss': 7.984411716461182}


EP_train:3:  56%|| 15452/27626 [36:26<28:34,  7.10it/s]

{'epoch': 3, 'iter': 15450, 'avg_loss': 8.52256604937101, 'avg_acc': 49.967437382693674, 'loss': 8.773938179016113}


EP_train:3:  56%|| 15462/27626 [36:27<28:39,  7.07it/s]

{'epoch': 3, 'iter': 15460, 'avg_loss': 8.522451362174385, 'avg_acc': 49.97089450876398, 'loss': 8.538042068481445}


EP_train:3:  56%|| 15472/27626 [36:29<28:43,  7.05it/s]

{'epoch': 3, 'iter': 15470, 'avg_loss': 8.52243657436054, 'avg_acc': 49.97454915648633, 'loss': 8.432840347290039}


EP_train:3:  56%|| 15482/27626 [36:30<28:35,  7.08it/s]

{'epoch': 3, 'iter': 15480, 'avg_loss': 8.522275051954496, 'avg_acc': 49.97456559653769, 'loss': 7.865800857543945}


EP_train:3:  56%|| 15492/27626 [36:32<28:52,  7.00it/s]

{'epoch': 3, 'iter': 15490, 'avg_loss': 8.52234438730636, 'avg_acc': 49.97337163514299, 'loss': 9.082030296325684}


EP_train:3:  56%|| 15502/27626 [36:33<28:53,  7.00it/s]

{'epoch': 3, 'iter': 15500, 'avg_loss': 8.52260268588934, 'avg_acc': 49.971372814657116, 'loss': 8.33437442779541}


EP_train:3:  56%|| 15512/27626 [36:34<28:53,  6.99it/s]

{'epoch': 3, 'iter': 15510, 'avg_loss': 8.522603842623575, 'avg_acc': 49.97139127071111, 'loss': 8.071842193603516}


EP_train:3:  56%|| 15522/27626 [36:36<28:41,  7.03it/s]

{'epoch': 3, 'iter': 15520, 'avg_loss': 8.52267033165474, 'avg_acc': 49.97100702274338, 'loss': 8.350577354431152}


EP_train:3:  56%|| 15532/27626 [36:37<28:33,  7.06it/s]

{'epoch': 3, 'iter': 15530, 'avg_loss': 8.522914873518246, 'avg_acc': 49.97122690103664, 'loss': 8.528883934020996}


EP_train:3:  56%|| 15542/27626 [36:39<28:34,  7.05it/s]

{'epoch': 3, 'iter': 15540, 'avg_loss': 8.522881982194725, 'avg_acc': 49.97506595457178, 'loss': 8.958645820617676}


EP_train:3:  56%|| 15552/27626 [36:40<28:41,  7.01it/s]

{'epoch': 3, 'iter': 15550, 'avg_loss': 8.523150536233793, 'avg_acc': 49.973474374638286, 'loss': 9.425209045410156}


EP_train:3:  56%|| 15562/27626 [36:41<28:38,  7.02it/s]

{'epoch': 3, 'iter': 15560, 'avg_loss': 8.52308322975625, 'avg_acc': 49.97389306599833, 'loss': 7.970546245574951}


EP_train:3:  56%|| 15572/27626 [36:43<28:40,  7.00it/s]

{'epoch': 3, 'iter': 15570, 'avg_loss': 8.522833985175618, 'avg_acc': 49.97330775158949, 'loss': 8.611211776733398}


EP_train:3:  56%|| 15582/27626 [36:44<29:03,  6.91it/s]

{'epoch': 3, 'iter': 15580, 'avg_loss': 8.5227397980888, 'avg_acc': 49.97492940119376, 'loss': 8.425271034240723}


EP_train:3:  56%|| 15592/27626 [36:46<28:46,  6.97it/s]

{'epoch': 3, 'iter': 15590, 'avg_loss': 8.522780129131302, 'avg_acc': 49.97614809826182, 'loss': 8.566020965576172}


EP_train:3:  56%|| 15602/27626 [36:47<28:32,  7.02it/s]

{'epoch': 3, 'iter': 15600, 'avg_loss': 8.522673643274908, 'avg_acc': 49.97696461765271, 'loss': 8.241281509399414}


EP_train:3:  57%|| 15612/27626 [36:49<28:35,  7.00it/s]

{'epoch': 3, 'iter': 15610, 'avg_loss': 8.522806205722633, 'avg_acc': 49.979781884568574, 'loss': 8.519868850708008}


EP_train:3:  57%|| 15622/27626 [36:50<28:46,  6.95it/s]

{'epoch': 3, 'iter': 15620, 'avg_loss': 8.522940122182998, 'avg_acc': 49.983795851738044, 'loss': 8.464818954467773}


EP_train:3:  57%|| 15632/27626 [36:51<28:39,  6.98it/s]

{'epoch': 3, 'iter': 15630, 'avg_loss': 8.522968835332223, 'avg_acc': 49.983206448723685, 'loss': 8.088207244873047}


EP_train:3:  57%|| 15642/27626 [36:53<28:26,  7.02it/s]

{'epoch': 3, 'iter': 15640, 'avg_loss': 8.522620013650211, 'avg_acc': 49.983616776420945, 'loss': 8.170910835266113}


EP_train:3:  57%|| 15652/27626 [36:54<28:39,  6.97it/s]

{'epoch': 3, 'iter': 15650, 'avg_loss': 8.522555052133484, 'avg_acc': 49.986422592805575, 'loss': 8.404024124145508}


EP_train:3:  57%|| 15662/27626 [36:56<28:33,  6.98it/s]

{'epoch': 3, 'iter': 15660, 'avg_loss': 8.522553911063568, 'avg_acc': 49.98523402081604, 'loss': 8.50804328918457}


EP_train:3:  57%|| 15672/27626 [36:57<28:30,  6.99it/s]

{'epoch': 3, 'iter': 15670, 'avg_loss': 8.522902006637722, 'avg_acc': 49.984246378661226, 'loss': 8.498054504394531}


EP_train:3:  57%|| 15682/27626 [36:59<28:46,  6.92it/s]

{'epoch': 3, 'iter': 15680, 'avg_loss': 8.522897461255695, 'avg_acc': 49.98445571073273, 'loss': 8.610154151916504}


EP_train:3:  57%|| 15692/27626 [37:00<28:36,  6.95it/s]

{'epoch': 3, 'iter': 15690, 'avg_loss': 8.523000715765182, 'avg_acc': 49.98566056975336, 'loss': 8.922079086303711}


EP_train:3:  57%|| 15702/27626 [37:01<28:37,  6.94it/s]

{'epoch': 3, 'iter': 15700, 'avg_loss': 8.523046357594785, 'avg_acc': 49.98746098974588, 'loss': 8.233582496643066}


EP_train:3:  57%|| 15712/27626 [37:03<28:24,  6.99it/s]

{'epoch': 3, 'iter': 15710, 'avg_loss': 8.522889603543241, 'avg_acc': 49.98925911781554, 'loss': 8.623859405517578}


EP_train:3:  57%|| 15722/27626 [37:04<28:28,  6.97it/s]

{'epoch': 3, 'iter': 15720, 'avg_loss': 8.522791759331303, 'avg_acc': 49.98787449907767, 'loss': 8.064838409423828}


EP_train:3:  57%|| 15732/27626 [37:06<28:02,  7.07it/s]

{'epoch': 3, 'iter': 15730, 'avg_loss': 8.52289629218977, 'avg_acc': 49.98708759773695, 'loss': 8.558478355407715}


EP_train:3:  57%|| 15742/27626 [37:07<28:08,  7.04it/s]

{'epoch': 3, 'iter': 15740, 'avg_loss': 8.52319357236287, 'avg_acc': 49.98689727463312, 'loss': 9.579325675964355}


EP_train:3:  57%|| 15752/27626 [37:08<27:50,  7.11it/s]

{'epoch': 3, 'iter': 15750, 'avg_loss': 8.523183512661952, 'avg_acc': 49.9855167925846, 'loss': 8.793447494506836}


EP_train:3:  57%|| 15762/27626 [37:10<27:56,  7.08it/s]

{'epoch': 3, 'iter': 15760, 'avg_loss': 8.523216222538332, 'avg_acc': 49.98631907873866, 'loss': 8.814037322998047}


EP_train:3:  57%|| 15772/27626 [37:11<28:00,  7.05it/s]

{'epoch': 3, 'iter': 15770, 'avg_loss': 8.523048318004601, 'avg_acc': 49.98434626846744, 'loss': 8.03493595123291}


EP_train:3:  57%|| 15782/27626 [37:13<28:04,  7.03it/s]

{'epoch': 3, 'iter': 15780, 'avg_loss': 8.522911632614711, 'avg_acc': 49.986336417210566, 'loss': 8.291051864624023}


EP_train:3:  57%|| 15792/27626 [37:14<27:54,  7.07it/s]

{'epoch': 3, 'iter': 15790, 'avg_loss': 8.522801018932865, 'avg_acc': 49.987532455195996, 'loss': 8.149724006652832}


EP_train:3:  57%|| 15802/27626 [37:16<28:08,  7.00it/s]

{'epoch': 3, 'iter': 15800, 'avg_loss': 8.52329923605498, 'avg_acc': 49.98457376115436, 'loss': 9.450238227844238}


EP_train:3:  57%|| 15812/27626 [37:17<27:55,  7.05it/s]

{'epoch': 3, 'iter': 15810, 'avg_loss': 8.523725640592067, 'avg_acc': 49.986164695465185, 'loss': 8.445732116699219}


EP_train:3:  57%|| 15822/27626 [37:18<28:10,  6.98it/s]

{'epoch': 3, 'iter': 15820, 'avg_loss': 8.52379403308116, 'avg_acc': 49.98538335124202, 'loss': 9.060356140136719}


EP_train:3:  57%|| 15832/27626 [37:20<27:45,  7.08it/s]

{'epoch': 3, 'iter': 15830, 'avg_loss': 8.523729293648062, 'avg_acc': 49.98578737919273, 'loss': 8.487948417663574}


EP_train:3:  57%|| 15842/27626 [37:21<27:41,  7.09it/s]

{'epoch': 3, 'iter': 15840, 'avg_loss': 8.523535025856514, 'avg_acc': 49.984020895145505, 'loss': 6.891946792602539}


EP_train:3:  57%|| 15852/27626 [37:23<27:57,  7.02it/s]

{'epoch': 3, 'iter': 15850, 'avg_loss': 8.523519488916993, 'avg_acc': 49.98659390574727, 'loss': 8.453503608703613}


EP_train:3:  57%|| 15862/27626 [37:24<27:58,  7.01it/s]

{'epoch': 3, 'iter': 15860, 'avg_loss': 8.523313258351282, 'avg_acc': 49.986799382132276, 'loss': 8.089439392089844}


EP_train:3:  57%|| 15872/27626 [37:25<27:52,  7.03it/s]

{'epoch': 3, 'iter': 15870, 'avg_loss': 8.52320393926517, 'avg_acc': 49.98542939953374, 'loss': 8.65116024017334}


EP_train:3:  57%|| 15882/27626 [37:27<27:36,  7.09it/s]

{'epoch': 3, 'iter': 15880, 'avg_loss': 8.523144170173687, 'avg_acc': 49.988193438700335, 'loss': 8.587913513183594}


EP_train:3:  58%|| 15892/27626 [37:28<27:38,  7.07it/s]

{'epoch': 3, 'iter': 15890, 'avg_loss': 8.523048706077493, 'avg_acc': 49.990167390346734, 'loss': 9.05382251739502}


EP_train:3:  58%|| 15902/27626 [37:30<27:51,  7.01it/s]

{'epoch': 3, 'iter': 15900, 'avg_loss': 8.52297763168928, 'avg_acc': 49.988797874347526, 'loss': 8.780227661132812}


EP_train:3:  58%|| 15912/27626 [37:31<27:29,  7.10it/s]

{'epoch': 3, 'iter': 15910, 'avg_loss': 8.523136953204396, 'avg_acc': 49.98743007981899, 'loss': 8.864703178405762}


EP_train:3:  58%|| 15922/27626 [37:32<27:32,  7.08it/s]

{'epoch': 3, 'iter': 15920, 'avg_loss': 8.523040366827203, 'avg_acc': 49.98488631367376, 'loss': 9.074892044067383}


EP_train:3:  58%|| 15932/27626 [37:34<27:37,  7.06it/s]

{'epoch': 3, 'iter': 15930, 'avg_loss': 8.523147797885345, 'avg_acc': 49.986268909672965, 'loss': 9.144465446472168}


EP_train:3:  58%|| 15942/27626 [37:35<27:30,  7.08it/s]

{'epoch': 3, 'iter': 15940, 'avg_loss': 8.523101082156844, 'avg_acc': 49.98451320494323, 'loss': 8.465303421020508}


EP_train:3:  58%|| 15952/27626 [37:37<27:40,  7.03it/s]

{'epoch': 3, 'iter': 15950, 'avg_loss': 8.523098638218986, 'avg_acc': 49.98413108895994, 'loss': 8.130630493164062}


EP_train:3:  58%|| 15962/27626 [37:38<27:48,  6.99it/s]

{'epoch': 3, 'iter': 15960, 'avg_loss': 8.522957059302042, 'avg_acc': 49.98237892362634, 'loss': 8.080557823181152}


EP_train:3:  58%|| 15972/27626 [37:40<27:31,  7.06it/s]

{'epoch': 3, 'iter': 15970, 'avg_loss': 8.522662849508674, 'avg_acc': 49.978476613862625, 'loss': 8.596407890319824}


EP_train:3:  58%|| 15982/27626 [37:41<27:33,  7.04it/s]

{'epoch': 3, 'iter': 15980, 'avg_loss': 8.522749051844237, 'avg_acc': 49.977316813716286, 'loss': 9.114126205444336}


EP_train:3:  58%|| 15992/27626 [37:42<27:33,  7.04it/s]

{'epoch': 3, 'iter': 15990, 'avg_loss': 8.522976210369162, 'avg_acc': 49.97908980051279, 'loss': 9.371477127075195}


EP_train:3:  58%|| 16002/27626 [37:44<27:16,  7.10it/s]

{'epoch': 3, 'iter': 16000, 'avg_loss': 8.522858842090773, 'avg_acc': 49.97871226798325, 'loss': 8.645515441894531}


EP_train:3:  58%|| 16012/27626 [37:45<27:19,  7.09it/s]

{'epoch': 3, 'iter': 16010, 'avg_loss': 8.523028325382324, 'avg_acc': 49.97950627693461, 'loss': 8.805976867675781}


EP_train:3:  58%|| 16022/27626 [37:47<27:14,  7.10it/s]

{'epoch': 3, 'iter': 16020, 'avg_loss': 8.523073597411509, 'avg_acc': 49.97873884276886, 'loss': 9.23629093170166}


EP_train:3:  58%|| 16032/27626 [37:48<27:19,  7.07it/s]

{'epoch': 3, 'iter': 16030, 'avg_loss': 8.52298862627296, 'avg_acc': 49.978167300854594, 'loss': 8.243350982666016}


EP_train:3:  58%|| 16042/27626 [37:49<27:21,  7.06it/s]

{'epoch': 3, 'iter': 16040, 'avg_loss': 8.522836279478181, 'avg_acc': 49.97779128483262, 'loss': 8.059730529785156}


EP_train:3:  58%|| 16052/27626 [37:51<27:32,  7.00it/s]

{'epoch': 3, 'iter': 16050, 'avg_loss': 8.522668806297906, 'avg_acc': 49.979557348451806, 'loss': 7.854214668273926}


EP_train:3:  58%|| 16062/27626 [37:52<27:19,  7.05it/s]

{'epoch': 3, 'iter': 16060, 'avg_loss': 8.522872450491105, 'avg_acc': 49.979959217981445, 'loss': 9.4585542678833}


EP_train:3:  58%|| 16072/27626 [37:54<27:20,  7.04it/s]

{'epoch': 3, 'iter': 16070, 'avg_loss': 8.52295127320576, 'avg_acc': 49.9788049903553, 'loss': 7.9506144523620605}


EP_train:3:  58%|| 16082/27626 [37:55<27:15,  7.06it/s]

{'epoch': 3, 'iter': 16080, 'avg_loss': 8.523107710513864, 'avg_acc': 49.97959548535539, 'loss': 8.292367935180664}


EP_train:3:  58%|| 16092/27626 [37:57<27:07,  7.09it/s]

{'epoch': 3, 'iter': 16090, 'avg_loss': 8.523359963095409, 'avg_acc': 49.97999658194022, 'loss': 9.16720962524414}


EP_train:3:  58%|| 16102/27626 [37:58<26:58,  7.12it/s]

{'epoch': 3, 'iter': 16100, 'avg_loss': 8.523212118760657, 'avg_acc': 49.97962083100428, 'loss': 9.16066837310791}


EP_train:3:  58%|| 16112/27626 [37:59<27:12,  7.05it/s]

{'epoch': 3, 'iter': 16110, 'avg_loss': 8.523249012554803, 'avg_acc': 49.98176711563528, 'loss': 8.287409782409668}


EP_train:3:  58%|| 16122/27626 [38:01<27:04,  7.08it/s]

{'epoch': 3, 'iter': 16120, 'avg_loss': 8.523163105312925, 'avg_acc': 49.9815845791204, 'loss': 7.554603099822998}


EP_train:3:  58%|| 16132/27626 [38:02<27:12,  7.04it/s]

{'epoch': 3, 'iter': 16130, 'avg_loss': 8.523276023002769, 'avg_acc': 49.97907755253859, 'loss': 8.650101661682129}


EP_train:3:  58%|| 16142/27626 [38:04<27:15,  7.02it/s]

{'epoch': 3, 'iter': 16140, 'avg_loss': 8.523236868483568, 'avg_acc': 49.97696084505297, 'loss': 8.595856666564941}


EP_train:3:  58%|| 16152/27626 [38:05<27:15,  7.02it/s]

{'epoch': 3, 'iter': 16150, 'avg_loss': 8.523291319311607, 'avg_acc': 49.97871648814315, 'loss': 7.682339191436768}


EP_train:3:  59%|| 16162/27626 [38:06<27:14,  7.02it/s]

{'epoch': 3, 'iter': 16160, 'avg_loss': 8.523110596335957, 'avg_acc': 49.97892302456531, 'loss': 8.13215160369873}


EP_train:3:  59%|| 16172/27626 [38:08<27:11,  7.02it/s]

{'epoch': 3, 'iter': 16170, 'avg_loss': 8.52304674101616, 'avg_acc': 49.97912930554696, 'loss': 8.421292304992676}


EP_train:3:  59%|| 16182/27626 [38:09<27:08,  7.03it/s]

{'epoch': 3, 'iter': 16180, 'avg_loss': 8.522948123565762, 'avg_acc': 49.980494098016194, 'loss': 8.83859920501709}


EP_train:3:  59%|| 16192/27626 [38:11<27:00,  7.05it/s]

{'epoch': 3, 'iter': 16190, 'avg_loss': 8.522864060451688, 'avg_acc': 49.98417330615774, 'loss': 8.973061561584473}


EP_train:3:  59%|| 16202/27626 [38:12<27:07,  7.02it/s]

{'epoch': 3, 'iter': 16200, 'avg_loss': 8.522784564415765, 'avg_acc': 49.983990185791, 'loss': 8.656554222106934}


EP_train:3:  59%|| 16212/27626 [38:13<27:07,  7.01it/s]

{'epoch': 3, 'iter': 16210, 'avg_loss': 8.52283599520662, 'avg_acc': 49.98284343963975, 'loss': 8.233231544494629}


EP_train:3:  59%|| 16222/27626 [38:15<27:09,  7.00it/s]

{'epoch': 3, 'iter': 16220, 'avg_loss': 8.522731906792568, 'avg_acc': 49.981890758892796, 'loss': 8.203920364379883}


EP_train:3:  59%|| 16232/27626 [38:16<27:11,  6.98it/s]

{'epoch': 3, 'iter': 16230, 'avg_loss': 8.522597661469915, 'avg_acc': 49.980554186433366, 'loss': 7.978784084320068}


EP_train:3:  59%|| 16242/27626 [38:18<27:03,  7.01it/s]

{'epoch': 3, 'iter': 16240, 'avg_loss': 8.522660184087119, 'avg_acc': 49.98095098823964, 'loss': 8.065279960632324}


EP_train:3:  59%|| 16252/27626 [38:19<26:58,  7.03it/s]

{'epoch': 3, 'iter': 16250, 'avg_loss': 8.52264498784046, 'avg_acc': 49.97980893483478, 'loss': 8.228536605834961}


EP_train:3:  59%|| 16262/27626 [38:21<27:09,  6.98it/s]

{'epoch': 3, 'iter': 16260, 'avg_loss': 8.522419654738748, 'avg_acc': 49.97905264128897, 'loss': 8.403290748596191}


EP_train:3:  59%|| 16272/27626 [38:22<26:45,  7.07it/s]

{'epoch': 3, 'iter': 16270, 'avg_loss': 8.522606124055018, 'avg_acc': 49.977336979902894, 'loss': 9.778314590454102}


EP_train:3:  59%|| 16282/27626 [38:23<26:47,  7.06it/s]

{'epoch': 3, 'iter': 16280, 'avg_loss': 8.522901500181517, 'avg_acc': 49.97696701676801, 'loss': 8.502901077270508}


EP_train:3:  59%|| 16292/27626 [38:25<26:52,  7.03it/s]

{'epoch': 3, 'iter': 16290, 'avg_loss': 8.522930934177301, 'avg_acc': 49.974871094469336, 'loss': 7.723991870880127}


EP_train:3:  59%|| 16302/27626 [38:26<26:56,  7.00it/s]

{'epoch': 3, 'iter': 16300, 'avg_loss': 8.522476700447607, 'avg_acc': 49.97373627384823, 'loss': 8.388490676879883}


EP_train:3:  59%|| 16312/27626 [38:28<26:49,  7.03it/s]

{'epoch': 3, 'iter': 16310, 'avg_loss': 8.522483220245697, 'avg_acc': 49.97183649071179, 'loss': 8.32590389251709}


EP_train:3:  59%|| 16322/27626 [38:29<26:50,  7.02it/s]

{'epoch': 3, 'iter': 16320, 'avg_loss': 8.522423094141812, 'avg_acc': 49.973576986704245, 'loss': 8.684256553649902}


EP_train:3:  59%|| 16332/27626 [38:31<26:41,  7.05it/s]

{'epoch': 3, 'iter': 16330, 'avg_loss': 8.522421035652144, 'avg_acc': 49.97340181250382, 'loss': 8.190498352050781}


EP_train:3:  59%|| 16342/27626 [38:32<26:46,  7.02it/s]

{'epoch': 3, 'iter': 16340, 'avg_loss': 8.522391758848672, 'avg_acc': 49.97150572180405, 'loss': 8.963865280151367}


EP_train:3:  59%|| 16352/27626 [38:33<26:54,  6.98it/s]

{'epoch': 3, 'iter': 16350, 'avg_loss': 8.522420386569332, 'avg_acc': 49.96942083053024, 'loss': 8.185834884643555}


EP_train:3:  59%|| 16362/27626 [38:35<26:29,  7.09it/s]

{'epoch': 3, 'iter': 16360, 'avg_loss': 8.522465959197698, 'avg_acc': 49.970203532791395, 'loss': 8.354182243347168}


EP_train:3:  59%|| 16372/27626 [38:36<26:42,  7.02it/s]

{'epoch': 3, 'iter': 16370, 'avg_loss': 8.52220376236482, 'avg_acc': 49.96945818825973, 'loss': 8.218952178955078}


EP_train:3:  59%|| 16382/27626 [38:38<26:33,  7.06it/s]

{'epoch': 3, 'iter': 16380, 'avg_loss': 8.521867866910833, 'avg_acc': 49.96814144435627, 'loss': 9.119314193725586}


EP_train:3:  59%|| 16392/27626 [38:39<27:31,  6.80it/s]

{'epoch': 3, 'iter': 16390, 'avg_loss': 8.522035386242196, 'avg_acc': 49.96854218778598, 'loss': 8.805375099182129}


EP_train:3:  59%|| 16402/27626 [38:40<26:46,  6.99it/s]

{'epoch': 3, 'iter': 16400, 'avg_loss': 8.522495512627703, 'avg_acc': 49.969704591183465, 'loss': 9.07874584197998}


EP_train:3:  59%|| 16412/27626 [38:42<26:46,  6.98it/s]

{'epoch': 3, 'iter': 16410, 'avg_loss': 8.522645358040945, 'avg_acc': 49.97124642008409, 'loss': 8.486631393432617}


EP_train:3:  59%|| 16422/27626 [38:43<26:36,  7.02it/s]

{'epoch': 3, 'iter': 16420, 'avg_loss': 8.52262556636372, 'avg_acc': 49.97202515072164, 'loss': 8.720596313476562}


EP_train:3:  59%|| 16432/27626 [38:45<26:38,  7.00it/s]

{'epoch': 3, 'iter': 16430, 'avg_loss': 8.522559968489723, 'avg_acc': 49.9733735013085, 'loss': 8.39950180053711}


EP_train:3:  60%|| 16442/27626 [38:46<26:29,  7.03it/s]

{'epoch': 3, 'iter': 16440, 'avg_loss': 8.522425539511978, 'avg_acc': 49.973389696490486, 'loss': 8.881513595581055}


EP_train:3:  60%|| 16452/27626 [38:48<26:25,  7.05it/s]

{'epoch': 3, 'iter': 16450, 'avg_loss': 8.522372142890761, 'avg_acc': 49.975115494498816, 'loss': 8.10995101928711}


EP_train:3:  60%|| 16462/27626 [38:49<26:23,  7.05it/s]

{'epoch': 3, 'iter': 16460, 'avg_loss': 8.522517395123815, 'avg_acc': 49.975700139724196, 'loss': 7.8453168869018555}


EP_train:3:  60%|| 16472/27626 [38:50<26:23,  7.04it/s]

{'epoch': 3, 'iter': 16470, 'avg_loss': 8.522443229387413, 'avg_acc': 49.977232712039346, 'loss': 8.553461074829102}


EP_train:3:  60%|| 16482/27626 [38:52<26:29,  7.01it/s]

{'epoch': 3, 'iter': 16480, 'avg_loss': 8.522444473988989, 'avg_acc': 49.97857381226867, 'loss': 8.759374618530273}


EP_train:3:  60%|| 16492/27626 [38:53<26:31,  6.99it/s]

{'epoch': 3, 'iter': 16490, 'avg_loss': 8.522409773864709, 'avg_acc': 49.97934479413013, 'loss': 8.0072603225708}


EP_train:3:  60%|| 16502/27626 [38:55<26:18,  7.05it/s]

{'epoch': 3, 'iter': 16500, 'avg_loss': 8.522412204488134, 'avg_acc': 49.978599781831406, 'loss': 8.701889991760254}


EP_train:3:  60%|| 16512/27626 [38:56<26:22,  7.02it/s]

{'epoch': 3, 'iter': 16510, 'avg_loss': 8.522301105704662, 'avg_acc': 49.978044939737146, 'loss': 9.019835472106934}


EP_train:3:  60%|| 16522/27626 [38:57<26:17,  7.04it/s]

{'epoch': 3, 'iter': 16520, 'avg_loss': 8.522502622955916, 'avg_acc': 49.97484262453847, 'loss': 9.63005542755127}


EP_train:3:  60%|| 16532/27626 [38:59<26:24,  7.00it/s]

{'epoch': 3, 'iter': 16530, 'avg_loss': 8.522433581856157, 'avg_acc': 49.97429072651382, 'loss': 8.898393630981445}


EP_train:3:  60%|| 16542/27626 [39:00<26:25,  6.99it/s]

{'epoch': 3, 'iter': 16540, 'avg_loss': 8.522446409461503, 'avg_acc': 49.97487304274228, 'loss': 8.777814865112305}


EP_train:3:  60%|| 16552/27626 [39:02<26:14,  7.03it/s]

{'epoch': 3, 'iter': 16550, 'avg_loss': 8.52247972328795, 'avg_acc': 49.97677632771434, 'loss': 7.828705787658691}


EP_train:3:  60%|| 16562/27626 [39:03<26:18,  7.01it/s]

{'epoch': 3, 'iter': 16560, 'avg_loss': 8.522496622172065, 'avg_acc': 49.975846869150416, 'loss': 8.675336837768555}


EP_train:3:  60%|| 16572/27626 [39:05<26:22,  6.99it/s]

{'epoch': 3, 'iter': 16570, 'avg_loss': 8.522557601781381, 'avg_acc': 49.976427192082554, 'loss': 8.347757339477539}


EP_train:3:  60%|| 16582/27626 [39:06<26:20,  6.99it/s]

{'epoch': 3, 'iter': 16580, 'avg_loss': 8.522676700778163, 'avg_acc': 49.97342590917315, 'loss': 9.217449188232422}


EP_train:3:  60%|| 16592/27626 [39:07<26:17,  6.99it/s]

{'epoch': 3, 'iter': 16590, 'avg_loss': 8.52281448254928, 'avg_acc': 49.9723117955518, 'loss': 8.304679870605469}


EP_train:3:  60%|| 16602/27626 [39:09<26:12,  7.01it/s]

{'epoch': 3, 'iter': 16600, 'avg_loss': 8.522807843411847, 'avg_acc': 49.974963857598944, 'loss': 8.944863319396973}


EP_train:3:  60%|| 16612/27626 [39:10<26:12,  7.00it/s]

{'epoch': 3, 'iter': 16610, 'avg_loss': 8.522701109046286, 'avg_acc': 49.972721389440736, 'loss': 8.823780059814453}


EP_train:3:  60%|| 16622/27626 [39:12<26:01,  7.05it/s]

{'epoch': 3, 'iter': 16620, 'avg_loss': 8.522851676618455, 'avg_acc': 49.97104566512244, 'loss': 8.294068336486816}


EP_train:3:  60%|| 16632/27626 [39:13<26:00,  7.04it/s]

{'epoch': 3, 'iter': 16630, 'avg_loss': 8.522985651878562, 'avg_acc': 49.971250977090975, 'loss': 9.03678035736084}


EP_train:3:  60%|| 16642/27626 [39:14<26:09,  7.00it/s]

{'epoch': 3, 'iter': 16640, 'avg_loss': 8.523108805839879, 'avg_acc': 49.97145604230515, 'loss': 8.066601753234863}


EP_train:3:  60%|| 16652/27626 [39:16<25:59,  7.04it/s]

{'epoch': 3, 'iter': 16650, 'avg_loss': 8.523222248043487, 'avg_acc': 49.97053480271455, 'loss': 8.425418853759766}


EP_train:3:  60%|| 16662/27626 [39:17<26:01,  7.02it/s]

{'epoch': 3, 'iter': 16660, 'avg_loss': 8.523308255493436, 'avg_acc': 49.97111517916091, 'loss': 8.576090812683105}


EP_train:3:  60%|| 16672/27626 [39:19<26:06,  6.99it/s]

{'epoch': 3, 'iter': 16670, 'avg_loss': 8.523287512103238, 'avg_acc': 49.97057015176054, 'loss': 8.544721603393555}


EP_train:3:  60%|| 16682/27626 [39:20<26:13,  6.95it/s]

{'epoch': 3, 'iter': 16680, 'avg_loss': 8.523460610865774, 'avg_acc': 49.96871440561117, 'loss': 9.507766723632812}


EP_train:3:  60%|| 16692/27626 [39:22<26:11,  6.96it/s]

{'epoch': 3, 'iter': 16690, 'avg_loss': 8.523435335620398, 'avg_acc': 49.96892037625067, 'loss': 8.998241424560547}


EP_train:3:  60%|| 16702/27626 [39:23<25:49,  7.05it/s]

{'epoch': 3, 'iter': 16700, 'avg_loss': 8.52362781755582, 'avg_acc': 49.96744206933717, 'loss': 8.998944282531738}


EP_train:3:  60%|| 16712/27626 [39:24<25:45,  7.06it/s]

{'epoch': 3, 'iter': 16710, 'avg_loss': 8.523711558811028, 'avg_acc': 49.968957572856205, 'loss': 8.33843994140625}


EP_train:3:  61%|| 16722/27626 [39:26<25:49,  7.04it/s]

{'epoch': 3, 'iter': 16720, 'avg_loss': 8.523682994510589, 'avg_acc': 49.968228574846, 'loss': 8.273503303527832}


EP_train:3:  61%|| 16732/27626 [39:27<25:44,  7.05it/s]

{'epoch': 3, 'iter': 16730, 'avg_loss': 8.523755736405448, 'avg_acc': 49.96936823859901, 'loss': 8.508471488952637}


EP_train:3:  61%|| 16742/27626 [39:29<25:52,  7.01it/s]

{'epoch': 3, 'iter': 16740, 'avg_loss': 8.523881909554024, 'avg_acc': 49.96826653127053, 'loss': 8.38787841796875}


EP_train:3:  61%|| 16752/27626 [39:30<25:48,  7.02it/s]

{'epoch': 3, 'iter': 16750, 'avg_loss': 8.523781873298669, 'avg_acc': 49.968285475494, 'loss': 8.354016304016113}


EP_train:3:  61%|| 16762/27626 [39:32<25:37,  7.06it/s]

{'epoch': 3, 'iter': 16760, 'avg_loss': 8.52369095045729, 'avg_acc': 49.9703552890639, 'loss': 7.992939472198486}


EP_train:3:  61%|| 16772/27626 [39:33<25:49,  7.00it/s]

{'epoch': 3, 'iter': 16770, 'avg_loss': 8.523740397647728, 'avg_acc': 49.972981634965116, 'loss': 8.790223121643066}


EP_train:3:  61%|| 16782/27626 [39:34<25:57,  6.96it/s]

{'epoch': 3, 'iter': 16780, 'avg_loss': 8.52376462665774, 'avg_acc': 49.97243906799356, 'loss': 8.212077140808105}


EP_train:3:  61%|| 16792/27626 [39:36<25:43,  7.02it/s]

{'epoch': 3, 'iter': 16790, 'avg_loss': 8.523869437278542, 'avg_acc': 49.97189714728128, 'loss': 8.027703285217285}


EP_train:3:  61%|| 16802/27626 [39:37<25:38,  7.04it/s]

{'epoch': 3, 'iter': 16800, 'avg_loss': 8.523782742590388, 'avg_acc': 49.97098387000774, 'loss': 8.089905738830566}


EP_train:3:  61%|| 16812/27626 [39:39<25:43,  7.01it/s]

{'epoch': 3, 'iter': 16810, 'avg_loss': 8.523903083449548, 'avg_acc': 49.97378948307656, 'loss': 8.121581077575684}


EP_train:3:  61%|| 16822/27626 [39:40<25:36,  7.03it/s]

{'epoch': 3, 'iter': 16820, 'avg_loss': 8.523876054570009, 'avg_acc': 49.973805065097196, 'loss': 8.767867088317871}


EP_train:3:  61%|| 16832/27626 [39:41<25:29,  7.06it/s]

{'epoch': 3, 'iter': 16830, 'avg_loss': 8.523757030645033, 'avg_acc': 49.97029291188878, 'loss': 7.928059101104736}


EP_train:3:  61%|| 16842/27626 [39:43<25:30,  7.04it/s]

{'epoch': 3, 'iter': 16840, 'avg_loss': 8.523789946055102, 'avg_acc': 49.969753874473014, 'loss': 8.035487174987793}


EP_train:3:  61%|| 16852/27626 [39:44<25:34,  7.02it/s]

{'epoch': 3, 'iter': 16850, 'avg_loss': 8.523672952092257, 'avg_acc': 49.96884457895674, 'loss': 7.093142032623291}


EP_train:3:  61%|| 16862/27626 [39:46<25:41,  6.98it/s]

{'epoch': 3, 'iter': 16860, 'avg_loss': 8.523424371918795, 'avg_acc': 49.96923373465393, 'loss': 7.720134258270264}


EP_train:3:  61%|| 16872/27626 [39:47<25:20,  7.07it/s]

{'epoch': 3, 'iter': 16870, 'avg_loss': 8.523441469243267, 'avg_acc': 49.96739967992413, 'loss': 8.678857803344727}


EP_train:3:  61%|| 16882/27626 [39:49<25:32,  7.01it/s]

{'epoch': 3, 'iter': 16880, 'avg_loss': 8.523509064272083, 'avg_acc': 49.97056602097032, 'loss': 9.016273498535156}


EP_train:3:  61%|| 16892/27626 [39:50<25:24,  7.04it/s]

{'epoch': 3, 'iter': 16890, 'avg_loss': 8.52358669184797, 'avg_acc': 49.96984340773193, 'loss': 8.58687686920166}


EP_train:3:  61%|| 16902/27626 [39:51<25:31,  7.00it/s]

{'epoch': 3, 'iter': 16900, 'avg_loss': 8.523461216194772, 'avg_acc': 49.969121649606535, 'loss': 7.475400924682617}


EP_train:3:  61%|| 16912/27626 [39:53<25:23,  7.03it/s]

{'epoch': 3, 'iter': 16910, 'avg_loss': 8.52345501786537, 'avg_acc': 49.96655283543256, 'loss': 8.362052917480469}


EP_train:3:  61%|| 16922/27626 [39:54<25:22,  7.03it/s]

{'epoch': 3, 'iter': 16920, 'avg_loss': 8.523525786761956, 'avg_acc': 49.969158146681636, 'loss': 9.114469528198242}


EP_train:3:  61%|| 16932/27626 [39:56<25:28,  7.00it/s]

{'epoch': 3, 'iter': 16930, 'avg_loss': 8.523593205370911, 'avg_acc': 49.96954550823933, 'loss': 8.322992324829102}


EP_train:3:  61%|| 16942/27626 [39:57<25:31,  6.98it/s]

{'epoch': 3, 'iter': 16940, 'avg_loss': 8.523582182909493, 'avg_acc': 49.972145977215035, 'loss': 7.919614315032959}


EP_train:3:  61%|| 16952/27626 [39:58<25:20,  7.02it/s]

{'epoch': 3, 'iter': 16950, 'avg_loss': 8.523465202323216, 'avg_acc': 49.970318860244234, 'loss': 8.665274620056152}


EP_train:3:  61%|| 16962/27626 [40:00<25:20,  7.01it/s]

{'epoch': 3, 'iter': 16960, 'avg_loss': 8.523372044913872, 'avg_acc': 49.969783621248745, 'loss': 8.557741165161133}


EP_train:3:  61%|| 16972/27626 [40:01<25:07,  7.07it/s]

{'epoch': 3, 'iter': 16970, 'avg_loss': 8.523332588351346, 'avg_acc': 49.969249013022214, 'loss': 7.86218786239624}


EP_train:3:  61%|| 16982/27626 [40:03<25:14,  7.03it/s]

{'epoch': 3, 'iter': 16980, 'avg_loss': 8.523324647378558, 'avg_acc': 49.9690830928685, 'loss': 8.016592979431152}


EP_train:3:  62%|| 16992/27626 [40:04<25:26,  6.97it/s]

{'epoch': 3, 'iter': 16990, 'avg_loss': 8.52334243585711, 'avg_acc': 49.96671031722676, 'loss': 8.277420043945312}


EP_train:3:  62%|| 17002/27626 [40:06<25:23,  6.97it/s]

{'epoch': 3, 'iter': 17000, 'avg_loss': 8.523144432617828, 'avg_acc': 49.96672989824128, 'loss': 8.974631309509277}


EP_train:3:  62%|| 17012/27626 [40:07<25:18,  6.99it/s]

{'epoch': 3, 'iter': 17010, 'avg_loss': 8.523069520692095, 'avg_acc': 49.96656575157251, 'loss': 8.458418846130371}


EP_train:3:  62%|| 17022/27626 [40:08<25:09,  7.02it/s]

{'epoch': 3, 'iter': 17020, 'avg_loss': 8.522993076058436, 'avg_acc': 49.96970653898126, 'loss': 9.204154014587402}


EP_train:3:  62%|| 17032/27626 [40:10<25:16,  6.98it/s]

{'epoch': 3, 'iter': 17030, 'avg_loss': 8.523071403211118, 'avg_acc': 49.96990781516059, 'loss': 8.861836433410645}


EP_train:3:  62%|| 17042/27626 [40:11<25:13,  6.99it/s]

{'epoch': 3, 'iter': 17040, 'avg_loss': 8.5230362974809, 'avg_acc': 49.96717475500264, 'loss': 9.003747940063477}


EP_train:3:  62%|| 17052/27626 [40:13<24:54,  7.08it/s]

{'epoch': 3, 'iter': 17050, 'avg_loss': 8.522983074495851, 'avg_acc': 49.96737727992493, 'loss': 8.911994934082031}


EP_train:3:  62%|| 17062/27626 [40:14<24:58,  7.05it/s]

{'epoch': 3, 'iter': 17060, 'avg_loss': 8.522838912516907, 'avg_acc': 49.96519840572065, 'loss': 8.804941177368164}


EP_train:3:  62%|| 17072/27626 [40:16<24:56,  7.05it/s]

{'epoch': 3, 'iter': 17070, 'avg_loss': 8.52288230096342, 'avg_acc': 49.965584910081425, 'loss': 9.590152740478516}


EP_train:3:  62%|| 17082/27626 [40:17<25:03,  7.01it/s]

{'epoch': 3, 'iter': 17080, 'avg_loss': 8.52279537038567, 'avg_acc': 49.96523915461624, 'loss': 7.874227046966553}


EP_train:3:  62%|| 17092/27626 [40:18<25:22,  6.92it/s]

{'epoch': 3, 'iter': 17090, 'avg_loss': 8.522778212239455, 'avg_acc': 49.965076648528466, 'loss': 8.742517471313477}


EP_train:3:  62%|| 17102/27626 [40:20<25:06,  6.99it/s]

{'epoch': 3, 'iter': 17100, 'avg_loss': 8.522714212201338, 'avg_acc': 49.965279808198346, 'loss': 8.515580177307129}


EP_train:3:  62%|| 17112/27626 [40:21<24:53,  7.04it/s]

{'epoch': 3, 'iter': 17110, 'avg_loss': 8.522638187800819, 'avg_acc': 49.96548273040734, 'loss': 8.110055923461914}


EP_train:3:  62%|| 17122/27626 [40:23<25:01,  7.00it/s]

{'epoch': 3, 'iter': 17120, 'avg_loss': 8.522654287768166, 'avg_acc': 49.96331259856317, 'loss': 8.599480628967285}


EP_train:3:  62%|| 17132/27626 [40:24<24:49,  7.04it/s]

{'epoch': 3, 'iter': 17130, 'avg_loss': 8.522535286631102, 'avg_acc': 49.96278676084292, 'loss': 8.980912208557129}


EP_train:3:  62%|| 17142/27626 [40:25<24:50,  7.03it/s]

{'epoch': 3, 'iter': 17140, 'avg_loss': 8.522469069491635, 'avg_acc': 49.961896913832334, 'loss': 8.426864624023438}


EP_train:3:  62%|| 17152/27626 [40:27<25:01,  6.98it/s]

{'epoch': 3, 'iter': 17150, 'avg_loss': 8.522503502896754, 'avg_acc': 49.96283015567605, 'loss': 8.123662948608398}


EP_train:3:  62%|| 17162/27626 [40:28<24:56,  6.99it/s]

{'epoch': 3, 'iter': 17160, 'avg_loss': 8.522374273089774, 'avg_acc': 49.96394440883398, 'loss': 8.741148948669434}


EP_train:3:  62%|| 17172/27626 [40:30<24:42,  7.05it/s]

{'epoch': 3, 'iter': 17170, 'avg_loss': 8.522202533109652, 'avg_acc': 49.96432939258052, 'loss': 8.241369247436523}


EP_train:3:  62%|| 17182/27626 [40:31<24:54,  6.99it/s]

{'epoch': 3, 'iter': 17180, 'avg_loss': 8.522330970940674, 'avg_acc': 49.965259589080965, 'loss': 9.433627128601074}


EP_train:3:  62%|| 17192/27626 [40:33<24:50,  7.00it/s]

{'epoch': 3, 'iter': 17190, 'avg_loss': 8.522495152528537, 'avg_acc': 49.96564335989762, 'loss': 8.214338302612305}


EP_train:3:  62%|| 17202/27626 [40:34<24:47,  7.01it/s]

{'epoch': 3, 'iter': 17200, 'avg_loss': 8.522698818642791, 'avg_acc': 49.96711673739899, 'loss': 8.765037536621094}


EP_train:3:  62%|| 17212/27626 [40:35<24:46,  7.01it/s]

{'epoch': 3, 'iter': 17210, 'avg_loss': 8.522494490728034, 'avg_acc': 49.96295973505316, 'loss': 8.951851844787598}


EP_train:3:  62%|| 17222/27626 [40:37<24:43,  7.02it/s]

{'epoch': 3, 'iter': 17220, 'avg_loss': 8.522469116222235, 'avg_acc': 49.96243685035712, 'loss': 8.640119552612305}


EP_train:3:  62%|| 17232/27626 [40:38<24:44,  7.00it/s]

{'epoch': 3, 'iter': 17230, 'avg_loss': 8.522525127734776, 'avg_acc': 49.96137049503802, 'loss': 8.536063194274902}


EP_train:3:  62%|| 17242/27626 [40:40<24:47,  6.98it/s]

{'epoch': 3, 'iter': 17240, 'avg_loss': 8.52265875602666, 'avg_acc': 49.962480424569335, 'loss': 8.10571575164795}


EP_train:3:  62%|| 17252/27626 [40:41<24:39,  7.01it/s]

{'epoch': 3, 'iter': 17250, 'avg_loss': 8.522782826436083, 'avg_acc': 49.96449481189496, 'loss': 8.493282318115234}


EP_train:3:  62%|| 17262/27626 [40:42<24:23,  7.08it/s]

{'epoch': 3, 'iter': 17260, 'avg_loss': 8.52279470563729, 'avg_acc': 49.964334337523894, 'loss': 8.365938186645508}


EP_train:3:  63%|| 17272/27626 [40:44<24:24,  7.07it/s]

{'epoch': 3, 'iter': 17270, 'avg_loss': 8.522738609708224, 'avg_acc': 49.96345029239766, 'loss': 8.324563026428223}


EP_train:3:  63%|| 17282/27626 [40:45<24:49,  6.94it/s]

{'epoch': 3, 'iter': 17280, 'avg_loss': 8.522810904021258, 'avg_acc': 49.961843932642786, 'loss': 7.967395305633545}


EP_train:3:  63%|| 17292/27626 [40:47<24:28,  7.04it/s]

{'epoch': 3, 'iter': 17290, 'avg_loss': 8.52279480276758, 'avg_acc': 49.958974321901565, 'loss': 8.425407409667969}


EP_train:3:  63%|| 17302/27626 [40:48<24:41,  6.97it/s]

{'epoch': 3, 'iter': 17300, 'avg_loss': 8.523010323663769, 'avg_acc': 49.95755303161667, 'loss': 10.173316955566406}


EP_train:3:  63%|| 17312/27626 [40:50<24:51,  6.92it/s]

{'epoch': 3, 'iter': 17310, 'avg_loss': 8.522846281834914, 'avg_acc': 49.955050257061984, 'loss': 8.160859107971191}


EP_train:3:  63%|| 17322/27626 [40:51<24:32,  7.00it/s]

{'epoch': 3, 'iter': 17320, 'avg_loss': 8.522665363186453, 'avg_acc': 49.95507620807113, 'loss': 7.638108730316162}


EP_train:3:  63%|| 17332/27626 [40:52<24:26,  7.02it/s]

{'epoch': 3, 'iter': 17330, 'avg_loss': 8.522882991953871, 'avg_acc': 49.95402025272633, 'loss': 8.985458374023438}


EP_train:3:  63%|| 17342/27626 [40:54<24:24,  7.02it/s]

{'epoch': 3, 'iter': 17340, 'avg_loss': 8.522714074318896, 'avg_acc': 49.95296551525287, 'loss': 8.547760009765625}


EP_train:3:  63%|| 17352/27626 [40:55<24:20,  7.04it/s]

{'epoch': 3, 'iter': 17350, 'avg_loss': 8.522666612335684, 'avg_acc': 49.95281251801049, 'loss': 8.427529335021973}


EP_train:3:  63%|| 17362/27626 [40:57<24:21,  7.02it/s]

{'epoch': 3, 'iter': 17360, 'avg_loss': 8.522545161207978, 'avg_acc': 49.95409970623812, 'loss': 8.4627103805542}


EP_train:3:  63%|| 17372/27626 [40:58<24:11,  7.06it/s]

{'epoch': 3, 'iter': 17370, 'avg_loss': 8.522783254054975, 'avg_acc': 49.95430602728686, 'loss': 8.944175720214844}


EP_train:3:  63%|| 17382/27626 [41:00<24:05,  7.09it/s]

{'epoch': 3, 'iter': 17380, 'avg_loss': 8.52268639813015, 'avg_acc': 49.956310051205335, 'loss': 8.498016357421875}


EP_train:3:  63%|| 17392/27626 [41:01<24:43,  6.90it/s]

{'epoch': 3, 'iter': 17390, 'avg_loss': 8.522759505668878, 'avg_acc': 49.95795238916681, 'loss': 8.474135398864746}


EP_train:3:  63%|| 17402/27626 [41:02<24:24,  6.98it/s]

{'epoch': 3, 'iter': 17400, 'avg_loss': 8.522659795420326, 'avg_acc': 49.9631845870927, 'loss': 8.49661922454834}


EP_train:3:  63%|| 17412/27626 [41:04<24:18,  7.00it/s]

{'epoch': 3, 'iter': 17410, 'avg_loss': 8.522761460236833, 'avg_acc': 49.96284676354029, 'loss': 8.450848579406738}


EP_train:3:  63%|| 17422/27626 [41:05<24:12,  7.02it/s]

{'epoch': 3, 'iter': 17420, 'avg_loss': 8.522501420238441, 'avg_acc': 49.96555880833477, 'loss': 7.51193380355835}


EP_train:3:  63%|| 17432/27626 [41:07<24:04,  7.06it/s]

{'epoch': 3, 'iter': 17430, 'avg_loss': 8.52252143314719, 'avg_acc': 49.965040732029145, 'loss': 8.205429077148438}


EP_train:3:  63%|| 17442/27626 [41:08<24:10,  7.02it/s]

{'epoch': 3, 'iter': 17440, 'avg_loss': 8.522523360681346, 'avg_acc': 49.96559830284961, 'loss': 7.981906414031982}


EP_train:3:  63%|| 17452/27626 [41:09<24:07,  7.03it/s]

{'epoch': 3, 'iter': 17450, 'avg_loss': 8.522610904029513, 'avg_acc': 49.964901724829524, 'loss': 9.133342742919922}


EP_train:3:  63%|| 17462/27626 [41:11<24:07,  7.02it/s]

{'epoch': 3, 'iter': 17460, 'avg_loss': 8.522680861127519, 'avg_acc': 49.965995647442874, 'loss': 8.32137680053711}


EP_train:3:  63%|| 17472/27626 [41:12<24:00,  7.05it/s]

{'epoch': 3, 'iter': 17470, 'avg_loss': 8.522562882505989, 'avg_acc': 49.96476303588804, 'loss': 8.070205688476562}


EP_train:3:  63%|| 17482/27626 [41:14<24:13,  6.98it/s]

{'epoch': 3, 'iter': 17480, 'avg_loss': 8.522370765386638, 'avg_acc': 49.9633530690464, 'loss': 7.676088333129883}


EP_train:3:  63%|| 17492/27626 [41:15<24:05,  7.01it/s]

{'epoch': 3, 'iter': 17490, 'avg_loss': 8.522404948268308, 'avg_acc': 49.96319535761249, 'loss': 9.203761100769043}


EP_train:3:  63%|| 17502/27626 [41:17<23:55,  7.05it/s]

{'epoch': 3, 'iter': 17500, 'avg_loss': 8.522412081948893, 'avg_acc': 49.963216387634986, 'loss': 9.381428718566895}


EP_train:3:  63%|| 17512/27626 [41:18<24:02,  7.01it/s]

{'epoch': 3, 'iter': 17510, 'avg_loss': 8.522610489845984, 'avg_acc': 49.96680657872194, 'loss': 8.683959007263184}


EP_train:3:  63%|| 17522/27626 [41:19<24:08,  6.98it/s]

{'epoch': 3, 'iter': 17520, 'avg_loss': 8.52260565348163, 'avg_acc': 49.96753895325609, 'loss': 8.676826477050781}


EP_train:3:  63%|| 17532/27626 [41:21<24:00,  7.01it/s]

{'epoch': 3, 'iter': 17530, 'avg_loss': 8.522379456743607, 'avg_acc': 49.968448747932236, 'loss': 7.909017562866211}


EP_train:3:  63%|| 17542/27626 [41:22<23:57,  7.01it/s]

{'epoch': 3, 'iter': 17540, 'avg_loss': 8.522269392086947, 'avg_acc': 49.96935750527336, 'loss': 8.739283561706543}


EP_train:3:  64%|| 17552/27626 [41:24<24:09,  6.95it/s]

{'epoch': 3, 'iter': 17550, 'avg_loss': 8.52243134523211, 'avg_acc': 49.97151159478093, 'loss': 8.334583282470703}


EP_train:3:  64%|| 17562/27626 [41:25<23:49,  7.04it/s]

{'epoch': 3, 'iter': 17560, 'avg_loss': 8.522424764509903, 'avg_acc': 49.97562069358237, 'loss': 8.330179214477539}


EP_train:3:  64%|| 17572/27626 [41:27<23:50,  7.03it/s]

{'epoch': 3, 'iter': 17570, 'avg_loss': 8.522542633320203, 'avg_acc': 49.976168117921574, 'loss': 8.651178359985352}


EP_train:3:  64%|| 17582/27626 [41:28<23:56,  6.99it/s]

{'epoch': 3, 'iter': 17580, 'avg_loss': 8.522611703314503, 'avg_acc': 49.97458193504351, 'loss': 7.611464500427246}


EP_train:3:  64%|| 17592/27626 [41:29<23:42,  7.06it/s]

{'epoch': 3, 'iter': 17590, 'avg_loss': 8.52247528690739, 'avg_acc': 49.97317520322893, 'loss': 7.380979061126709}


EP_train:3:  64%|| 17602/27626 [41:31<23:44,  7.03it/s]

{'epoch': 3, 'iter': 17600, 'avg_loss': 8.522481879756691, 'avg_acc': 49.97443327083688, 'loss': 9.106986045837402}


EP_train:3:  64%|| 17612/27626 [41:32<23:41,  7.05it/s]

{'epoch': 3, 'iter': 17610, 'avg_loss': 8.52243998850653, 'avg_acc': 49.973738004656184, 'loss': 8.14486026763916}


EP_train:3:  64%|| 17622/27626 [41:34<23:42,  7.03it/s]

{'epoch': 3, 'iter': 17620, 'avg_loss': 8.522299650991203, 'avg_acc': 49.973220872822196, 'loss': 8.03575325012207}


EP_train:3:  64%|| 17632/27626 [41:35<23:38,  7.04it/s]

{'epoch': 3, 'iter': 17630, 'avg_loss': 8.522099485819787, 'avg_acc': 49.97075463671942, 'loss': 7.7545928955078125}


EP_train:3:  64%|| 17642/27626 [41:36<23:36,  7.05it/s]

{'epoch': 3, 'iter': 17640, 'avg_loss': 8.522044110275182, 'avg_acc': 49.9718340797007, 'loss': 7.842211723327637}


EP_train:3:  64%|| 17652/27626 [41:38<23:37,  7.04it/s]

{'epoch': 3, 'iter': 17650, 'avg_loss': 8.521786443221792, 'avg_acc': 49.97255821199932, 'loss': 8.02640438079834}


EP_train:3:  64%|| 17662/27626 [41:39<23:46,  6.98it/s]

{'epoch': 3, 'iter': 17660, 'avg_loss': 8.52176140024427, 'avg_acc': 49.97257375007077, 'loss': 9.088834762573242}


EP_train:3:  64%|| 17672/27626 [41:41<23:40,  7.01it/s]

{'epoch': 3, 'iter': 17670, 'avg_loss': 8.521812927270272, 'avg_acc': 49.97082083639862, 'loss': 8.82044506072998}


EP_train:3:  64%|| 17682/27626 [41:42<23:35,  7.03it/s]

{'epoch': 3, 'iter': 17680, 'avg_loss': 8.52189986141186, 'avg_acc': 49.97083733951699, 'loss': 7.601028919219971}


EP_train:3:  64%|| 17692/27626 [41:44<23:40,  6.99it/s]

{'epoch': 3, 'iter': 17690, 'avg_loss': 8.521787144116109, 'avg_acc': 49.97173704143349, 'loss': 8.101325988769531}


EP_train:3:  64%|| 17702/27626 [41:45<23:36,  7.01it/s]

{'epoch': 3, 'iter': 17700, 'avg_loss': 8.521994909737233, 'avg_acc': 49.971046833512226, 'loss': 8.408349990844727}


EP_train:3:  64%|| 17712/27626 [41:46<23:36,  7.00it/s]

{'epoch': 3, 'iter': 17710, 'avg_loss': 8.521995360924922, 'avg_acc': 49.97247473321664, 'loss': 8.653002738952637}


EP_train:3:  64%|| 17722/27626 [41:48<23:25,  7.05it/s]

{'epoch': 3, 'iter': 17720, 'avg_loss': 8.521967976190565, 'avg_acc': 49.97354833248688, 'loss': 9.17265796661377}


EP_train:3:  64%|| 17732/27626 [41:49<23:32,  7.01it/s]

{'epoch': 3, 'iter': 17730, 'avg_loss': 8.521991171494507, 'avg_acc': 49.974973210760815, 'loss': 8.858290672302246}


EP_train:3:  64%|| 17742/27626 [41:51<23:23,  7.04it/s]

{'epoch': 3, 'iter': 17740, 'avg_loss': 8.521835166688607, 'avg_acc': 49.974987317513104, 'loss': 8.13318920135498}


EP_train:3:  64%|| 17752/27626 [41:52<23:39,  6.95it/s]

{'epoch': 3, 'iter': 17750, 'avg_loss': 8.52176229514966, 'avg_acc': 49.97658582615064, 'loss': 9.30827808380127}


EP_train:3:  64%|| 17762/27626 [41:53<23:32,  6.98it/s]

{'epoch': 3, 'iter': 17760, 'avg_loss': 8.521736589156523, 'avg_acc': 49.97466358876189, 'loss': 9.20484733581543}


EP_train:3:  64%|| 17772/27626 [41:55<23:21,  7.03it/s]

{'epoch': 3, 'iter': 17770, 'avg_loss': 8.521944241762283, 'avg_acc': 49.97573293568173, 'loss': 8.45153522491455}


EP_train:3:  64%|| 17782/27626 [41:56<23:20,  7.03it/s]

{'epoch': 3, 'iter': 17780, 'avg_loss': 8.521731051328137, 'avg_acc': 49.97750407738597, 'loss': 7.775710105895996}


EP_train:3:  64%|| 17792/27626 [41:58<23:11,  7.07it/s]

{'epoch': 3, 'iter': 17790, 'avg_loss': 8.521743977252779, 'avg_acc': 49.981029734135234, 'loss': 8.602880477905273}


EP_train:3:  64%|| 17802/27626 [41:59<23:18,  7.03it/s]

{'epoch': 3, 'iter': 17800, 'avg_loss': 8.52170047204981, 'avg_acc': 49.98226925453626, 'loss': 8.683537483215332}


EP_train:3:  64%|| 17812/27626 [42:01<23:12,  7.05it/s]

{'epoch': 3, 'iter': 17810, 'avg_loss': 8.52161495082269, 'avg_acc': 49.982805569591825, 'loss': 8.976393699645996}


EP_train:3:  65%|| 17822/27626 [42:02<23:15,  7.03it/s]

{'epoch': 3, 'iter': 17820, 'avg_loss': 8.521646939225151, 'avg_acc': 49.98263986308288, 'loss': 9.611191749572754}


EP_train:3:  65%|| 17832/27626 [42:03<23:01,  7.09it/s]

{'epoch': 3, 'iter': 17830, 'avg_loss': 8.521570426604397, 'avg_acc': 49.980546520105435, 'loss': 8.710391998291016}


EP_train:3:  65%|| 17842/27626 [42:05<23:08,  7.05it/s]

{'epoch': 3, 'iter': 17840, 'avg_loss': 8.521436386355356, 'avg_acc': 49.980732582254355, 'loss': 7.615150451660156}


EP_train:3:  65%|| 17852/27626 [42:06<23:12,  7.02it/s]

{'epoch': 3, 'iter': 17850, 'avg_loss': 8.521378026850838, 'avg_acc': 49.978817713293374, 'loss': 8.491636276245117}


EP_train:3:  65%|| 17862/27626 [42:08<23:14,  7.00it/s]

{'epoch': 3, 'iter': 17860, 'avg_loss': 8.52127623580737, 'avg_acc': 49.97655506410615, 'loss': 8.71313190460205}


EP_train:3:  65%|| 17872/27626 [42:09<23:14,  6.99it/s]

{'epoch': 3, 'iter': 17870, 'avg_loss': 8.521351371826185, 'avg_acc': 49.97412008281574, 'loss': 8.962329864501953}


EP_train:3:  65%|| 17882/27626 [42:10<23:09,  7.01it/s]

{'epoch': 3, 'iter': 17880, 'avg_loss': 8.521496437772075, 'avg_acc': 49.975008388792574, 'loss': 8.979671478271484}


EP_train:3:  65%|| 17892/27626 [42:12<22:54,  7.08it/s]

{'epoch': 3, 'iter': 17890, 'avg_loss': 8.52175021052967, 'avg_acc': 49.97537169526577, 'loss': 9.233817100524902}


EP_train:3:  65%|| 17902/27626 [42:13<23:05,  7.02it/s]

{'epoch': 3, 'iter': 17900, 'avg_loss': 8.522003638758338, 'avg_acc': 49.97486173956762, 'loss': 9.213095664978027}


EP_train:3:  65%|| 17912/27626 [42:15<22:57,  7.05it/s]

{'epoch': 3, 'iter': 17910, 'avg_loss': 8.521887194550581, 'avg_acc': 49.975573669811844, 'loss': 8.475130081176758}


EP_train:3:  65%|| 17922/27626 [42:16<22:46,  7.10it/s]

{'epoch': 3, 'iter': 17920, 'avg_loss': 8.521870540645292, 'avg_acc': 49.974541041236535, 'loss': 7.607522487640381}


EP_train:3:  65%|| 17932/27626 [42:18<22:53,  7.06it/s]

{'epoch': 3, 'iter': 17930, 'avg_loss': 8.521924397994734, 'avg_acc': 49.97629803134237, 'loss': 8.676776885986328}


EP_train:3:  65%|| 17942/27626 [42:19<22:58,  7.02it/s]

{'epoch': 3, 'iter': 17940, 'avg_loss': 8.521992931463775, 'avg_acc': 49.97526615015885, 'loss': 8.768381118774414}


EP_train:3:  65%|| 17952/27626 [42:20<22:54,  7.04it/s]

{'epoch': 3, 'iter': 17950, 'avg_loss': 8.522155258158948, 'avg_acc': 49.974583588658014, 'loss': 8.601956367492676}


EP_train:3:  65%|| 17962/27626 [42:22<22:51,  7.05it/s]

{'epoch': 3, 'iter': 17960, 'avg_loss': 8.522239047443872, 'avg_acc': 49.97529369188798, 'loss': 7.9676055908203125}


EP_train:3:  65%|| 17972/27626 [42:23<22:54,  7.02it/s]

{'epoch': 3, 'iter': 17970, 'avg_loss': 8.522121386750634, 'avg_acc': 49.97600300484113, 'loss': 7.894357681274414}


EP_train:3:  65%|| 17982/27626 [42:25<22:38,  7.10it/s]

{'epoch': 3, 'iter': 17980, 'avg_loss': 8.522050718160369, 'avg_acc': 49.97427840498304, 'loss': 8.632128715515137}


EP_train:3:  65%|| 17992/27626 [42:26<22:40,  7.08it/s]

{'epoch': 3, 'iter': 17990, 'avg_loss': 8.522062268187435, 'avg_acc': 49.97255572230559, 'loss': 8.741035461425781}


EP_train:3:  65%|| 18002/27626 [42:27<22:56,  6.99it/s]

{'epoch': 3, 'iter': 18000, 'avg_loss': 8.521858194272893, 'avg_acc': 49.975001388811734, 'loss': 8.174787521362305}


EP_train:3:  65%|| 18012/27626 [42:29<22:42,  7.06it/s]

{'epoch': 3, 'iter': 18010, 'avg_loss': 8.521720518840027, 'avg_acc': 49.974321248126145, 'loss': 8.504469871520996}


EP_train:3:  65%|| 18022/27626 [42:30<22:49,  7.01it/s]

{'epoch': 3, 'iter': 18020, 'avg_loss': 8.521841702520248, 'avg_acc': 49.972254591865045, 'loss': 8.846146583557129}


EP_train:3:  65%|| 18032/27626 [42:32<22:44,  7.03it/s]

{'epoch': 3, 'iter': 18030, 'avg_loss': 8.52182017165514, 'avg_acc': 49.97226997947978, 'loss': 8.642539978027344}


EP_train:3:  65%|| 18042/27626 [42:33<22:33,  7.08it/s]

{'epoch': 3, 'iter': 18040, 'avg_loss': 8.521957529323949, 'avg_acc': 49.976096114406076, 'loss': 8.58171272277832}


EP_train:3:  65%|| 18052/27626 [42:34<22:45,  7.01it/s]

{'epoch': 3, 'iter': 18050, 'avg_loss': 8.52197857068441, 'avg_acc': 49.975936236219596, 'loss': 9.47879695892334}


EP_train:3:  65%|| 18062/27626 [42:36<22:32,  7.07it/s]

{'epoch': 3, 'iter': 18060, 'avg_loss': 8.522072070527134, 'avg_acc': 49.975776535075575, 'loss': 8.642657279968262}


EP_train:3:  65%|| 18072/27626 [42:37<22:33,  7.06it/s]

{'epoch': 3, 'iter': 18070, 'avg_loss': 8.521990219404566, 'avg_acc': 49.976308726689176, 'loss': 8.122010231018066}


EP_train:3:  65%|| 18082/27626 [42:39<22:26,  7.09it/s]

{'epoch': 3, 'iter': 18080, 'avg_loss': 8.522051131921126, 'avg_acc': 49.97822299651568, 'loss': 9.322394371032715}


EP_train:3:  65%|| 18092/27626 [42:40<22:50,  6.96it/s]

{'epoch': 3, 'iter': 18090, 'avg_loss': 8.52203316335608, 'avg_acc': 49.97668039356586, 'loss': 8.50990104675293}


EP_train:3:  66%|| 18102/27626 [42:42<22:27,  7.07it/s]

{'epoch': 3, 'iter': 18100, 'avg_loss': 8.522125653091514, 'avg_acc': 49.97807441577813, 'loss': 9.706137657165527}


EP_train:3:  66%|| 18112/27626 [42:43<22:28,  7.06it/s]

{'epoch': 3, 'iter': 18110, 'avg_loss': 8.522089490647753, 'avg_acc': 49.97912180442825, 'loss': 8.326321601867676}


EP_train:3:  66%|| 18122/27626 [42:44<22:34,  7.02it/s]

{'epoch': 3, 'iter': 18120, 'avg_loss': 8.521981371441765, 'avg_acc': 49.97827106671817, 'loss': 7.540973663330078}


EP_train:3:  66%|| 18132/27626 [42:46<22:21,  7.08it/s]

{'epoch': 3, 'iter': 18130, 'avg_loss': 8.521940402691117, 'avg_acc': 49.9774212674425, 'loss': 9.22327709197998}


EP_train:3:  66%|| 18142/27626 [42:47<22:30,  7.02it/s]

{'epoch': 3, 'iter': 18140, 'avg_loss': 8.52212158215952, 'avg_acc': 49.97743371368723, 'loss': 11.121122360229492}


EP_train:3:  66%|| 18152/27626 [42:49<22:37,  6.98it/s]

{'epoch': 3, 'iter': 18150, 'avg_loss': 8.522154345892526, 'avg_acc': 49.978479147154424, 'loss': 8.980866432189941}


EP_train:3:  66%|| 18162/27626 [42:50<22:29,  7.01it/s]

{'epoch': 3, 'iter': 18160, 'avg_loss': 8.522051453452548, 'avg_acc': 49.97969550134905, 'loss': 8.22275447845459}


EP_train:3:  66%|| 18172/27626 [42:51<22:22,  7.04it/s]

{'epoch': 3, 'iter': 18170, 'avg_loss': 8.521987743943455, 'avg_acc': 49.978330856859834, 'loss': 8.846643447875977}


EP_train:3:  66%|| 18182/27626 [42:53<22:28,  7.00it/s]

{'epoch': 3, 'iter': 18180, 'avg_loss': 8.522017456072732, 'avg_acc': 49.97885842362906, 'loss': 8.783988952636719}


EP_train:3:  66%|| 18192/27626 [42:54<22:12,  7.08it/s]

{'epoch': 3, 'iter': 18190, 'avg_loss': 8.522225247831305, 'avg_acc': 49.980072563355506, 'loss': 8.252476692199707}


EP_train:3:  66%|| 18202/27626 [42:56<22:21,  7.02it/s]

{'epoch': 3, 'iter': 18200, 'avg_loss': 8.522258740127443, 'avg_acc': 49.979568430306024, 'loss': 8.301315307617188}


EP_train:3:  66%|| 18212/27626 [42:57<22:21,  7.02it/s]

{'epoch': 3, 'iter': 18210, 'avg_loss': 8.522285915011532, 'avg_acc': 49.976662456756905, 'loss': 9.237086296081543}


EP_train:3:  66%|| 18222/27626 [42:59<22:13,  7.05it/s]

{'epoch': 3, 'iter': 18220, 'avg_loss': 8.522254106706782, 'avg_acc': 49.97598924318095, 'loss': 9.08923053741455}


EP_train:3:  66%|| 18232/27626 [43:00<22:27,  6.97it/s]

{'epoch': 3, 'iter': 18230, 'avg_loss': 8.522199242671213, 'avg_acc': 49.97514535680983, 'loss': 8.684932708740234}


EP_train:3:  66%|| 18242/27626 [43:01<22:08,  7.06it/s]

{'epoch': 3, 'iter': 18240, 'avg_loss': 8.522282202223806, 'avg_acc': 49.97652952140782, 'loss': 8.753726959228516}


EP_train:3:  66%|| 18252/27626 [43:03<22:03,  7.08it/s]

{'epoch': 3, 'iter': 18250, 'avg_loss': 8.522407795669517, 'avg_acc': 49.97534381677716, 'loss': 8.710562705993652}


EP_train:3:  66%|| 18262/27626 [43:04<22:04,  7.07it/s]

{'epoch': 3, 'iter': 18260, 'avg_loss': 8.522546843653094, 'avg_acc': 49.97450167022616, 'loss': 8.703696250915527}


EP_train:3:  66%|| 18272/27626 [43:06<21:57,  7.10it/s]

{'epoch': 3, 'iter': 18270, 'avg_loss': 8.52233639515573, 'avg_acc': 49.972463193038145, 'loss': 8.356575965881348}


EP_train:3:  66%|| 18282/27626 [43:07<21:58,  7.09it/s]

{'epoch': 3, 'iter': 18280, 'avg_loss': 8.522189786023224, 'avg_acc': 49.97042694600952, 'loss': 9.358792304992676}


EP_train:3:  66%|| 18292/27626 [43:08<22:08,  7.02it/s]

{'epoch': 3, 'iter': 18290, 'avg_loss': 8.522146901140113, 'avg_acc': 49.97044311409983, 'loss': 8.324145317077637}


EP_train:3:  66%|| 18302/27626 [43:10<22:09,  7.01it/s]

{'epoch': 3, 'iter': 18300, 'avg_loss': 8.522249198273865, 'avg_acc': 49.971142287306705, 'loss': 8.357884407043457}


EP_train:3:  66%|| 18312/27626 [43:11<22:12,  6.99it/s]

{'epoch': 3, 'iter': 18310, 'avg_loss': 8.52243942909997, 'avg_acc': 49.967915460652065, 'loss': 8.650150299072266}


EP_train:3:  66%|| 18322/27626 [43:13<21:58,  7.06it/s]

{'epoch': 3, 'iter': 18320, 'avg_loss': 8.522612551285263, 'avg_acc': 49.96776240379892, 'loss': 8.091890335083008}


EP_train:3:  66%|| 18332/27626 [43:14<21:54,  7.07it/s]

{'epoch': 3, 'iter': 18330, 'avg_loss': 8.522722121925389, 'avg_acc': 49.970507610059464, 'loss': 9.779953956604004}


EP_train:3:  66%|| 18342/27626 [43:16<21:56,  7.05it/s]

{'epoch': 3, 'iter': 18340, 'avg_loss': 8.522900113934135, 'avg_acc': 49.970012540210455, 'loss': 8.516528129577637}


EP_train:3:  66%|| 18352/27626 [43:17<21:49,  7.08it/s]

{'epoch': 3, 'iter': 18350, 'avg_loss': 8.52281504712581, 'avg_acc': 49.969858590812485, 'loss': 8.829962730407715}


EP_train:3:  66%|| 18362/27626 [43:18<21:52,  7.06it/s]

{'epoch': 3, 'iter': 18360, 'avg_loss': 8.522686250134106, 'avg_acc': 49.97106639071946, 'loss': 8.214859008789062}


EP_train:3:  67%|| 18372/27626 [43:20<21:46,  7.08it/s]

{'epoch': 3, 'iter': 18370, 'avg_loss': 8.522813363454473, 'avg_acc': 49.97142235044363, 'loss': 8.558883666992188}


EP_train:3:  67%|| 18382/27626 [43:21<21:57,  7.01it/s]

{'epoch': 3, 'iter': 18380, 'avg_loss': 8.52288566975059, 'avg_acc': 49.97194793536804, 'loss': 8.293020248413086}


EP_train:3:  67%|| 18392/27626 [43:23<21:59,  7.00it/s]

{'epoch': 3, 'iter': 18390, 'avg_loss': 8.523168128178874, 'avg_acc': 49.97162334837692, 'loss': 8.30371379852295}


EP_train:3:  67%|| 18402/27626 [43:24<21:54,  7.02it/s]

{'epoch': 3, 'iter': 18400, 'avg_loss': 8.52317455489832, 'avg_acc': 49.97061980327156, 'loss': 8.483925819396973}


EP_train:3:  67%|| 18412/27626 [43:25<21:50,  7.03it/s]

{'epoch': 3, 'iter': 18410, 'avg_loss': 8.52312318272907, 'avg_acc': 49.971484438650805, 'loss': 8.592901229858398}


EP_train:3:  67%|| 18422/27626 [43:27<21:37,  7.10it/s]

{'epoch': 3, 'iter': 18420, 'avg_loss': 8.52321846594763, 'avg_acc': 49.97166956191303, 'loss': 7.924355983734131}


EP_train:3:  67%|| 18432/27626 [43:28<21:39,  7.08it/s]

{'epoch': 3, 'iter': 18430, 'avg_loss': 8.523099581538535, 'avg_acc': 49.97151538169389, 'loss': 7.863842010498047}


EP_train:3:  67%|| 18442/27626 [43:30<21:38,  7.08it/s]

{'epoch': 3, 'iter': 18440, 'avg_loss': 8.523115014003366, 'avg_acc': 49.97085299061873, 'loss': 7.893538951873779}


EP_train:3:  67%|| 18452/27626 [43:31<21:40,  7.06it/s]

{'epoch': 3, 'iter': 18450, 'avg_loss': 8.523039430029366, 'avg_acc': 49.97154625765541, 'loss': 8.470112800598145}


EP_train:3:  67%|| 18462/27626 [43:32<21:38,  7.05it/s]

{'epoch': 3, 'iter': 18460, 'avg_loss': 8.523022038998219, 'avg_acc': 49.971561670548724, 'loss': 8.38120174407959}


EP_train:3:  67%|| 18472/27626 [43:34<21:42,  7.03it/s]

{'epoch': 3, 'iter': 18470, 'avg_loss': 8.52281961706423, 'avg_acc': 49.97293053976504, 'loss': 6.978668689727783}


EP_train:3:  67%|| 18482/27626 [43:35<21:45,  7.01it/s]

{'epoch': 3, 'iter': 18480, 'avg_loss': 8.522616402358889, 'avg_acc': 49.97226881662248, 'loss': 9.027641296386719}


EP_train:3:  67%|| 18492/27626 [43:37<21:35,  7.05it/s]

{'epoch': 3, 'iter': 18490, 'avg_loss': 8.522698725522496, 'avg_acc': 49.97295981828998, 'loss': 8.710211753845215}


EP_train:3:  67%|| 18502/27626 [43:38<21:38,  7.03it/s]

{'epoch': 3, 'iter': 18500, 'avg_loss': 8.522698338898664, 'avg_acc': 49.969596238041184, 'loss': 8.138449668884277}


EP_train:3:  67%|| 18512/27626 [43:40<21:32,  7.05it/s]

{'epoch': 3, 'iter': 18510, 'avg_loss': 8.522901376663759, 'avg_acc': 49.96910620712009, 'loss': 9.127044677734375}


EP_train:3:  67%|| 18522/27626 [43:41<21:24,  7.09it/s]

{'epoch': 3, 'iter': 18520, 'avg_loss': 8.523015035124965, 'avg_acc': 49.968954160142545, 'loss': 8.967680931091309}


EP_train:3:  67%|| 18532/27626 [43:42<21:37,  7.01it/s]

{'epoch': 3, 'iter': 18530, 'avg_loss': 8.523029086465142, 'avg_acc': 49.968633640926015, 'loss': 8.844913482666016}


EP_train:3:  67%|| 18542/27626 [43:44<21:20,  7.09it/s]

{'epoch': 3, 'iter': 18540, 'avg_loss': 8.522825780806498, 'avg_acc': 49.96780783129281, 'loss': 8.822036743164062}


EP_train:3:  67%|| 18552/27626 [43:45<21:30,  7.03it/s]

{'epoch': 3, 'iter': 18550, 'avg_loss': 8.523040506886149, 'avg_acc': 49.967825184626165, 'loss': 9.184274673461914}


EP_train:3:  67%|| 18562/27626 [43:47<22:27,  6.73it/s]

{'epoch': 3, 'iter': 18560, 'avg_loss': 8.522916446208157, 'avg_acc': 49.96548542643177, 'loss': 8.503467559814453}


EP_train:3:  67%|| 18572/27626 [43:48<21:21,  7.06it/s]

{'epoch': 3, 'iter': 18570, 'avg_loss': 8.522833179781788, 'avg_acc': 49.96651365031501, 'loss': 8.711234092712402}


EP_train:3:  67%|| 18582/27626 [43:49<21:14,  7.10it/s]

{'epoch': 3, 'iter': 18580, 'avg_loss': 8.522922141194876, 'avg_acc': 49.96838168021097, 'loss': 8.520525932312012}


EP_train:3:  67%|| 18592/27626 [43:51<21:16,  7.08it/s]

{'epoch': 3, 'iter': 18590, 'avg_loss': 8.522764847460829, 'avg_acc': 49.9677263191867, 'loss': 8.409276008605957}


EP_train:3:  67%|| 18602/27626 [43:52<21:25,  7.02it/s]

{'epoch': 3, 'iter': 18600, 'avg_loss': 8.522687192873496, 'avg_acc': 49.968751680017206, 'loss': 9.215973854064941}


EP_train:3:  67%|| 18612/27626 [43:54<21:25,  7.01it/s]

{'epoch': 3, 'iter': 18610, 'avg_loss': 8.522705276228535, 'avg_acc': 49.96960802751062, 'loss': 8.518781661987305}


EP_train:3:  67%|| 18622/27626 [43:55<21:18,  7.04it/s]

{'epoch': 3, 'iter': 18620, 'avg_loss': 8.522667513057053, 'avg_acc': 49.96794613608292, 'loss': 8.557639122009277}


EP_train:3:  67%|| 18632/27626 [43:57<21:25,  7.00it/s]

{'epoch': 3, 'iter': 18630, 'avg_loss': 8.522585256069885, 'avg_acc': 49.96611829746122, 'loss': 8.546284675598145}


EP_train:3:  67%|| 18642/27626 [43:58<21:17,  7.03it/s]

{'epoch': 3, 'iter': 18640, 'avg_loss': 8.522640935200926, 'avg_acc': 49.96714232069095, 'loss': 8.682433128356934}


EP_train:3:  68%|| 18652/27626 [43:59<21:11,  7.06it/s]

{'epoch': 3, 'iter': 18650, 'avg_loss': 8.522575459744386, 'avg_acc': 49.969003002519976, 'loss': 8.757887840270996}


EP_train:3:  68%|| 18662/27626 [44:01<21:16,  7.02it/s]

{'epoch': 3, 'iter': 18660, 'avg_loss': 8.522824677781694, 'avg_acc': 49.968852151546, 'loss': 8.700517654418945}


EP_train:3:  68%|| 18672/27626 [44:02<21:01,  7.10it/s]

{'epoch': 3, 'iter': 18670, 'avg_loss': 8.522861294202164, 'avg_acc': 49.969036205880776, 'loss': 8.8612699508667}


EP_train:3:  68%|| 18682/27626 [44:04<21:14,  7.02it/s]

{'epoch': 3, 'iter': 18680, 'avg_loss': 8.522970885167597, 'avg_acc': 49.97072560355441, 'loss': 8.3162841796875}


EP_train:3:  68%|| 18692/27626 [44:05<21:10,  7.03it/s]

{'epoch': 3, 'iter': 18690, 'avg_loss': 8.523103064438624, 'avg_acc': 49.97191161521588, 'loss': 8.502985954284668}


EP_train:3:  68%|| 18702/27626 [44:06<21:09,  7.03it/s]

{'epoch': 3, 'iter': 18700, 'avg_loss': 8.523053322875635, 'avg_acc': 49.97326346184696, 'loss': 8.409859657287598}


EP_train:3:  68%|| 18712/27626 [44:08<21:07,  7.03it/s]

{'epoch': 3, 'iter': 18710, 'avg_loss': 8.52303787175074, 'avg_acc': 49.972943722943725, 'loss': 8.610494613647461}


EP_train:3:  68%|| 18722/27626 [44:09<21:04,  7.04it/s]

{'epoch': 3, 'iter': 18720, 'avg_loss': 8.523098149072169, 'avg_acc': 49.9719566262486, 'loss': 8.18046760559082}


EP_train:3:  68%|| 18732/27626 [44:11<20:59,  7.06it/s]

{'epoch': 3, 'iter': 18730, 'avg_loss': 8.522908606994337, 'avg_acc': 49.97147109070525, 'loss': 8.28908920288086}


EP_train:3:  68%|| 18742/27626 [44:12<21:02,  7.04it/s]

{'epoch': 3, 'iter': 18740, 'avg_loss': 8.522747406537935, 'avg_acc': 49.972486793660956, 'loss': 7.759849548339844}


EP_train:3:  68%|| 18752/27626 [44:14<21:05,  7.01it/s]

{'epoch': 3, 'iter': 18750, 'avg_loss': 8.522941555940502, 'avg_acc': 49.97066823102767, 'loss': 8.672853469848633}


EP_train:3:  68%|| 18762/27626 [44:15<20:53,  7.07it/s]

{'epoch': 3, 'iter': 18760, 'avg_loss': 8.523149310865799, 'avg_acc': 49.968351900218536, 'loss': 8.793585777282715}


EP_train:3:  68%|| 18772/27626 [44:16<20:47,  7.09it/s]

{'epoch': 3, 'iter': 18770, 'avg_loss': 8.523034536351432, 'avg_acc': 49.96870172073944, 'loss': 9.15930461883545}


EP_train:3:  68%|| 18782/27626 [44:18<20:49,  7.08it/s]

{'epoch': 3, 'iter': 18780, 'avg_loss': 8.522885663803505, 'avg_acc': 49.970382301261914, 'loss': 8.053398132324219}


EP_train:3:  68%|| 18792/27626 [44:19<20:44,  7.10it/s]

{'epoch': 3, 'iter': 18790, 'avg_loss': 8.52262146754222, 'avg_acc': 49.969067638763235, 'loss': 8.618925094604492}


EP_train:3:  68%|| 18802/27626 [44:21<20:54,  7.03it/s]

{'epoch': 3, 'iter': 18800, 'avg_loss': 8.522472501124456, 'avg_acc': 49.97041380777618, 'loss': 7.913576126098633}


EP_train:3:  68%|| 18812/27626 [44:22<20:48,  7.06it/s]

{'epoch': 3, 'iter': 18810, 'avg_loss': 8.52255528935544, 'avg_acc': 49.97009728350433, 'loss': 8.492278099060059}


EP_train:3:  68%|| 18822/27626 [44:23<20:43,  7.08it/s]

{'epoch': 3, 'iter': 18820, 'avg_loss': 8.522571788496181, 'avg_acc': 49.97177355082089, 'loss': 8.038715362548828}


EP_train:3:  68%|| 18832/27626 [44:25<20:43,  7.07it/s]

{'epoch': 3, 'iter': 18830, 'avg_loss': 8.52263687400277, 'avg_acc': 49.972784238755246, 'loss': 10.079438209533691}


EP_train:3:  68%|| 18842/27626 [44:26<21:01,  6.96it/s]

{'epoch': 3, 'iter': 18840, 'avg_loss': 8.522659195538102, 'avg_acc': 49.97595005572953, 'loss': 7.939520359039307}


EP_train:3:  68%|| 18852/27626 [44:28<20:55,  6.99it/s]

{'epoch': 3, 'iter': 18850, 'avg_loss': 8.522532725408787, 'avg_acc': 49.97612858734284, 'loss': 8.793011665344238}


EP_train:3:  68%|| 18862/27626 [44:29<20:34,  7.10it/s]

{'epoch': 3, 'iter': 18860, 'avg_loss': 8.52265326879821, 'avg_acc': 49.97547850060972, 'loss': 8.588237762451172}


EP_train:3:  68%|| 18872/27626 [44:30<20:47,  7.02it/s]

{'epoch': 3, 'iter': 18870, 'avg_loss': 8.522773244335342, 'avg_acc': 49.97714747496158, 'loss': 9.249780654907227}


EP_train:3:  68%|| 18882/27626 [44:32<20:43,  7.03it/s]

{'epoch': 3, 'iter': 18880, 'avg_loss': 8.52292281993775, 'avg_acc': 49.97600100630263, 'loss': 7.901430130004883}


EP_train:3:  68%|| 18892/27626 [44:33<20:36,  7.06it/s]

{'epoch': 3, 'iter': 18890, 'avg_loss': 8.522810523805683, 'avg_acc': 49.975517442168226, 'loss': 8.50581169128418}


EP_train:3:  68%|| 18902/27626 [44:35<20:38,  7.04it/s]

{'epoch': 3, 'iter': 18900, 'avg_loss': 8.52283775442511, 'avg_acc': 49.97635707105444, 'loss': 8.352189064025879}


EP_train:3:  68%|| 18912/27626 [44:36<20:43,  7.01it/s]

{'epoch': 3, 'iter': 18910, 'avg_loss': 8.52329795930798, 'avg_acc': 49.977195811961295, 'loss': 9.191823959350586}


EP_train:3:  68%|| 18922/27626 [44:38<20:53,  6.94it/s]

{'epoch': 3, 'iter': 18920, 'avg_loss': 8.523279021969621, 'avg_acc': 49.97737302468157, 'loss': 8.496785163879395}


EP_train:3:  69%|| 18932/27626 [44:39<20:38,  7.02it/s]

{'epoch': 3, 'iter': 18930, 'avg_loss': 8.523331760938266, 'avg_acc': 49.97771512334266, 'loss': 8.625190734863281}


EP_train:3:  69%|| 18942/27626 [44:40<20:32,  7.05it/s]

{'epoch': 3, 'iter': 18940, 'avg_loss': 8.523241646381337, 'avg_acc': 49.97723193073227, 'loss': 8.684599876403809}


EP_train:3:  69%|| 18952/27626 [44:42<20:30,  7.05it/s]

{'epoch': 3, 'iter': 18950, 'avg_loss': 8.52318466642552, 'avg_acc': 49.977738641760325, 'loss': 8.168737411499023}


EP_train:3:  69%|| 18962/27626 [44:43<20:33,  7.03it/s]

{'epoch': 3, 'iter': 18960, 'avg_loss': 8.52318752257735, 'avg_acc': 49.9793985021887, 'loss': 8.634349822998047}


EP_train:3:  69%|| 18972/27626 [44:45<20:28,  7.04it/s]

{'epoch': 3, 'iter': 18970, 'avg_loss': 8.523143464126091, 'avg_acc': 49.979574086764, 'loss': 7.9047064781188965}


EP_train:3:  69%|| 18982/27626 [44:46<20:24,  7.06it/s]

{'epoch': 3, 'iter': 18980, 'avg_loss': 8.523027827544896, 'avg_acc': 49.97925557136084, 'loss': 8.982484817504883}


EP_train:3:  69%|| 18992/27626 [44:47<20:23,  7.06it/s]

{'epoch': 3, 'iter': 18990, 'avg_loss': 8.522989682968957, 'avg_acc': 49.980089252803964, 'loss': 9.531301498413086}


EP_train:3:  69%|| 19002/27626 [44:49<20:32,  7.00it/s]

{'epoch': 3, 'iter': 19000, 'avg_loss': 8.523037682720927, 'avg_acc': 49.97829061628335, 'loss': 8.10013484954834}


EP_train:3:  69%|| 19012/27626 [44:50<20:28,  7.01it/s]

{'epoch': 3, 'iter': 19010, 'avg_loss': 8.523189127435815, 'avg_acc': 49.979617063805165, 'loss': 8.691977500915527}


EP_train:3:  69%|| 19022/27626 [44:52<20:23,  7.03it/s]

{'epoch': 3, 'iter': 19020, 'avg_loss': 8.52315938982275, 'avg_acc': 49.97814915093844, 'loss': 8.824788093566895}


EP_train:3:  69%|| 19032/27626 [44:53<20:13,  7.08it/s]

{'epoch': 3, 'iter': 19030, 'avg_loss': 8.522935358375918, 'avg_acc': 49.97848904419106, 'loss': 7.739874839782715}


EP_train:3:  69%|| 19042/27626 [44:55<20:12,  7.08it/s]

{'epoch': 3, 'iter': 19040, 'avg_loss': 8.523336486702211, 'avg_acc': 49.97718738511633, 'loss': 8.268155097961426}


EP_train:3:  69%|| 19052/27626 [44:56<20:10,  7.08it/s]

{'epoch': 3, 'iter': 19050, 'avg_loss': 8.523238345510558, 'avg_acc': 49.979003726838485, 'loss': 9.136305809020996}


EP_train:3:  69%|| 19062/27626 [44:57<20:09,  7.08it/s]

{'epoch': 3, 'iter': 19060, 'avg_loss': 8.523174473611563, 'avg_acc': 49.977867110854625, 'loss': 7.755627632141113}


EP_train:3:  69%|| 19072/27626 [44:59<20:10,  7.07it/s]

{'epoch': 3, 'iter': 19070, 'avg_loss': 8.523096395610269, 'avg_acc': 49.976731686854386, 'loss': 8.698728561401367}


EP_train:3:  69%|| 19082/27626 [45:00<20:16,  7.02it/s]

{'epoch': 3, 'iter': 19080, 'avg_loss': 8.523110493343648, 'avg_acc': 49.97625255489754, 'loss': 8.383487701416016}


EP_train:3:  69%|| 19092/27626 [45:02<20:12,  7.04it/s]

{'epoch': 3, 'iter': 19090, 'avg_loss': 8.523117102895286, 'avg_acc': 49.97822927033681, 'loss': 8.684568405151367}


EP_train:3:  69%|| 19102/27626 [45:03<20:14,  7.02it/s]

{'epoch': 3, 'iter': 19100, 'avg_loss': 8.523117905642673, 'avg_acc': 49.975295796031624, 'loss': 8.602078437805176}


EP_train:3:  69%|| 19112/27626 [45:04<20:14,  7.01it/s]

{'epoch': 3, 'iter': 19110, 'avg_loss': 8.52300865013554, 'avg_acc': 49.976943906650625, 'loss': 9.120392799377441}


EP_train:3:  69%|| 19122/27626 [45:06<20:07,  7.04it/s]

{'epoch': 3, 'iter': 19120, 'avg_loss': 8.522983152393417, 'avg_acc': 49.9766290988965, 'loss': 7.586918830871582}


EP_train:3:  69%|| 19132/27626 [45:07<20:07,  7.03it/s]

{'epoch': 3, 'iter': 19130, 'avg_loss': 8.522959388808482, 'avg_acc': 49.975007840677435, 'loss': 7.8879194259643555}


EP_train:3:  69%|| 19142/27626 [45:09<20:01,  7.06it/s]

{'epoch': 3, 'iter': 19140, 'avg_loss': 8.52285917220921, 'avg_acc': 49.97371480068962, 'loss': 8.377523422241211}


EP_train:3:  69%|| 19152/27626 [45:10<20:02,  7.05it/s]

{'epoch': 3, 'iter': 19150, 'avg_loss': 8.522824649991058, 'avg_acc': 49.97372852592554, 'loss': 8.38301944732666}


EP_train:3:  69%|| 19162/27626 [45:12<20:06,  7.02it/s]

{'epoch': 3, 'iter': 19160, 'avg_loss': 8.52304034354785, 'avg_acc': 49.97423151192527, 'loss': 8.86616325378418}


EP_train:3:  69%|| 19172/27626 [45:13<20:01,  7.03it/s]

{'epoch': 3, 'iter': 19170, 'avg_loss': 8.522968928657608, 'avg_acc': 49.973266913567365, 'loss': 8.126566886901855}


EP_train:3:  69%|| 19182/27626 [45:14<20:08,  6.99it/s]

{'epoch': 3, 'iter': 19180, 'avg_loss': 8.52291482362886, 'avg_acc': 49.97051118294145, 'loss': 9.176129341125488}


EP_train:3:  69%|| 19192/27626 [45:16<19:59,  7.03it/s]

{'epoch': 3, 'iter': 19190, 'avg_loss': 8.522809962944057, 'avg_acc': 49.97020087541035, 'loss': 8.17514419555664}


EP_train:3:  70%|| 19202/27626 [45:17<20:06,  6.98it/s]

{'epoch': 3, 'iter': 19200, 'avg_loss': 8.522725202013978, 'avg_acc': 49.97086740273944, 'loss': 8.253334045410156}


EP_train:3:  70%|| 19212/27626 [45:19<20:01,  7.00it/s]

{'epoch': 3, 'iter': 19210, 'avg_loss': 8.522815469191093, 'avg_acc': 49.97348524282963, 'loss': 8.294053077697754}


EP_train:3:  70%|| 19222/27626 [45:20<19:47,  7.08it/s]

{'epoch': 3, 'iter': 19220, 'avg_loss': 8.522689646919401, 'avg_acc': 49.97431195047084, 'loss': 8.581221580505371}


EP_train:3:  70%|| 19232/27626 [45:21<19:51,  7.05it/s]

{'epoch': 3, 'iter': 19230, 'avg_loss': 8.522640096410427, 'avg_acc': 49.97367531589621, 'loss': 8.394920349121094}


EP_train:3:  70%|| 19242/27626 [45:23<19:54,  7.02it/s]

{'epoch': 3, 'iter': 19240, 'avg_loss': 8.522499993860917, 'avg_acc': 49.97109037991788, 'loss': 7.536647319793701}


EP_train:3:  70%|| 19252/27626 [45:24<19:47,  7.05it/s]

{'epoch': 3, 'iter': 19250, 'avg_loss': 8.522395932292339, 'avg_acc': 49.970456080203626, 'loss': 8.864013671875}


EP_train:3:  70%|| 19262/27626 [45:26<19:50,  7.03it/s]

{'epoch': 3, 'iter': 19260, 'avg_loss': 8.52235341521415, 'avg_acc': 49.969497949223815, 'loss': 8.45486068725586}


EP_train:3:  70%|| 19272/27626 [45:27<19:43,  7.06it/s]

{'epoch': 3, 'iter': 19270, 'avg_loss': 8.52233005558521, 'avg_acc': 49.97048674173629, 'loss': 8.229887962341309}


EP_train:3:  70%|| 19282/27626 [45:29<19:47,  7.03it/s]

{'epoch': 3, 'iter': 19280, 'avg_loss': 8.522283115474016, 'avg_acc': 49.971960738550905, 'loss': 8.156611442565918}


EP_train:3:  70%|| 19292/27626 [45:30<19:39,  7.07it/s]

{'epoch': 3, 'iter': 19290, 'avg_loss': 8.52215650580817, 'avg_acc': 49.9693833912187, 'loss': 8.278645515441895}


EP_train:3:  70%|| 19302/27626 [45:31<19:42,  7.04it/s]

{'epoch': 3, 'iter': 19300, 'avg_loss': 8.522147612376298, 'avg_acc': 49.969561162634065, 'loss': 8.663960456848145}


EP_train:3:  70%|| 19312/27626 [45:33<19:39,  7.05it/s]

{'epoch': 3, 'iter': 19310, 'avg_loss': 8.522087774261424, 'avg_acc': 49.969576925068615, 'loss': 7.746263027191162}


EP_train:3:  70%|| 19322/27626 [45:34<19:36,  7.06it/s]

{'epoch': 3, 'iter': 19320, 'avg_loss': 8.521969956829954, 'avg_acc': 49.96829874230112, 'loss': 8.565001487731934}


EP_train:3:  70%|| 19332/27626 [45:36<19:40,  7.03it/s]

{'epoch': 3, 'iter': 19330, 'avg_loss': 8.522049449306742, 'avg_acc': 49.96669856706844, 'loss': 9.000850677490234}


EP_train:3:  70%|| 19342/27626 [45:37<19:34,  7.05it/s]

{'epoch': 3, 'iter': 19340, 'avg_loss': 8.522085529406484, 'avg_acc': 49.966069489685125, 'loss': 9.075584411621094}


EP_train:3:  70%|| 19352/27626 [45:38<19:40,  7.01it/s]

{'epoch': 3, 'iter': 19350, 'avg_loss': 8.522116135518425, 'avg_acc': 49.96786341791122, 'loss': 8.721953392028809}


EP_train:3:  70%|| 19362/27626 [45:40<19:49,  6.95it/s]

{'epoch': 3, 'iter': 19360, 'avg_loss': 8.522114034957811, 'avg_acc': 49.96642735395899, 'loss': 8.940422058105469}


EP_train:3:  70%|| 19372/27626 [45:41<19:41,  6.99it/s]

{'epoch': 3, 'iter': 19370, 'avg_loss': 8.522212029069587, 'avg_acc': 49.96515409632957, 'loss': 8.39681625366211}


EP_train:3:  70%|| 19382/27626 [45:43<19:38,  7.00it/s]

{'epoch': 3, 'iter': 19380, 'avg_loss': 8.522108185519485, 'avg_acc': 49.96323719106341, 'loss': 7.406879901885986}


EP_train:3:  70%|| 19392/27626 [45:44<19:25,  7.06it/s]

{'epoch': 3, 'iter': 19390, 'avg_loss': 8.521956474672468, 'avg_acc': 49.96422309318756, 'loss': 8.530590057373047}


EP_train:3:  70%|| 19402/27626 [45:46<19:32,  7.01it/s]

{'epoch': 3, 'iter': 19400, 'avg_loss': 8.52185840332891, 'avg_acc': 49.96617442399876, 'loss': 8.35142993927002}


EP_train:3:  70%|| 19412/27626 [45:47<19:24,  7.06it/s]

{'epoch': 3, 'iter': 19410, 'avg_loss': 8.52172316213239, 'avg_acc': 49.96844572664983, 'loss': 7.962886810302734}


EP_train:3:  70%|| 19422/27626 [45:48<19:22,  7.06it/s]

{'epoch': 3, 'iter': 19420, 'avg_loss': 8.521606642224812, 'avg_acc': 49.970231965398284, 'loss': 8.677360534667969}


EP_train:3:  70%|| 19432/27626 [45:50<19:41,  6.94it/s]

{'epoch': 3, 'iter': 19430, 'avg_loss': 8.521644816237393, 'avg_acc': 49.96992563429571, 'loss': 8.31363582611084}


EP_train:3:  70%|| 19442/27626 [45:51<19:25,  7.02it/s]

{'epoch': 3, 'iter': 19440, 'avg_loss': 8.521723120627437, 'avg_acc': 49.97219150249473, 'loss': 8.087396621704102}


EP_train:3:  70%|| 19452/27626 [45:53<19:18,  7.06it/s]

{'epoch': 3, 'iter': 19450, 'avg_loss': 8.521858183364747, 'avg_acc': 49.97011721762377, 'loss': 8.96017837524414}


EP_train:3:  70%|| 19462/27626 [45:54<19:23,  7.02it/s]

{'epoch': 3, 'iter': 19460, 'avg_loss': 8.521889107205642, 'avg_acc': 49.969971995272594, 'loss': 8.342612266540527}


EP_train:3:  70%|| 19472/27626 [45:55<19:28,  6.98it/s]

{'epoch': 3, 'iter': 19470, 'avg_loss': 8.52210594108871, 'avg_acc': 49.9701479122798, 'loss': 8.155900955200195}


EP_train:3:  71%|| 19482/27626 [45:57<19:18,  7.03it/s]

{'epoch': 3, 'iter': 19480, 'avg_loss': 8.522335036531206, 'avg_acc': 49.96984241055387, 'loss': 8.805593490600586}


EP_train:3:  71%|| 19492/27626 [45:58<19:09,  7.08it/s]

{'epoch': 3, 'iter': 19490, 'avg_loss': 8.522374502881398, 'avg_acc': 49.96793391821866, 'loss': 8.613471984863281}


EP_train:3:  71%|| 19502/27626 [46:00<19:13,  7.04it/s]

{'epoch': 3, 'iter': 19500, 'avg_loss': 8.522255583290173, 'avg_acc': 49.969072098866725, 'loss': 8.31665325164795}


EP_train:3:  71%|| 19512/27626 [46:01<19:20,  6.99it/s]

{'epoch': 3, 'iter': 19510, 'avg_loss': 8.522084887381745, 'avg_acc': 49.971330275229356, 'loss': 8.796234130859375}


EP_train:3:  71%|| 19522/27626 [46:03<19:10,  7.04it/s]

{'epoch': 3, 'iter': 19520, 'avg_loss': 8.522109403527667, 'avg_acc': 49.97262563393269, 'loss': 8.642297744750977}


EP_train:3:  71%|| 19532/27626 [46:04<19:12,  7.03it/s]

{'epoch': 3, 'iter': 19530, 'avg_loss': 8.522222019060985, 'avg_acc': 49.97471967641186, 'loss': 7.983426094055176}


EP_train:3:  71%|| 19542/27626 [46:05<19:09,  7.03it/s]

{'epoch': 3, 'iter': 19540, 'avg_loss': 8.522471673693227, 'avg_acc': 49.97473261347935, 'loss': 8.932260513305664}


EP_train:3:  71%|| 19552/27626 [46:07<19:10,  7.02it/s]

{'epoch': 3, 'iter': 19550, 'avg_loss': 8.522521841685172, 'avg_acc': 49.97410618382692, 'loss': 8.337898254394531}


EP_train:3:  71%|| 19562/27626 [46:08<19:18,  6.96it/s]

{'epoch': 3, 'iter': 19560, 'avg_loss': 8.522552056885663, 'avg_acc': 49.97459869127346, 'loss': 8.589698791503906}


EP_train:3:  71%|| 19572/27626 [46:10<19:00,  7.06it/s]

{'epoch': 3, 'iter': 19570, 'avg_loss': 8.5226007637167, 'avg_acc': 49.97604874559297, 'loss': 8.489251136779785}


EP_train:3:  71%|| 19582/27626 [46:11<19:06,  7.01it/s]

{'epoch': 3, 'iter': 19580, 'avg_loss': 8.522656074772511, 'avg_acc': 49.97334788825903, 'loss': 8.807266235351562}


EP_train:3:  71%|| 19592/27626 [46:12<18:56,  7.07it/s]

{'epoch': 3, 'iter': 19590, 'avg_loss': 8.522656288076874, 'avg_acc': 49.97208539635547, 'loss': 8.553301811218262}


EP_train:3:  71%|| 19602/27626 [46:14<19:04,  7.01it/s]

{'epoch': 3, 'iter': 19600, 'avg_loss': 8.522698289040102, 'avg_acc': 49.97496938931687, 'loss': 8.335504531860352}


EP_train:3:  71%|| 19612/27626 [46:15<18:56,  7.05it/s]

{'epoch': 3, 'iter': 19610, 'avg_loss': 8.522606149307476, 'avg_acc': 49.97577889959717, 'loss': 8.547060012817383}


EP_train:3:  71%|| 19622/27626 [46:17<19:10,  6.96it/s]

{'epoch': 3, 'iter': 19620, 'avg_loss': 8.522457372363958, 'avg_acc': 49.97435783089547, 'loss': 7.602595806121826}


EP_train:3:  71%|| 19632/27626 [46:18<18:59,  7.01it/s]

{'epoch': 3, 'iter': 19630, 'avg_loss': 8.522559221020039, 'avg_acc': 49.97453007997555, 'loss': 8.758231163024902}


EP_train:3:  71%|| 19642/27626 [46:20<18:55,  7.03it/s]

{'epoch': 3, 'iter': 19640, 'avg_loss': 8.522698836124851, 'avg_acc': 49.97406572985082, 'loss': 8.272452354431152}


EP_train:3:  71%|| 19652/27626 [46:21<19:02,  6.98it/s]

{'epoch': 3, 'iter': 19650, 'avg_loss': 8.522638450378158, 'avg_acc': 49.975192102183094, 'loss': 9.264657020568848}


EP_train:3:  71%|| 19662/27626 [46:22<18:59,  6.99it/s]

{'epoch': 3, 'iter': 19660, 'avg_loss': 8.52286771757606, 'avg_acc': 49.97711204923452, 'loss': 8.509063720703125}


EP_train:3:  71%|| 19672/27626 [46:24<19:00,  6.98it/s]

{'epoch': 3, 'iter': 19670, 'avg_loss': 8.522930730262459, 'avg_acc': 49.97601164150272, 'loss': 8.814614295959473}


EP_train:3:  71%|| 19682/27626 [46:25<18:45,  7.06it/s]

{'epoch': 3, 'iter': 19680, 'avg_loss': 8.522974336663976, 'avg_acc': 49.97443600426807, 'loss': 8.63099479675293}


EP_train:3:  71%|| 19692/27626 [46:27<18:54,  6.99it/s]

{'epoch': 3, 'iter': 19690, 'avg_loss': 8.52301593144315, 'avg_acc': 49.97317937128638, 'loss': 7.792827606201172}


EP_train:3:  71%|| 19702/27626 [46:28<18:45,  7.04it/s]

{'epoch': 3, 'iter': 19700, 'avg_loss': 8.523038153369791, 'avg_acc': 49.97319298512765, 'loss': 8.891390800476074}


EP_train:3:  71%|| 19712/27626 [46:30<18:51,  6.99it/s]

{'epoch': 3, 'iter': 19710, 'avg_loss': 8.52295181236095, 'avg_acc': 49.97304804423926, 'loss': 7.923118591308594}


EP_train:3:  71%|| 19722/27626 [46:31<18:40,  7.05it/s]

{'epoch': 3, 'iter': 19720, 'avg_loss': 8.52289869811785, 'avg_acc': 49.97163556614776, 'loss': 8.234725952148438}


EP_train:3:  71%|| 19732/27626 [46:32<18:50,  6.98it/s]

{'epoch': 3, 'iter': 19730, 'avg_loss': 8.522930119670923, 'avg_acc': 49.97228346257159, 'loss': 8.795170783996582}


EP_train:3:  71%|| 19742/27626 [46:34<18:41,  7.03it/s]

{'epoch': 3, 'iter': 19740, 'avg_loss': 8.522918451847467, 'avg_acc': 49.970872802796215, 'loss': 7.949118137359619}


EP_train:3:  71%|| 19752/27626 [46:35<18:41,  7.02it/s]

{'epoch': 3, 'iter': 19750, 'avg_loss': 8.522884945828162, 'avg_acc': 49.9723115285302, 'loss': 8.141819953918457}


EP_train:3:  72%|| 19762/27626 [46:37<18:39,  7.03it/s]

{'epoch': 3, 'iter': 19760, 'avg_loss': 8.52267928092346, 'avg_acc': 49.971534841354185, 'loss': 8.205991744995117}


EP_train:3:  72%|| 19772/27626 [46:38<18:38,  7.02it/s]

{'epoch': 3, 'iter': 19770, 'avg_loss': 8.52254080590309, 'avg_acc': 49.97265565727581, 'loss': 8.028057098388672}


EP_train:3:  72%|| 19782/27626 [46:39<18:32,  7.05it/s]

{'epoch': 3, 'iter': 19780, 'avg_loss': 8.522404114473614, 'avg_acc': 49.97014180274, 'loss': 8.596287727355957}


EP_train:3:  72%|| 19792/27626 [46:41<18:32,  7.04it/s]

{'epoch': 3, 'iter': 19790, 'avg_loss': 8.522147119018152, 'avg_acc': 49.971735890051036, 'loss': 8.432723045349121}


EP_train:3:  72%|| 19802/27626 [46:42<18:30,  7.05it/s]

{'epoch': 3, 'iter': 19800, 'avg_loss': 8.522206110169469, 'avg_acc': 49.968593757891014, 'loss': 8.25645923614502}


EP_train:3:  72%|| 19812/27626 [46:44<18:34,  7.01it/s]

{'epoch': 3, 'iter': 19810, 'avg_loss': 8.521861832468376, 'avg_acc': 49.96860961082227, 'loss': 8.293604850769043}


EP_train:3:  72%|| 19822/27626 [46:45<18:38,  6.98it/s]

{'epoch': 3, 'iter': 19820, 'avg_loss': 8.521912984990921, 'avg_acc': 49.96736415922506, 'loss': 8.56028938293457}


EP_train:3:  72%|| 19832/27626 [46:47<18:35,  6.99it/s]

{'epoch': 3, 'iter': 19830, 'avg_loss': 8.52197439932293, 'avg_acc': 49.9678533608996, 'loss': 8.351943016052246}


EP_train:3:  72%|| 19842/27626 [46:48<18:26,  7.04it/s]

{'epoch': 3, 'iter': 19840, 'avg_loss': 8.522028777154846, 'avg_acc': 49.96928708230432, 'loss': 8.809975624084473}


EP_train:3:  72%|| 19852/27626 [46:49<18:25,  7.03it/s]

{'epoch': 3, 'iter': 19850, 'avg_loss': 8.522024255214209, 'avg_acc': 49.971191627625814, 'loss': 9.0462064743042}


EP_train:3:  72%|| 19862/27626 [46:51<18:20,  7.06it/s]

{'epoch': 3, 'iter': 19860, 'avg_loss': 8.521939124281232, 'avg_acc': 49.97293691153517, 'loss': 7.777061462402344}


EP_train:3:  72%|| 19872/27626 [46:52<18:24,  7.02it/s]

{'epoch': 3, 'iter': 19870, 'avg_loss': 8.521828882635813, 'avg_acc': 49.973579588344826, 'loss': 7.778902053833008}


EP_train:3:  72%|| 19882/27626 [46:54<18:24,  7.01it/s]

{'epoch': 3, 'iter': 19880, 'avg_loss': 8.521767542807247, 'avg_acc': 49.97485035963985, 'loss': 9.237493515014648}


EP_train:3:  72%|| 19892/27626 [46:55<18:23,  7.01it/s]

{'epoch': 3, 'iter': 19890, 'avg_loss': 8.521695463553458, 'avg_acc': 49.97517721582626, 'loss': 8.554859161376953}


EP_train:3:  72%|| 19902/27626 [46:57<18:18,  7.03it/s]

{'epoch': 3, 'iter': 19900, 'avg_loss': 8.521904361576874, 'avg_acc': 49.97597482538566, 'loss': 8.403451919555664}


EP_train:3:  72%|| 19912/27626 [46:58<18:15,  7.04it/s]

{'epoch': 3, 'iter': 19910, 'avg_loss': 8.5218896686471, 'avg_acc': 49.97692858219075, 'loss': 9.839118003845215}


EP_train:3:  72%|| 19922/27626 [46:59<18:30,  6.94it/s]

{'epoch': 3, 'iter': 19920, 'avg_loss': 8.521734580296624, 'avg_acc': 49.97552833693087, 'loss': 8.622099876403809}


EP_train:3:  72%|| 19932/27626 [47:01<18:14,  7.03it/s]

{'epoch': 3, 'iter': 19930, 'avg_loss': 8.521646376590615, 'avg_acc': 49.97569740605087, 'loss': 9.094758987426758}


EP_train:3:  72%|| 19942/27626 [47:02<18:14,  7.02it/s]

{'epoch': 3, 'iter': 19940, 'avg_loss': 8.521783920917652, 'avg_acc': 49.9761797302041, 'loss': 8.714897155761719}


EP_train:3:  72%|| 19952/27626 [47:04<18:24,  6.95it/s]

{'epoch': 3, 'iter': 19950, 'avg_loss': 8.521867889956683, 'avg_acc': 49.97556513457972, 'loss': 8.709964752197266}


EP_train:3:  72%|| 19962/27626 [47:05<18:04,  7.07it/s]

{'epoch': 3, 'iter': 19960, 'avg_loss': 8.521919124190097, 'avg_acc': 49.974951154751764, 'loss': 9.613639831542969}


EP_train:3:  72%|| 19972/27626 [47:06<18:11,  7.01it/s]

{'epoch': 3, 'iter': 19970, 'avg_loss': 8.521872042511301, 'avg_acc': 49.9744942666867, 'loss': 8.578261375427246}


EP_train:3:  72%|| 19982/27626 [47:08<18:11,  7.01it/s]

{'epoch': 3, 'iter': 19980, 'avg_loss': 8.521948328677327, 'avg_acc': 49.97528902457334, 'loss': 8.492416381835938}


EP_train:3:  72%|| 19992/27626 [47:09<18:05,  7.03it/s]

{'epoch': 3, 'iter': 19990, 'avg_loss': 8.521962648775942, 'avg_acc': 49.975457705967685, 'loss': 8.65623950958252}


EP_train:3:  72%|| 20002/27626 [47:11<18:11,  6.98it/s]

{'epoch': 3, 'iter': 20000, 'avg_loss': 8.521872687927456, 'avg_acc': 49.97406379681016, 'loss': 8.33436107635498}


EP_train:3:  72%|| 20012/27626 [47:12<18:07,  7.00it/s]

{'epoch': 3, 'iter': 20010, 'avg_loss': 8.521955520088875, 'avg_acc': 49.97407675778322, 'loss': 8.847111701965332}


EP_train:3:  72%|| 20022/27626 [47:14<17:59,  7.04it/s]

{'epoch': 3, 'iter': 20020, 'avg_loss': 8.52200573409189, 'avg_acc': 49.97049972528845, 'loss': 7.7837677001953125}


EP_train:3:  73%|| 20032/27626 [47:15<18:01,  7.02it/s]

{'epoch': 3, 'iter': 20030, 'avg_loss': 8.522015011463525, 'avg_acc': 49.96895437072538, 'loss': 9.132135391235352}


EP_train:3:  73%|| 20042/27626 [47:16<18:00,  7.02it/s]

{'epoch': 3, 'iter': 20040, 'avg_loss': 8.521900267820147, 'avg_acc': 49.96850207075495, 'loss': 7.81259822845459}


EP_train:3:  73%|| 20052/27626 [47:18<17:59,  7.02it/s]

{'epoch': 3, 'iter': 20050, 'avg_loss': 8.522002979083748, 'avg_acc': 49.97007630542117, 'loss': 7.539959907531738}


EP_train:3:  73%|| 20062/27626 [47:19<17:56,  7.03it/s]

{'epoch': 3, 'iter': 20060, 'avg_loss': 8.521968511301228, 'avg_acc': 49.9697796720004, 'loss': 8.738096237182617}


EP_train:3:  73%|| 20072/27626 [47:21<17:54,  7.03it/s]

{'epoch': 3, 'iter': 20070, 'avg_loss': 8.521882623331612, 'avg_acc': 49.96932763688904, 'loss': 9.348536491394043}


EP_train:3:  73%|| 20082/27626 [47:22<17:56,  7.01it/s]

{'epoch': 3, 'iter': 20080, 'avg_loss': 8.52203097157516, 'avg_acc': 49.968564812509335, 'loss': 8.444692611694336}


EP_train:3:  73%|| 20092/27626 [47:23<17:58,  6.98it/s]

{'epoch': 3, 'iter': 20090, 'avg_loss': 8.521833791448456, 'avg_acc': 49.96904708575979, 'loss': 8.376093864440918}


EP_train:3:  73%|| 20102/27626 [47:25<17:55,  7.00it/s]

{'epoch': 3, 'iter': 20100, 'avg_loss': 8.521899745521376, 'avg_acc': 49.967041440724344, 'loss': 8.798445701599121}


EP_train:3:  73%|| 20112/27626 [47:26<17:58,  6.97it/s]

{'epoch': 3, 'iter': 20110, 'avg_loss': 8.521798962642443, 'avg_acc': 49.96799015464174, 'loss': 8.659433364868164}


EP_train:3:  73%|| 20122/27626 [47:28<17:53,  6.99it/s]

{'epoch': 3, 'iter': 20120, 'avg_loss': 8.521777142066439, 'avg_acc': 49.96754013220019, 'loss': 8.243049621582031}


EP_train:3:  73%|| 20132/27626 [47:29<17:48,  7.01it/s]

{'epoch': 3, 'iter': 20130, 'avg_loss': 8.521848536847594, 'avg_acc': 49.96786672296458, 'loss': 8.831725120544434}


EP_train:3:  73%|| 20142/27626 [47:31<17:44,  7.03it/s]

{'epoch': 3, 'iter': 20140, 'avg_loss': 8.521851903614165, 'avg_acc': 49.96726205252966, 'loss': 8.073447227478027}


EP_train:3:  73%|| 20152/27626 [47:32<17:46,  7.01it/s]

{'epoch': 3, 'iter': 20150, 'avg_loss': 8.521946180133593, 'avg_acc': 49.96650290308173, 'loss': 8.905729293823242}


EP_train:3:  73%|| 20162/27626 [47:33<17:40,  7.04it/s]

{'epoch': 3, 'iter': 20160, 'avg_loss': 8.521892418801551, 'avg_acc': 49.96884455136154, 'loss': 7.450872421264648}


EP_train:3:  73%|| 20172/27626 [47:35<17:47,  6.99it/s]

{'epoch': 3, 'iter': 20170, 'avg_loss': 8.521689140402469, 'avg_acc': 49.96746566853403, 'loss': 7.975350379943848}


EP_train:3:  73%|| 20182/27626 [47:36<17:47,  6.97it/s]

{'epoch': 3, 'iter': 20180, 'avg_loss': 8.52171306703202, 'avg_acc': 49.966707546702345, 'loss': 8.387495994567871}


EP_train:3:  73%|| 20192/27626 [47:38<17:36,  7.04it/s]

{'epoch': 3, 'iter': 20190, 'avg_loss': 8.521869641782436, 'avg_acc': 49.96533108810856, 'loss': 8.994922637939453}


EP_train:3:  73%|| 20202/27626 [47:39<17:30,  7.06it/s]

{'epoch': 3, 'iter': 20200, 'avg_loss': 8.521816260861522, 'avg_acc': 49.96674050789564, 'loss': 9.034480094909668}


EP_train:3:  73%|| 20212/27626 [47:41<17:36,  7.02it/s]

{'epoch': 3, 'iter': 20210, 'avg_loss': 8.521745355330609, 'avg_acc': 49.966602345257535, 'loss': 8.757281303405762}


EP_train:3:  73%|| 20222/27626 [47:42<17:31,  7.04it/s]

{'epoch': 3, 'iter': 20220, 'avg_loss': 8.521695907400916, 'avg_acc': 49.96584615004204, 'loss': 8.42577075958252}


EP_train:3:  73%|| 20232/27626 [47:43<17:26,  7.07it/s]

{'epoch': 3, 'iter': 20230, 'avg_loss': 8.52166848601982, 'avg_acc': 49.96509070238743, 'loss': 8.580700874328613}


EP_train:3:  73%|| 20242/27626 [47:45<17:26,  7.05it/s]

{'epoch': 3, 'iter': 20240, 'avg_loss': 8.52155398794811, 'avg_acc': 49.96557111802776, 'loss': 8.998882293701172}


EP_train:3:  73%|| 20252/27626 [47:46<17:23,  7.06it/s]

{'epoch': 3, 'iter': 20250, 'avg_loss': 8.521461681662462, 'avg_acc': 49.96759419287937, 'loss': 7.317689895629883}


EP_train:3:  73%|| 20262/27626 [47:48<17:29,  7.02it/s]

{'epoch': 3, 'iter': 20260, 'avg_loss': 8.521400294071606, 'avg_acc': 49.97146611717092, 'loss': 8.050405502319336}


EP_train:3:  73%|| 20272/27626 [47:49<17:29,  7.01it/s]

{'epoch': 3, 'iter': 20270, 'avg_loss': 8.521495630977409, 'avg_acc': 49.97410093236643, 'loss': 9.704795837402344}


EP_train:3:  73%|| 20282/27626 [47:50<17:32,  6.98it/s]

{'epoch': 3, 'iter': 20280, 'avg_loss': 8.521521872866584, 'avg_acc': 49.97041565997732, 'loss': 9.015190124511719}


EP_train:3:  73%|| 20292/27626 [47:52<17:31,  6.98it/s]

{'epoch': 3, 'iter': 20290, 'avg_loss': 8.52158944748133, 'avg_acc': 49.968736139175, 'loss': 9.119196891784668}


EP_train:3:  73%|| 20302/27626 [47:53<17:31,  6.97it/s]

{'epoch': 3, 'iter': 20300, 'avg_loss': 8.521856538920936, 'avg_acc': 49.969675139155704, 'loss': 8.064993858337402}


EP_train:3:  74%|| 20312/27626 [47:55<17:19,  7.03it/s]

{'epoch': 3, 'iter': 20310, 'avg_loss': 8.52184446908236, 'avg_acc': 49.97169021712373, 'loss': 9.255109786987305}


EP_train:3:  74%|| 20322/27626 [47:56<17:20,  7.02it/s]

{'epoch': 3, 'iter': 20320, 'avg_loss': 8.521795137291917, 'avg_acc': 49.97616382067812, 'loss': 9.03607177734375}


EP_train:3:  74%|| 20332/27626 [47:58<17:18,  7.02it/s]

{'epoch': 3, 'iter': 20330, 'avg_loss': 8.521618898710788, 'avg_acc': 49.97832743101667, 'loss': 8.32292652130127}


EP_train:3:  74%|| 20342/27626 [47:59<17:15,  7.04it/s]

{'epoch': 3, 'iter': 20340, 'avg_loss': 8.52145021652485, 'avg_acc': 49.97910623863134, 'loss': 7.616389274597168}


EP_train:3:  74%|| 20352/27626 [48:00<17:24,  6.96it/s]

{'epoch': 3, 'iter': 20350, 'avg_loss': 8.521375079476202, 'avg_acc': 49.978962950223575, 'loss': 8.590585708618164}


EP_train:3:  74%|| 20362/27626 [48:02<17:17,  7.00it/s]

{'epoch': 3, 'iter': 20360, 'avg_loss': 8.521582353455244, 'avg_acc': 49.98050807917097, 'loss': 9.701343536376953}


EP_train:3:  74%|| 20372/27626 [48:03<17:18,  6.99it/s]

{'epoch': 3, 'iter': 20370, 'avg_loss': 8.521558103958863, 'avg_acc': 49.97990403023906, 'loss': 8.376461029052734}


EP_train:3:  74%|| 20382/27626 [48:05<17:19,  6.97it/s]

{'epoch': 3, 'iter': 20380, 'avg_loss': 8.521459408769383, 'avg_acc': 49.98114052303616, 'loss': 8.22394847869873}


EP_train:3:  74%|| 20392/27626 [48:06<17:16,  6.98it/s]

{'epoch': 3, 'iter': 20390, 'avg_loss': 8.521476894589311, 'avg_acc': 49.98160953361777, 'loss': 9.160235404968262}


EP_train:3:  74%|| 20402/27626 [48:07<17:14,  6.98it/s]

{'epoch': 3, 'iter': 20400, 'avg_loss': 8.521403992610214, 'avg_acc': 49.98406940836234, 'loss': 8.722050666809082}


EP_train:3:  74%|| 20412/27626 [48:09<17:09,  7.00it/s]

{'epoch': 3, 'iter': 20410, 'avg_loss': 8.521445656247938, 'avg_acc': 49.98254617608153, 'loss': 8.295263290405273}


EP_train:3:  74%|| 20422/27626 [48:10<17:01,  7.05it/s]

{'epoch': 3, 'iter': 20420, 'avg_loss': 8.521641840284914, 'avg_acc': 49.98286078056902, 'loss': 9.058131217956543}


EP_train:3:  74%|| 20432/27626 [48:12<17:00,  7.05it/s]

{'epoch': 3, 'iter': 20430, 'avg_loss': 8.521667084244168, 'avg_acc': 49.983328030933386, 'loss': 8.736433982849121}


EP_train:3:  74%|| 20442/27626 [48:13<17:09,  6.98it/s]

{'epoch': 3, 'iter': 20440, 'avg_loss': 8.521597000526288, 'avg_acc': 49.98578225135756, 'loss': 7.714991569519043}


EP_train:3:  74%|| 20452/27626 [48:15<17:07,  6.98it/s]

{'epoch': 3, 'iter': 20450, 'avg_loss': 8.521576568273733, 'avg_acc': 49.98471957361498, 'loss': 8.154175758361816}


EP_train:3:  74%|| 20462/27626 [48:16<17:03,  7.00it/s]

{'epoch': 3, 'iter': 20460, 'avg_loss': 8.521607865127125, 'avg_acc': 49.98640706710327, 'loss': 9.33502197265625}


EP_train:3:  74%|| 20472/27626 [48:17<16:55,  7.04it/s]

{'epoch': 3, 'iter': 20470, 'avg_loss': 8.52163986826402, 'avg_acc': 49.98488715744224, 'loss': 8.731624603271484}


EP_train:3:  74%|| 20482/27626 [48:19<16:59,  7.01it/s]

{'epoch': 3, 'iter': 20480, 'avg_loss': 8.521789787698145, 'avg_acc': 49.986420340803676, 'loss': 8.593501091003418}


EP_train:3:  74%|| 20492/27626 [48:20<16:56,  7.02it/s]

{'epoch': 3, 'iter': 20490, 'avg_loss': 8.521901208027629, 'avg_acc': 49.98566443804597, 'loss': 9.057805061340332}


EP_train:3:  74%|| 20502/27626 [48:22<16:58,  6.99it/s]

{'epoch': 3, 'iter': 20500, 'avg_loss': 8.521956517355516, 'avg_acc': 49.986890883371544, 'loss': 8.374801635742188}


EP_train:3:  74%|| 20512/27626 [48:23<16:50,  7.04it/s]

{'epoch': 3, 'iter': 20510, 'avg_loss': 8.521884523580217, 'avg_acc': 49.98796377553508, 'loss': 7.72361946105957}


EP_train:3:  74%|| 20522/27626 [48:25<16:48,  7.04it/s]

{'epoch': 3, 'iter': 20520, 'avg_loss': 8.522031329713435, 'avg_acc': 49.986751376638566, 'loss': 7.771928787231445}


EP_train:3:  74%|| 20532/27626 [48:26<16:53,  7.00it/s]

{'epoch': 3, 'iter': 20530, 'avg_loss': 8.522055894701092, 'avg_acc': 49.98508353221957, 'loss': 8.179305076599121}


EP_train:3:  74%|| 20542/27626 [48:27<16:53,  6.99it/s]

{'epoch': 3, 'iter': 20540, 'avg_loss': 8.52212728499252, 'avg_acc': 49.98524292877659, 'loss': 9.227789878845215}


EP_train:3:  74%|| 20552/27626 [48:29<16:56,  6.96it/s]

{'epoch': 3, 'iter': 20550, 'avg_loss': 8.522264189699504, 'avg_acc': 49.98570629166464, 'loss': 9.21215534210205}


EP_train:3:  74%|| 20562/27626 [48:30<16:46,  7.02it/s]

{'epoch': 3, 'iter': 20560, 'avg_loss': 8.522278721048108, 'avg_acc': 49.98677715091679, 'loss': 8.88684368133545}


EP_train:3:  74%|| 20572/27626 [48:32<16:39,  7.06it/s]

{'epoch': 3, 'iter': 20570, 'avg_loss': 8.522221283655128, 'avg_acc': 49.987087404598704, 'loss': 8.485564231872559}


EP_train:3:  75%|| 20582/27626 [48:33<16:52,  6.96it/s]

{'epoch': 3, 'iter': 20580, 'avg_loss': 8.522410143557574, 'avg_acc': 49.98846023030951, 'loss': 8.896233558654785}


EP_train:3:  75%|| 20592/27626 [48:34<16:40,  7.03it/s]

{'epoch': 3, 'iter': 20590, 'avg_loss': 8.522619862871633, 'avg_acc': 49.99119761060658, 'loss': 8.319392204284668}


EP_train:3:  75%|| 20602/27626 [48:36<16:41,  7.01it/s]

{'epoch': 3, 'iter': 20600, 'avg_loss': 8.522684522788678, 'avg_acc': 49.98922989175283, 'loss': 8.09648609161377}


EP_train:3:  75%|| 20612/27626 [48:37<16:39,  7.02it/s]

{'epoch': 3, 'iter': 20610, 'avg_loss': 8.522662229849361, 'avg_acc': 49.98908349910242, 'loss': 8.176125526428223}


EP_train:3:  75%|| 20622/27626 [48:39<16:38,  7.02it/s]

{'epoch': 3, 'iter': 20620, 'avg_loss': 8.522660198686172, 'avg_acc': 49.98787643664226, 'loss': 9.32370376586914}


EP_train:3:  75%|| 20632/27626 [48:40<16:31,  7.05it/s]

{'epoch': 3, 'iter': 20630, 'avg_loss': 8.522752192714837, 'avg_acc': 49.987427899762494, 'loss': 8.41101360321045}


EP_train:3:  75%|| 20642/27626 [48:42<16:38,  6.99it/s]

{'epoch': 3, 'iter': 20640, 'avg_loss': 8.522683746817847, 'avg_acc': 49.98713119519403, 'loss': 8.085244178771973}


EP_train:3:  75%|| 20652/27626 [48:43<16:32,  7.02it/s]

{'epoch': 3, 'iter': 20650, 'avg_loss': 8.522545073062512, 'avg_acc': 49.98335431698223, 'loss': 8.056733131408691}


EP_train:3:  75%|| 20662/27626 [48:44<16:35,  6.99it/s]

{'epoch': 3, 'iter': 20660, 'avg_loss': 8.522408802657822, 'avg_acc': 49.985933643095684, 'loss': 8.049129486083984}


EP_train:3:  75%|| 20672/27626 [48:46<16:28,  7.03it/s]

{'epoch': 3, 'iter': 20670, 'avg_loss': 8.522212956987556, 'avg_acc': 49.9860916259494, 'loss': 7.845053672790527}


EP_train:3:  75%|| 20682/27626 [48:47<16:28,  7.02it/s]

{'epoch': 3, 'iter': 20680, 'avg_loss': 8.5222210505502, 'avg_acc': 49.98700498041681, 'loss': 8.24083137512207}


EP_train:3:  75%|| 20692/27626 [48:49<16:24,  7.04it/s]

{'epoch': 3, 'iter': 20690, 'avg_loss': 8.522477750186848, 'avg_acc': 49.98610506983713, 'loss': 8.71884536743164}


EP_train:3:  75%|| 20702/27626 [48:50<16:30,  6.99it/s]

{'epoch': 3, 'iter': 20700, 'avg_loss': 8.522456770328835, 'avg_acc': 49.986111782039515, 'loss': 7.847805500030518}


EP_train:3:  75%|| 20712/27626 [48:51<16:23,  7.03it/s]

{'epoch': 3, 'iter': 20710, 'avg_loss': 8.522397148793925, 'avg_acc': 49.98566582975231, 'loss': 8.435495376586914}


EP_train:3:  75%|| 20722/27626 [48:53<16:28,  6.98it/s]

{'epoch': 3, 'iter': 20720, 'avg_loss': 8.522510714365486, 'avg_acc': 49.984164615607355, 'loss': 8.435288429260254}


EP_train:3:  75%|| 20732/27626 [48:54<16:41,  6.88it/s]

{'epoch': 3, 'iter': 20730, 'avg_loss': 8.522720985408917, 'avg_acc': 49.98628262023057, 'loss': 9.290573120117188}


EP_train:3:  75%|| 20742/27626 [48:56<16:25,  6.99it/s]

{'epoch': 3, 'iter': 20740, 'avg_loss': 8.522738117668439, 'avg_acc': 49.98598789836556, 'loss': 8.764403343200684}


EP_train:3:  75%|| 20752/27626 [48:57<16:24,  6.98it/s]

{'epoch': 3, 'iter': 20750, 'avg_loss': 8.522648076908622, 'avg_acc': 49.983735723579585, 'loss': 8.295934677124023}


EP_train:3:  75%|| 20762/27626 [48:59<16:14,  7.05it/s]

{'epoch': 3, 'iter': 20760, 'avg_loss': 8.522771840540868, 'avg_acc': 49.98238885410144, 'loss': 9.129434585571289}


EP_train:3:  75%|| 20772/27626 [49:00<16:17,  7.01it/s]

{'epoch': 3, 'iter': 20770, 'avg_loss': 8.522773234295883, 'avg_acc': 49.985105435462906, 'loss': 8.716580390930176}


EP_train:3:  75%|| 20782/27626 [49:01<16:11,  7.04it/s]

{'epoch': 3, 'iter': 20780, 'avg_loss': 8.52294532326049, 'avg_acc': 49.984210336364946, 'loss': 8.581720352172852}


EP_train:3:  75%|| 20792/27626 [49:03<16:14,  7.02it/s]

{'epoch': 3, 'iter': 20790, 'avg_loss': 8.52295929728164, 'avg_acc': 49.983616709153, 'loss': 8.68819522857666}


EP_train:3:  75%|| 20802/27626 [49:04<16:11,  7.02it/s]

{'epoch': 3, 'iter': 20800, 'avg_loss': 8.522799636256172, 'avg_acc': 49.98092038844286, 'loss': 7.909931182861328}


EP_train:3:  75%|| 20812/27626 [49:06<16:07,  7.04it/s]

{'epoch': 3, 'iter': 20810, 'avg_loss': 8.522860265397927, 'avg_acc': 49.97987842967661, 'loss': 8.736578941345215}


EP_train:3:  75%|| 20822/27626 [49:07<16:08,  7.03it/s]

{'epoch': 3, 'iter': 20820, 'avg_loss': 8.52281136100754, 'avg_acc': 49.9786873829307, 'loss': 8.20024299621582}


EP_train:3:  75%|| 20832/27626 [49:09<16:06,  7.03it/s]

{'epoch': 3, 'iter': 20830, 'avg_loss': 8.522835661414879, 'avg_acc': 49.9785475973309, 'loss': 9.040741920471191}


EP_train:3:  75%|| 20842/27626 [49:10<15:57,  7.08it/s]

{'epoch': 3, 'iter': 20840, 'avg_loss': 8.522717779047984, 'avg_acc': 49.97810805623531, 'loss': 8.754922866821289}


EP_train:3:  75%|| 20852/27626 [49:11<16:04,  7.02it/s]

{'epoch': 3, 'iter': 20850, 'avg_loss': 8.52279291715069, 'avg_acc': 49.97781880964941, 'loss': 9.205121040344238}


EP_train:3:  76%|| 20862/27626 [49:13<16:00,  7.04it/s]

{'epoch': 3, 'iter': 20860, 'avg_loss': 8.522800506495612, 'avg_acc': 49.977529840371986, 'loss': 8.855395317077637}


EP_train:3:  76%|| 20872/27626 [49:14<16:03,  7.01it/s]

{'epoch': 3, 'iter': 20870, 'avg_loss': 8.522723227683645, 'avg_acc': 49.97649250155718, 'loss': 8.519560813903809}


EP_train:3:  76%|| 20882/27626 [49:16<16:03,  7.00it/s]

{'epoch': 3, 'iter': 20880, 'avg_loss': 8.522827201279853, 'avg_acc': 49.9756058138978, 'loss': 8.635108947753906}


EP_train:3:  76%|| 20892/27626 [49:17<16:02,  7.00it/s]

{'epoch': 3, 'iter': 20890, 'avg_loss': 8.522819487384687, 'avg_acc': 49.9747199751089, 'loss': 7.726258754730225}


EP_train:3:  76%|| 20902/27626 [49:19<16:00,  7.00it/s]

{'epoch': 3, 'iter': 20900, 'avg_loss': 8.522758491460994, 'avg_acc': 49.9763767283862, 'loss': 8.274691581726074}


EP_train:3:  76%|| 20912/27626 [49:20<16:00,  6.99it/s]

{'epoch': 3, 'iter': 20910, 'avg_loss': 8.522749885653761, 'avg_acc': 49.975939696810286, 'loss': 8.945635795593262}


EP_train:3:  76%|| 20922/27626 [49:21<15:57,  7.00it/s]

{'epoch': 3, 'iter': 20920, 'avg_loss': 8.522750313745494, 'avg_acc': 49.975503083026624, 'loss': 8.831953048706055}


EP_train:3:  76%|| 20932/27626 [49:23<16:01,  6.96it/s]

{'epoch': 3, 'iter': 20930, 'avg_loss': 8.522841743331456, 'avg_acc': 49.97342458554297, 'loss': 8.061427116394043}


EP_train:3:  76%|| 20942/27626 [49:24<15:47,  7.05it/s]

{'epoch': 3, 'iter': 20940, 'avg_loss': 8.522994090627163, 'avg_acc': 49.974481877656274, 'loss': 9.276591300964355}


EP_train:3:  76%|| 20952/27626 [49:26<15:46,  7.05it/s]

{'epoch': 3, 'iter': 20950, 'avg_loss': 8.523069213110475, 'avg_acc': 49.974494057562886, 'loss': 9.983513832092285}


EP_train:3:  76%|| 20962/27626 [49:27<15:46,  7.04it/s]

{'epoch': 3, 'iter': 20960, 'avg_loss': 8.52311537301152, 'avg_acc': 49.974655312246554, 'loss': 8.511470794677734}


EP_train:3:  76%|| 20972/27626 [49:28<15:48,  7.01it/s]

{'epoch': 3, 'iter': 20970, 'avg_loss': 8.523060756739012, 'avg_acc': 49.97422035191455, 'loss': 8.515140533447266}


EP_train:3:  76%|| 20982/27626 [49:30<15:52,  6.98it/s]

{'epoch': 3, 'iter': 20980, 'avg_loss': 8.523022998737815, 'avg_acc': 49.97378580620562, 'loss': 8.75605297088623}


EP_train:3:  76%|| 20992/27626 [49:31<15:47,  7.00it/s]

{'epoch': 3, 'iter': 20990, 'avg_loss': 8.52297096709947, 'avg_acc': 49.97528702777381, 'loss': 8.375096321105957}


EP_train:3:  76%|| 21002/27626 [49:33<15:50,  6.97it/s]

{'epoch': 3, 'iter': 21000, 'avg_loss': 8.523087054659914, 'avg_acc': 49.97976286843483, 'loss': 8.58830738067627}


EP_train:3:  76%|| 21012/27626 [49:34<15:45,  6.99it/s]

{'epoch': 3, 'iter': 21010, 'avg_loss': 8.523077688543513, 'avg_acc': 49.98066488981962, 'loss': 8.194205284118652}


EP_train:3:  76%|| 21022/27626 [49:36<15:42,  7.00it/s]

{'epoch': 3, 'iter': 21020, 'avg_loss': 8.523195129078045, 'avg_acc': 49.97918747918748, 'loss': 8.860020637512207}


EP_train:3:  76%|| 21032/27626 [49:37<15:42,  7.00it/s]

{'epoch': 3, 'iter': 21030, 'avg_loss': 8.522993517445745, 'avg_acc': 49.98008891636156, 'loss': 7.826014041900635}


EP_train:3:  76%|| 21042/27626 [49:38<15:38,  7.01it/s]

{'epoch': 3, 'iter': 21040, 'avg_loss': 8.52285937015154, 'avg_acc': 49.97920726201226, 'loss': 7.750147342681885}


EP_train:3:  76%|| 21052/27626 [49:40<15:30,  7.06it/s]

{'epoch': 3, 'iter': 21050, 'avg_loss': 8.522791935742811, 'avg_acc': 49.97862334330911, 'loss': 8.647000312805176}


EP_train:3:  76%|| 21062/27626 [49:41<15:31,  7.04it/s]

{'epoch': 3, 'iter': 21060, 'avg_loss': 8.522877244798554, 'avg_acc': 49.97818835762784, 'loss': 8.266335487365723}


EP_train:3:  76%|| 21072/27626 [49:43<15:35,  7.00it/s]

{'epoch': 3, 'iter': 21070, 'avg_loss': 8.522727655191074, 'avg_acc': 49.97849532532865, 'loss': 9.074735641479492}


EP_train:3:  76%|| 21082/27626 [49:44<15:39,  6.96it/s]

{'epoch': 3, 'iter': 21080, 'avg_loss': 8.522798126039387, 'avg_acc': 49.97983966605, 'loss': 8.626107215881348}


EP_train:3:  76%|| 21092/27626 [49:46<15:33,  7.00it/s]

{'epoch': 3, 'iter': 21090, 'avg_loss': 8.522786574707133, 'avg_acc': 49.97999739225262, 'loss': 8.91016960144043}


EP_train:3:  76%|| 21102/27626 [49:47<15:31,  7.00it/s]

{'epoch': 3, 'iter': 21100, 'avg_loss': 8.522887638804868, 'avg_acc': 49.97748921852045, 'loss': 8.660148620605469}


EP_train:3:  76%|| 21112/27626 [49:48<15:26,  7.03it/s]

{'epoch': 3, 'iter': 21110, 'avg_loss': 8.522859561506008, 'avg_acc': 49.97616763772441, 'loss': 8.618122100830078}


EP_train:3:  76%|| 21122/27626 [49:50<15:21,  7.06it/s]

{'epoch': 3, 'iter': 21120, 'avg_loss': 8.522848795300254, 'avg_acc': 49.9803217177217, 'loss': 8.762651443481445}


EP_train:3:  76%|| 21132/27626 [49:51<15:27,  7.01it/s]

{'epoch': 3, 'iter': 21130, 'avg_loss': 8.523045128414383, 'avg_acc': 49.97870427334249, 'loss': 8.146217346191406}


EP_train:3:  77%|| 21142/27626 [49:53<15:27,  6.99it/s]

{'epoch': 3, 'iter': 21140, 'avg_loss': 8.523044180280241, 'avg_acc': 49.9798968828343, 'loss': 7.896352291107178}


EP_train:3:  77%|| 21152/27626 [49:54<15:18,  7.05it/s]

{'epoch': 3, 'iter': 21150, 'avg_loss': 8.52302477082588, 'avg_acc': 49.9784289158905, 'loss': 8.050049781799316}


EP_train:3:  77%|| 21162/27626 [49:55<15:22,  7.01it/s]

{'epoch': 3, 'iter': 21160, 'avg_loss': 8.522989696999149, 'avg_acc': 49.97725769103539, 'loss': 8.052583694458008}


EP_train:3:  77%|| 21172/27626 [49:57<15:26,  6.97it/s]

{'epoch': 3, 'iter': 21170, 'avg_loss': 8.523015833673544, 'avg_acc': 49.97741604081054, 'loss': 8.365760803222656}


EP_train:3:  77%|| 21182/27626 [49:58<15:20,  7.00it/s]

{'epoch': 3, 'iter': 21180, 'avg_loss': 8.523054554151647, 'avg_acc': 49.97550871063689, 'loss': 8.025403022766113}


EP_train:3:  77%|| 21192/27626 [50:00<15:15,  7.03it/s]

{'epoch': 3, 'iter': 21190, 'avg_loss': 8.522857820040935, 'avg_acc': 49.97537279977349, 'loss': 8.59699821472168}


EP_train:3:  77%|| 21202/27626 [50:01<15:18,  7.00it/s]

{'epoch': 3, 'iter': 21200, 'avg_loss': 8.522633359756208, 'avg_acc': 49.97744799773596, 'loss': 8.23643970489502}


EP_train:3:  77%|| 21212/27626 [50:03<15:17,  6.99it/s]

{'epoch': 3, 'iter': 21210, 'avg_loss': 8.522660569689195, 'avg_acc': 49.9761326670124, 'loss': 9.05759334564209}


EP_train:3:  77%|| 21222/27626 [50:04<15:09,  7.04it/s]

{'epoch': 3, 'iter': 21220, 'avg_loss': 8.522638112468098, 'avg_acc': 49.97967814900335, 'loss': 8.12685489654541}


EP_train:3:  77%|| 21232/27626 [50:05<15:09,  7.03it/s]

{'epoch': 3, 'iter': 21230, 'avg_loss': 8.522766911872877, 'avg_acc': 49.98012929207291, 'loss': 9.106593132019043}


EP_train:3:  77%|| 21242/27626 [50:07<15:07,  7.03it/s]

{'epoch': 3, 'iter': 21240, 'avg_loss': 8.522983582633447, 'avg_acc': 49.9786674356198, 'loss': 7.920616626739502}


EP_train:3:  77%|| 21252/27626 [50:08<15:10,  7.00it/s]

{'epoch': 3, 'iter': 21250, 'avg_loss': 8.522907219720242, 'avg_acc': 49.978383370194344, 'loss': 8.045317649841309}


EP_train:3:  77%|| 21262/27626 [50:10<15:06,  7.02it/s]

{'epoch': 3, 'iter': 21260, 'avg_loss': 8.522767866326715, 'avg_acc': 49.98001034758478, 'loss': 8.174715995788574}


EP_train:3:  77%|| 21272/27626 [50:11<15:08,  6.99it/s]

{'epoch': 3, 'iter': 21270, 'avg_loss': 8.522583708754828, 'avg_acc': 49.97913826336327, 'loss': 8.747232437133789}


EP_train:3:  77%|| 21282/27626 [50:13<15:00,  7.04it/s]

{'epoch': 3, 'iter': 21280, 'avg_loss': 8.522573698475018, 'avg_acc': 49.97958860015977, 'loss': 8.495220184326172}


EP_train:3:  77%|| 21292/27626 [50:14<15:04,  7.00it/s]

{'epoch': 3, 'iter': 21290, 'avg_loss': 8.522698738034002, 'avg_acc': 49.983561129115586, 'loss': 9.73907470703125}


EP_train:3:  77%|| 21302/27626 [50:15<15:00,  7.02it/s]

{'epoch': 3, 'iter': 21300, 'avg_loss': 8.522641948072973, 'avg_acc': 49.98298201962349, 'loss': 8.077078819274902}


EP_train:3:  77%|| 21312/27626 [50:17<15:00,  7.01it/s]

{'epoch': 3, 'iter': 21310, 'avg_loss': 8.522659740176811, 'avg_acc': 49.982843367275116, 'loss': 8.147480010986328}


EP_train:3:  77%|| 21322/27626 [50:18<14:58,  7.02it/s]

{'epoch': 3, 'iter': 21320, 'avg_loss': 8.522677691913971, 'avg_acc': 49.98387739787064, 'loss': 8.649858474731445}


EP_train:3:  77%|| 21332/27626 [50:20<14:58,  7.00it/s]

{'epoch': 3, 'iter': 21330, 'avg_loss': 8.522726388281841, 'avg_acc': 49.98344545497164, 'loss': 8.686270713806152}


EP_train:3:  77%|| 21342/27626 [50:21<14:55,  7.02it/s]

{'epoch': 3, 'iter': 21340, 'avg_loss': 8.52270610019342, 'avg_acc': 49.983453212126896, 'loss': 8.124738693237305}


EP_train:3:  77%|| 21352/27626 [50:22<14:52,  7.03it/s]

{'epoch': 3, 'iter': 21350, 'avg_loss': 8.522706158786159, 'avg_acc': 49.98258278300782, 'loss': 8.143087387084961}


EP_train:3:  77%|| 21362/27626 [50:24<14:58,  6.97it/s]

{'epoch': 3, 'iter': 21360, 'avg_loss': 8.522692695672323, 'avg_acc': 49.98185946350826, 'loss': 8.329015731811523}


EP_train:3:  77%|| 21372/27626 [50:25<14:51,  7.02it/s]

{'epoch': 3, 'iter': 21370, 'avg_loss': 8.522622384797884, 'avg_acc': 49.9820141780918, 'loss': 8.395621299743652}


EP_train:3:  77%|| 21382/27626 [50:27<14:50,  7.01it/s]

{'epoch': 3, 'iter': 21380, 'avg_loss': 8.522760624491038, 'avg_acc': 49.98114564332819, 'loss': 8.746039390563965}


EP_train:3:  77%|| 21392/27626 [50:28<14:55,  6.96it/s]

{'epoch': 3, 'iter': 21390, 'avg_loss': 8.522892572006969, 'avg_acc': 49.98057009957459, 'loss': 9.137847900390625}


EP_train:3:  77%|| 21402/27626 [50:30<14:39,  7.07it/s]

{'epoch': 3, 'iter': 21400, 'avg_loss': 8.522822558269908, 'avg_acc': 49.979995093687215, 'loss': 8.523895263671875}


EP_train:3:  78%|| 21412/27626 [50:31<14:41,  7.05it/s]

{'epoch': 3, 'iter': 21410, 'avg_loss': 8.522783575440593, 'avg_acc': 49.98117206109009, 'loss': 8.412710189819336}


EP_train:3:  78%|| 21422/27626 [50:32<14:38,  7.06it/s]

{'epoch': 3, 'iter': 21420, 'avg_loss': 8.52285134338775, 'avg_acc': 49.98045142617058, 'loss': 8.23812484741211}


EP_train:3:  78%|| 21432/27626 [50:34<14:38,  7.05it/s]

{'epoch': 3, 'iter': 21430, 'avg_loss': 8.522935615618938, 'avg_acc': 49.981335448649155, 'loss': 8.396719932556152}


EP_train:3:  78%|| 21442/27626 [50:35<14:39,  7.03it/s]

{'epoch': 3, 'iter': 21440, 'avg_loss': 8.52322858042984, 'avg_acc': 49.98236439531738, 'loss': 9.416555404663086}


EP_train:3:  78%|| 21452/27626 [50:37<14:43,  6.99it/s]

{'epoch': 3, 'iter': 21450, 'avg_loss': 8.52314476699886, 'avg_acc': 49.9833923826395, 'loss': 8.530993461608887}


EP_train:3:  78%|| 21462/27626 [50:38<14:30,  7.08it/s]

{'epoch': 3, 'iter': 21460, 'avg_loss': 8.522880521004367, 'avg_acc': 49.98281766926052, 'loss': 7.525359153747559}


EP_train:3:  78%|| 21472/27626 [50:39<14:34,  7.04it/s]

{'epoch': 3, 'iter': 21470, 'avg_loss': 8.522792069003966, 'avg_acc': 49.98442666852965, 'loss': 8.066068649291992}


EP_train:3:  78%|| 21482/27626 [50:41<14:35,  7.02it/s]

{'epoch': 3, 'iter': 21480, 'avg_loss': 8.522655851974939, 'avg_acc': 49.98341557655603, 'loss': 7.570387840270996}


EP_train:3:  78%|| 21492/27626 [50:42<14:36,  7.00it/s]

{'epoch': 3, 'iter': 21490, 'avg_loss': 8.52265895325466, 'avg_acc': 49.98080591875669, 'loss': 9.449893951416016}


EP_train:3:  78%|| 21502/27626 [50:44<14:29,  7.05it/s]

{'epoch': 3, 'iter': 21500, 'avg_loss': 8.52263784982965, 'avg_acc': 49.98081484582112, 'loss': 8.632570266723633}


EP_train:3:  78%|| 21512/27626 [50:45<14:26,  7.06it/s]

{'epoch': 3, 'iter': 21510, 'avg_loss': 8.522512388187252, 'avg_acc': 49.98372925479987, 'loss': 7.933205604553223}


EP_train:3:  78%|| 21522/27626 [50:47<14:20,  7.09it/s]

{'epoch': 3, 'iter': 21520, 'avg_loss': 8.522588677368866, 'avg_acc': 49.981849124111335, 'loss': 8.344265937805176}


EP_train:3:  78%|| 21532/27626 [50:48<14:29,  7.01it/s]

{'epoch': 3, 'iter': 21530, 'avg_loss': 8.522501281498036, 'avg_acc': 49.98200269379035, 'loss': 8.842742919921875}


EP_train:3:  78%|| 21542/27626 [50:49<14:24,  7.04it/s]

{'epoch': 3, 'iter': 21540, 'avg_loss': 8.522441774538912, 'avg_acc': 49.98259133744952, 'loss': 8.68608283996582}


EP_train:3:  78%|| 21552/27626 [50:51<14:18,  7.07it/s]

{'epoch': 3, 'iter': 21550, 'avg_loss': 8.522432758174116, 'avg_acc': 49.98390445918982, 'loss': 8.900312423706055}


EP_train:3:  78%|| 21562/27626 [50:52<14:11,  7.12it/s]

{'epoch': 3, 'iter': 21560, 'avg_loss': 8.522579251257477, 'avg_acc': 49.985216362877416, 'loss': 8.855576515197754}


EP_train:3:  78%|| 21572/27626 [50:54<14:21,  7.03it/s]

{'epoch': 3, 'iter': 21570, 'avg_loss': 8.522590276335185, 'avg_acc': 49.984498864215844, 'loss': 8.362075805664062}


EP_train:3:  78%|| 21582/27626 [50:55<14:15,  7.06it/s]

{'epoch': 3, 'iter': 21580, 'avg_loss': 8.522550552858325, 'avg_acc': 49.98566447337936, 'loss': 7.822198390960693}


EP_train:3:  78%|| 21592/27626 [50:56<14:25,  6.97it/s]

{'epoch': 3, 'iter': 21590, 'avg_loss': 8.522591797442319, 'avg_acc': 49.98538164049835, 'loss': 8.1793851852417}


EP_train:3:  78%|| 21602/27626 [50:58<14:12,  7.06it/s]

{'epoch': 3, 'iter': 21600, 'avg_loss': 8.522653188348274, 'avg_acc': 49.984665061802694, 'loss': 8.697362899780273}


EP_train:3:  78%|| 21612/27626 [50:59<14:18,  7.00it/s]

{'epoch': 3, 'iter': 21610, 'avg_loss': 8.522640269024723, 'avg_acc': 49.98510596455509, 'loss': 8.711443901062012}


EP_train:3:  78%|| 21622/27626 [51:01<14:09,  7.07it/s]

{'epoch': 3, 'iter': 21620, 'avg_loss': 8.522539626834714, 'avg_acc': 49.98641367189307, 'loss': 7.520608901977539}


EP_train:3:  78%|| 21632/27626 [51:02<14:20,  6.96it/s]

{'epoch': 3, 'iter': 21630, 'avg_loss': 8.522443771472526, 'avg_acc': 49.98844251305996, 'loss': 7.9134135246276855}


EP_train:3:  78%|| 21642/27626 [51:04<14:12,  7.02it/s]

{'epoch': 3, 'iter': 21640, 'avg_loss': 8.522437919866828, 'avg_acc': 49.9884478536112, 'loss': 7.858109951019287}


EP_train:3:  78%|| 21652/27626 [51:05<14:04,  7.08it/s]

{'epoch': 3, 'iter': 21650, 'avg_loss': 8.522411759359656, 'avg_acc': 49.98874185949841, 'loss': 8.88716983795166}


EP_train:3:  78%|| 21662/27626 [51:06<14:02,  7.08it/s]

{'epoch': 3, 'iter': 21660, 'avg_loss': 8.522381213699637, 'avg_acc': 49.9883142514196, 'loss': 9.326355934143066}


EP_train:3:  78%|| 21672/27626 [51:08<13:57,  7.11it/s]

{'epoch': 3, 'iter': 21670, 'avg_loss': 8.522312995751692, 'avg_acc': 49.99062687462508, 'loss': 7.880604267120361}


EP_train:3:  78%|| 21682/27626 [51:09<14:03,  7.05it/s]

{'epoch': 3, 'iter': 21680, 'avg_loss': 8.52236791350053, 'avg_acc': 49.991784281167845, 'loss': 9.084555625915527}


EP_train:3:  79%|| 21692/27626 [51:11<14:00,  7.06it/s]

{'epoch': 3, 'iter': 21690, 'avg_loss': 8.52244526914231, 'avg_acc': 49.989627034253836, 'loss': 7.629191875457764}


EP_train:3:  79%|| 21702/27626 [51:12<14:00,  7.05it/s]

{'epoch': 3, 'iter': 21700, 'avg_loss': 8.522460925261012, 'avg_acc': 49.99063983226579, 'loss': 8.661067008972168}


EP_train:3:  79%|| 21712/27626 [51:13<14:02,  7.02it/s]

{'epoch': 3, 'iter': 21710, 'avg_loss': 8.522562716503705, 'avg_acc': 49.9928031873244, 'loss': 8.572784423828125}


EP_train:3:  79%|| 21722/27626 [51:15<14:06,  6.97it/s]

{'epoch': 3, 'iter': 21720, 'avg_loss': 8.522694857836944, 'avg_acc': 49.99309424059666, 'loss': 9.028463363647461}


EP_train:3:  79%|| 21732/27626 [51:16<13:57,  7.04it/s]

{'epoch': 3, 'iter': 21730, 'avg_loss': 8.522627601862272, 'avg_acc': 49.99338502599973, 'loss': 8.47032642364502}


EP_train:3:  79%|| 21742/27626 [51:18<13:55,  7.04it/s]

{'epoch': 3, 'iter': 21740, 'avg_loss': 8.52239228876264, 'avg_acc': 49.994106756818915, 'loss': 8.76281452178955}


EP_train:3:  79%|| 21752/27626 [51:19<13:51,  7.06it/s]

{'epoch': 3, 'iter': 21750, 'avg_loss': 8.522337186123321, 'avg_acc': 49.996264539561395, 'loss': 8.38490104675293}


EP_train:3:  79%|| 21762/27626 [51:20<13:56,  7.01it/s]

{'epoch': 3, 'iter': 21760, 'avg_loss': 8.522405993570864, 'avg_acc': 49.995979045080645, 'loss': 9.184475898742676}


EP_train:3:  79%|| 21772/27626 [51:22<13:45,  7.09it/s]

{'epoch': 3, 'iter': 21770, 'avg_loss': 8.522435350721658, 'avg_acc': 49.9961244315833, 'loss': 8.292580604553223}


EP_train:3:  79%|| 21782/27626 [51:23<13:47,  7.07it/s]

{'epoch': 3, 'iter': 21780, 'avg_loss': 8.52235440878187, 'avg_acc': 49.99655663192691, 'loss': 8.060218811035156}


EP_train:3:  79%|| 21792/27626 [51:25<13:41,  7.10it/s]

{'epoch': 3, 'iter': 21790, 'avg_loss': 8.52227958710097, 'avg_acc': 49.99842251388188, 'loss': 8.743401527404785}


EP_train:3:  79%|| 21802/27626 [51:26<13:48,  7.03it/s]

{'epoch': 3, 'iter': 21800, 'avg_loss': 8.522063362090435, 'avg_acc': 49.99842323746617, 'loss': 8.763671875}


EP_train:3:  79%|| 21812/27626 [51:28<13:50,  7.00it/s]

{'epoch': 3, 'iter': 21810, 'avg_loss': 8.522249930269272, 'avg_acc': 49.998710513043875, 'loss': 8.840596199035645}


EP_train:3:  79%|| 21822/27626 [51:29<13:42,  7.06it/s]

{'epoch': 3, 'iter': 21820, 'avg_loss': 8.522271897144789, 'avg_acc': 49.9979950506393, 'loss': 8.233806610107422}


EP_train:3:  79%|| 21832/27626 [51:30<13:40,  7.06it/s]

{'epoch': 3, 'iter': 21830, 'avg_loss': 8.522342877459861, 'avg_acc': 49.99728024369017, 'loss': 9.113615989685059}


EP_train:3:  79%|| 21842/27626 [51:32<13:34,  7.10it/s]

{'epoch': 3, 'iter': 21840, 'avg_loss': 8.522330020516627, 'avg_acc': 49.99413373929765, 'loss': 8.353985786437988}


EP_train:3:  79%|| 21852/27626 [51:33<13:34,  7.09it/s]

{'epoch': 3, 'iter': 21850, 'avg_loss': 8.52229343862273, 'avg_acc': 49.993850395862886, 'loss': 8.855134010314941}


EP_train:3:  79%|| 21862/27626 [51:35<13:39,  7.04it/s]

{'epoch': 3, 'iter': 21860, 'avg_loss': 8.522218552497762, 'avg_acc': 49.996140386990525, 'loss': 8.530834197998047}


EP_train:3:  79%|| 21872/27626 [51:36<13:41,  7.00it/s]

{'epoch': 3, 'iter': 21870, 'avg_loss': 8.522224987416017, 'avg_acc': 49.994856202276985, 'loss': 8.303400039672852}


EP_train:3:  79%|| 21882/27626 [51:37<13:32,  7.07it/s]

{'epoch': 3, 'iter': 21880, 'avg_loss': 8.522149150590892, 'avg_acc': 49.99371600932316, 'loss': 8.51941967010498}


EP_train:3:  79%|| 21892/27626 [51:39<13:33,  7.05it/s]

{'epoch': 3, 'iter': 21890, 'avg_loss': 8.52226064785778, 'avg_acc': 49.99143483623406, 'loss': 7.677679061889648}


EP_train:3:  79%|| 21902/27626 [51:40<13:39,  6.98it/s]

{'epoch': 3, 'iter': 21900, 'avg_loss': 8.522212787380143, 'avg_acc': 49.98958380895849, 'loss': 8.625755310058594}


EP_train:3:  79%|| 21912/27626 [51:42<13:30,  7.05it/s]

{'epoch': 3, 'iter': 21910, 'avg_loss': 8.522275848709885, 'avg_acc': 49.990301674957784, 'loss': 10.062053680419922}


EP_train:3:  79%|| 21922/27626 [51:43<13:24,  7.09it/s]

{'epoch': 3, 'iter': 21920, 'avg_loss': 8.522517627741124, 'avg_acc': 49.99087632863464, 'loss': 8.597078323364258}


EP_train:3:  79%|| 21932/27626 [51:45<13:28,  7.05it/s]

{'epoch': 3, 'iter': 21930, 'avg_loss': 8.522771023262445, 'avg_acc': 49.990310519356164, 'loss': 8.741559982299805}


EP_train:3:  79%|| 21942/27626 [51:46<13:28,  7.03it/s]

{'epoch': 3, 'iter': 21940, 'avg_loss': 8.522668704540617, 'avg_acc': 49.98974522583291, 'loss': 7.694521427154541}


EP_train:3:  79%|| 21952/27626 [51:47<13:29,  7.01it/s]

{'epoch': 3, 'iter': 21950, 'avg_loss': 8.522622755603374, 'avg_acc': 49.989749897498974, 'loss': 9.007299423217773}


EP_train:3:  79%|| 21962/27626 [51:49<13:27,  7.02it/s]

{'epoch': 3, 'iter': 21960, 'avg_loss': 8.522574090430018, 'avg_acc': 49.990323755748825, 'loss': 8.485239028930664}


EP_train:3:  80%|| 21972/27626 [51:50<13:18,  7.08it/s]

{'epoch': 3, 'iter': 21970, 'avg_loss': 8.522549236799033, 'avg_acc': 49.98762573392199, 'loss': 8.66537094116211}


EP_train:3:  80%|| 21982/27626 [51:52<13:22,  7.03it/s]

{'epoch': 3, 'iter': 21980, 'avg_loss': 8.52268415484197, 'avg_acc': 49.98791569992266, 'loss': 9.452644348144531}


EP_train:3:  80%|| 21992/27626 [51:53<13:21,  7.03it/s]

{'epoch': 3, 'iter': 21990, 'avg_loss': 8.522898585304905, 'avg_acc': 49.988063298622166, 'loss': 9.490808486938477}


EP_train:3:  80%|| 22002/27626 [51:54<13:14,  7.08it/s]

{'epoch': 3, 'iter': 22000, 'avg_loss': 8.5228000879667, 'avg_acc': 49.98636425617018, 'loss': 8.578386306762695}


EP_train:3:  80%|| 22012/27626 [51:56<13:13,  7.07it/s]

{'epoch': 3, 'iter': 22010, 'avg_loss': 8.52270804865737, 'avg_acc': 49.98693834900732, 'loss': 8.274653434753418}


EP_train:3:  80%|| 22022/27626 [51:57<13:12,  7.07it/s]

{'epoch': 3, 'iter': 22020, 'avg_loss': 8.522766874865178, 'avg_acc': 49.98751192043958, 'loss': 9.056600570678711}


EP_train:3:  80%|| 22032/27626 [51:59<13:10,  7.08it/s]

{'epoch': 3, 'iter': 22030, 'avg_loss': 8.522678525041698, 'avg_acc': 49.984964368389996, 'loss': 8.018994331359863}


EP_train:3:  80%|| 22042/27626 [52:00<13:08,  7.08it/s]

{'epoch': 3, 'iter': 22040, 'avg_loss': 8.522804265316285, 'avg_acc': 49.98596365863618, 'loss': 9.164835929870605}


EP_train:3:  80%|| 22052/27626 [52:01<13:12,  7.03it/s]

{'epoch': 3, 'iter': 22050, 'avg_loss': 8.52280110684897, 'avg_acc': 49.98384427010113, 'loss': 8.395005226135254}


EP_train:3:  80%|| 22062/27626 [52:03<13:12,  7.02it/s]

{'epoch': 3, 'iter': 22060, 'avg_loss': 8.522664215400349, 'avg_acc': 49.98470150945107, 'loss': 7.5630645751953125}


EP_train:3:  80%|| 22072/27626 [52:04<13:10,  7.03it/s]

{'epoch': 3, 'iter': 22070, 'avg_loss': 8.52275347120928, 'avg_acc': 49.983434144352316, 'loss': 8.954007148742676}


EP_train:3:  80%|| 22082/27626 [52:06<13:12,  7.00it/s]

{'epoch': 3, 'iter': 22080, 'avg_loss': 8.52281410131251, 'avg_acc': 49.98117725646483, 'loss': 8.821643829345703}


EP_train:3:  80%|| 22092/27626 [52:07<13:05,  7.05it/s]

{'epoch': 3, 'iter': 22090, 'avg_loss': 8.522797351392677, 'avg_acc': 49.9817516183061, 'loss': 8.14425277709961}


EP_train:3:  80%|| 22102/27626 [52:09<13:07,  7.01it/s]

{'epoch': 3, 'iter': 22100, 'avg_loss': 8.5228392136625, 'avg_acc': 49.98317383828786, 'loss': 9.06103801727295}


EP_train:3:  80%|| 22112/27626 [52:10<13:02,  7.05it/s]

{'epoch': 3, 'iter': 22110, 'avg_loss': 8.523107946218259, 'avg_acc': 49.983605445253495, 'loss': 9.706744194030762}


EP_train:3:  80%|| 22122/27626 [52:11<12:56,  7.09it/s]

{'epoch': 3, 'iter': 22120, 'avg_loss': 8.522982423028843, 'avg_acc': 49.983471588083724, 'loss': 8.1513671875}


EP_train:3:  80%|| 22132/27626 [52:13<13:01,  7.03it/s]

{'epoch': 3, 'iter': 22130, 'avg_loss': 8.52299791886089, 'avg_acc': 49.98220821472143, 'loss': 8.520477294921875}


EP_train:3:  80%|| 22142/27626 [52:14<12:55,  7.08it/s]

{'epoch': 3, 'iter': 22140, 'avg_loss': 8.523075304845017, 'avg_acc': 49.981792827785554, 'loss': 8.771913528442383}


EP_train:3:  80%|| 22152/27626 [52:16<12:58,  7.03it/s]

{'epoch': 3, 'iter': 22150, 'avg_loss': 8.522896654271879, 'avg_acc': 49.982083201661325, 'loss': 7.857624053955078}


EP_train:3:  80%|| 22162/27626 [52:17<12:59,  7.01it/s]

{'epoch': 3, 'iter': 22160, 'avg_loss': 8.52289635609765, 'avg_acc': 49.98166824601778, 'loss': 8.32089614868164}


EP_train:3:  80%|| 22172/27626 [52:18<12:58,  7.01it/s]

{'epoch': 3, 'iter': 22170, 'avg_loss': 8.523087707070632, 'avg_acc': 49.98111271480763, 'loss': 8.677938461303711}


EP_train:3:  80%|| 22182/27626 [52:20<12:53,  7.03it/s]

{'epoch': 3, 'iter': 22180, 'avg_loss': 8.523134169632941, 'avg_acc': 49.98154388891394, 'loss': 8.388128280639648}


EP_train:3:  80%|| 22192/27626 [52:21<12:56,  7.00it/s]

{'epoch': 3, 'iter': 22190, 'avg_loss': 8.522996348680627, 'avg_acc': 49.97958068586364, 'loss': 8.826349258422852}


EP_train:3:  80%|| 22202/27626 [52:23<12:43,  7.10it/s]

{'epoch': 3, 'iter': 22200, 'avg_loss': 8.52288847911208, 'avg_acc': 49.97790077023557, 'loss': 8.062539100646973}


EP_train:3:  80%|| 22212/27626 [52:24<12:45,  7.08it/s]

{'epoch': 3, 'iter': 22210, 'avg_loss': 8.522886295094473, 'avg_acc': 49.977207239656025, 'loss': 9.283287048339844}


EP_train:3:  80%|| 22222/27626 [52:25<12:46,  7.05it/s]

{'epoch': 3, 'iter': 22220, 'avg_loss': 8.522685415357127, 'avg_acc': 49.97792066063633, 'loss': 8.189667701721191}


EP_train:3:  80%|| 22232/27626 [52:27<12:42,  7.08it/s]

{'epoch': 3, 'iter': 22230, 'avg_loss': 8.522701301628759, 'avg_acc': 49.976806036615535, 'loss': 9.031037330627441}


EP_train:3:  81%|| 22242/27626 [52:28<12:42,  7.06it/s]

{'epoch': 3, 'iter': 22240, 'avg_loss': 8.522608604819638, 'avg_acc': 49.9769569713592, 'loss': 7.41607141494751}


EP_train:3:  81%|| 22252/27626 [52:30<12:44,  7.03it/s]

{'epoch': 3, 'iter': 22250, 'avg_loss': 8.52266460947763, 'avg_acc': 49.97752909981574, 'loss': 7.932712078094482}


EP_train:3:  81%|| 22262/27626 [52:31<12:43,  7.02it/s]

{'epoch': 3, 'iter': 22260, 'avg_loss': 8.522621565583549, 'avg_acc': 49.975854633664255, 'loss': 8.472196578979492}


EP_train:3:  81%|| 22272/27626 [52:33<12:40,  7.04it/s]

{'epoch': 3, 'iter': 22270, 'avg_loss': 8.522622392793245, 'avg_acc': 49.976567060302635, 'loss': 9.172208786010742}


EP_train:3:  81%|| 22282/27626 [52:34<12:32,  7.10it/s]

{'epoch': 3, 'iter': 22280, 'avg_loss': 8.522757129712847, 'avg_acc': 49.97685808536421, 'loss': 9.071125030517578}


EP_train:3:  81%|| 22292/27626 [52:35<12:38,  7.03it/s]

{'epoch': 3, 'iter': 22290, 'avg_loss': 8.522890774485273, 'avg_acc': 49.976447893768785, 'loss': 8.869317054748535}


EP_train:3:  81%|| 22302/27626 [52:37<12:38,  7.01it/s]

{'epoch': 3, 'iter': 22300, 'avg_loss': 8.522788150060027, 'avg_acc': 49.97533742881485, 'loss': 8.457053184509277}


EP_train:3:  81%|| 22312/27626 [52:38<12:35,  7.04it/s]

{'epoch': 3, 'iter': 22310, 'avg_loss': 8.522725783300167, 'avg_acc': 49.97576867912689, 'loss': 7.348991870880127}


EP_train:3:  81%|| 22322/27626 [52:40<12:29,  7.07it/s]

{'epoch': 3, 'iter': 22320, 'avg_loss': 8.522570835380304, 'avg_acc': 49.97437950808655, 'loss': 7.524807929992676}


EP_train:3:  81%|| 22332/27626 [52:41<12:31,  7.05it/s]

{'epoch': 3, 'iter': 22330, 'avg_loss': 8.522543134096141, 'avg_acc': 49.97523062110967, 'loss': 8.458514213562012}


EP_train:3:  81%|| 22342/27626 [52:42<12:38,  6.97it/s]

{'epoch': 3, 'iter': 22340, 'avg_loss': 8.52249335429485, 'avg_acc': 49.97664048162571, 'loss': 8.89290714263916}


EP_train:3:  81%|| 22352/27626 [52:44<12:36,  6.97it/s]

{'epoch': 3, 'iter': 22350, 'avg_loss': 8.522428059672878, 'avg_acc': 49.97427408169657, 'loss': 9.079230308532715}


EP_train:3:  81%|| 22362/27626 [52:45<12:28,  7.03it/s]

{'epoch': 3, 'iter': 22360, 'avg_loss': 8.522293697203812, 'avg_acc': 49.974006082017794, 'loss': 8.4622220993042}


EP_train:3:  81%|| 22372/27626 [52:47<12:32,  6.98it/s]

{'epoch': 3, 'iter': 22370, 'avg_loss': 8.522447917272864, 'avg_acc': 49.97527490948103, 'loss': 8.537354469299316}


EP_train:3:  81%|| 22382/27626 [52:48<12:24,  7.04it/s]

{'epoch': 3, 'iter': 22380, 'avg_loss': 8.522420383544727, 'avg_acc': 49.97472744738841, 'loss': 8.17361831665039}


EP_train:3:  81%|| 22392/27626 [52:50<12:21,  7.06it/s]

{'epoch': 3, 'iter': 22390, 'avg_loss': 8.522350132478673, 'avg_acc': 49.97557612433567, 'loss': 7.794358730316162}


EP_train:3:  81%|| 22402/27626 [52:51<12:25,  7.00it/s]

{'epoch': 3, 'iter': 22400, 'avg_loss': 8.522464674352817, 'avg_acc': 49.97865608678184, 'loss': 8.883522987365723}


EP_train:3:  81%|| 22412/27626 [52:52<12:27,  6.98it/s]

{'epoch': 3, 'iter': 22410, 'avg_loss': 8.522633026841016, 'avg_acc': 49.980896657891215, 'loss': 8.547524452209473}


EP_train:3:  81%|| 22422/27626 [52:54<12:23,  7.00it/s]

{'epoch': 3, 'iter': 22420, 'avg_loss': 8.522658789148828, 'avg_acc': 49.98132331296553, 'loss': 8.947675704956055}


EP_train:3:  81%|| 22432/27626 [52:55<12:25,  6.97it/s]

{'epoch': 3, 'iter': 22430, 'avg_loss': 8.522639917373912, 'avg_acc': 49.982724800499305, 'loss': 8.35957145690918}


EP_train:3:  81%|| 22442/27626 [52:57<12:21,  6.99it/s]

{'epoch': 3, 'iter': 22440, 'avg_loss': 8.522586638774152, 'avg_acc': 49.98161846620025, 'loss': 8.230408668518066}


EP_train:3:  81%|| 22452/27626 [52:58<12:13,  7.05it/s]

{'epoch': 3, 'iter': 22450, 'avg_loss': 8.52255721385535, 'avg_acc': 49.98274018974656, 'loss': 8.25897216796875}


EP_train:3:  81%|| 22462/27626 [52:59<12:14,  7.03it/s]

{'epoch': 3, 'iter': 22460, 'avg_loss': 8.522452319869428, 'avg_acc': 49.97968701304483, 'loss': 7.915877819061279}


EP_train:3:  81%|| 22472/27626 [53:01<12:16,  7.00it/s]

{'epoch': 3, 'iter': 22470, 'avg_loss': 8.522311029073038, 'avg_acc': 49.981643006541766, 'loss': 7.680934429168701}


EP_train:3:  81%|| 22482/27626 [53:02<12:12,  7.02it/s]

{'epoch': 3, 'iter': 22480, 'avg_loss': 8.522209675412475, 'avg_acc': 49.98165117210088, 'loss': 8.85988712310791}


EP_train:3:  81%|| 22492/27626 [53:04<12:05,  7.07it/s]

{'epoch': 3, 'iter': 22490, 'avg_loss': 8.52214583413067, 'avg_acc': 49.980686719132095, 'loss': 8.809844017028809}


EP_train:3:  81%|| 22502/27626 [53:05<12:06,  7.05it/s]

{'epoch': 3, 'iter': 22500, 'avg_loss': 8.52212667151571, 'avg_acc': 49.98041753699836, 'loss': 8.340309143066406}


EP_train:3:  81%|| 22512/27626 [53:07<12:06,  7.03it/s]

{'epoch': 3, 'iter': 22510, 'avg_loss': 8.522091267962674, 'avg_acc': 49.978621562791524, 'loss': 8.535506248474121}


EP_train:3:  82%|| 22522/27626 [53:08<12:08,  7.00it/s]

{'epoch': 3, 'iter': 22520, 'avg_loss': 8.522080955684919, 'avg_acc': 49.97738222103814, 'loss': 8.35884952545166}


EP_train:3:  82%|| 22532/27626 [53:09<12:09,  6.99it/s]

{'epoch': 3, 'iter': 22530, 'avg_loss': 8.52223108057218, 'avg_acc': 49.97919533087746, 'loss': 9.371308326721191}


EP_train:3:  82%|| 22542/27626 [53:11<12:04,  7.02it/s]

{'epoch': 3, 'iter': 22540, 'avg_loss': 8.522375627585985, 'avg_acc': 49.97989774189256, 'loss': 9.964241027832031}


EP_train:3:  82%|| 22552/27626 [53:12<12:01,  7.03it/s]

{'epoch': 3, 'iter': 22550, 'avg_loss': 8.522332205169137, 'avg_acc': 49.97990665602413, 'loss': 8.088993072509766}


EP_train:3:  82%|| 22562/27626 [53:14<11:57,  7.06it/s]

{'epoch': 3, 'iter': 22560, 'avg_loss': 8.522162030475643, 'avg_acc': 49.983101369620144, 'loss': 8.203458786010742}


EP_train:3:  82%|| 22572/27626 [53:15<11:58,  7.03it/s]

{'epoch': 3, 'iter': 22570, 'avg_loss': 8.522116776885047, 'avg_acc': 49.98241659651765, 'loss': 8.350326538085938}


EP_train:3:  82%|| 22582/27626 [53:16<11:57,  7.03it/s]

{'epoch': 3, 'iter': 22580, 'avg_loss': 8.52203727926747, 'avg_acc': 49.982147601966254, 'loss': 8.23611831665039}


EP_train:3:  82%|| 22592/27626 [53:18<11:50,  7.08it/s]

{'epoch': 3, 'iter': 22590, 'avg_loss': 8.521978419666393, 'avg_acc': 49.981048869018636, 'loss': 8.52503490447998}


EP_train:3:  82%|| 22602/27626 [53:19<11:48,  7.09it/s]

{'epoch': 3, 'iter': 22600, 'avg_loss': 8.521996530430132, 'avg_acc': 49.98368435024999, 'loss': 8.18830680847168}


EP_train:3:  82%|| 22612/27626 [53:21<11:51,  7.05it/s]

{'epoch': 3, 'iter': 22610, 'avg_loss': 8.521952188841135, 'avg_acc': 49.982171288311, 'loss': 8.246076583862305}


EP_train:3:  82%|| 22622/27626 [53:22<11:49,  7.06it/s]

{'epoch': 3, 'iter': 22620, 'avg_loss': 8.521889815126668, 'avg_acc': 49.98231731576853, 'loss': 8.311239242553711}


EP_train:3:  82%|| 22632/27626 [53:24<11:50,  7.03it/s]

{'epoch': 3, 'iter': 22630, 'avg_loss': 8.52180546362798, 'avg_acc': 49.980944279969954, 'loss': 8.982806205749512}


EP_train:3:  82%|| 22642/27626 [53:25<11:45,  7.07it/s]

{'epoch': 3, 'iter': 22640, 'avg_loss': 8.521717470677137, 'avg_acc': 49.98026257674131, 'loss': 8.417510032653809}


EP_train:3:  82%|| 22652/27626 [53:26<11:47,  7.03it/s]

{'epoch': 3, 'iter': 22650, 'avg_loss': 8.521800175886124, 'avg_acc': 49.979167586420026, 'loss': 8.710888862609863}


EP_train:3:  82%|| 22662/27626 [53:28<11:40,  7.08it/s]

{'epoch': 3, 'iter': 22660, 'avg_loss': 8.521784950599924, 'avg_acc': 49.97890097524381, 'loss': 8.166297912597656}


EP_train:3:  82%|| 22672/27626 [53:29<11:38,  7.10it/s]

{'epoch': 3, 'iter': 22670, 'avg_loss': 8.521789640297325, 'avg_acc': 49.97891028185788, 'loss': 9.131950378417969}


EP_train:3:  82%|| 22682/27626 [53:31<11:42,  7.04it/s]

{'epoch': 3, 'iter': 22680, 'avg_loss': 8.521843507194124, 'avg_acc': 49.978506238701996, 'loss': 8.01371955871582}


EP_train:3:  82%|| 22692/27626 [53:32<11:39,  7.06it/s]

{'epoch': 3, 'iter': 22690, 'avg_loss': 8.521848129903564, 'avg_acc': 49.978928870477276, 'loss': 8.739374160766602}


EP_train:3:  82%|| 22702/27626 [53:33<11:39,  7.04it/s]

{'epoch': 3, 'iter': 22700, 'avg_loss': 8.521746997061104, 'avg_acc': 49.97893815250429, 'loss': 8.635730743408203}


EP_train:3:  82%|| 22712/27626 [53:35<11:39,  7.02it/s]

{'epoch': 3, 'iter': 22710, 'avg_loss': 8.521663229603323, 'avg_acc': 49.9800482145216, 'loss': 7.685174942016602}


EP_train:3:  82%|| 22722/27626 [53:36<11:37,  7.03it/s]

{'epoch': 3, 'iter': 22720, 'avg_loss': 8.521597386497799, 'avg_acc': 49.98046960961226, 'loss': 8.381604194641113}


EP_train:3:  82%|| 22732/27626 [53:38<11:34,  7.05it/s]

{'epoch': 3, 'iter': 22730, 'avg_loss': 8.521534756261932, 'avg_acc': 49.98047820157494, 'loss': 8.462545394897461}


EP_train:3:  82%|| 22742/27626 [53:39<11:30,  7.07it/s]

{'epoch': 3, 'iter': 22740, 'avg_loss': 8.521494663242384, 'avg_acc': 49.98076161998153, 'loss': 8.445390701293945}


EP_train:3:  82%|| 22752/27626 [53:41<11:38,  6.98it/s]

{'epoch': 3, 'iter': 22750, 'avg_loss': 8.521277786406793, 'avg_acc': 49.9804953628412, 'loss': 7.980808258056641}


EP_train:3:  82%|| 22762/27626 [53:42<11:30,  7.05it/s]

{'epoch': 3, 'iter': 22760, 'avg_loss': 8.521085077898924, 'avg_acc': 49.98105311717411, 'loss': 8.131780624389648}


EP_train:3:  82%|| 22772/27626 [53:43<11:27,  7.06it/s]

{'epoch': 3, 'iter': 22770, 'avg_loss': 8.521014926085007, 'avg_acc': 49.979551842255496, 'loss': 9.027835845947266}


EP_train:3:  82%|| 22782/27626 [53:45<11:28,  7.03it/s]

{'epoch': 3, 'iter': 22780, 'avg_loss': 8.521240161240852, 'avg_acc': 49.97832623677626, 'loss': 9.501303672790527}


EP_train:3:  83%|| 22792/27626 [53:46<11:26,  7.04it/s]

{'epoch': 3, 'iter': 22790, 'avg_loss': 8.52140094638577, 'avg_acc': 49.97984401737528, 'loss': 8.304179191589355}


EP_train:3:  83%|| 22802/27626 [53:48<11:25,  7.04it/s]

{'epoch': 3, 'iter': 22800, 'avg_loss': 8.521329474822455, 'avg_acc': 49.979167580369285, 'loss': 8.18498420715332}


EP_train:3:  83%|| 22812/27626 [53:49<11:24,  7.03it/s]

{'epoch': 3, 'iter': 22810, 'avg_loss': 8.52133860031405, 'avg_acc': 49.980272675463596, 'loss': 8.857955932617188}


EP_train:3:  83%|| 22822/27626 [53:50<11:21,  7.05it/s]

{'epoch': 3, 'iter': 22820, 'avg_loss': 8.521210087071239, 'avg_acc': 49.981376802068276, 'loss': 8.57027530670166}


EP_train:3:  83%|| 22832/27626 [53:52<11:20,  7.05it/s]

{'epoch': 3, 'iter': 22830, 'avg_loss': 8.521055276925113, 'avg_acc': 49.98111120844466, 'loss': 8.906906127929688}


EP_train:3:  83%|| 22842/27626 [53:53<11:16,  7.07it/s]

{'epoch': 3, 'iter': 22840, 'avg_loss': 8.52095980920003, 'avg_acc': 49.982624447265884, 'loss': 7.4210004806518555}


EP_train:3:  83%|| 22852/27626 [53:55<11:19,  7.03it/s]

{'epoch': 3, 'iter': 22850, 'avg_loss': 8.520856621980009, 'avg_acc': 49.98386285064111, 'loss': 8.5952730178833}


EP_train:3:  83%|| 22862/27626 [53:56<11:19,  7.02it/s]

{'epoch': 3, 'iter': 22860, 'avg_loss': 8.520777711258813, 'avg_acc': 49.98414330081798, 'loss': 7.297656536102295}


EP_train:3:  83%|| 22872/27626 [53:58<11:12,  7.07it/s]

{'epoch': 3, 'iter': 22870, 'avg_loss': 8.520824221464705, 'avg_acc': 49.985653228979935, 'loss': 8.62388801574707}


EP_train:3:  83%|| 22882/27626 [53:59<11:13,  7.04it/s]

{'epoch': 3, 'iter': 22880, 'avg_loss': 8.520919580069055, 'avg_acc': 49.98661553253791, 'loss': 9.3117036819458}


EP_train:3:  83%|| 22892/27626 [54:00<11:19,  6.97it/s]

{'epoch': 3, 'iter': 22890, 'avg_loss': 8.520896874708969, 'avg_acc': 49.98907867720938, 'loss': 9.108358383178711}


EP_train:3:  83%|| 22902/27626 [54:02<11:11,  7.03it/s]

{'epoch': 3, 'iter': 22900, 'avg_loss': 8.520961969587242, 'avg_acc': 49.989629273830836, 'loss': 8.91138744354248}


EP_train:3:  83%|| 22912/27626 [54:03<11:05,  7.08it/s]

{'epoch': 3, 'iter': 22910, 'avg_loss': 8.520947212915045, 'avg_acc': 49.989361005630485, 'loss': 8.130965232849121}


EP_train:3:  83%|| 22922/27626 [54:05<11:06,  7.06it/s]

{'epoch': 3, 'iter': 22920, 'avg_loss': 8.520988124779842, 'avg_acc': 49.989365647223075, 'loss': 8.358713150024414}


EP_train:3:  83%|| 22932/27626 [54:06<11:06,  7.04it/s]

{'epoch': 3, 'iter': 22930, 'avg_loss': 8.520838195792797, 'avg_acc': 49.988280057564, 'loss': 9.040999412536621}


EP_train:3:  83%|| 22942/27626 [54:07<11:03,  7.06it/s]

{'epoch': 3, 'iter': 22940, 'avg_loss': 8.520774034212701, 'avg_acc': 49.9862418813478, 'loss': 8.366296768188477}


EP_train:3:  83%|| 22952/27626 [54:09<11:06,  7.02it/s]

{'epoch': 3, 'iter': 22950, 'avg_loss': 8.520680946543568, 'avg_acc': 49.98665635484293, 'loss': 8.712769508361816}


EP_train:3:  83%|| 22962/27626 [54:10<11:05,  7.01it/s]

{'epoch': 3, 'iter': 22960, 'avg_loss': 8.52069652870211, 'avg_acc': 49.98815927006663, 'loss': 8.165169715881348}


EP_train:3:  83%|| 22972/27626 [54:12<11:03,  7.02it/s]

{'epoch': 3, 'iter': 22970, 'avg_loss': 8.520659074666664, 'avg_acc': 49.98666797266118, 'loss': 8.336281776428223}


EP_train:3:  83%|| 22982/27626 [54:13<10:59,  7.04it/s]

{'epoch': 3, 'iter': 22980, 'avg_loss': 8.520691075911591, 'avg_acc': 49.98612984639485, 'loss': 7.409201145172119}


EP_train:3:  83%|| 22992/27626 [54:15<10:59,  7.02it/s]

{'epoch': 3, 'iter': 22990, 'avg_loss': 8.520698853550353, 'avg_acc': 49.987766952285675, 'loss': 8.533655166625977}


EP_train:3:  83%|| 23002/27626 [54:16<10:54,  7.07it/s]

{'epoch': 3, 'iter': 23000, 'avg_loss': 8.520817672588024, 'avg_acc': 49.98940263466805, 'loss': 8.003301620483398}


EP_train:3:  83%|| 23012/27626 [54:17<10:55,  7.04it/s]

{'epoch': 3, 'iter': 23010, 'avg_loss': 8.520822626003401, 'avg_acc': 49.98954304463083, 'loss': 9.038336753845215}


EP_train:3:  83%|| 23022/27626 [54:19<10:56,  7.01it/s]

{'epoch': 3, 'iter': 23020, 'avg_loss': 8.520839878388166, 'avg_acc': 49.989276095738674, 'loss': 8.955366134643555}


EP_train:3:  83%|| 23032/27626 [54:20<10:49,  7.08it/s]

{'epoch': 3, 'iter': 23030, 'avg_loss': 8.520754327869701, 'avg_acc': 49.9895521253962, 'loss': 8.004822731018066}


EP_train:3:  83%|| 23042/27626 [54:22<10:49,  7.05it/s]

{'epoch': 3, 'iter': 23040, 'avg_loss': 8.520660763135371, 'avg_acc': 49.98738661516427, 'loss': 9.653666496276855}


EP_train:3:  83%|| 23052/27626 [54:23<10:43,  7.11it/s]

{'epoch': 3, 'iter': 23050, 'avg_loss': 8.520623820478924, 'avg_acc': 49.98590082859746, 'loss': 8.218169212341309}


EP_train:3:  83%|| 23062/27626 [54:24<10:46,  7.06it/s]

{'epoch': 3, 'iter': 23060, 'avg_loss': 8.520574229148487, 'avg_acc': 49.985906942456964, 'loss': 8.772134780883789}


EP_train:3:  84%|| 23072/27626 [54:26<10:49,  7.02it/s]

{'epoch': 3, 'iter': 23070, 'avg_loss': 8.52063240152792, 'avg_acc': 49.98645485674656, 'loss': 8.529987335205078}


EP_train:3:  84%|| 23082/27626 [54:27<10:49,  6.99it/s]

{'epoch': 3, 'iter': 23080, 'avg_loss': 8.520654255387006, 'avg_acc': 49.988085438239246, 'loss': 8.745418548583984}


EP_train:3:  84%|| 23092/27626 [54:29<10:43,  7.05it/s]

{'epoch': 3, 'iter': 23090, 'avg_loss': 8.520596765768719, 'avg_acc': 49.98917327097138, 'loss': 7.50388240814209}


EP_train:3:  84%|| 23102/27626 [54:30<10:44,  7.02it/s]

{'epoch': 3, 'iter': 23100, 'avg_loss': 8.520355962911244, 'avg_acc': 49.98809575343059, 'loss': 7.927239894866943}


EP_train:3:  84%|| 23112/27626 [54:31<10:44,  7.00it/s]

{'epoch': 3, 'iter': 23110, 'avg_loss': 8.520278365231052, 'avg_acc': 49.988641772316214, 'loss': 8.693635940551758}


EP_train:3:  84%|| 23122/27626 [54:33<10:38,  7.05it/s]

{'epoch': 3, 'iter': 23120, 'avg_loss': 8.520283414183137, 'avg_acc': 49.98743025820683, 'loss': 8.743307113647461}


EP_train:3:  84%|| 23132/27626 [54:34<10:36,  7.06it/s]

{'epoch': 3, 'iter': 23130, 'avg_loss': 8.520517296428466, 'avg_acc': 49.986625091868056, 'loss': 9.554341316223145}


EP_train:3:  84%|| 23142/27626 [54:36<10:33,  7.07it/s]

{'epoch': 3, 'iter': 23140, 'avg_loss': 8.520608474388071, 'avg_acc': 49.987981288621924, 'loss': 9.26154613494873}


EP_train:3:  84%|| 23152/27626 [54:37<10:36,  7.03it/s]

{'epoch': 3, 'iter': 23150, 'avg_loss': 8.520577001713773, 'avg_acc': 49.98650166299512, 'loss': 7.082810878753662}


EP_train:3:  84%|| 23162/27626 [54:39<10:36,  7.02it/s]

{'epoch': 3, 'iter': 23160, 'avg_loss': 8.520524763987886, 'avg_acc': 49.986372565951385, 'loss': 8.541268348693848}


EP_train:3:  84%|| 23172/27626 [54:40<10:31,  7.05it/s]

{'epoch': 3, 'iter': 23170, 'avg_loss': 8.52057805645168, 'avg_acc': 49.989210651245095, 'loss': 8.235633850097656}


EP_train:3:  84%|| 23182/27626 [54:41<10:31,  7.04it/s]

{'epoch': 3, 'iter': 23180, 'avg_loss': 8.52052577676037, 'avg_acc': 49.98867607092015, 'loss': 8.012619972229004}


EP_train:3:  84%|| 23192/27626 [54:43<10:27,  7.07it/s]

{'epoch': 3, 'iter': 23190, 'avg_loss': 8.520636561472982, 'avg_acc': 49.9893547065672, 'loss': 9.259723663330078}


EP_train:3:  84%|| 23202/27626 [54:44<10:27,  7.05it/s]

{'epoch': 3, 'iter': 23200, 'avg_loss': 8.520532812117954, 'avg_acc': 49.98841644756691, 'loss': 8.341771125793457}


EP_train:3:  84%|| 23212/27626 [54:46<10:23,  7.08it/s]

{'epoch': 3, 'iter': 23210, 'avg_loss': 8.520464517181608, 'avg_acc': 49.98626728706217, 'loss': 8.225119590759277}


EP_train:3:  84%|| 23222/27626 [54:47<10:22,  7.07it/s]

{'epoch': 3, 'iter': 23220, 'avg_loss': 8.52032434607162, 'avg_acc': 49.98667693036475, 'loss': 8.171504020690918}


EP_train:3:  84%|| 23232/27626 [54:48<10:20,  7.08it/s]

{'epoch': 3, 'iter': 23230, 'avg_loss': 8.520304521985391, 'avg_acc': 49.98722073952908, 'loss': 8.118561744689941}


EP_train:3:  84%|| 23242/27626 [54:50<10:24,  7.02it/s]

{'epoch': 3, 'iter': 23240, 'avg_loss': 8.52018011164621, 'avg_acc': 49.9888397659309, 'loss': 8.413655281066895}


EP_train:3:  84%|| 23252/27626 [54:51<10:24,  7.01it/s]

{'epoch': 3, 'iter': 23250, 'avg_loss': 8.520162123956197, 'avg_acc': 49.986828523504364, 'loss': 7.687311172485352}


EP_train:3:  84%|| 23262/27626 [54:53<10:20,  7.03it/s]

{'epoch': 3, 'iter': 23260, 'avg_loss': 8.520260650382541, 'avg_acc': 49.987774601263915, 'loss': 8.669013023376465}


EP_train:3:  84%|| 23272/27626 [54:54<10:15,  7.07it/s]

{'epoch': 3, 'iter': 23270, 'avg_loss': 8.520199153334278, 'avg_acc': 49.98831700399639, 'loss': 8.872320175170898}


EP_train:3:  84%|| 23282/27626 [54:56<10:22,  6.98it/s]

{'epoch': 3, 'iter': 23280, 'avg_loss': 8.520357710077747, 'avg_acc': 49.98966431854302, 'loss': 9.564340591430664}


EP_train:3:  84%|| 23292/27626 [54:57<10:16,  7.03it/s]

{'epoch': 3, 'iter': 23290, 'avg_loss': 8.520276294782937, 'avg_acc': 49.99007127216522, 'loss': 8.63815975189209}


EP_train:3:  84%|| 23302/27626 [54:58<10:13,  7.04it/s]

{'epoch': 3, 'iter': 23300, 'avg_loss': 8.52031755975365, 'avg_acc': 49.98967318999185, 'loss': 8.841501235961914}


EP_train:3:  84%|| 23312/27626 [55:00<10:12,  7.05it/s]

{'epoch': 3, 'iter': 23310, 'avg_loss': 8.520208087095725, 'avg_acc': 49.98900733559264, 'loss': 9.417037010192871}


EP_train:3:  84%|| 23322/27626 [55:01<10:12,  7.03it/s]

{'epoch': 3, 'iter': 23320, 'avg_loss': 8.520329616096477, 'avg_acc': 49.99088804082158, 'loss': 8.873411178588867}


EP_train:3:  84%|| 23332/27626 [55:03<10:14,  6.99it/s]

{'epoch': 3, 'iter': 23330, 'avg_loss': 8.520158129607832, 'avg_acc': 49.989150700784364, 'loss': 8.19848346710205}


EP_train:3:  84%|| 23342/27626 [55:04<10:08,  7.03it/s]

{'epoch': 3, 'iter': 23340, 'avg_loss': 8.520050285234431, 'avg_acc': 49.989021464375995, 'loss': 8.150999069213867}


EP_train:3:  85%|| 23352/27626 [55:05<10:10,  7.00it/s]

{'epoch': 3, 'iter': 23350, 'avg_loss': 8.51999348255762, 'avg_acc': 49.98835702967753, 'loss': 8.554770469665527}


EP_train:3:  85%|| 23362/27626 [55:07<10:06,  7.04it/s]

{'epoch': 3, 'iter': 23360, 'avg_loss': 8.519994313094026, 'avg_acc': 49.99010102307264, 'loss': 8.073015213012695}


EP_train:3:  85%|| 23372/27626 [55:08<10:02,  7.06it/s]

{'epoch': 3, 'iter': 23370, 'avg_loss': 8.519888463933764, 'avg_acc': 49.99023897137478, 'loss': 9.509897232055664}


EP_train:3:  85%|| 23382/27626 [55:10<10:09,  6.97it/s]

{'epoch': 3, 'iter': 23380, 'avg_loss': 8.519906394011878, 'avg_acc': 49.98890659082161, 'loss': 8.675482749938965}


EP_train:3:  85%|| 23392/27626 [55:11<09:57,  7.08it/s]

{'epoch': 3, 'iter': 23390, 'avg_loss': 8.520014969995067, 'avg_acc': 49.988644136633745, 'loss': 9.099729537963867}


EP_train:3:  85%|| 23402/27626 [55:13<09:57,  7.07it/s]

{'epoch': 3, 'iter': 23400, 'avg_loss': 8.520113045800155, 'avg_acc': 49.98998440237597, 'loss': 8.364829063415527}


EP_train:3:  85%|| 23412/27626 [55:14<09:57,  7.06it/s]

{'epoch': 3, 'iter': 23410, 'avg_loss': 8.520076867705209, 'avg_acc': 49.99092307035154, 'loss': 8.132272720336914}


EP_train:3:  85%|| 23422/27626 [55:15<09:59,  7.02it/s]

{'epoch': 3, 'iter': 23420, 'avg_loss': 8.51995586321807, 'avg_acc': 49.99172750950002, 'loss': 7.893164157867432}


EP_train:3:  85%|| 23432/27626 [55:17<09:59,  6.99it/s]

{'epoch': 3, 'iter': 23430, 'avg_loss': 8.520097288695265, 'avg_acc': 49.99293137296743, 'loss': 9.17172622680664}


EP_train:3:  85%|| 23442/27626 [55:18<09:54,  7.04it/s]

{'epoch': 3, 'iter': 23440, 'avg_loss': 8.520320493060927, 'avg_acc': 49.993201015315044, 'loss': 8.644957542419434}


EP_train:3:  85%|| 23452/27626 [55:20<09:50,  7.07it/s]

{'epoch': 3, 'iter': 23450, 'avg_loss': 8.520459884580765, 'avg_acc': 49.9937369408554, 'loss': 9.293272972106934}


EP_train:3:  85%|| 23462/27626 [55:21<09:49,  7.06it/s]

{'epoch': 3, 'iter': 23460, 'avg_loss': 8.52045546723908, 'avg_acc': 49.99600400664933, 'loss': 8.536883354187012}


EP_train:3:  85%|| 23472/27626 [55:22<09:54,  6.98it/s]

{'epoch': 3, 'iter': 23470, 'avg_loss': 8.520515126650947, 'avg_acc': 49.994008563759536, 'loss': 8.262679100036621}


EP_train:3:  85%|| 23482/27626 [55:24<09:46,  7.06it/s]

{'epoch': 3, 'iter': 23480, 'avg_loss': 8.520484926956364, 'avg_acc': 49.99414420169499, 'loss': 8.270584106445312}


EP_train:3:  85%|| 23492/27626 [55:25<09:42,  7.09it/s]

{'epoch': 3, 'iter': 23490, 'avg_loss': 8.520499084072041, 'avg_acc': 49.992550338427485, 'loss': 8.862715721130371}


EP_train:3:  85%|| 23502/27626 [55:27<09:49,  7.00it/s]

{'epoch': 3, 'iter': 23500, 'avg_loss': 8.520468542714925, 'avg_acc': 49.99255350836135, 'loss': 7.4480462074279785}


EP_train:3:  85%|| 23512/27626 [55:28<09:48,  7.00it/s]

{'epoch': 3, 'iter': 23510, 'avg_loss': 8.52055483800166, 'avg_acc': 49.99282250861299, 'loss': 8.597185134887695}


EP_train:3:  85%|| 23522/27626 [55:30<09:50,  6.96it/s]

{'epoch': 3, 'iter': 23520, 'avg_loss': 8.52053826528617, 'avg_acc': 49.993224140130096, 'loss': 7.714395999908447}


EP_train:3:  85%|| 23532/27626 [55:31<09:41,  7.04it/s]

{'epoch': 3, 'iter': 23530, 'avg_loss': 8.520506766319558, 'avg_acc': 49.99375823381922, 'loss': 8.708566665649414}


EP_train:3:  85%|| 23542/27626 [55:32<09:41,  7.02it/s]

{'epoch': 3, 'iter': 23540, 'avg_loss': 8.52055135319548, 'avg_acc': 49.99415912663013, 'loss': 8.024829864501953}


EP_train:3:  85%|| 23552/27626 [55:34<09:39,  7.03it/s]

{'epoch': 3, 'iter': 23550, 'avg_loss': 8.5206652839879, 'avg_acc': 49.99389622521337, 'loss': 9.708334922790527}


EP_train:3:  85%|| 23562/27626 [55:35<09:35,  7.06it/s]

{'epoch': 3, 'iter': 23560, 'avg_loss': 8.520718193232296, 'avg_acc': 49.995225160222404, 'loss': 8.310003280639648}


EP_train:3:  85%|| 23572/27626 [55:37<09:35,  7.04it/s]

{'epoch': 3, 'iter': 23570, 'avg_loss': 8.520832964988715, 'avg_acc': 49.99695070213398, 'loss': 8.704562187194824}


EP_train:3:  85%|| 23582/27626 [55:38<09:37,  7.00it/s]

{'epoch': 3, 'iter': 23580, 'avg_loss': 8.520758780605652, 'avg_acc': 49.998277214706754, 'loss': 7.792354583740234}


EP_train:3:  85%|| 23592/27626 [55:39<09:37,  6.99it/s]

{'epoch': 3, 'iter': 23590, 'avg_loss': 8.520633426437614, 'avg_acc': 49.99960260268747, 'loss': 8.374403953552246}


EP_train:3:  85%|| 23602/27626 [55:41<09:32,  7.03it/s]

{'epoch': 3, 'iter': 23600, 'avg_loss': 8.520608486560869, 'avg_acc': 50.00052963857463, 'loss': 8.110345840454102}


EP_train:3:  85%|| 23612/27626 [55:42<09:30,  7.03it/s]

{'epoch': 3, 'iter': 23610, 'avg_loss': 8.520618173472334, 'avg_acc': 50.000397060692045, 'loss': 8.46002197265625}


EP_train:3:  86%|| 23622/27626 [55:44<09:27,  7.06it/s]

{'epoch': 3, 'iter': 23620, 'avg_loss': 8.520507921994295, 'avg_acc': 49.999338512340714, 'loss': 7.753820419311523}


EP_train:3:  86%|| 23632/27626 [55:45<09:26,  7.05it/s]

{'epoch': 3, 'iter': 23630, 'avg_loss': 8.520393317766082, 'avg_acc': 49.99960327535864, 'loss': 8.559836387634277}


EP_train:3:  86%|| 23642/27626 [55:47<09:26,  7.03it/s]

{'epoch': 3, 'iter': 23640, 'avg_loss': 8.52045445417794, 'avg_acc': 49.998017215853814, 'loss': 9.03321361541748}


EP_train:3:  86%|| 23652/27626 [55:48<09:24,  7.04it/s]

{'epoch': 3, 'iter': 23650, 'avg_loss': 8.520377803113938, 'avg_acc': 49.99881083252294, 'loss': 8.343711853027344}


EP_train:3:  86%|| 23662/27626 [55:49<09:19,  7.08it/s]

{'epoch': 3, 'iter': 23660, 'avg_loss': 8.520491645461199, 'avg_acc': 49.99867926123156, 'loss': 9.004621505737305}


EP_train:3:  86%|| 23672/27626 [55:51<09:21,  7.04it/s]

{'epoch': 3, 'iter': 23670, 'avg_loss': 8.520523890338618, 'avg_acc': 49.99894385535043, 'loss': 8.69376277923584}


EP_train:3:  86%|| 23682/27626 [55:52<09:20,  7.04it/s]

{'epoch': 3, 'iter': 23680, 'avg_loss': 8.520494265963281, 'avg_acc': 49.998944301338625, 'loss': 7.982099533081055}


EP_train:3:  86%|| 23692/27626 [55:54<09:22,  7.00it/s]

{'epoch': 3, 'iter': 23690, 'avg_loss': 8.520321873904814, 'avg_acc': 49.99828521379427, 'loss': 7.87255859375}


EP_train:3:  86%|| 23702/27626 [55:55<09:18,  7.03it/s]

{'epoch': 3, 'iter': 23700, 'avg_loss': 8.520461466849481, 'avg_acc': 49.99815408632547, 'loss': 8.174138069152832}


EP_train:3:  86%|| 23712/27626 [55:56<09:14,  7.06it/s]

{'epoch': 3, 'iter': 23710, 'avg_loss': 8.520378081360448, 'avg_acc': 49.99736409261524, 'loss': 7.78363037109375}


EP_train:3:  86%|| 23722/27626 [55:58<09:12,  7.07it/s]

{'epoch': 3, 'iter': 23720, 'avg_loss': 8.52036013365405, 'avg_acc': 49.997101724210616, 'loss': 8.413233757019043}


EP_train:3:  86%|| 23732/27626 [55:59<09:11,  7.06it/s]

{'epoch': 3, 'iter': 23730, 'avg_loss': 8.520348822722408, 'avg_acc': 49.9969712612195, 'loss': 8.233525276184082}


EP_train:3:  86%|| 23742/27626 [56:01<09:11,  7.05it/s]

{'epoch': 3, 'iter': 23740, 'avg_loss': 8.52025650075912, 'avg_acc': 49.995787877511475, 'loss': 8.715010643005371}


EP_train:3:  86%|| 23752/27626 [56:02<09:07,  7.07it/s]

{'epoch': 3, 'iter': 23750, 'avg_loss': 8.520238372322023, 'avg_acc': 49.99618437118437, 'loss': 8.318400382995605}


EP_train:3:  86%|| 23762/27626 [56:04<09:09,  7.03it/s]

{'epoch': 3, 'iter': 23760, 'avg_loss': 8.52018473145436, 'avg_acc': 49.99723812129119, 'loss': 8.335480690002441}


EP_train:3:  86%|| 23772/27626 [56:05<09:11,  6.99it/s]

{'epoch': 3, 'iter': 23770, 'avg_loss': 8.52014089851304, 'avg_acc': 49.99973707458668, 'loss': 7.6916608810424805}


EP_train:3:  86%|| 23782/27626 [56:06<09:11,  6.97it/s]

{'epoch': 3, 'iter': 23780, 'avg_loss': 8.52020267048494, 'avg_acc': 49.99842311088684, 'loss': 8.461162567138672}


EP_train:3:  86%|| 23792/27626 [56:08<09:08,  6.99it/s]

{'epoch': 3, 'iter': 23790, 'avg_loss': 8.520267785089043, 'avg_acc': 49.99868647807995, 'loss': 8.360282897949219}


EP_train:3:  86%|| 23802/27626 [56:09<09:08,  6.98it/s]

{'epoch': 3, 'iter': 23800, 'avg_loss': 8.520168264334625, 'avg_acc': 49.99776795092643, 'loss': 8.277779579162598}


EP_train:3:  86%|| 23812/27626 [56:11<09:00,  7.06it/s]

{'epoch': 3, 'iter': 23810, 'avg_loss': 8.5199629374621, 'avg_acc': 49.99685019528789, 'loss': 7.959583282470703}


EP_train:3:  86%|| 23822/27626 [56:12<08:59,  7.05it/s]

{'epoch': 3, 'iter': 23820, 'avg_loss': 8.519973846182685, 'avg_acc': 49.995539649888755, 'loss': 8.7556734085083}


EP_train:3:  86%|| 23832/27626 [56:13<08:58,  7.04it/s]

{'epoch': 3, 'iter': 23830, 'avg_loss': 8.519870686384, 'avg_acc': 49.99409907263648, 'loss': 8.490873336791992}


EP_train:3:  86%|| 23842/27626 [56:15<08:57,  7.05it/s]

{'epoch': 3, 'iter': 23840, 'avg_loss': 8.519903678041212, 'avg_acc': 49.99528123820309, 'loss': 8.417426109313965}


EP_train:3:  86%|| 23852/27626 [56:16<08:55,  7.04it/s]

{'epoch': 3, 'iter': 23850, 'avg_loss': 8.51984764266017, 'avg_acc': 49.993186868475114, 'loss': 8.714627265930176}


EP_train:3:  86%|| 23862/27626 [56:18<08:55,  7.03it/s]

{'epoch': 3, 'iter': 23860, 'avg_loss': 8.519844570342165, 'avg_acc': 49.992796823268094, 'loss': 9.226211547851562}


EP_train:3:  86%|| 23872/27626 [56:19<08:54,  7.02it/s]

{'epoch': 3, 'iter': 23870, 'avg_loss': 8.519792112865275, 'avg_acc': 49.99384713669306, 'loss': 9.190298080444336}


EP_train:3:  86%|| 23882/27626 [56:21<08:52,  7.03it/s]

{'epoch': 3, 'iter': 23880, 'avg_loss': 8.519797463966352, 'avg_acc': 49.99267199866002, 'loss': 7.934071063995361}


EP_train:3:  86%|| 23892/27626 [56:22<08:51,  7.03it/s]

{'epoch': 3, 'iter': 23890, 'avg_loss': 8.519789535290458, 'avg_acc': 49.99175944916496, 'loss': 8.767054557800293}


EP_train:3:  87%|| 23902/27626 [56:23<08:52,  7.00it/s]

{'epoch': 3, 'iter': 23900, 'avg_loss': 8.51979708469072, 'avg_acc': 49.99019392494038, 'loss': 8.162534713745117}


EP_train:3:  87%|| 23912/27626 [56:25<08:48,  7.03it/s]

{'epoch': 3, 'iter': 23910, 'avg_loss': 8.519844528044448, 'avg_acc': 49.98784555225628, 'loss': 8.696586608886719}


EP_train:3:  87%|| 23922/27626 [56:26<08:44,  7.06it/s]

{'epoch': 3, 'iter': 23920, 'avg_loss': 8.51975147226105, 'avg_acc': 49.989418293549605, 'loss': 8.495121955871582}


EP_train:3:  87%|| 23932/27626 [56:28<08:47,  7.01it/s]

{'epoch': 3, 'iter': 23930, 'avg_loss': 8.519806695917946, 'avg_acc': 49.988116877690025, 'loss': 7.946262836456299}


EP_train:3:  87%|| 23942/27626 [56:29<08:42,  7.05it/s]

{'epoch': 3, 'iter': 23940, 'avg_loss': 8.519997279878337, 'avg_acc': 49.98603337371037, 'loss': 8.52745532989502}


EP_train:3:  87%|| 23952/27626 [56:30<08:39,  7.07it/s]

{'epoch': 3, 'iter': 23950, 'avg_loss': 8.519967631195529, 'avg_acc': 49.98590873032441, 'loss': 8.666487693786621}


EP_train:3:  87%|| 23962/27626 [56:32<08:38,  7.07it/s]

{'epoch': 3, 'iter': 23960, 'avg_loss': 8.519856879490355, 'avg_acc': 49.984349568048074, 'loss': 7.822015762329102}


EP_train:3:  87%|| 23972/27626 [56:33<08:42,  6.99it/s]

{'epoch': 3, 'iter': 23970, 'avg_loss': 8.519943758402105, 'avg_acc': 49.9838346335155, 'loss': 8.680685043334961}


EP_train:3:  87%|| 23982/27626 [56:35<08:36,  7.06it/s]

{'epoch': 3, 'iter': 23980, 'avg_loss': 8.520005224027555, 'avg_acc': 49.98540511238063, 'loss': 8.802696228027344}


EP_train:3:  87%|| 23992/27626 [56:36<08:39,  6.99it/s]

{'epoch': 3, 'iter': 23990, 'avg_loss': 8.520060880967971, 'avg_acc': 49.98449939560668, 'loss': 8.574769020080566}


EP_train:3:  87%|| 24002/27626 [56:38<08:40,  6.96it/s]

{'epoch': 3, 'iter': 24000, 'avg_loss': 8.520042471684821, 'avg_acc': 49.98437565101454, 'loss': 8.447293281555176}


EP_train:3:  87%|| 24012/27626 [56:39<08:33,  7.04it/s]

{'epoch': 3, 'iter': 24010, 'avg_loss': 8.520121185679582, 'avg_acc': 49.98256007663154, 'loss': 8.623099327087402}


EP_train:3:  87%|| 24022/27626 [56:40<08:29,  7.07it/s]

{'epoch': 3, 'iter': 24020, 'avg_loss': 8.520239981856207, 'avg_acc': 49.98113629740644, 'loss': 9.688321113586426}


EP_train:3:  87%|| 24032/27626 [56:42<08:34,  6.99it/s]

{'epoch': 3, 'iter': 24030, 'avg_loss': 8.520197992145055, 'avg_acc': 49.98140422787233, 'loss': 8.235395431518555}


EP_train:3:  87%|| 24042/27626 [56:43<08:29,  7.04it/s]

{'epoch': 3, 'iter': 24040, 'avg_loss': 8.520128415520558, 'avg_acc': 49.981801921717064, 'loss': 7.896571159362793}


EP_train:3:  87%|| 24052/27626 [56:45<08:27,  7.04it/s]

{'epoch': 3, 'iter': 24050, 'avg_loss': 8.52010252793848, 'avg_acc': 49.984408132717974, 'loss': 8.535896301269531}


EP_train:3:  87%|| 24062/27626 [56:46<08:28,  7.01it/s]

{'epoch': 3, 'iter': 24060, 'avg_loss': 8.520093211043253, 'avg_acc': 49.98584327334691, 'loss': 8.408799171447754}


EP_train:3:  87%|| 24072/27626 [56:47<08:23,  7.05it/s]

{'epoch': 3, 'iter': 24070, 'avg_loss': 8.520113550023481, 'avg_acc': 49.98442108761581, 'loss': 9.221343040466309}


EP_train:3:  87%|| 24082/27626 [56:49<08:25,  7.02it/s]

{'epoch': 3, 'iter': 24080, 'avg_loss': 8.520209539046018, 'avg_acc': 49.98494663842863, 'loss': 9.01116943359375}


EP_train:3:  87%|| 24092/27626 [56:50<08:21,  7.05it/s]

{'epoch': 3, 'iter': 24090, 'avg_loss': 8.520071769709764, 'avg_acc': 49.98378543854551, 'loss': 8.165386199951172}


EP_train:3:  87%|| 24102/27626 [56:52<08:17,  7.08it/s]

{'epoch': 3, 'iter': 24100, 'avg_loss': 8.520055153207569, 'avg_acc': 49.98418115430895, 'loss': 7.723476886749268}


EP_train:3:  87%|| 24112/27626 [56:53<08:19,  7.03it/s]

{'epoch': 3, 'iter': 24110, 'avg_loss': 8.520006340179215, 'avg_acc': 49.98328045290531, 'loss': 9.06596851348877}


EP_train:3:  87%|| 24122/27626 [56:55<08:16,  7.06it/s]

{'epoch': 3, 'iter': 24120, 'avg_loss': 8.519959718343566, 'avg_acc': 49.98108494672692, 'loss': 8.660627365112305}


EP_train:3:  87%|| 24132/27626 [56:56<08:18,  7.01it/s]

{'epoch': 3, 'iter': 24130, 'avg_loss': 8.520013741505121, 'avg_acc': 49.979927271973814, 'loss': 8.678915977478027}


EP_train:3:  87%|| 24142/27626 [56:57<08:18,  6.98it/s]

{'epoch': 3, 'iter': 24140, 'avg_loss': 8.51998661872115, 'avg_acc': 49.97851166066029, 'loss': 8.358759880065918}


EP_train:3:  87%|| 24152/27626 [56:59<08:15,  7.01it/s]

{'epoch': 3, 'iter': 24150, 'avg_loss': 8.520049197407944, 'avg_acc': 49.97645025050723, 'loss': 8.087931632995605}


EP_train:3:  87%|| 24162/27626 [57:00<08:10,  7.06it/s]

{'epoch': 3, 'iter': 24160, 'avg_loss': 8.519980762658207, 'avg_acc': 49.97633065684368, 'loss': 8.493001937866211}


EP_train:3:  87%|| 24172/27626 [57:02<08:12,  7.02it/s]

{'epoch': 3, 'iter': 24170, 'avg_loss': 8.520085751761284, 'avg_acc': 49.97530615200033, 'loss': 8.865398406982422}


EP_train:3:  88%|| 24182/27626 [57:03<08:08,  7.06it/s]

{'epoch': 3, 'iter': 24180, 'avg_loss': 8.520113600054163, 'avg_acc': 49.97454096191225, 'loss': 8.433454513549805}


EP_train:3:  88%|| 24192/27626 [57:04<08:11,  6.99it/s]

{'epoch': 3, 'iter': 24190, 'avg_loss': 8.52011220536282, 'avg_acc': 49.976360009921045, 'loss': 8.339754104614258}


EP_train:3:  88%|| 24202/27626 [57:06<08:07,  7.03it/s]

{'epoch': 3, 'iter': 24200, 'avg_loss': 8.52012254338556, 'avg_acc': 49.97624065121276, 'loss': 8.553579330444336}


EP_train:3:  88%|| 24212/27626 [57:07<08:02,  7.07it/s]

{'epoch': 3, 'iter': 24210, 'avg_loss': 8.520128384481787, 'avg_acc': 49.97444343480236, 'loss': 7.8774871826171875}


EP_train:3:  88%|| 24222/27626 [57:09<08:02,  7.05it/s]

{'epoch': 3, 'iter': 24220, 'avg_loss': 8.52022465332253, 'avg_acc': 49.9752281078403, 'loss': 8.614078521728516}


EP_train:3:  88%|| 24232/27626 [57:10<08:01,  7.04it/s]

{'epoch': 3, 'iter': 24230, 'avg_loss': 8.520419156958221, 'avg_acc': 49.97472246296067, 'loss': 9.208390235900879}


EP_train:3:  88%|| 24242/27626 [57:12<08:03,  7.00it/s]

{'epoch': 3, 'iter': 24240, 'avg_loss': 8.520314544170073, 'avg_acc': 49.97550637349944, 'loss': 8.601773262023926}


EP_train:3:  88%|| 24252/27626 [57:13<07:58,  7.05it/s]

{'epoch': 3, 'iter': 24250, 'avg_loss': 8.520394207359535, 'avg_acc': 49.97577419487856, 'loss': 8.036267280578613}


EP_train:3:  88%|| 24262/27626 [57:14<08:01,  6.99it/s]

{'epoch': 3, 'iter': 24260, 'avg_loss': 8.520330186285449, 'avg_acc': 49.97423848975722, 'loss': 8.850187301635742}


EP_train:3:  88%|| 24272/27626 [57:16<08:04,  6.93it/s]

{'epoch': 3, 'iter': 24270, 'avg_loss': 8.520264924763602, 'avg_acc': 49.97244654113963, 'loss': 7.83055305480957}


EP_train:3:  88%|| 24282/27626 [57:17<07:52,  7.07it/s]

{'epoch': 3, 'iter': 24280, 'avg_loss': 8.520289785214594, 'avg_acc': 49.972715291791935, 'loss': 9.73847484588623}


EP_train:3:  88%|| 24292/27626 [57:19<07:53,  7.04it/s]

{'epoch': 3, 'iter': 24290, 'avg_loss': 8.52025815700622, 'avg_acc': 49.97272652422708, 'loss': 8.469883918762207}


EP_train:3:  88%|| 24302/27626 [57:20<07:52,  7.03it/s]

{'epoch': 3, 'iter': 24300, 'avg_loss': 8.520227328449957, 'avg_acc': 49.97389510719724, 'loss': 9.181281089782715}


EP_train:3:  88%|| 24312/27626 [57:21<07:52,  7.01it/s]

{'epoch': 3, 'iter': 24310, 'avg_loss': 8.5204359047269, 'avg_acc': 49.97416293036074, 'loss': 9.635988235473633}


EP_train:3:  88%|| 24322/27626 [57:23<07:53,  6.98it/s]

{'epoch': 3, 'iter': 24320, 'avg_loss': 8.520480214723566, 'avg_acc': 49.97532996176144, 'loss': 8.92431926727295}


EP_train:3:  88%|| 24332/27626 [57:24<07:47,  7.04it/s]

{'epoch': 3, 'iter': 24330, 'avg_loss': 8.520488405692133, 'avg_acc': 49.97662447083967, 'loss': 8.921021461486816}


EP_train:3:  88%|| 24342/27626 [57:26<07:46,  7.04it/s]

{'epoch': 3, 'iter': 24340, 'avg_loss': 8.520480387438383, 'avg_acc': 49.97804630048067, 'loss': 8.677556991577148}


EP_train:3:  88%|| 24352/27626 [57:27<07:49,  6.98it/s]

{'epoch': 3, 'iter': 24350, 'avg_loss': 8.520530823402927, 'avg_acc': 49.97831197897417, 'loss': 8.323138236999512}


EP_train:3:  88%|| 24362/27626 [57:29<07:43,  7.05it/s]

{'epoch': 3, 'iter': 24360, 'avg_loss': 8.520543893008664, 'avg_acc': 49.97883399696236, 'loss': 9.368840217590332}


EP_train:3:  88%|| 24372/27626 [57:30<07:41,  7.05it/s]

{'epoch': 3, 'iter': 24370, 'avg_loss': 8.520560140325255, 'avg_acc': 49.97781687251241, 'loss': 7.998557090759277}


EP_train:3:  88%|| 24382/27626 [57:31<07:41,  7.03it/s]

{'epoch': 3, 'iter': 24380, 'avg_loss': 8.520509944938615, 'avg_acc': 49.9771851031541, 'loss': 8.081328392028809}


EP_train:3:  88%|| 24392/27626 [57:33<07:39,  7.04it/s]

{'epoch': 3, 'iter': 24390, 'avg_loss': 8.520570739423759, 'avg_acc': 49.97681009388709, 'loss': 8.714406967163086}


EP_train:3:  88%|| 24402/27626 [57:34<07:41,  6.99it/s]

{'epoch': 3, 'iter': 24400, 'avg_loss': 8.520597501913205, 'avg_acc': 49.97605118642679, 'loss': 8.700516700744629}


EP_train:3:  88%|| 24412/27626 [57:36<07:37,  7.02it/s]

{'epoch': 3, 'iter': 24410, 'avg_loss': 8.520459435689425, 'avg_acc': 49.97593298103314, 'loss': 8.441701889038086}


EP_train:3:  88%|| 24422/27626 [57:37<07:35,  7.03it/s]

{'epoch': 3, 'iter': 24420, 'avg_loss': 8.52032535383964, 'avg_acc': 49.975814872445845, 'loss': 8.421422958374023}


EP_train:3:  88%|| 24432/27626 [57:38<07:34,  7.03it/s]

{'epoch': 3, 'iter': 24430, 'avg_loss': 8.52027897200257, 'avg_acc': 49.97569686054602, 'loss': 8.219482421875}


EP_train:3:  88%|| 24442/27626 [57:40<07:30,  7.06it/s]

{'epoch': 3, 'iter': 24440, 'avg_loss': 8.520075295951926, 'avg_acc': 49.97583466306616, 'loss': 8.439481735229492}


EP_train:3:  89%|| 24452/27626 [57:41<07:28,  7.07it/s]

{'epoch': 3, 'iter': 24450, 'avg_loss': 8.520002827128137, 'avg_acc': 49.9754611263343, 'loss': 8.730010032653809}


EP_train:3:  89%|| 24462/27626 [57:43<07:26,  7.09it/s]

{'epoch': 3, 'iter': 24460, 'avg_loss': 8.520309972254275, 'avg_acc': 49.97713196516905, 'loss': 9.066985130310059}


EP_train:3:  89%|| 24472/27626 [57:44<07:27,  7.05it/s]

{'epoch': 3, 'iter': 24470, 'avg_loss': 8.520503604169967, 'avg_acc': 49.976375097053655, 'loss': 8.64404296875}


EP_train:3:  89%|| 24482/27626 [57:46<07:28,  7.01it/s]

{'epoch': 3, 'iter': 24480, 'avg_loss': 8.520617722228122, 'avg_acc': 49.9761294473265, 'loss': 8.772839546203613}


EP_train:3:  89%|| 24492/27626 [57:47<07:27,  7.00it/s]

{'epoch': 3, 'iter': 24490, 'avg_loss': 8.520604677166439, 'avg_acc': 49.97575640031032, 'loss': 8.597018241882324}


EP_train:3:  89%|| 24502/27626 [57:48<07:29,  6.95it/s]

{'epoch': 3, 'iter': 24500, 'avg_loss': 8.520549784309596, 'avg_acc': 49.97551120362434, 'loss': 8.677809715270996}


EP_train:3:  89%|| 24512/27626 [57:50<07:22,  7.03it/s]

{'epoch': 3, 'iter': 24510, 'avg_loss': 8.520604338670642, 'avg_acc': 49.97654114479214, 'loss': 8.786578178405762}


EP_train:3:  89%|| 24522/27626 [57:51<07:23,  7.01it/s]

{'epoch': 3, 'iter': 24520, 'avg_loss': 8.52055594209483, 'avg_acc': 49.97667815341952, 'loss': 7.709746360778809}


EP_train:3:  89%|| 24532/27626 [57:53<07:22,  6.99it/s]

{'epoch': 3, 'iter': 24530, 'avg_loss': 8.520671986750212, 'avg_acc': 49.97694244017774, 'loss': 9.018376350402832}


EP_train:3:  89%|| 24542/27626 [57:54<07:19,  7.01it/s]

{'epoch': 3, 'iter': 24540, 'avg_loss': 8.520861768930438, 'avg_acc': 49.97746118740068, 'loss': 10.074984550476074}


EP_train:3:  89%|| 24552/27626 [57:56<07:20,  6.98it/s]

{'epoch': 3, 'iter': 24550, 'avg_loss': 8.520880953473455, 'avg_acc': 49.97607022117225, 'loss': 8.682327270507812}


EP_train:3:  89%|| 24562/27626 [57:57<07:15,  7.03it/s]

{'epoch': 3, 'iter': 24560, 'avg_loss': 8.520805246438496, 'avg_acc': 49.97620719840397, 'loss': 7.978564262390137}


EP_train:3:  89%|| 24572/27626 [57:58<07:16,  7.00it/s]

{'epoch': 3, 'iter': 24570, 'avg_loss': 8.520791328160502, 'avg_acc': 49.97481787473037, 'loss': 8.147695541381836}


EP_train:3:  89%|| 24582/27626 [58:00<07:13,  7.03it/s]

{'epoch': 3, 'iter': 24580, 'avg_loss': 8.520824074177769, 'avg_acc': 49.9724126357756, 'loss': 8.486613273620605}


EP_train:3:  89%|| 24592/27626 [58:01<07:13,  7.00it/s]

{'epoch': 3, 'iter': 24590, 'avg_loss': 8.520824471445344, 'avg_acc': 49.97204261721768, 'loss': 8.393623352050781}


EP_train:3:  89%|| 24602/27626 [58:03<07:07,  7.07it/s]

{'epoch': 3, 'iter': 24600, 'avg_loss': 8.52065576552802, 'avg_acc': 49.973070200398354, 'loss': 8.022045135498047}


EP_train:3:  89%|| 24612/27626 [58:04<07:10,  6.99it/s]

{'epoch': 3, 'iter': 24610, 'avg_loss': 8.520543266120617, 'avg_acc': 49.972319288123195, 'loss': 7.943578720092773}


EP_train:3:  89%|| 24622/27626 [58:05<07:05,  7.05it/s]

{'epoch': 3, 'iter': 24620, 'avg_loss': 8.52062731644048, 'avg_acc': 49.97347284838146, 'loss': 8.247790336608887}


EP_train:3:  89%|| 24632/27626 [58:07<07:05,  7.03it/s]

{'epoch': 3, 'iter': 24630, 'avg_loss': 8.520802851092254, 'avg_acc': 49.97411798140555, 'loss': 8.880732536315918}


EP_train:3:  89%|| 24642/27626 [58:08<07:01,  7.09it/s]

{'epoch': 3, 'iter': 24640, 'avg_loss': 8.520718723123112, 'avg_acc': 49.97171888316221, 'loss': 7.812628269195557}


EP_train:3:  89%|| 24652/27626 [58:10<07:02,  7.05it/s]

{'epoch': 3, 'iter': 24650, 'avg_loss': 8.520671594409372, 'avg_acc': 49.970969737536, 'loss': 8.064225196838379}


EP_train:3:  89%|| 24662/27626 [58:11<07:00,  7.06it/s]

{'epoch': 3, 'iter': 24660, 'avg_loss': 8.520573370948112, 'avg_acc': 49.97047463606504, 'loss': 8.142125129699707}


EP_train:3:  89%|| 24672/27626 [58:13<07:00,  7.02it/s]

{'epoch': 3, 'iter': 24670, 'avg_loss': 8.520641865706512, 'avg_acc': 49.97086660451542, 'loss': 8.528397560119629}


EP_train:3:  89%|| 24682/27626 [58:14<07:00,  7.00it/s]

{'epoch': 3, 'iter': 24680, 'avg_loss': 8.520562966510072, 'avg_acc': 49.97189133341437, 'loss': 8.325481414794922}


EP_train:3:  89%|| 24692/27626 [58:15<06:57,  7.03it/s]

{'epoch': 3, 'iter': 24690, 'avg_loss': 8.520745976138949, 'avg_acc': 49.97114333157831, 'loss': 9.173834800720215}


EP_train:3:  89%|| 24702/27626 [58:17<06:54,  7.05it/s]

{'epoch': 3, 'iter': 24700, 'avg_loss': 8.520747320212157, 'avg_acc': 49.96963685680741, 'loss': 8.306283950805664}


EP_train:3:  89%|| 24712/27626 [58:18<06:53,  7.05it/s]

{'epoch': 3, 'iter': 24710, 'avg_loss': 8.520799265787375, 'avg_acc': 49.96825806321071, 'loss': 8.710122108459473}


EP_train:3:  89%|| 24722/27626 [58:20<06:50,  7.08it/s]

{'epoch': 3, 'iter': 24720, 'avg_loss': 8.520706096748082, 'avg_acc': 49.9675124388172, 'loss': 7.611063003540039}


EP_train:3:  90%|| 24732/27626 [58:21<06:51,  7.03it/s]

{'epoch': 3, 'iter': 24730, 'avg_loss': 8.52059792477393, 'avg_acc': 49.96651469815212, 'loss': 7.657721996307373}


EP_train:3:  90%|| 24742/27626 [58:22<06:47,  7.07it/s]

{'epoch': 3, 'iter': 24740, 'avg_loss': 8.520371958295991, 'avg_acc': 49.9657703811487, 'loss': 7.4221110343933105}


EP_train:3:  90%|| 24752/27626 [58:24<06:54,  6.93it/s]

{'epoch': 3, 'iter': 24750, 'avg_loss': 8.520325443276608, 'avg_acc': 49.96679427093855, 'loss': 7.967689514160156}


EP_train:3:  90%|| 24762/27626 [58:25<06:49,  6.99it/s]

{'epoch': 3, 'iter': 24760, 'avg_loss': 8.52036639572485, 'avg_acc': 49.96529320302088, 'loss': 9.164153099060059}


EP_train:3:  90%|| 24772/27626 [58:27<06:46,  7.02it/s]

{'epoch': 3, 'iter': 24770, 'avg_loss': 8.520356155472662, 'avg_acc': 49.964550280570016, 'loss': 8.8842191696167}


EP_train:3:  90%|| 24782/27626 [58:28<06:44,  7.04it/s]

{'epoch': 3, 'iter': 24780, 'avg_loss': 8.52025830530667, 'avg_acc': 49.96544731851015, 'loss': 7.653280735015869}


EP_train:3:  90%|| 24792/27626 [58:29<06:41,  7.06it/s]

{'epoch': 3, 'iter': 24790, 'avg_loss': 8.520104645769111, 'avg_acc': 49.964578879432054, 'loss': 7.64343786239624}


EP_train:3:  90%|| 24802/27626 [58:31<06:41,  7.04it/s]

{'epoch': 3, 'iter': 24800, 'avg_loss': 8.520022116333799, 'avg_acc': 49.96522317648482, 'loss': 7.6863250732421875}


EP_train:3:  90%|| 24812/27626 [58:32<06:39,  7.04it/s]

{'epoch': 3, 'iter': 24810, 'avg_loss': 8.519945652413496, 'avg_acc': 49.96624481076942, 'loss': 7.937195301055908}


EP_train:3:  90%|| 24822/27626 [58:34<06:39,  7.03it/s]

{'epoch': 3, 'iter': 24820, 'avg_loss': 8.520032270895735, 'avg_acc': 49.96713972039805, 'loss': 9.817352294921875}


EP_train:3:  90%|| 24832/27626 [58:35<06:35,  7.06it/s]

{'epoch': 3, 'iter': 24830, 'avg_loss': 8.519999140417248, 'avg_acc': 49.9656427449559, 'loss': 8.942730903625488}


EP_train:3:  90%|| 24842/27626 [58:37<06:36,  7.03it/s]

{'epoch': 3, 'iter': 24840, 'avg_loss': 8.520038231804792, 'avg_acc': 49.96565657582223, 'loss': 8.924598693847656}


EP_train:3:  90%|| 24852/27626 [58:38<06:33,  7.05it/s]

{'epoch': 3, 'iter': 24850, 'avg_loss': 8.52003973910259, 'avg_acc': 49.96516739769024, 'loss': 8.780641555786133}


EP_train:3:  90%|| 24862/27626 [58:39<06:34,  7.01it/s]

{'epoch': 3, 'iter': 24860, 'avg_loss': 8.520003874166402, 'avg_acc': 49.96656409637585, 'loss': 7.772378444671631}


EP_train:3:  90%|| 24872/27626 [58:41<06:30,  7.04it/s]

{'epoch': 3, 'iter': 24870, 'avg_loss': 8.519986075441606, 'avg_acc': 49.966577540106954, 'loss': 8.558171272277832}


EP_train:3:  90%|| 24882/27626 [58:42<06:27,  7.08it/s]

{'epoch': 3, 'iter': 24880, 'avg_loss': 8.519969721861166, 'avg_acc': 49.96633977734014, 'loss': 8.181960105895996}


EP_train:3:  90%|| 24892/27626 [58:44<06:28,  7.04it/s]

{'epoch': 3, 'iter': 24890, 'avg_loss': 8.519868565958198, 'avg_acc': 49.968110963802175, 'loss': 8.262419700622559}


EP_train:3:  90%|| 24902/27626 [58:45<06:27,  7.03it/s]

{'epoch': 3, 'iter': 24900, 'avg_loss': 8.519912009746463, 'avg_acc': 49.96599032167383, 'loss': 8.377655982971191}


EP_train:3:  90%|| 24912/27626 [58:46<06:24,  7.06it/s]

{'epoch': 3, 'iter': 24910, 'avg_loss': 8.520001407667086, 'avg_acc': 49.96500040142909, 'loss': 8.974077224731445}


EP_train:3:  90%|| 24922/27626 [58:48<06:25,  7.02it/s]

{'epoch': 3, 'iter': 24920, 'avg_loss': 8.520017879454398, 'avg_acc': 49.96488904939609, 'loss': 7.860035419464111}


EP_train:3:  90%|| 24932/27626 [58:49<06:22,  7.04it/s]

{'epoch': 3, 'iter': 24930, 'avg_loss': 8.519918927465085, 'avg_acc': 49.964401748826766, 'loss': 8.300494194030762}


EP_train:3:  90%|| 24942/27626 [58:51<06:26,  6.95it/s]

{'epoch': 3, 'iter': 24940, 'avg_loss': 8.5199867263983, 'avg_acc': 49.963163064833005, 'loss': 8.630616188049316}


EP_train:3:  90%|| 24952/27626 [58:52<06:20,  7.04it/s]

{'epoch': 3, 'iter': 24950, 'avg_loss': 8.519960384082998, 'avg_acc': 49.96468077431766, 'loss': 9.644742012023926}


EP_train:3:  90%|| 24962/27626 [58:54<06:17,  7.06it/s]

{'epoch': 3, 'iter': 24960, 'avg_loss': 8.519983229444776, 'avg_acc': 49.96331777573014, 'loss': 8.185787200927734}


EP_train:3:  90%|| 24972/27626 [58:55<06:18,  7.02it/s]

{'epoch': 3, 'iter': 24970, 'avg_loss': 8.519876656794471, 'avg_acc': 49.96483420768091, 'loss': 8.799826622009277}


EP_train:3:  90%|| 24982/27626 [58:56<06:16,  7.03it/s]

{'epoch': 3, 'iter': 24980, 'avg_loss': 8.519965782648836, 'avg_acc': 49.96672471078019, 'loss': 8.50658893585205}


EP_train:3:  90%|| 24992/27626 [58:58<06:16,  7.00it/s]

{'epoch': 3, 'iter': 24990, 'avg_loss': 8.519992582156236, 'avg_acc': 49.96548757552719, 'loss': 8.61467170715332}


EP_train:3:  91%|| 25002/27626 [58:59<06:14,  7.01it/s]

{'epoch': 3, 'iter': 25000, 'avg_loss': 8.520000901585945, 'avg_acc': 49.964376424943005, 'loss': 7.810464859008789}


EP_train:3:  91%|| 25012/27626 [59:01<06:11,  7.04it/s]

{'epoch': 3, 'iter': 25010, 'avg_loss': 8.519925534675144, 'avg_acc': 49.9660149534205, 'loss': 8.350711822509766}


EP_train:3:  91%|| 25022/27626 [59:02<06:08,  7.06it/s]

{'epoch': 3, 'iter': 25020, 'avg_loss': 8.519921597119101, 'avg_acc': 49.96452979497222, 'loss': 8.002815246582031}


EP_train:3:  91%|| 25032/27626 [59:03<06:08,  7.04it/s]

{'epoch': 3, 'iter': 25030, 'avg_loss': 8.519787576785218, 'avg_acc': 49.96491850105868, 'loss': 8.895551681518555}


EP_train:3:  91%|| 25042/27626 [59:05<06:09,  6.99it/s]

{'epoch': 3, 'iter': 25040, 'avg_loss': 8.519716341538073, 'avg_acc': 49.96418373866858, 'loss': 7.746565818786621}


EP_train:3:  91%|| 25052/27626 [59:06<06:06,  7.02it/s]

{'epoch': 3, 'iter': 25050, 'avg_loss': 8.519659727225308, 'avg_acc': 49.9655702367171, 'loss': 8.443427085876465}


EP_train:3:  91%|| 25062/27626 [59:08<06:05,  7.02it/s]

{'epoch': 3, 'iter': 25060, 'avg_loss': 8.519781349025042, 'avg_acc': 49.96433701767687, 'loss': 8.461583137512207}


EP_train:3:  91%|| 25072/27626 [59:09<06:04,  7.02it/s]

{'epoch': 3, 'iter': 25070, 'avg_loss': 8.519647741388122, 'avg_acc': 49.963728012444655, 'loss': 8.314393997192383}


EP_train:3:  91%|| 25082/27626 [59:11<05:59,  7.07it/s]

{'epoch': 3, 'iter': 25080, 'avg_loss': 8.519599812361884, 'avg_acc': 49.96212272237949, 'loss': 9.343877792358398}


EP_train:3:  91%|| 25092/27626 [59:12<05:59,  7.05it/s]

{'epoch': 3, 'iter': 25090, 'avg_loss': 8.519627561432763, 'avg_acc': 49.96238691164162, 'loss': 8.62214183807373}


EP_train:3:  91%|| 25102/27626 [59:13<06:02,  6.96it/s]

{'epoch': 3, 'iter': 25100, 'avg_loss': 8.51966636733018, 'avg_acc': 49.96240189633879, 'loss': 9.153757095336914}


EP_train:3:  91%|| 25112/27626 [59:15<05:56,  7.04it/s]

{'epoch': 3, 'iter': 25110, 'avg_loss': 8.519844691370091, 'avg_acc': 49.963661343634264, 'loss': 8.675591468811035}


EP_train:3:  91%|| 25122/27626 [59:16<05:57,  7.00it/s]

{'epoch': 3, 'iter': 25120, 'avg_loss': 8.519867301751253, 'avg_acc': 49.96093905497393, 'loss': 9.206070899963379}


EP_train:3:  91%|| 25132/27626 [59:18<05:55,  7.01it/s]

{'epoch': 3, 'iter': 25130, 'avg_loss': 8.519848258822348, 'avg_acc': 49.960084159006804, 'loss': 7.890620231628418}


EP_train:3:  91%|| 25142/27626 [59:19<05:51,  7.06it/s]

{'epoch': 3, 'iter': 25140, 'avg_loss': 8.520011256178393, 'avg_acc': 49.958981345213, 'loss': 8.650997161865234}


EP_train:3:  91%|| 25152/27626 [59:20<05:49,  7.08it/s]

{'epoch': 3, 'iter': 25150, 'avg_loss': 8.52008679350026, 'avg_acc': 49.95912190370164, 'loss': 8.461601257324219}


EP_train:3:  91%|| 25162/27626 [59:22<05:49,  7.05it/s]

{'epoch': 3, 'iter': 25160, 'avg_loss': 8.520029191310178, 'avg_acc': 49.959013950160966, 'loss': 7.6637420654296875}


EP_train:3:  91%|| 25172/27626 [59:23<05:47,  7.07it/s]

{'epoch': 3, 'iter': 25170, 'avg_loss': 8.520049685199773, 'avg_acc': 49.95890608239641, 'loss': 8.935685157775879}


EP_train:3:  91%|| 25182/27626 [59:25<05:51,  6.95it/s]

{'epoch': 3, 'iter': 25180, 'avg_loss': 8.51992276184402, 'avg_acc': 49.95792958977007, 'loss': 8.905997276306152}


EP_train:3:  91%|| 25192/27626 [59:26<05:48,  6.98it/s]

{'epoch': 3, 'iter': 25190, 'avg_loss': 8.519990884260643, 'avg_acc': 49.95745008137827, 'loss': 8.822985649108887}


EP_train:3:  91%|| 25202/27626 [59:28<05:44,  7.03it/s]

{'epoch': 3, 'iter': 25200, 'avg_loss': 8.520079416763423, 'avg_acc': 49.95585492639181, 'loss': 7.687148094177246}


EP_train:3:  91%|| 25212/27626 [59:29<05:44,  7.02it/s]

{'epoch': 3, 'iter': 25210, 'avg_loss': 8.52014252491791, 'avg_acc': 49.954756852167705, 'loss': 8.271068572998047}


EP_train:3:  91%|| 25222/27626 [59:30<05:42,  7.01it/s]

{'epoch': 3, 'iter': 25220, 'avg_loss': 8.520046015308717, 'avg_acc': 49.95514650489671, 'loss': 8.187368392944336}


EP_train:3:  91%|| 25232/27626 [59:32<05:39,  7.05it/s]

{'epoch': 3, 'iter': 25230, 'avg_loss': 8.520091232129564, 'avg_acc': 49.956155126629945, 'loss': 8.915596008300781}


EP_train:3:  91%|| 25242/27626 [59:33<05:38,  7.04it/s]

{'epoch': 3, 'iter': 25240, 'avg_loss': 8.520312597698627, 'avg_acc': 49.95753436868587, 'loss': 8.343768119812012}


EP_train:3:  91%|| 25252/27626 [59:35<05:36,  7.06it/s]

{'epoch': 3, 'iter': 25250, 'avg_loss': 8.520366465038396, 'avg_acc': 49.95755118609164, 'loss': 9.107690811157227}


EP_train:3:  91%|| 25262/27626 [59:36<05:34,  7.06it/s]

{'epoch': 3, 'iter': 25260, 'avg_loss': 8.520389062379282, 'avg_acc': 49.95917620046713, 'loss': 8.350112915039062}


EP_train:3:  91%|| 25272/27626 [59:37<05:34,  7.05it/s]

{'epoch': 3, 'iter': 25270, 'avg_loss': 8.520372792372607, 'avg_acc': 49.959810652526606, 'loss': 8.226333618164062}


EP_train:3:  92%|| 25282/27626 [59:39<05:34,  7.01it/s]

{'epoch': 3, 'iter': 25280, 'avg_loss': 8.520369455428739, 'avg_acc': 49.95957932834935, 'loss': 9.52082633972168}


EP_train:3:  92%|| 25292/27626 [59:40<05:30,  7.06it/s]

{'epoch': 3, 'iter': 25290, 'avg_loss': 8.520312120366656, 'avg_acc': 49.96033668103278, 'loss': 8.916808128356934}


EP_train:3:  92%|| 25302/27626 [59:42<05:32,  6.99it/s]

{'epoch': 3, 'iter': 25300, 'avg_loss': 8.520265708303816, 'avg_acc': 49.96072289632821, 'loss': 8.136757850646973}


EP_train:3:  92%|| 25312/27626 [59:43<05:30,  7.01it/s]

{'epoch': 3, 'iter': 25310, 'avg_loss': 8.520134126177837, 'avg_acc': 49.95938030895658, 'loss': 7.504887104034424}


EP_train:3:  92%|| 25322/27626 [59:45<05:28,  7.01it/s]

{'epoch': 3, 'iter': 25320, 'avg_loss': 8.520101586787755, 'avg_acc': 49.959026104814185, 'loss': 8.01058578491211}


EP_train:3:  92%|| 25332/27626 [59:46<05:24,  7.07it/s]

{'epoch': 3, 'iter': 25330, 'avg_loss': 8.519967924955118, 'avg_acc': 49.95830208045478, 'loss': 7.934171199798584}


EP_train:3:  92%|| 25342/27626 [59:47<05:25,  7.02it/s]

{'epoch': 3, 'iter': 25340, 'avg_loss': 8.519943044187615, 'avg_acc': 49.960784894045226, 'loss': 7.874935150146484}


EP_train:3:  92%|| 25352/27626 [59:49<05:21,  7.08it/s]

{'epoch': 3, 'iter': 25350, 'avg_loss': 8.519948159443134, 'avg_acc': 49.95932113131632, 'loss': 8.503443717956543}


EP_train:3:  92%|| 25362/27626 [59:50<05:21,  7.04it/s]

{'epoch': 3, 'iter': 25360, 'avg_loss': 8.519832657882127, 'avg_acc': 49.95995327471314, 'loss': 7.83603048324585}


EP_train:3:  92%|| 25372/27626 [59:52<05:20,  7.04it/s]

{'epoch': 3, 'iter': 25370, 'avg_loss': 8.519846628810887, 'avg_acc': 49.959722714910725, 'loss': 8.280815124511719}


EP_train:3:  92%|| 25382/27626 [59:53<05:17,  7.06it/s]

{'epoch': 3, 'iter': 25380, 'avg_loss': 8.519654247778714, 'avg_acc': 49.958384224419845, 'loss': 7.455481052398682}


EP_train:3:  92%|| 25392/27626 [59:54<05:17,  7.04it/s]

{'epoch': 3, 'iter': 25390, 'avg_loss': 8.519607381875892, 'avg_acc': 49.95704678823205, 'loss': 8.526066780090332}


EP_train:3:  92%|| 25402/27626 [59:56<05:18,  6.99it/s]

{'epoch': 3, 'iter': 25400, 'avg_loss': 8.519639978089307, 'avg_acc': 49.95755580488957, 'loss': 9.391708374023438}


EP_train:3:  92%|| 25412/27626 [59:57<05:15,  7.01it/s]

{'epoch': 3, 'iter': 25410, 'avg_loss': 8.519613509542015, 'avg_acc': 49.957326551493445, 'loss': 8.31123161315918}


EP_train:3:  92%|| 25422/27626 [59:59<05:12,  7.05it/s]

{'epoch': 3, 'iter': 25420, 'avg_loss': 8.519477321201133, 'avg_acc': 49.957712127768374, 'loss': 8.348408699035645}


EP_train:3:  92%|| 25432/27626 [1:00:00<05:11,  7.04it/s]

{'epoch': 3, 'iter': 25430, 'avg_loss': 8.519546037539474, 'avg_acc': 49.95871180842279, 'loss': 8.678445816040039}


EP_train:3:  92%|| 25442/27626 [1:00:02<05:12,  7.00it/s]

{'epoch': 3, 'iter': 25440, 'avg_loss': 8.51950785664004, 'avg_acc': 49.95700837231241, 'loss': 8.800030708312988}


EP_train:3:  92%|| 25452/27626 [1:00:03<05:08,  7.06it/s]

{'epoch': 3, 'iter': 25450, 'avg_loss': 8.519501216078755, 'avg_acc': 49.9572708341519, 'loss': 8.951705932617188}


EP_train:3:  92%|| 25462/27626 [1:00:04<05:06,  7.06it/s]

{'epoch': 3, 'iter': 25460, 'avg_loss': 8.519414327697197, 'avg_acc': 49.95728761635442, 'loss': 7.829774379730225}


EP_train:3:  92%|| 25472/27626 [1:00:06<05:05,  7.05it/s]

{'epoch': 3, 'iter': 25470, 'avg_loss': 8.519449836237776, 'avg_acc': 49.95632287699737, 'loss': 9.109115600585938}


EP_train:3:  92%|| 25482/27626 [1:00:07<05:03,  7.05it/s]

{'epoch': 3, 'iter': 25480, 'avg_loss': 8.519336306689967, 'avg_acc': 49.95658529885013, 'loss': 9.193892478942871}


EP_train:3:  92%|| 25492/27626 [1:00:09<05:05,  6.99it/s]

{'epoch': 3, 'iter': 25490, 'avg_loss': 8.519412518576443, 'avg_acc': 49.956357145659254, 'loss': 7.9832444190979}


EP_train:3:  92%|| 25502/27626 [1:00:10<05:06,  6.94it/s]

{'epoch': 3, 'iter': 25500, 'avg_loss': 8.519405960938514, 'avg_acc': 49.957232069330615, 'loss': 9.138910293579102}


EP_train:3:  92%|| 25512/27626 [1:00:12<05:01,  7.02it/s]

{'epoch': 3, 'iter': 25510, 'avg_loss': 8.519574382146175, 'avg_acc': 49.95884128415193, 'loss': 8.865238189697266}


EP_train:3:  92%|| 25522/27626 [1:00:13<04:58,  7.05it/s]

{'epoch': 3, 'iter': 25520, 'avg_loss': 8.51956200383869, 'avg_acc': 49.958367618823715, 'loss': 8.867127418518066}


EP_train:3:  92%|| 25532/27626 [1:00:14<04:55,  7.08it/s]

{'epoch': 3, 'iter': 25530, 'avg_loss': 8.519527477885818, 'avg_acc': 49.957282323449924, 'loss': 8.45592975616455}


EP_train:3:  92%|| 25542/27626 [1:00:16<04:58,  6.99it/s]

{'epoch': 3, 'iter': 25540, 'avg_loss': 8.519531564674065, 'avg_acc': 49.95791081007008, 'loss': 9.175722122192383}


EP_train:3:  92%|| 25552/27626 [1:00:17<04:58,  6.95it/s]

{'epoch': 3, 'iter': 25550, 'avg_loss': 8.51952928330566, 'avg_acc': 49.95780497827874, 'loss': 8.628716468811035}


EP_train:3:  93%|| 25562/27626 [1:00:19<04:53,  7.03it/s]

{'epoch': 3, 'iter': 25560, 'avg_loss': 8.519451917829118, 'avg_acc': 49.956598920230036, 'loss': 8.244476318359375}


EP_train:3:  93%|| 25572/27626 [1:00:20<04:53,  7.00it/s]

{'epoch': 3, 'iter': 25570, 'avg_loss': 8.519320044387543, 'avg_acc': 49.95514938797857, 'loss': 8.512166976928711}


EP_train:3:  93%|| 25582/27626 [1:00:21<04:51,  7.00it/s]

{'epoch': 3, 'iter': 25580, 'avg_loss': 8.519225407511637, 'avg_acc': 49.95589988663461, 'loss': 8.244553565979004}


EP_train:3:  93%|| 25592/27626 [1:00:23<04:50,  7.01it/s]

{'epoch': 3, 'iter': 25590, 'avg_loss': 8.519202211212383, 'avg_acc': 49.95652768551444, 'loss': 8.321333885192871}


EP_train:3:  93%|| 25602/27626 [1:00:24<04:48,  7.01it/s]

{'epoch': 3, 'iter': 25600, 'avg_loss': 8.519095095446005, 'avg_acc': 49.95520194523652, 'loss': 8.26887321472168}


EP_train:3:  93%|| 25612/27626 [1:00:26<04:46,  7.04it/s]

{'epoch': 3, 'iter': 25610, 'avg_loss': 8.519049949235646, 'avg_acc': 49.953023115067744, 'loss': 8.213714599609375}


EP_train:3:  93%|| 25622/27626 [1:00:27<04:45,  7.02it/s]

{'epoch': 3, 'iter': 25620, 'avg_loss': 8.51900804335477, 'avg_acc': 49.95255356933765, 'loss': 8.36633586883545}


EP_train:3:  93%|| 25632/27626 [1:00:29<04:46,  6.96it/s]

{'epoch': 3, 'iter': 25630, 'avg_loss': 8.519005395782793, 'avg_acc': 49.95366938472942, 'loss': 7.512178897857666}


EP_train:3:  93%|| 25642/27626 [1:00:30<04:40,  7.06it/s]

{'epoch': 3, 'iter': 25640, 'avg_loss': 8.519079871847339, 'avg_acc': 49.95319995319995, 'loss': 8.679765701293945}


EP_train:3:  93%|| 25652/27626 [1:00:31<04:40,  7.04it/s]

{'epoch': 3, 'iter': 25650, 'avg_loss': 8.519302603450324, 'avg_acc': 49.95285271529375, 'loss': 9.423556327819824}


EP_train:3:  93%|| 25662/27626 [1:00:33<04:40,  7.00it/s]

{'epoch': 3, 'iter': 25660, 'avg_loss': 8.519291675423876, 'avg_acc': 49.95335820895522, 'loss': 8.714361190795898}


EP_train:3:  93%|| 25672/27626 [1:00:34<04:37,  7.04it/s]

{'epoch': 3, 'iter': 25670, 'avg_loss': 8.519286175055766, 'avg_acc': 49.95179385298586, 'loss': 9.289679527282715}


EP_train:3:  93%|| 25682/27626 [1:00:36<04:38,  6.99it/s]

{'epoch': 3, 'iter': 25680, 'avg_loss': 8.519273886274528, 'avg_acc': 49.95229936528951, 'loss': 8.411637306213379}


EP_train:3:  93%|| 25692/27626 [1:00:37<04:37,  6.97it/s]

{'epoch': 3, 'iter': 25690, 'avg_loss': 8.51949782896968, 'avg_acc': 49.95231793234985, 'loss': 9.747719764709473}


EP_train:3:  93%|| 25702/27626 [1:00:38<04:32,  7.05it/s]

{'epoch': 3, 'iter': 25700, 'avg_loss': 8.519629709757996, 'avg_acc': 49.951485350764564, 'loss': 8.124493598937988}


EP_train:3:  93%|| 25712/27626 [1:00:40<04:31,  7.04it/s]

{'epoch': 3, 'iter': 25710, 'avg_loss': 8.519640292630239, 'avg_acc': 49.95150421998367, 'loss': 8.622629165649414}


EP_train:3:  93%|| 25722/27626 [1:00:41<04:30,  7.03it/s]

{'epoch': 3, 'iter': 25720, 'avg_loss': 8.51963951101867, 'avg_acc': 49.95176606663815, 'loss': 8.13360595703125}


EP_train:3:  93%|| 25732/27626 [1:00:43<04:29,  7.04it/s]

{'epoch': 3, 'iter': 25730, 'avg_loss': 8.51977469767207, 'avg_acc': 49.95057032373402, 'loss': 9.315176010131836}


EP_train:3:  93%|| 25742/27626 [1:00:44<04:29,  6.99it/s]

{'epoch': 3, 'iter': 25740, 'avg_loss': 8.519852555458986, 'avg_acc': 49.95022532147158, 'loss': 8.631098747253418}


EP_train:3:  93%|| 25752/27626 [1:00:46<04:27,  7.01it/s]

{'epoch': 3, 'iter': 25750, 'avg_loss': 8.519859995528483, 'avg_acc': 49.94866704205662, 'loss': 8.915303230285645}


EP_train:3:  93%|| 25762/27626 [1:00:47<04:25,  7.03it/s]

{'epoch': 3, 'iter': 25760, 'avg_loss': 8.51985145117721, 'avg_acc': 49.94844435386825, 'loss': 7.920112133026123}


EP_train:3:  93%|| 25772/27626 [1:00:48<04:23,  7.03it/s]

{'epoch': 3, 'iter': 25770, 'avg_loss': 8.519861442385626, 'avg_acc': 49.94834309883202, 'loss': 8.149127006530762}


EP_train:3:  93%|| 25782/27626 [1:00:50<04:21,  7.06it/s]

{'epoch': 3, 'iter': 25780, 'avg_loss': 8.519827580906034, 'avg_acc': 49.94654493619332, 'loss': 9.727624893188477}


EP_train:3:  93%|| 25792/27626 [1:00:51<04:20,  7.05it/s]

{'epoch': 3, 'iter': 25790, 'avg_loss': 8.519804348752782, 'avg_acc': 49.94462700166725, 'loss': 7.718865394592285}


EP_train:3:  93%|| 25802/27626 [1:00:53<04:19,  7.02it/s]

{'epoch': 3, 'iter': 25800, 'avg_loss': 8.519838352535293, 'avg_acc': 49.946101895275376, 'loss': 9.213934898376465}


EP_train:3:  93%|| 25812/27626 [1:00:54<04:18,  7.02it/s]

{'epoch': 3, 'iter': 25810, 'avg_loss': 8.519873746311294, 'avg_acc': 49.94466990817868, 'loss': 8.734061241149902}


EP_train:3:  93%|| 25822/27626 [1:00:55<04:15,  7.05it/s]

{'epoch': 3, 'iter': 25820, 'avg_loss': 8.519945563781013, 'avg_acc': 49.94517543859649, 'loss': 8.73580551147461}


EP_train:3:  94%|| 25832/27626 [1:00:57<04:14,  7.06it/s]

{'epoch': 3, 'iter': 25830, 'avg_loss': 8.519728882674311, 'avg_acc': 49.94422883357206, 'loss': 8.557772636413574}


EP_train:3:  94%|| 25842/27626 [1:00:58<04:13,  7.03it/s]

{'epoch': 3, 'iter': 25840, 'avg_loss': 8.519692219123444, 'avg_acc': 49.94437134785806, 'loss': 8.283486366271973}


EP_train:3:  94%|| 25852/27626 [1:01:00<04:13,  7.00it/s]

{'epoch': 3, 'iter': 25850, 'avg_loss': 8.519601450423513, 'avg_acc': 49.94608525782368, 'loss': 8.584601402282715}


EP_train:3:  94%|| 25862/27626 [1:01:01<04:09,  7.06it/s]

{'epoch': 3, 'iter': 25860, 'avg_loss': 8.519619314729843, 'avg_acc': 49.94683113568694, 'loss': 8.369087219238281}


EP_train:3:  94%|| 25872/27626 [1:01:03<04:09,  7.04it/s]

{'epoch': 3, 'iter': 25870, 'avg_loss': 8.519585459054744, 'avg_acc': 49.94769722855707, 'loss': 8.934075355529785}


EP_train:3:  94%|| 25882/27626 [1:01:04<04:06,  7.07it/s]

{'epoch': 3, 'iter': 25880, 'avg_loss': 8.519508156165298, 'avg_acc': 49.948200417294544, 'loss': 8.880226135253906}


EP_train:3:  94%|| 25892/27626 [1:01:05<04:08,  6.99it/s]

{'epoch': 3, 'iter': 25890, 'avg_loss': 8.519584194634348, 'avg_acc': 49.94822042408559, 'loss': 9.113000869750977}


EP_train:3:  94%|| 25902/27626 [1:01:07<04:08,  6.95it/s]

{'epoch': 3, 'iter': 25900, 'avg_loss': 8.519456888091117, 'avg_acc': 49.95005019111231, 'loss': 8.30048656463623}


EP_train:3:  94%|| 25912/27626 [1:01:08<04:04,  7.00it/s]

{'epoch': 3, 'iter': 25910, 'avg_loss': 8.519353851118725, 'avg_acc': 49.951516730346185, 'loss': 8.089436531066895}


EP_train:3:  94%|| 25922/27626 [1:01:10<04:03,  6.99it/s]

{'epoch': 3, 'iter': 25920, 'avg_loss': 8.51938324604481, 'avg_acc': 49.95201766907141, 'loss': 9.740756034851074}


EP_train:3:  94%|| 25932/27626 [1:01:11<04:05,  6.90it/s]

{'epoch': 3, 'iter': 25930, 'avg_loss': 8.519345930330523, 'avg_acc': 49.95155412440708, 'loss': 8.284378051757812}


EP_train:3:  94%|| 25942/27626 [1:01:13<04:00,  7.00it/s]

{'epoch': 3, 'iter': 25940, 'avg_loss': 8.519391282586987, 'avg_acc': 49.95109093712656, 'loss': 8.701163291931152}


EP_train:3:  94%|| 25952/27626 [1:01:14<03:58,  7.02it/s]

{'epoch': 3, 'iter': 25950, 'avg_loss': 8.519423690321247, 'avg_acc': 49.95255481484336, 'loss': 8.655657768249512}


EP_train:3:  94%|| 25962/27626 [1:01:15<03:56,  7.04it/s]

{'epoch': 3, 'iter': 25960, 'avg_loss': 8.51952555840732, 'avg_acc': 49.95173048033589, 'loss': 9.075909614562988}


EP_train:3:  94%|| 25972/27626 [1:01:17<03:54,  7.05it/s]

{'epoch': 3, 'iter': 25970, 'avg_loss': 8.519645326170867, 'avg_acc': 49.9519897193023, 'loss': 8.430776596069336}


EP_train:3:  94%|| 25982/27626 [1:01:18<03:52,  7.07it/s]

{'epoch': 3, 'iter': 25980, 'avg_loss': 8.519624756406744, 'avg_acc': 49.952609599322585, 'loss': 7.755376815795898}


EP_train:3:  94%|| 25992/27626 [1:01:20<03:55,  6.95it/s]

{'epoch': 3, 'iter': 25990, 'avg_loss': 8.519560862961658, 'avg_acc': 49.954070639836864, 'loss': 8.568182945251465}


EP_train:3:  94%|| 26002/27626 [1:01:21<03:51,  7.02it/s]

{'epoch': 3, 'iter': 26000, 'avg_loss': 8.519717084229494, 'avg_acc': 49.95324699050037, 'loss': 7.578743934631348}


EP_train:3:  94%|| 26012/27626 [1:01:22<03:48,  7.06it/s]

{'epoch': 3, 'iter': 26010, 'avg_loss': 8.519725700805337, 'avg_acc': 49.953745530736995, 'loss': 8.629764556884766}


EP_train:3:  94%|| 26022/27626 [1:01:24<03:48,  7.03it/s]

{'epoch': 3, 'iter': 26020, 'avg_loss': 8.519917765906504, 'avg_acc': 49.95556473617463, 'loss': 9.457265853881836}


EP_train:3:  94%|| 26032/27626 [1:01:25<03:47,  7.00it/s]

{'epoch': 3, 'iter': 26030, 'avg_loss': 8.520002284732701, 'avg_acc': 49.95666224885713, 'loss': 9.208375930786133}


EP_train:3:  94%|| 26042/27626 [1:01:27<03:44,  7.05it/s]

{'epoch': 3, 'iter': 26040, 'avg_loss': 8.5200179479331, 'avg_acc': 49.95751891248416, 'loss': 8.759293556213379}


EP_train:3:  94%|| 26052/27626 [1:01:28<03:44,  7.01it/s]

{'epoch': 3, 'iter': 26050, 'avg_loss': 8.519973229192313, 'avg_acc': 49.95777513339219, 'loss': 8.102246284484863}


EP_train:3:  94%|| 26062/27626 [1:01:30<03:42,  7.04it/s]

{'epoch': 3, 'iter': 26060, 'avg_loss': 8.519860010530234, 'avg_acc': 49.957431602778094, 'loss': 8.615212440490723}


EP_train:3:  94%|| 26072/27626 [1:01:31<03:41,  7.00it/s]

{'epoch': 3, 'iter': 26070, 'avg_loss': 8.519863257917152, 'avg_acc': 49.95816712055541, 'loss': 8.556903839111328}


EP_train:3:  94%|| 26082/27626 [1:01:32<03:40,  7.02it/s]

{'epoch': 3, 'iter': 26080, 'avg_loss': 8.51999830233393, 'avg_acc': 49.95578677964802, 'loss': 9.89533805847168}


EP_train:3:  94%|| 26092/27626 [1:01:34<03:36,  7.07it/s]

{'epoch': 3, 'iter': 26090, 'avg_loss': 8.520040394570763, 'avg_acc': 49.956043271626235, 'loss': 8.14892292022705}


EP_train:3:  94%|| 26102/27626 [1:01:35<03:37,  7.00it/s]

{'epoch': 3, 'iter': 26100, 'avg_loss': 8.519996631538715, 'avg_acc': 49.957377111988045, 'loss': 7.920424938201904}


EP_train:3:  95%|| 26112/27626 [1:01:37<03:35,  7.01it/s]

{'epoch': 3, 'iter': 26110, 'avg_loss': 8.519936519508994, 'avg_acc': 49.95631630347363, 'loss': 8.698293685913086}


EP_train:3:  95%|| 26122/27626 [1:01:38<03:33,  7.04it/s]

{'epoch': 3, 'iter': 26120, 'avg_loss': 8.520013780215265, 'avg_acc': 49.9566919336932, 'loss': 8.97768497467041}


EP_train:3:  95%|| 26132/27626 [1:01:39<03:33,  7.01it/s]

{'epoch': 3, 'iter': 26130, 'avg_loss': 8.52020152229825, 'avg_acc': 49.955273430025635, 'loss': 9.300924301147461}


EP_train:3:  95%|| 26142/27626 [1:01:41<03:31,  7.03it/s]

{'epoch': 3, 'iter': 26140, 'avg_loss': 8.520307027574459, 'avg_acc': 49.95409509965189, 'loss': 8.662607192993164}


EP_train:3:  95%|| 26152/27626 [1:01:42<03:30,  7.02it/s]

{'epoch': 3, 'iter': 26150, 'avg_loss': 8.520336305327929, 'avg_acc': 49.95315666704906, 'loss': 9.293501853942871}


EP_train:3:  95%|| 26162/27626 [1:01:44<03:27,  7.05it/s]

{'epoch': 3, 'iter': 26160, 'avg_loss': 8.520224986384939, 'avg_acc': 49.953771835938994, 'loss': 8.274127960205078}


EP_train:3:  95%|| 26172/27626 [1:01:45<03:25,  7.07it/s]

{'epoch': 3, 'iter': 26170, 'avg_loss': 8.52004284594695, 'avg_acc': 49.953908906805246, 'loss': 8.513375282287598}


EP_train:3:  95%|| 26182/27626 [1:01:47<03:23,  7.10it/s]

{'epoch': 3, 'iter': 26180, 'avg_loss': 8.519996512624187, 'avg_acc': 49.9540458729613, 'loss': 7.937434673309326}


EP_train:3:  95%|| 26192/27626 [1:01:48<03:23,  7.06it/s]

{'epoch': 3, 'iter': 26190, 'avg_loss': 8.520093962542767, 'avg_acc': 49.95370547134512, 'loss': 8.512951850891113}


EP_train:3:  95%|| 26202/27626 [1:01:49<03:22,  7.02it/s]

{'epoch': 3, 'iter': 26200, 'avg_loss': 8.520084617310893, 'avg_acc': 49.95229189725583, 'loss': 8.726882934570312}


EP_train:3:  95%|| 26212/27626 [1:01:51<03:21,  7.03it/s]

{'epoch': 3, 'iter': 26210, 'avg_loss': 8.520041345945096, 'avg_acc': 49.95099862653085, 'loss': 7.657134532928467}


EP_train:3:  95%|| 26222/27626 [1:01:52<03:19,  7.04it/s]

{'epoch': 3, 'iter': 26220, 'avg_loss': 8.519976605631058, 'avg_acc': 49.95030223866367, 'loss': 7.927833080291748}


EP_train:3:  95%|| 26232/27626 [1:01:54<03:17,  7.06it/s]

{'epoch': 3, 'iter': 26230, 'avg_loss': 8.519870400960489, 'avg_acc': 49.95115512180245, 'loss': 7.955192565917969}


EP_train:3:  95%|| 26242/27626 [1:01:55<03:15,  7.09it/s]

{'epoch': 3, 'iter': 26240, 'avg_loss': 8.519804333555667, 'avg_acc': 49.95141191265577, 'loss': 8.832331657409668}


EP_train:3:  95%|| 26252/27626 [1:01:56<03:15,  7.04it/s]

{'epoch': 3, 'iter': 26250, 'avg_loss': 8.519779771801394, 'avg_acc': 49.951192335530074, 'loss': 7.997276782989502}


EP_train:3:  95%|| 26262/27626 [1:01:58<03:13,  7.04it/s]

{'epoch': 3, 'iter': 26260, 'avg_loss': 8.519662073627156, 'avg_acc': 49.95156791439778, 'loss': 8.014077186584473}


EP_train:3:  95%|| 26272/27626 [1:01:59<03:13,  7.00it/s]

{'epoch': 3, 'iter': 26270, 'avg_loss': 8.519643926270176, 'avg_acc': 49.95158634996765, 'loss': 9.67652416229248}


EP_train:3:  95%|| 26282/27626 [1:02:01<03:11,  7.00it/s]

{'epoch': 3, 'iter': 26280, 'avg_loss': 8.519626214711897, 'avg_acc': 49.951248049922, 'loss': 8.072381019592285}


EP_train:3:  95%|| 26292/27626 [1:02:02<03:09,  7.03it/s]

{'epoch': 3, 'iter': 26290, 'avg_loss': 8.519610781363285, 'avg_acc': 49.95150431706668, 'loss': 8.808216094970703}


EP_train:3:  95%|| 26302/27626 [1:02:03<03:08,  7.04it/s]

{'epoch': 3, 'iter': 26300, 'avg_loss': 8.519648449379503, 'avg_acc': 49.949740504163344, 'loss': 8.23712158203125}


EP_train:3:  95%|| 26312/27626 [1:02:05<03:06,  7.03it/s]

{'epoch': 3, 'iter': 26310, 'avg_loss': 8.51982790714006, 'avg_acc': 49.9492845197826, 'loss': 8.348013877868652}


EP_train:3:  95%|| 26322/27626 [1:02:06<03:05,  7.04it/s]

{'epoch': 3, 'iter': 26320, 'avg_loss': 8.51979863111482, 'avg_acc': 49.94704798449907, 'loss': 9.081116676330566}


EP_train:3:  95%|| 26332/27626 [1:02:08<03:03,  7.04it/s]

{'epoch': 3, 'iter': 26330, 'avg_loss': 8.519722398009874, 'avg_acc': 49.948373590064946, 'loss': 8.942115783691406}


EP_train:3:  95%|| 26342/27626 [1:02:09<03:02,  7.05it/s]

{'epoch': 3, 'iter': 26340, 'avg_loss': 8.519738085924061, 'avg_acc': 49.948630462017384, 'loss': 8.052270889282227}


EP_train:3:  95%|| 26352/27626 [1:02:11<03:01,  7.03it/s]

{'epoch': 3, 'iter': 26350, 'avg_loss': 8.519893922451475, 'avg_acc': 49.94912432165762, 'loss': 9.672139167785645}


EP_train:3:  95%|| 26362/27626 [1:02:12<03:00,  7.02it/s]

{'epoch': 3, 'iter': 26360, 'avg_loss': 8.520087360173553, 'avg_acc': 49.94914362125868, 'loss': 8.33434009552002}


EP_train:3:  95%|| 26372/27626 [1:02:13<02:58,  7.04it/s]

{'epoch': 3, 'iter': 26370, 'avg_loss': 8.520092321488237, 'avg_acc': 49.95022941867961, 'loss': 8.427855491638184}


EP_train:3:  95%|| 26382/27626 [1:02:15<02:59,  6.93it/s]

{'epoch': 3, 'iter': 26380, 'avg_loss': 8.520116212789972, 'avg_acc': 49.94989291535575, 'loss': 8.899796485900879}


EP_train:3:  96%|| 26392/27626 [1:02:16<02:56,  6.99it/s]

{'epoch': 3, 'iter': 26390, 'avg_loss': 8.520117685354057, 'avg_acc': 49.94920143230647, 'loss': 8.452322959899902}


EP_train:3:  96%|| 26402/27626 [1:02:18<02:54,  7.02it/s]

{'epoch': 3, 'iter': 26400, 'avg_loss': 8.52017199843676, 'avg_acc': 49.94969414037347, 'loss': 8.5578031539917}


EP_train:3:  96%|| 26412/27626 [1:02:19<02:53,  7.02it/s]

{'epoch': 3, 'iter': 26410, 'avg_loss': 8.52018649239302, 'avg_acc': 49.95207962591344, 'loss': 8.13662052154541}


EP_train:3:  96%|| 26422/27626 [1:02:21<02:49,  7.10it/s]

{'epoch': 3, 'iter': 26420, 'avg_loss': 8.520289471073553, 'avg_acc': 49.95162465463079, 'loss': 8.474844932556152}


EP_train:3:  96%|| 26432/27626 [1:02:22<02:51,  6.98it/s]

{'epoch': 3, 'iter': 26430, 'avg_loss': 8.520271046316532, 'avg_acc': 49.95093356286179, 'loss': 8.378252983093262}


EP_train:3:  96%|| 26442/27626 [1:02:23<02:48,  7.02it/s]

{'epoch': 3, 'iter': 26440, 'avg_loss': 8.520264593791431, 'avg_acc': 49.95130668280322, 'loss': 8.290287017822266}


EP_train:3:  96%|| 26452/27626 [1:02:25<02:45,  7.08it/s]

{'epoch': 3, 'iter': 26450, 'avg_loss': 8.520207815803763, 'avg_acc': 49.95238837851121, 'loss': 9.557892799377441}


EP_train:3:  96%|| 26462/27626 [1:02:26<02:44,  7.06it/s]

{'epoch': 3, 'iter': 26460, 'avg_loss': 8.520297900803536, 'avg_acc': 49.95157968330751, 'loss': 7.791725158691406}


EP_train:3:  96%|| 26472/27626 [1:02:28<02:43,  7.05it/s]

{'epoch': 3, 'iter': 26470, 'avg_loss': 8.520345261575125, 'avg_acc': 49.95100770654678, 'loss': 8.58856201171875}


EP_train:3:  96%|| 26482/27626 [1:02:29<02:43,  7.02it/s]

{'epoch': 3, 'iter': 26480, 'avg_loss': 8.520285859163252, 'avg_acc': 49.95020014349911, 'loss': 7.981229305267334}


EP_train:3:  96%|| 26492/27626 [1:02:30<02:41,  7.00it/s]

{'epoch': 3, 'iter': 26490, 'avg_loss': 8.52022852938504, 'avg_acc': 49.949157260956554, 'loss': 7.949830532073975}


EP_train:3:  96%|| 26502/27626 [1:02:32<02:40,  7.00it/s]

{'epoch': 3, 'iter': 26500, 'avg_loss': 8.5202272497282, 'avg_acc': 49.947997245386965, 'loss': 8.061761856079102}


EP_train:3:  96%|| 26512/27626 [1:02:33<02:39,  6.99it/s]

{'epoch': 3, 'iter': 26510, 'avg_loss': 8.52013289607829, 'avg_acc': 49.949313492512545, 'loss': 8.46216106414795}


EP_train:3:  96%|| 26522/27626 [1:02:35<02:35,  7.08it/s]

{'epoch': 3, 'iter': 26520, 'avg_loss': 8.520125552066279, 'avg_acc': 49.94791863051921, 'loss': 8.418295860290527}


EP_train:3:  96%|| 26532/27626 [1:02:36<02:34,  7.08it/s]

{'epoch': 3, 'iter': 26530, 'avg_loss': 8.52029352054033, 'avg_acc': 49.94734932720214, 'loss': 8.306060791015625}


EP_train:3:  96%|| 26542/27626 [1:02:38<02:33,  7.04it/s]

{'epoch': 3, 'iter': 26540, 'avg_loss': 8.52041598046528, 'avg_acc': 49.94748690704947, 'loss': 8.456138610839844}


EP_train:3:  96%|| 26552/27626 [1:02:39<02:32,  7.04it/s]

{'epoch': 3, 'iter': 26550, 'avg_loss': 8.52041230480629, 'avg_acc': 49.947035893186694, 'loss': 9.124702453613281}


EP_train:3:  96%|| 26562/27626 [1:02:40<02:31,  7.05it/s]

{'epoch': 3, 'iter': 26560, 'avg_loss': 8.520332936542966, 'avg_acc': 49.948114717066375, 'loss': 8.397018432617188}


EP_train:3:  96%|| 26572/27626 [1:02:42<02:30,  7.00it/s]

{'epoch': 3, 'iter': 26570, 'avg_loss': 8.520251306788142, 'avg_acc': 49.948134244100714, 'loss': 7.944298267364502}


EP_train:3:  96%|| 26582/27626 [1:02:43<02:30,  6.96it/s]

{'epoch': 3, 'iter': 26580, 'avg_loss': 8.520283312467647, 'avg_acc': 49.94968210375833, 'loss': 8.16749095916748}


EP_train:3:  96%|| 26592/27626 [1:02:45<02:26,  7.04it/s]

{'epoch': 3, 'iter': 26590, 'avg_loss': 8.520318461990515, 'avg_acc': 49.948760858937234, 'loss': 9.378355979919434}


EP_train:3:  96%|| 26602/27626 [1:02:46<02:25,  7.03it/s]

{'epoch': 3, 'iter': 26600, 'avg_loss': 8.520443864318823, 'avg_acc': 49.95030731927371, 'loss': 8.015189170837402}


EP_train:3:  96%|| 26612/27626 [1:02:47<02:22,  7.09it/s]

{'epoch': 3, 'iter': 26610, 'avg_loss': 8.520466401205987, 'avg_acc': 49.951852617338695, 'loss': 8.360021591186523}


EP_train:3:  96%|| 26622/27626 [1:02:49<02:21,  7.10it/s]

{'epoch': 3, 'iter': 26620, 'avg_loss': 8.520420429587547, 'avg_acc': 49.95269242327486, 'loss': 8.555167198181152}


EP_train:3:  96%|| 26632/27626 [1:02:50<02:21,  7.04it/s]

{'epoch': 3, 'iter': 26630, 'avg_loss': 8.520441159614373, 'avg_acc': 49.95130205399722, 'loss': 7.890339374542236}


EP_train:3:  96%|| 26642/27626 [1:02:52<02:20,  7.02it/s]

{'epoch': 3, 'iter': 26640, 'avg_loss': 8.520403887916714, 'avg_acc': 49.95085113171427, 'loss': 7.875095844268799}


EP_train:3:  96%|| 26652/27626 [1:02:53<02:18,  7.05it/s]

{'epoch': 3, 'iter': 26650, 'avg_loss': 8.520397141263263, 'avg_acc': 49.95040054782184, 'loss': 8.129938125610352}


EP_train:3:  97%|| 26662/27626 [1:02:54<02:16,  7.05it/s]

{'epoch': 3, 'iter': 26660, 'avg_loss': 8.520340864837475, 'avg_acc': 49.948426540639886, 'loss': 8.605347633361816}


EP_train:3:  97%|| 26672/27626 [1:02:56<02:16,  7.00it/s]

{'epoch': 3, 'iter': 26670, 'avg_loss': 8.520393927567257, 'avg_acc': 49.9493832252259, 'loss': 9.274288177490234}


EP_train:3:  97%|| 26682/27626 [1:02:57<02:15,  6.98it/s]

{'epoch': 3, 'iter': 26680, 'avg_loss': 8.520453723607082, 'avg_acc': 49.948465199955024, 'loss': 8.346024513244629}


EP_train:3:  97%|| 26692/27626 [1:02:59<02:13,  7.01it/s]

{'epoch': 3, 'iter': 26690, 'avg_loss': 8.52037699468835, 'avg_acc': 49.949069911206024, 'loss': 9.138490676879883}


EP_train:3:  97%|| 26702/27626 [1:03:00<02:11,  7.04it/s]

{'epoch': 3, 'iter': 26700, 'avg_loss': 8.520313441148435, 'avg_acc': 49.949323059061456, 'loss': 8.287003517150879}


EP_train:3:  97%|| 26712/27626 [1:03:02<02:09,  7.04it/s]

{'epoch': 3, 'iter': 26710, 'avg_loss': 8.52023425962151, 'avg_acc': 49.9498100033694, 'loss': 9.739243507385254}


EP_train:3:  97%|| 26722/27626 [1:03:03<02:08,  7.06it/s]

{'epoch': 3, 'iter': 26720, 'avg_loss': 8.520297637559853, 'avg_acc': 49.949828786347815, 'loss': 8.205856323242188}


EP_train:3:  97%|| 26732/27626 [1:03:04<02:08,  6.96it/s]

{'epoch': 3, 'iter': 26730, 'avg_loss': 8.520345204874818, 'avg_acc': 49.94891231154839, 'loss': 8.932632446289062}


EP_train:3:  97%|| 26742/27626 [1:03:06<02:06,  6.99it/s]

{'epoch': 3, 'iter': 26740, 'avg_loss': 8.520404671807773, 'avg_acc': 49.94963258666467, 'loss': 9.14631462097168}


EP_train:3:  97%|| 26752/27626 [1:03:07<02:05,  6.96it/s]

{'epoch': 3, 'iter': 26750, 'avg_loss': 8.520303695234226, 'avg_acc': 49.950352323277635, 'loss': 8.727917671203613}


EP_train:3:  97%|| 26762/27626 [1:03:09<02:02,  7.05it/s]

{'epoch': 3, 'iter': 26760, 'avg_loss': 8.520352685938395, 'avg_acc': 49.95107152199096, 'loss': 8.419694900512695}


EP_train:3:  97%|| 26772/27626 [1:03:10<02:01,  7.05it/s]

{'epoch': 3, 'iter': 26770, 'avg_loss': 8.520348804148707, 'avg_acc': 49.95073960629039, 'loss': 8.164027214050293}


EP_train:3:  97%|| 26782/27626 [1:03:12<01:59,  7.05it/s]

{'epoch': 3, 'iter': 26780, 'avg_loss': 8.520336138374628, 'avg_acc': 49.95040793846384, 'loss': 8.55453109741211}


EP_train:3:  97%|| 26792/27626 [1:03:13<01:58,  7.06it/s]

{'epoch': 3, 'iter': 26790, 'avg_loss': 8.520384357791702, 'avg_acc': 49.95229274756448, 'loss': 8.022051811218262}


EP_train:3:  97%|| 26802/27626 [1:03:14<01:57,  7.03it/s]

{'epoch': 3, 'iter': 26800, 'avg_loss': 8.5203769230219, 'avg_acc': 49.951960747733295, 'loss': 8.686359405517578}


EP_train:3:  97%|| 26812/27626 [1:03:16<01:55,  7.03it/s]

{'epoch': 3, 'iter': 26810, 'avg_loss': 8.520348660917703, 'avg_acc': 49.95314423184514, 'loss': 7.919070243835449}


EP_train:3:  97%|| 26822/27626 [1:03:17<01:54,  7.01it/s]

{'epoch': 3, 'iter': 26820, 'avg_loss': 8.520252483918059, 'avg_acc': 49.954676372991315, 'loss': 7.373476505279541}


EP_train:3:  97%|| 26832/27626 [1:03:19<01:53,  7.02it/s]

{'epoch': 3, 'iter': 26830, 'avg_loss': 8.520249309694766, 'avg_acc': 49.95678972084529, 'loss': 9.132290840148926}


EP_train:3:  97%|| 26842/27626 [1:03:20<01:52,  7.00it/s]

{'epoch': 3, 'iter': 26840, 'avg_loss': 8.520318923837351, 'avg_acc': 49.95762080399389, 'loss': 7.557520389556885}


EP_train:3:  97%|| 26852/27626 [1:03:21<01:50,  7.03it/s]

{'epoch': 3, 'iter': 26850, 'avg_loss': 8.520324208049072, 'avg_acc': 49.95530892704183, 'loss': 8.21267318725586}


EP_train:3:  97%|| 26862/27626 [1:03:23<01:48,  7.02it/s]

{'epoch': 3, 'iter': 26860, 'avg_loss': 8.520234295698861, 'avg_acc': 49.954976545921596, 'loss': 7.962325096130371}


EP_train:3:  97%|| 26872/27626 [1:03:24<01:46,  7.07it/s]

{'epoch': 3, 'iter': 26870, 'avg_loss': 8.520215384051875, 'avg_acc': 49.95336515202263, 'loss': 7.825370788574219}


EP_train:3:  97%|| 26882/27626 [1:03:26<01:46,  7.02it/s]

{'epoch': 3, 'iter': 26880, 'avg_loss': 8.520292964144293, 'avg_acc': 49.9555913098471, 'loss': 8.75534439086914}


EP_train:3:  97%|| 26892/27626 [1:03:27<01:44,  7.03it/s]

{'epoch': 3, 'iter': 26890, 'avg_loss': 8.520392964201983, 'avg_acc': 49.95665371313823, 'loss': 9.071783065795898}


EP_train:3:  97%|| 26902/27626 [1:03:29<01:43,  7.03it/s]

{'epoch': 3, 'iter': 26900, 'avg_loss': 8.520323969680515, 'avg_acc': 49.957018326456264, 'loss': 7.574987888336182}


EP_train:3:  97%|| 26912/27626 [1:03:30<01:41,  7.04it/s]

{'epoch': 3, 'iter': 26910, 'avg_loss': 8.520214899129096, 'avg_acc': 49.95622143361451, 'loss': 8.17529582977295}


EP_train:3:  97%|| 26922/27626 [1:03:31<01:41,  6.94it/s]

{'epoch': 3, 'iter': 26920, 'avg_loss': 8.520356909542636, 'avg_acc': 49.9565859366294, 'loss': 8.844393730163574}


EP_train:3:  97%|| 26932/27626 [1:03:33<01:39,  6.99it/s]

{'epoch': 3, 'iter': 26930, 'avg_loss': 8.520471651560925, 'avg_acc': 49.95660205710891, 'loss': 9.850122451782227}


EP_train:3:  98%|| 26942/27626 [1:03:34<01:38,  6.98it/s]

{'epoch': 3, 'iter': 26940, 'avg_loss': 8.520558040491887, 'avg_acc': 49.95743012508815, 'loss': 8.97093677520752}


EP_train:3:  98%|| 26952/27626 [1:03:36<01:36,  6.98it/s]

{'epoch': 3, 'iter': 26950, 'avg_loss': 8.52053859067456, 'avg_acc': 49.956750213350155, 'loss': 8.926132202148438}


EP_train:3:  98%|| 26962/27626 [1:03:37<01:35,  6.99it/s]

{'epoch': 3, 'iter': 26960, 'avg_loss': 8.52055486359877, 'avg_acc': 49.95722988761545, 'loss': 8.822882652282715}


EP_train:3:  98%|| 26972/27626 [1:03:38<01:32,  7.04it/s]

{'epoch': 3, 'iter': 26970, 'avg_loss': 8.520539696889912, 'avg_acc': 49.95747747580735, 'loss': 8.606075286865234}


EP_train:3:  98%|| 26982/27626 [1:03:40<01:31,  7.04it/s]

{'epoch': 3, 'iter': 26980, 'avg_loss': 8.520517903576744, 'avg_acc': 49.957145769245024, 'loss': 8.495855331420898}


EP_train:3:  98%|| 26992/27626 [1:03:41<01:30,  7.01it/s]

{'epoch': 3, 'iter': 26990, 'avg_loss': 8.520351650547838, 'avg_acc': 49.958087881145566, 'loss': 8.719555854797363}


EP_train:3:  98%|| 27002/27626 [1:03:43<01:29,  7.00it/s]

{'epoch': 3, 'iter': 27000, 'avg_loss': 8.520359533115041, 'avg_acc': 49.95694603903559, 'loss': 7.6679816246032715}


EP_train:3:  98%|| 27012/27626 [1:03:44<01:27,  7.02it/s]

{'epoch': 3, 'iter': 27010, 'avg_loss': 8.520258951079448, 'avg_acc': 49.957771833697386, 'loss': 8.178492546081543}


EP_train:3:  98%|| 27022/27626 [1:03:46<01:25,  7.03it/s]

{'epoch': 3, 'iter': 27020, 'avg_loss': 8.520298024126692, 'avg_acc': 49.95605269975204, 'loss': 8.968096733093262}


EP_train:3:  98%|| 27032/27626 [1:03:47<01:24,  7.02it/s]

{'epoch': 3, 'iter': 27030, 'avg_loss': 8.520155470906897, 'avg_acc': 49.95653138988569, 'loss': 8.472229957580566}


EP_train:3:  98%|| 27042/27626 [1:03:48<01:23,  7.02it/s]

{'epoch': 3, 'iter': 27040, 'avg_loss': 8.520123019619968, 'avg_acc': 49.95816537849932, 'loss': 7.164916515350342}


EP_train:3:  98%|| 27052/27626 [1:03:50<01:21,  7.05it/s]

{'epoch': 3, 'iter': 27050, 'avg_loss': 8.520124463046846, 'avg_acc': 49.959105023843854, 'loss': 8.69505500793457}


EP_train:3:  98%|| 27062/27626 [1:03:51<01:20,  6.99it/s]

{'epoch': 3, 'iter': 27060, 'avg_loss': 8.520132239159784, 'avg_acc': 49.95761889804516, 'loss': 9.064309120178223}


EP_train:3:  98%|| 27072/27626 [1:03:53<01:19,  7.01it/s]

{'epoch': 3, 'iter': 27070, 'avg_loss': 8.520266216580056, 'avg_acc': 49.95867348823464, 'loss': 8.29357624053955}


EP_train:3:  98%|| 27082/27626 [1:03:54<01:16,  7.07it/s]

{'epoch': 3, 'iter': 27080, 'avg_loss': 8.520266569663061, 'avg_acc': 49.95868874856911, 'loss': 7.764366149902344}


EP_train:3:  98%|| 27092/27626 [1:03:55<01:15,  7.08it/s]

{'epoch': 3, 'iter': 27090, 'avg_loss': 8.520240854888716, 'avg_acc': 49.95743512605662, 'loss': 8.469559669494629}


EP_train:3:  98%|| 27102/27626 [1:03:57<01:14,  6.99it/s]

{'epoch': 3, 'iter': 27100, 'avg_loss': 8.520390343566666, 'avg_acc': 49.95825799785985, 'loss': 9.375624656677246}


EP_train:3:  98%|| 27112/27626 [1:03:58<01:13,  7.02it/s]

{'epoch': 3, 'iter': 27110, 'avg_loss': 8.52037394059535, 'avg_acc': 49.95884972889233, 'loss': 9.042550086975098}


EP_train:3:  98%|| 27122/27626 [1:04:00<01:11,  7.04it/s]

{'epoch': 3, 'iter': 27120, 'avg_loss': 8.520370962503653, 'avg_acc': 49.959786696655726, 'loss': 8.75312614440918}


EP_train:3:  98%|| 27132/27626 [1:04:01<01:09,  7.08it/s]

{'epoch': 3, 'iter': 27130, 'avg_loss': 8.520393378436422, 'avg_acc': 49.960147064243856, 'loss': 8.527287483215332}


EP_train:3:  98%|| 27142/27626 [1:04:03<01:08,  7.03it/s]

{'epoch': 3, 'iter': 27140, 'avg_loss': 8.5204306616498, 'avg_acc': 49.95947091116761, 'loss': 9.051003456115723}


EP_train:3:  98%|| 27152/27626 [1:04:04<01:07,  7.06it/s]

{'epoch': 3, 'iter': 27150, 'avg_loss': 8.520488413687477, 'avg_acc': 49.95948583845899, 'loss': 8.539691925048828}


EP_train:3:  98%|| 27162/27626 [1:04:05<01:06,  7.03it/s]

{'epoch': 3, 'iter': 27160, 'avg_loss': 8.520515269934, 'avg_acc': 49.95961580943265, 'loss': 8.571961402893066}


EP_train:3:  98%|| 27172/27626 [1:04:07<01:04,  7.03it/s]

{'epoch': 3, 'iter': 27170, 'avg_loss': 8.5205532108863, 'avg_acc': 49.959745684737406, 'loss': 9.301562309265137}


EP_train:3:  98%|| 27182/27626 [1:04:08<01:03,  7.04it/s]

{'epoch': 3, 'iter': 27180, 'avg_loss': 8.520454763598643, 'avg_acc': 49.958610794304846, 'loss': 8.157183647155762}


EP_train:3:  98%|| 27192/27626 [1:04:10<01:01,  7.07it/s]

{'epoch': 3, 'iter': 27190, 'avg_loss': 8.520422933556816, 'avg_acc': 49.95977529329558, 'loss': 9.12646770477295}


EP_train:3:  98%|| 27202/27626 [1:04:11<01:00,  7.01it/s]

{'epoch': 3, 'iter': 27200, 'avg_loss': 8.520446431824821, 'avg_acc': 49.95875611190765, 'loss': 8.647737503051758}


EP_train:3:  99%|| 27212/27626 [1:04:12<00:58,  7.04it/s]

{'epoch': 3, 'iter': 27210, 'avg_loss': 8.520398270417493, 'avg_acc': 49.959804858329356, 'loss': 8.944084167480469}


EP_train:3:  99%|| 27222/27626 [1:04:14<00:57,  7.05it/s]

{'epoch': 3, 'iter': 27220, 'avg_loss': 8.520397169795562, 'avg_acc': 49.95878641490026, 'loss': 7.4274821281433105}


EP_train:3:  99%|| 27232/27626 [1:04:15<00:55,  7.05it/s]

{'epoch': 3, 'iter': 27230, 'avg_loss': 8.520427688377856, 'avg_acc': 49.95926058536227, 'loss': 8.651673316955566}


EP_train:3:  99%|| 27242/27626 [1:04:17<00:54,  7.02it/s]

{'epoch': 3, 'iter': 27240, 'avg_loss': 8.52036240590896, 'avg_acc': 49.95984912448148, 'loss': 7.54144811630249}


EP_train:3:  99%|| 27252/27626 [1:04:18<00:52,  7.06it/s]

{'epoch': 3, 'iter': 27250, 'avg_loss': 8.520379189338025, 'avg_acc': 49.960666581042894, 'loss': 8.713563919067383}


EP_train:3:  99%|| 27262/27626 [1:04:20<00:51,  7.06it/s]

{'epoch': 3, 'iter': 27260, 'avg_loss': 8.52042509007754, 'avg_acc': 49.96102490737684, 'loss': 8.000262260437012}


EP_train:3:  99%|| 27272/27626 [1:04:21<00:50,  6.99it/s]

{'epoch': 3, 'iter': 27270, 'avg_loss': 8.520462803433059, 'avg_acc': 49.96000788383264, 'loss': 8.135071754455566}


EP_train:3:  99%|| 27282/27626 [1:04:22<00:49,  7.01it/s]

{'epoch': 3, 'iter': 27280, 'avg_loss': 8.520334610111059, 'avg_acc': 49.958304314357974, 'loss': 8.839664459228516}


EP_train:3:  99%|| 27292/27626 [1:04:24<00:47,  7.04it/s]

{'epoch': 3, 'iter': 27290, 'avg_loss': 8.520505637318132, 'avg_acc': 49.95717452640064, 'loss': 9.709151268005371}


EP_train:3:  99%|| 27302/27626 [1:04:25<00:46,  7.01it/s]

{'epoch': 3, 'iter': 27300, 'avg_loss': 8.52040620438935, 'avg_acc': 49.958334859528954, 'loss': 8.207422256469727}


EP_train:3:  99%|| 27312/27626 [1:04:27<00:44,  7.05it/s]

{'epoch': 3, 'iter': 27310, 'avg_loss': 8.520499351438332, 'avg_acc': 49.95766357877778, 'loss': 8.766170501708984}


EP_train:3:  99%|| 27322/27626 [1:04:28<00:43,  7.06it/s]

{'epoch': 3, 'iter': 27320, 'avg_loss': 8.520398453010532, 'avg_acc': 49.95802221734198, 'loss': 7.4999213218688965}


EP_train:3:  99%|| 27332/27626 [1:04:29<00:41,  7.09it/s]

{'epoch': 3, 'iter': 27330, 'avg_loss': 8.52040802660882, 'avg_acc': 49.957923237349526, 'loss': 9.018994331359863}


EP_train:3:  99%|| 27342/27626 [1:04:31<00:40,  7.05it/s]

{'epoch': 3, 'iter': 27340, 'avg_loss': 8.520387706269657, 'avg_acc': 49.95873870743572, 'loss': 8.780830383300781}


EP_train:3:  99%|| 27352/27626 [1:04:32<00:38,  7.09it/s]

{'epoch': 3, 'iter': 27350, 'avg_loss': 8.520365706501632, 'avg_acc': 49.958068260758296, 'loss': 8.682574272155762}


EP_train:3:  99%|| 27362/27626 [1:04:34<00:37,  7.05it/s]

{'epoch': 3, 'iter': 27360, 'avg_loss': 8.520512093264154, 'avg_acc': 49.95739830415555, 'loss': 8.99840259552002}


EP_train:3:  99%|| 27372/27626 [1:04:35<00:35,  7.06it/s]

{'epoch': 3, 'iter': 27370, 'avg_loss': 8.520575067932608, 'avg_acc': 49.95855558803113, 'loss': 8.457930564880371}


EP_train:3:  99%|| 27382/27626 [1:04:37<00:34,  7.02it/s]

{'epoch': 3, 'iter': 27380, 'avg_loss': 8.520577605222316, 'avg_acc': 49.95811420327965, 'loss': 8.144987106323242}


EP_train:3:  99%|| 27392/27626 [1:04:38<00:33,  7.01it/s]

{'epoch': 3, 'iter': 27390, 'avg_loss': 8.520566578471236, 'avg_acc': 49.957787229381914, 'loss': 9.210474014282227}


EP_train:3:  99%|| 27402/27626 [1:04:39<00:31,  7.08it/s]

{'epoch': 3, 'iter': 27400, 'avg_loss': 8.520547536179853, 'avg_acc': 49.95734644720996, 'loss': 7.858846187591553}


EP_train:3:  99%|| 27412/27626 [1:04:41<00:30,  7.02it/s]

{'epoch': 3, 'iter': 27410, 'avg_loss': 8.520397497003653, 'avg_acc': 49.95793203458466, 'loss': 8.459150314331055}


EP_train:3:  99%|| 27422/27626 [1:04:42<00:29,  7.02it/s]

{'epoch': 3, 'iter': 27420, 'avg_loss': 8.520338113387684, 'avg_acc': 49.956807738594506, 'loss': 8.073498725891113}


EP_train:3:  99%|| 27432/27626 [1:04:44<00:27,  7.02it/s]

{'epoch': 3, 'iter': 27430, 'avg_loss': 8.520374804762344, 'avg_acc': 49.95659563996938, 'loss': 8.847798347473145}


EP_train:3:  99%|| 27442/27626 [1:04:45<00:26,  7.02it/s]

{'epoch': 3, 'iter': 27440, 'avg_loss': 8.520237703095033, 'avg_acc': 49.95695309937685, 'loss': 8.554450988769531}


EP_train:3:  99%|| 27452/27626 [1:04:46<00:24,  6.98it/s]

{'epoch': 3, 'iter': 27450, 'avg_loss': 8.520153373400591, 'avg_acc': 49.95560271028378, 'loss': 8.094061851501465}


EP_train:3:  99%|| 27462/27626 [1:04:48<00:23,  7.02it/s]

{'epoch': 3, 'iter': 27460, 'avg_loss': 8.5202533343663, 'avg_acc': 49.956870652925964, 'loss': 8.468072891235352}


EP_train:3:  99%|| 27472/27626 [1:04:49<00:22,  6.95it/s]

{'epoch': 3, 'iter': 27470, 'avg_loss': 8.520205532301153, 'avg_acc': 49.95461122638419, 'loss': 8.943193435668945}


EP_train:3:  99%|| 27482/27626 [1:04:51<00:20,  7.08it/s]

{'epoch': 3, 'iter': 27480, 'avg_loss': 8.520111121515715, 'avg_acc': 49.95417288308286, 'loss': 8.166126251220703}


EP_train:3: 100%|| 27492/27626 [1:04:52<00:18,  7.06it/s]

{'epoch': 3, 'iter': 27490, 'avg_loss': 8.520190094918016, 'avg_acc': 49.955212615037645, 'loss': 8.637008666992188}


EP_train:3: 100%|| 27502/27626 [1:04:54<00:17,  7.02it/s]

{'epoch': 3, 'iter': 27500, 'avg_loss': 8.520165701180535, 'avg_acc': 49.95647885531435, 'loss': 8.064187049865723}


EP_train:3: 100%|| 27512/27626 [1:04:55<00:16,  7.06it/s]

{'epoch': 3, 'iter': 27510, 'avg_loss': 8.520184987018848, 'avg_acc': 49.953768492602954, 'loss': 8.987098693847656}


EP_train:3: 100%|| 27522/27626 [1:04:56<00:14,  7.04it/s]

{'epoch': 3, 'iter': 27520, 'avg_loss': 8.52016225981637, 'avg_acc': 49.95537498637404, 'loss': 7.839845657348633}


EP_train:3: 100%|| 27532/27626 [1:04:58<00:13,  7.00it/s]

{'epoch': 3, 'iter': 27530, 'avg_loss': 8.520335727371716, 'avg_acc': 49.955504703788456, 'loss': 9.111483573913574}


EP_train:3: 100%|| 27542/27626 [1:04:59<00:11,  7.05it/s]

{'epoch': 3, 'iter': 27540, 'avg_loss': 8.520286073595008, 'avg_acc': 49.95449965505973, 'loss': 9.109097480773926}


EP_train:3: 100%|| 27552/27626 [1:05:01<00:10,  7.00it/s]

{'epoch': 3, 'iter': 27550, 'avg_loss': 8.520356241386601, 'avg_acc': 49.95315505789264, 'loss': 8.170730590820312}


EP_train:3: 100%|| 27562/27626 [1:05:02<00:09,  7.04it/s]

{'epoch': 3, 'iter': 27560, 'avg_loss': 8.520345041391218, 'avg_acc': 49.95362559413664, 'loss': 8.758811950683594}


EP_train:3: 100%|| 27572/27626 [1:05:03<00:07,  7.04it/s]

{'epoch': 3, 'iter': 27570, 'avg_loss': 8.52034281840336, 'avg_acc': 49.953982445322985, 'loss': 8.233256340026855}


EP_train:3: 100%|| 27582/27626 [1:05:05<00:06,  7.07it/s]

{'epoch': 3, 'iter': 27580, 'avg_loss': 8.520465896229968, 'avg_acc': 49.95354591929227, 'loss': 9.2253999710083}


EP_train:3: 100%|| 27592/27626 [1:05:06<00:04,  7.04it/s]

{'epoch': 3, 'iter': 27590, 'avg_loss': 8.520515966586501, 'avg_acc': 49.95231687869233, 'loss': 8.964864730834961}


EP_train:3: 100%|| 27602/27626 [1:05:08<00:03,  7.03it/s]

{'epoch': 3, 'iter': 27600, 'avg_loss': 8.520544863581627, 'avg_acc': 49.951881272417666, 'loss': 8.037757873535156}


EP_train:3: 100%|| 27612/27626 [1:05:09<00:01,  7.08it/s]

{'epoch': 3, 'iter': 27610, 'avg_loss': 8.520520740518842, 'avg_acc': 49.951219622614175, 'loss': 8.486552238464355}


EP_train:3: 100%|| 27622/27626 [1:05:11<00:00,  7.04it/s]

{'epoch': 3, 'iter': 27620, 'avg_loss': 8.520562444821339, 'avg_acc': 49.95214239165852, 'loss': 8.05826473236084}


EP_train:3: 100%|| 27626/27626 [1:05:11<00:00,  7.06it/s]


EP3_train, avg_loss= 8.52055757806759 total_acc= 49.951754103163644
EP:3 Model Saved on: ./wikitext_trained.model.ep3


EP_train:4:   0%|| 2/27626 [00:00<1:18:54,  5.83it/s]

{'epoch': 4, 'iter': 0, 'avg_loss': 7.910165786743164, 'avg_acc': 53.125, 'loss': 7.910165786743164}


EP_train:4:   0%|| 12/27626 [00:01<1:08:30,  6.72it/s]

{'epoch': 4, 'iter': 10, 'avg_loss': 8.49369816346602, 'avg_acc': 51.13636363636363, 'loss': 8.503808975219727}


EP_train:4:   0%|| 22/27626 [00:03<1:06:03,  6.96it/s]

{'epoch': 4, 'iter': 20, 'avg_loss': 8.503043946765718, 'avg_acc': 51.19047619047619, 'loss': 8.315486907958984}


EP_train:4:   0%|| 32/27626 [00:04<1:05:05,  7.06it/s]

{'epoch': 4, 'iter': 30, 'avg_loss': 8.383176403660928, 'avg_acc': 50.1008064516129, 'loss': 8.608194351196289}


EP_train:4:   0%|| 42/27626 [00:06<1:05:38,  7.00it/s]

{'epoch': 4, 'iter': 40, 'avg_loss': 8.342480171017531, 'avg_acc': 48.93292682926829, 'loss': 7.562194347381592}


EP_train:4:   0%|| 52/27626 [00:07<1:05:26,  7.02it/s]

{'epoch': 4, 'iter': 50, 'avg_loss': 8.437648268306956, 'avg_acc': 47.7328431372549, 'loss': 9.18620777130127}


EP_train:4:   0%|| 62/27626 [00:08<1:05:52,  6.97it/s]

{'epoch': 4, 'iter': 60, 'avg_loss': 8.413764468959121, 'avg_acc': 48.61680327868852, 'loss': 8.073352813720703}


EP_train:4:   0%|| 72/27626 [00:10<1:07:00,  6.85it/s]

{'epoch': 4, 'iter': 70, 'avg_loss': 8.455742191260969, 'avg_acc': 49.33978873239437, 'loss': 8.379798889160156}


EP_train:4:   0%|| 82/27626 [00:11<1:06:30,  6.90it/s]

{'epoch': 4, 'iter': 80, 'avg_loss': 8.466298856852967, 'avg_acc': 49.074074074074076, 'loss': 7.901116371154785}


EP_train:4:   0%|| 92/27626 [00:13<1:06:01,  6.95it/s]

{'epoch': 4, 'iter': 90, 'avg_loss': 8.489626302823915, 'avg_acc': 49.175824175824175, 'loss': 7.961612224578857}


EP_train:4:   0%|| 102/27626 [00:14<1:06:12,  6.93it/s]

{'epoch': 4, 'iter': 100, 'avg_loss': 8.50845768900201, 'avg_acc': 49.16460396039604, 'loss': 8.903255462646484}


EP_train:4:   0%|| 112/27626 [00:16<1:05:53,  6.96it/s]

{'epoch': 4, 'iter': 110, 'avg_loss': 8.500521956263361, 'avg_acc': 49.26801801801802, 'loss': 8.10400676727295}


EP_train:4:   0%|| 122/27626 [00:17<1:05:15,  7.02it/s]

{'epoch': 4, 'iter': 120, 'avg_loss': 8.488641691601966, 'avg_acc': 49.070247933884296, 'loss': 8.755224227905273}


EP_train:4:   0%|| 132/27626 [00:18<1:05:42,  6.97it/s]

{'epoch': 4, 'iter': 130, 'avg_loss': 8.480988986619556, 'avg_acc': 49.37977099236641, 'loss': 8.327213287353516}


EP_train:4:   1%|| 142/27626 [00:20<1:06:06,  6.93it/s]

{'epoch': 4, 'iter': 140, 'avg_loss': 8.48666271588481, 'avg_acc': 49.71187943262411, 'loss': 8.668318748474121}


EP_train:4:   1%|| 152/27626 [00:21<1:05:06,  7.03it/s]

{'epoch': 4, 'iter': 150, 'avg_loss': 8.4709937967212, 'avg_acc': 49.71026490066225, 'loss': 8.084236145019531}


EP_train:4:   1%|| 162/27626 [00:23<1:05:13,  7.02it/s]

{'epoch': 4, 'iter': 160, 'avg_loss': 8.485198693245835, 'avg_acc': 49.70885093167702, 'loss': 10.207932472229004}


EP_train:4:   1%|| 172/27626 [00:24<1:04:44,  7.07it/s]

{'epoch': 4, 'iter': 170, 'avg_loss': 8.46868514178092, 'avg_acc': 49.68932748538012, 'loss': 8.209806442260742}


EP_train:4:   1%|| 182/27626 [00:26<1:05:13,  7.01it/s]

{'epoch': 4, 'iter': 180, 'avg_loss': 8.468233477344828, 'avg_acc': 49.87914364640884, 'loss': 8.752913475036621}


EP_train:4:   1%|| 192/27626 [00:27<1:04:48,  7.05it/s]

{'epoch': 4, 'iter': 190, 'avg_loss': 8.470970802906296, 'avg_acc': 50.049083769633505, 'loss': 9.194550514221191}


EP_train:4:   1%|| 202/27626 [00:28<1:04:43,  7.06it/s]

{'epoch': 4, 'iter': 200, 'avg_loss': 8.476433917657653, 'avg_acc': 49.93781094527363, 'loss': 8.747023582458496}


EP_train:4:   1%|| 212/27626 [00:30<1:04:51,  7.04it/s]

{'epoch': 4, 'iter': 210, 'avg_loss': 8.474314291895283, 'avg_acc': 49.822274881516584, 'loss': 7.865743637084961}


EP_train:4:   1%|| 222/27626 [00:31<1:04:40,  7.06it/s]

{'epoch': 4, 'iter': 220, 'avg_loss': 8.475249739254222, 'avg_acc': 49.886877828054295, 'loss': 8.232370376586914}


EP_train:4:   1%|| 232/27626 [00:33<1:04:41,  7.06it/s]

{'epoch': 4, 'iter': 230, 'avg_loss': 8.46392990706803, 'avg_acc': 49.86471861471862, 'loss': 7.778448581695557}


EP_train:4:   1%|| 242/27626 [00:34<1:04:42,  7.05it/s]

{'epoch': 4, 'iter': 240, 'avg_loss': 8.475380141705399, 'avg_acc': 49.96109958506224, 'loss': 8.642017364501953}


EP_train:4:   1%|| 252/27626 [00:35<1:04:43,  7.05it/s]

{'epoch': 4, 'iter': 250, 'avg_loss': 8.465623306563176, 'avg_acc': 49.71364541832669, 'loss': 8.627732276916504}


EP_train:4:   1%|| 262/27626 [00:37<1:04:51,  7.03it/s]

{'epoch': 4, 'iter': 260, 'avg_loss': 8.485028948363674, 'avg_acc': 49.724616858237546, 'loss': 8.679141998291016}


EP_train:4:   1%|| 272/27626 [00:38<1:04:55,  7.02it/s]

{'epoch': 4, 'iter': 270, 'avg_loss': 8.482435367204166, 'avg_acc': 49.654059040590404, 'loss': 8.05502700805664}


EP_train:4:   1%|| 282/27626 [00:40<1:04:32,  7.06it/s]

{'epoch': 4, 'iter': 280, 'avg_loss': 8.466017948775105, 'avg_acc': 49.57740213523132, 'loss': 8.572941780090332}


EP_train:4:   1%|| 292/27626 [00:41<1:04:28,  7.07it/s]

{'epoch': 4, 'iter': 290, 'avg_loss': 8.459933677489815, 'avg_acc': 49.463058419243985, 'loss': 8.484923362731934}


EP_train:4:   1%|| 302/27626 [00:43<1:05:12,  6.98it/s]

{'epoch': 4, 'iter': 300, 'avg_loss': 8.462979400672786, 'avg_acc': 49.55357142857143, 'loss': 8.603096008300781}


EP_train:4:   1%|| 312/27626 [00:44<1:04:43,  7.03it/s]

{'epoch': 4, 'iter': 310, 'avg_loss': 8.469611091245792, 'avg_acc': 49.61816720257235, 'loss': 9.279364585876465}


EP_train:4:   1%|| 322/27626 [00:45<1:04:53,  7.01it/s]

{'epoch': 4, 'iter': 320, 'avg_loss': 8.480042267439893, 'avg_acc': 49.63006230529595, 'loss': 7.720632553100586}


EP_train:4:   1%|| 332/27626 [00:47<1:04:43,  7.03it/s]

{'epoch': 4, 'iter': 330, 'avg_loss': 8.47982436004362, 'avg_acc': 49.56570996978852, 'loss': 8.686416625976562}


EP_train:4:   1%|| 342/27626 [00:48<1:04:26,  7.06it/s]

{'epoch': 4, 'iter': 340, 'avg_loss': 8.479207985561963, 'avg_acc': 49.46847507331378, 'loss': 9.082343101501465}


EP_train:4:   1%|| 352/27626 [00:50<1:04:52,  7.01it/s]

{'epoch': 4, 'iter': 350, 'avg_loss': 8.484592520613276, 'avg_acc': 49.358974358974365, 'loss': 8.643901824951172}


EP_train:4:   1%|| 362/27626 [00:51<1:04:41,  7.02it/s]

{'epoch': 4, 'iter': 360, 'avg_loss': 8.489175199471683, 'avg_acc': 49.29882271468144, 'loss': 8.15239429473877}


EP_train:4:   1%|| 372/27626 [00:52<1:04:47,  7.01it/s]

{'epoch': 4, 'iter': 370, 'avg_loss': 8.492850822901147, 'avg_acc': 49.25033692722372, 'loss': 8.46995735168457}


EP_train:4:   1%|| 382/27626 [00:54<1:04:14,  7.07it/s]

{'epoch': 4, 'iter': 380, 'avg_loss': 8.499546112350904, 'avg_acc': 49.08136482939632, 'loss': 8.530346870422363}


EP_train:4:   1%|| 392/27626 [00:55<1:04:08,  7.08it/s]

{'epoch': 4, 'iter': 390, 'avg_loss': 8.50346764030359, 'avg_acc': 49.080882352941174, 'loss': 8.66533374786377}


EP_train:4:   1%|| 402/27626 [00:57<1:04:55,  6.99it/s]

{'epoch': 4, 'iter': 400, 'avg_loss': 8.50575153369856, 'avg_acc': 49.00249376558604, 'loss': 8.803068161010742}


EP_train:4:   1%|| 412/27626 [00:58<1:04:50,  7.00it/s]

{'epoch': 4, 'iter': 410, 'avg_loss': 8.509929817088329, 'avg_acc': 48.9051094890511, 'loss': 8.764122009277344}


EP_train:4:   2%|| 422/27626 [01:00<1:04:19,  7.05it/s]

{'epoch': 4, 'iter': 420, 'avg_loss': 8.507532644158587, 'avg_acc': 48.90142517814727, 'loss': 8.555185317993164}


EP_train:4:   2%|| 432/27626 [01:01<1:04:00,  7.08it/s]

{'epoch': 4, 'iter': 430, 'avg_loss': 8.499702872920203, 'avg_acc': 48.992169373549885, 'loss': 7.874989032745361}


EP_train:4:   2%|| 442/27626 [01:02<1:04:17,  7.05it/s]

{'epoch': 4, 'iter': 440, 'avg_loss': 8.504076145673825, 'avg_acc': 48.97250566893424, 'loss': 8.264248847961426}


EP_train:4:   2%|| 452/27626 [01:04<1:04:26,  7.03it/s]

{'epoch': 4, 'iter': 450, 'avg_loss': 8.510809258716863, 'avg_acc': 49.029933481152995, 'loss': 9.19738483428955}


EP_train:4:   2%|| 462/27626 [01:05<1:04:17,  7.04it/s]

{'epoch': 4, 'iter': 460, 'avg_loss': 8.518882448399145, 'avg_acc': 49.05775488069414, 'loss': 9.16038703918457}


EP_train:4:   2%|| 472/27626 [01:07<1:04:01,  7.07it/s]

{'epoch': 4, 'iter': 470, 'avg_loss': 8.520525562788524, 'avg_acc': 49.19718683651805, 'loss': 8.265323638916016}


EP_train:4:   2%|| 482/27626 [01:08<1:04:34,  7.01it/s]

{'epoch': 4, 'iter': 480, 'avg_loss': 8.517942783490536, 'avg_acc': 49.20738045738046, 'loss': 7.768011093139648}


EP_train:4:   2%|| 492/27626 [01:09<1:04:15,  7.04it/s]

{'epoch': 4, 'iter': 490, 'avg_loss': 8.513482962994857, 'avg_acc': 49.29989816700611, 'loss': 8.152078628540039}


EP_train:4:   2%|| 502/27626 [01:11<1:04:08,  7.05it/s]

{'epoch': 4, 'iter': 500, 'avg_loss': 8.513812877936754, 'avg_acc': 49.288922155688624, 'loss': 8.4763765335083}


EP_train:4:   2%|| 512/27626 [01:12<1:04:45,  6.98it/s]

{'epoch': 4, 'iter': 510, 'avg_loss': 8.511733597272062, 'avg_acc': 49.33953033268102, 'loss': 8.724234580993652}


EP_train:4:   2%|| 522/27626 [01:14<1:04:03,  7.05it/s]

{'epoch': 4, 'iter': 520, 'avg_loss': 8.51290518857696, 'avg_acc': 49.382197696737045, 'loss': 8.362265586853027}


EP_train:4:   2%|| 532/27626 [01:15<1:04:23,  7.01it/s]

{'epoch': 4, 'iter': 530, 'avg_loss': 8.512544520606204, 'avg_acc': 49.41737288135593, 'loss': 8.753262519836426}


EP_train:4:   2%|| 542/27626 [01:17<1:04:31,  7.00it/s]

{'epoch': 4, 'iter': 540, 'avg_loss': 8.517851066236796, 'avg_acc': 49.410813308687615, 'loss': 8.460992813110352}


EP_train:4:   2%|| 552/27626 [01:18<1:04:09,  7.03it/s]

{'epoch': 4, 'iter': 550, 'avg_loss': 8.515190912894026, 'avg_acc': 49.38747731397459, 'loss': 9.091567993164062}


EP_train:4:   2%|| 562/27626 [01:19<1:03:59,  7.05it/s]

{'epoch': 4, 'iter': 560, 'avg_loss': 8.515386946061078, 'avg_acc': 49.44295900178253, 'loss': 8.806008338928223}


EP_train:4:   2%|| 572/27626 [01:21<1:04:09,  7.03it/s]

{'epoch': 4, 'iter': 570, 'avg_loss': 8.51346982548408, 'avg_acc': 49.48007880910683, 'loss': 8.370193481445312}


EP_train:4:   2%|| 582/27626 [01:22<1:04:14,  7.02it/s]

{'epoch': 4, 'iter': 580, 'avg_loss': 8.514979743300945, 'avg_acc': 49.53205679862307, 'loss': 7.640249729156494}


EP_train:4:   2%|| 592/27626 [01:24<1:04:26,  6.99it/s]

{'epoch': 4, 'iter': 590, 'avg_loss': 8.51571810507734, 'avg_acc': 49.48181049069374, 'loss': 8.649225234985352}


EP_train:4:   2%|| 602/27626 [01:25<1:04:23,  6.99it/s]

{'epoch': 4, 'iter': 600, 'avg_loss': 8.511874544045295, 'avg_acc': 49.44883527454243, 'loss': 7.9210734367370605}


EP_train:4:   2%|| 612/27626 [01:26<1:03:56,  7.04it/s]

{'epoch': 4, 'iter': 610, 'avg_loss': 8.514340260220042, 'avg_acc': 49.52945990180033, 'loss': 7.907834053039551}


EP_train:4:   2%|| 622/27626 [01:28<1:03:36,  7.08it/s]

{'epoch': 4, 'iter': 620, 'avg_loss': 8.511814718661102, 'avg_acc': 49.52697262479871, 'loss': 8.600422859191895}


EP_train:4:   2%|| 632/27626 [01:29<1:04:08,  7.01it/s]

{'epoch': 4, 'iter': 630, 'avg_loss': 8.508532062385425, 'avg_acc': 49.52951664025357, 'loss': 8.557341575622559}


EP_train:4:   2%|| 642/27626 [01:31<1:03:58,  7.03it/s]

{'epoch': 4, 'iter': 640, 'avg_loss': 8.50545266489157, 'avg_acc': 49.54660686427457, 'loss': 8.312501907348633}


EP_train:4:   2%|| 652/27626 [01:32<1:03:57,  7.03it/s]

{'epoch': 4, 'iter': 650, 'avg_loss': 8.51085273314914, 'avg_acc': 49.57277265745007, 'loss': 8.546736717224121}


EP_train:4:   2%|| 662/27626 [01:34<1:03:54,  7.03it/s]

{'epoch': 4, 'iter': 660, 'avg_loss': 8.507345084162957, 'avg_acc': 49.602874432677766, 'loss': 7.332386016845703}


EP_train:4:   2%|| 672/27626 [01:35<1:03:37,  7.06it/s]

{'epoch': 4, 'iter': 670, 'avg_loss': 8.504370510311666, 'avg_acc': 49.59016393442623, 'loss': 7.931960105895996}


EP_train:4:   2%|| 682/27626 [01:36<1:04:12,  6.99it/s]

{'epoch': 4, 'iter': 680, 'avg_loss': 8.499872726490844, 'avg_acc': 49.669603524229075, 'loss': 8.358165740966797}


EP_train:4:   3%|| 692/27626 [01:38<1:03:39,  7.05it/s]

{'epoch': 4, 'iter': 690, 'avg_loss': 8.500495709834672, 'avg_acc': 49.687952243125906, 'loss': 8.346593856811523}


EP_train:4:   3%|| 702/27626 [01:39<1:03:40,  7.05it/s]

{'epoch': 4, 'iter': 700, 'avg_loss': 8.499902774195869, 'avg_acc': 49.72360912981455, 'loss': 8.845246315002441}


EP_train:4:   3%|| 712/27626 [01:41<1:04:32,  6.95it/s]

{'epoch': 4, 'iter': 710, 'avg_loss': 8.501909698615094, 'avg_acc': 49.81100562587905, 'loss': 8.290830612182617}


EP_train:4:   3%|| 722/27626 [01:42<1:03:40,  7.04it/s]

{'epoch': 4, 'iter': 720, 'avg_loss': 8.494415769299257, 'avg_acc': 49.75294729542302, 'loss': 8.569767951965332}


EP_train:4:   3%|| 732/27626 [01:43<1:03:43,  7.03it/s]

{'epoch': 4, 'iter': 730, 'avg_loss': 8.496812459708238, 'avg_acc': 49.81617647058824, 'loss': 9.131758689880371}


EP_train:4:   3%|| 742/27626 [01:45<1:04:00,  7.00it/s]

{'epoch': 4, 'iter': 740, 'avg_loss': 8.496850330980845, 'avg_acc': 49.814439946018894, 'loss': 8.326863288879395}


EP_train:4:   3%|| 752/27626 [01:46<1:03:58,  7.00it/s]

{'epoch': 4, 'iter': 750, 'avg_loss': 8.500530710233036, 'avg_acc': 49.77113848202397, 'loss': 9.078780174255371}


EP_train:4:   3%|| 762/27626 [01:48<1:03:29,  7.05it/s]

{'epoch': 4, 'iter': 760, 'avg_loss': 8.497877163579993, 'avg_acc': 49.85627463863337, 'loss': 8.857802391052246}


EP_train:4:   3%|| 772/27626 [01:49<1:03:34,  7.04it/s]

{'epoch': 4, 'iter': 770, 'avg_loss': 8.50072448943221, 'avg_acc': 49.80544747081712, 'loss': 10.076802253723145}


EP_train:4:   3%|| 782/27626 [01:51<1:03:17,  7.07it/s]

{'epoch': 4, 'iter': 780, 'avg_loss': 8.498809867570708, 'avg_acc': 49.807938540332906, 'loss': 8.148439407348633}


EP_train:4:   3%|| 792/27626 [01:52<1:03:21,  7.06it/s]

{'epoch': 4, 'iter': 790, 'avg_loss': 8.499526025070402, 'avg_acc': 49.74715549936789, 'loss': 7.842896938323975}


EP_train:4:   3%|| 802/27626 [01:53<1:03:53,  7.00it/s]

{'epoch': 4, 'iter': 800, 'avg_loss': 8.496333858047086, 'avg_acc': 49.70739700374532, 'loss': 8.679899215698242}


EP_train:4:   3%|| 812/27626 [01:55<1:03:26,  7.04it/s]

{'epoch': 4, 'iter': 810, 'avg_loss': 8.495321900571172, 'avg_acc': 49.69173859432799, 'loss': 8.354642868041992}


EP_train:4:   3%|| 822/27626 [01:56<1:03:41,  7.01it/s]

{'epoch': 4, 'iter': 820, 'avg_loss': 8.49266827324276, 'avg_acc': 49.62317295980512, 'loss': 8.416563034057617}


EP_train:4:   3%|| 832/27626 [01:58<1:03:26,  7.04it/s]

{'epoch': 4, 'iter': 830, 'avg_loss': 8.494098611113253, 'avg_acc': 49.6690734055355, 'loss': 9.300704956054688}


EP_train:4:   3%|| 842/27626 [01:59<1:03:21,  7.05it/s]

{'epoch': 4, 'iter': 840, 'avg_loss': 8.489728431497545, 'avg_acc': 49.6358501783591, 'loss': 7.78019380569458}


EP_train:4:   3%|| 852/27626 [02:00<1:03:16,  7.05it/s]

{'epoch': 4, 'iter': 850, 'avg_loss': 8.490326627581156, 'avg_acc': 49.62544065804935, 'loss': 8.678080558776855}


EP_train:4:   3%|| 862/27626 [02:02<1:03:07,  7.07it/s]

{'epoch': 4, 'iter': 860, 'avg_loss': 8.488738715717878, 'avg_acc': 49.63704994192799, 'loss': 7.77786111831665}


EP_train:4:   3%|| 872/27626 [02:03<1:03:27,  7.03it/s]

{'epoch': 4, 'iter': 870, 'avg_loss': 8.489935506498773, 'avg_acc': 49.6591561423651, 'loss': 7.667261123657227}


EP_train:4:   3%|| 882/27626 [02:05<1:05:56,  6.76it/s]

{'epoch': 4, 'iter': 880, 'avg_loss': 8.487726214795323, 'avg_acc': 49.64174233825199, 'loss': 8.28380012512207}


EP_train:4:   3%|| 892/27626 [02:06<1:03:22,  7.03it/s]

{'epoch': 4, 'iter': 890, 'avg_loss': 8.489535635703072, 'avg_acc': 49.617704826038164, 'loss': 8.459428787231445}


EP_train:4:   3%|| 902/27626 [02:08<1:03:21,  7.03it/s]

{'epoch': 4, 'iter': 900, 'avg_loss': 8.488373575411678, 'avg_acc': 49.663568257491676, 'loss': 8.482791900634766}


EP_train:4:   3%|| 912/27626 [02:09<1:02:58,  7.07it/s]

{'epoch': 4, 'iter': 910, 'avg_loss': 8.490786678050405, 'avg_acc': 49.69813391877058, 'loss': 8.522483825683594}


EP_train:4:   3%|| 922/27626 [02:10<1:03:05,  7.06it/s]

{'epoch': 4, 'iter': 920, 'avg_loss': 8.493692054292925, 'avg_acc': 49.69801845819761, 'loss': 8.209786415100098}


EP_train:4:   3%|| 932/27626 [02:12<1:02:56,  7.07it/s]

{'epoch': 4, 'iter': 930, 'avg_loss': 8.493378789289169, 'avg_acc': 49.71804511278196, 'loss': 8.985176086425781}


EP_train:4:   3%|| 942/27626 [02:13<1:03:12,  7.04it/s]

{'epoch': 4, 'iter': 940, 'avg_loss': 8.494375781729168, 'avg_acc': 49.67122741764081, 'loss': 9.108743667602539}


EP_train:4:   3%|| 952/27626 [02:15<1:02:54,  7.07it/s]

{'epoch': 4, 'iter': 950, 'avg_loss': 8.496560945621173, 'avg_acc': 49.684542586750794, 'loss': 8.654356956481934}


EP_train:4:   3%|| 962/27626 [02:16<1:02:58,  7.06it/s]

{'epoch': 4, 'iter': 960, 'avg_loss': 8.494121170440897, 'avg_acc': 49.66506243496358, 'loss': 8.020207405090332}


EP_train:4:   4%|| 972/27626 [02:17<1:03:22,  7.01it/s]

{'epoch': 4, 'iter': 970, 'avg_loss': 8.492542693101782, 'avg_acc': 49.62667353244078, 'loss': 8.461366653442383}


EP_train:4:   4%|| 982/27626 [02:19<1:03:18,  7.01it/s]

{'epoch': 4, 'iter': 980, 'avg_loss': 8.491310421479952, 'avg_acc': 49.6177370030581, 'loss': 8.050037384033203}


EP_train:4:   4%|| 992/27626 [02:20<1:03:53,  6.95it/s]

{'epoch': 4, 'iter': 990, 'avg_loss': 8.490032287466779, 'avg_acc': 49.6468213925328, 'loss': 8.725566864013672}


EP_train:4:   4%|| 1002/27626 [02:22<1:02:55,  7.05it/s]

{'epoch': 4, 'iter': 1000, 'avg_loss': 8.489083128613787, 'avg_acc': 49.62849650349651, 'loss': 7.734867572784424}


EP_train:4:   4%|| 1012/27626 [02:23<1:03:25,  6.99it/s]

{'epoch': 4, 'iter': 1010, 'avg_loss': 8.489867655153208, 'avg_acc': 49.5919881305638, 'loss': 8.792008399963379}


EP_train:4:   4%|| 1022/27626 [02:25<1:02:55,  7.05it/s]

{'epoch': 4, 'iter': 1020, 'avg_loss': 8.488158592164341, 'avg_acc': 49.60210577864839, 'loss': 8.730008125305176}


EP_train:4:   4%|| 1032/27626 [02:26<1:03:08,  7.02it/s]

{'epoch': 4, 'iter': 1030, 'avg_loss': 8.487091983902467, 'avg_acc': 49.61505819592629, 'loss': 8.598675727844238}


EP_train:4:   4%|| 1042/27626 [02:27<1:03:09,  7.02it/s]

{'epoch': 4, 'iter': 1040, 'avg_loss': 8.485993942312044, 'avg_acc': 49.63076368876081, 'loss': 8.151908874511719}


EP_train:4:   4%|| 1052/27626 [02:29<1:03:13,  7.00it/s]

{'epoch': 4, 'iter': 1050, 'avg_loss': 8.487989151171567, 'avg_acc': 49.62535680304472, 'loss': 8.858160018920898}


EP_train:4:   4%|| 1062/27626 [02:30<1:02:27,  7.09it/s]

{'epoch': 4, 'iter': 1060, 'avg_loss': 8.488223418775984, 'avg_acc': 49.643614514608856, 'loss': 7.85473108291626}


EP_train:4:   4%|| 1072/27626 [02:32<1:03:05,  7.01it/s]

{'epoch': 4, 'iter': 1070, 'avg_loss': 8.488031109508013, 'avg_acc': 49.65569561157797, 'loss': 8.469456672668457}


EP_train:4:   4%|| 1082/27626 [02:33<1:03:07,  7.01it/s]

{'epoch': 4, 'iter': 1080, 'avg_loss': 8.484778117074887, 'avg_acc': 49.638644773358, 'loss': 9.455892562866211}


EP_train:4:   4%|| 1092/27626 [02:34<1:02:51,  7.04it/s]

{'epoch': 4, 'iter': 1090, 'avg_loss': 8.485430712660554, 'avg_acc': 49.62190650779102, 'loss': 8.513932228088379}


EP_train:4:   4%|| 1102/27626 [02:36<1:02:57,  7.02it/s]

{'epoch': 4, 'iter': 1100, 'avg_loss': 8.488451176833067, 'avg_acc': 49.59128065395095, 'loss': 8.332388877868652}


EP_train:4:   4%|| 1112/27626 [02:37<1:03:00,  7.01it/s]

{'epoch': 4, 'iter': 1110, 'avg_loss': 8.490507826732152, 'avg_acc': 49.56964446444644, 'loss': 9.019356727600098}


EP_train:4:   4%|| 1122/27626 [02:39<1:02:35,  7.06it/s]

{'epoch': 4, 'iter': 1120, 'avg_loss': 8.4949130208018, 'avg_acc': 49.592997323818025, 'loss': 9.778153419494629}


EP_train:4:   4%|| 1132/27626 [02:40<1:02:38,  7.05it/s]

{'epoch': 4, 'iter': 1130, 'avg_loss': 8.494469824773248, 'avg_acc': 49.582780725022104, 'loss': 7.4386887550354}


EP_train:4:   4%|| 1142/27626 [02:42<1:02:20,  7.08it/s]

{'epoch': 4, 'iter': 1140, 'avg_loss': 8.491087736736569, 'avg_acc': 49.60834794040315, 'loss': 7.948886394500732}


EP_train:4:   4%|| 1152/27626 [02:43<1:03:01,  7.00it/s]

{'epoch': 4, 'iter': 1150, 'avg_loss': 8.492814035440507, 'avg_acc': 49.66062119895743, 'loss': 9.218437194824219}


EP_train:4:   4%|| 1162/27626 [02:44<1:02:59,  7.00it/s]

{'epoch': 4, 'iter': 1160, 'avg_loss': 8.493371531020763, 'avg_acc': 49.66085271317829, 'loss': 8.398185729980469}


EP_train:4:   4%|| 1172/27626 [02:46<1:02:42,  7.03it/s]

{'epoch': 4, 'iter': 1170, 'avg_loss': 8.493580639820442, 'avg_acc': 49.69043552519214, 'loss': 8.374019622802734}


EP_train:4:   4%|| 1182/27626 [02:47<1:02:29,  7.05it/s]

{'epoch': 4, 'iter': 1180, 'avg_loss': 8.494133138131732, 'avg_acc': 49.68776460626588, 'loss': 8.987336158752441}


EP_train:4:   4%|| 1192/27626 [02:49<1:02:41,  7.03it/s]

{'epoch': 4, 'iter': 1190, 'avg_loss': 8.492580950310208, 'avg_acc': 49.67464315701092, 'loss': 8.460360527038574}


EP_train:4:   4%|| 1202/27626 [02:50<1:02:20,  7.06it/s]

{'epoch': 4, 'iter': 1200, 'avg_loss': 8.494899662408503, 'avg_acc': 49.68776019983347, 'loss': 9.170917510986328}


EP_train:4:   4%|| 1212/27626 [02:51<1:02:50,  7.01it/s]

{'epoch': 4, 'iter': 1210, 'avg_loss': 8.495129624247847, 'avg_acc': 49.71098265895954, 'loss': 7.886620998382568}


EP_train:4:   4%|| 1222/27626 [02:53<1:03:09,  6.97it/s]

{'epoch': 4, 'iter': 1220, 'avg_loss': 8.493825494608462, 'avg_acc': 49.70567158067158, 'loss': 8.504297256469727}


EP_train:4:   4%|| 1232/27626 [02:54<1:02:12,  7.07it/s]

{'epoch': 4, 'iter': 1230, 'avg_loss': 8.496167797271456, 'avg_acc': 49.71567831031681, 'loss': 8.571972846984863}


EP_train:4:   4%|| 1242/27626 [02:56<1:02:27,  7.04it/s]

{'epoch': 4, 'iter': 1240, 'avg_loss': 8.494787424058323, 'avg_acc': 49.76329572925061, 'loss': 8.07442855834961}


EP_train:4:   5%|| 1252/27626 [02:57<1:02:21,  7.05it/s]

{'epoch': 4, 'iter': 1250, 'avg_loss': 8.49302829731759, 'avg_acc': 49.76518784972022, 'loss': 8.42817211151123}


EP_train:4:   5%|| 1262/27626 [02:59<1:02:56,  6.98it/s]

{'epoch': 4, 'iter': 1260, 'avg_loss': 8.493982521907192, 'avg_acc': 49.79678826328311, 'loss': 9.411252975463867}


EP_train:4:   5%|| 1272/27626 [03:00<1:02:16,  7.05it/s]

{'epoch': 4, 'iter': 1270, 'avg_loss': 8.496312994923205, 'avg_acc': 49.786093627065306, 'loss': 9.511711120605469}


EP_train:4:   5%|| 1282/27626 [03:01<1:02:23,  7.04it/s]

{'epoch': 4, 'iter': 1280, 'avg_loss': 8.496792980881988, 'avg_acc': 49.82679547228728, 'loss': 8.847731590270996}


EP_train:4:   5%|| 1292/27626 [03:03<1:02:20,  7.04it/s]

{'epoch': 4, 'iter': 1290, 'avg_loss': 8.496739567018682, 'avg_acc': 49.8208752904725, 'loss': 8.5823335647583}


EP_train:4:   5%|| 1302/27626 [03:04<1:02:47,  6.99it/s]

{'epoch': 4, 'iter': 1300, 'avg_loss': 8.495914946694635, 'avg_acc': 49.81985011529593, 'loss': 8.600262641906738}


EP_train:4:   5%|| 1312/27626 [03:06<1:02:22,  7.03it/s]

{'epoch': 4, 'iter': 1310, 'avg_loss': 8.497496583096222, 'avg_acc': 49.82122425629291, 'loss': 8.634912490844727}


EP_train:4:   5%|| 1322/27626 [03:07<1:02:01,  7.07it/s]

{'epoch': 4, 'iter': 1320, 'avg_loss': 8.496736061203038, 'avg_acc': 49.82257759273278, 'loss': 8.048587799072266}


EP_train:4:   5%|| 1332/27626 [03:08<1:02:48,  6.98it/s]

{'epoch': 4, 'iter': 1330, 'avg_loss': 8.499304062078807, 'avg_acc': 49.84034560480842, 'loss': 10.091705322265625}


EP_train:4:   5%|| 1342/27626 [03:10<1:02:21,  7.03it/s]

{'epoch': 4, 'iter': 1340, 'avg_loss': 8.50026052092368, 'avg_acc': 49.84386651752424, 'loss': 8.793021202087402}


EP_train:4:   5%|| 1352/27626 [03:11<1:02:33,  7.00it/s]

{'epoch': 4, 'iter': 1350, 'avg_loss': 8.499515257086426, 'avg_acc': 49.861213915618066, 'loss': 7.761140823364258}


EP_train:4:   5%|| 1362/27626 [03:13<1:02:19,  7.02it/s]

{'epoch': 4, 'iter': 1360, 'avg_loss': 8.499171618588438, 'avg_acc': 49.87371418074945, 'loss': 8.041414260864258}


EP_train:4:   5%|| 1372/27626 [03:14<1:01:58,  7.06it/s]

{'epoch': 4, 'iter': 1370, 'avg_loss': 8.499166702198165, 'avg_acc': 49.85867979576951, 'loss': 7.936908721923828}


EP_train:4:   5%|| 1382/27626 [03:16<1:01:55,  7.06it/s]

{'epoch': 4, 'iter': 1380, 'avg_loss': 8.495918935144578, 'avg_acc': 49.83707458363505, 'loss': 8.87078857421875}


EP_train:4:   5%|| 1392/27626 [03:17<1:01:58,  7.06it/s]

{'epoch': 4, 'iter': 1390, 'avg_loss': 8.495247185787308, 'avg_acc': 49.83375269590223, 'loss': 8.64760684967041}


EP_train:4:   5%|| 1402/27626 [03:18<1:02:12,  7.03it/s]

{'epoch': 4, 'iter': 1400, 'avg_loss': 8.496533574588293, 'avg_acc': 49.88624197002141, 'loss': 8.379744529724121}


EP_train:4:   5%|| 1412/27626 [03:20<1:02:18,  7.01it/s]

{'epoch': 4, 'iter': 1410, 'avg_loss': 8.496910373341692, 'avg_acc': 49.86268603827073, 'loss': 8.154577255249023}


EP_train:4:   5%|| 1422/27626 [03:21<1:02:09,  7.03it/s]

{'epoch': 4, 'iter': 1420, 'avg_loss': 8.497853130458696, 'avg_acc': 49.84385995777622, 'loss': 8.914311408996582}


EP_train:4:   5%|| 1432/27626 [03:23<1:02:10,  7.02it/s]

{'epoch': 4, 'iter': 1430, 'avg_loss': 8.49674059576625, 'avg_acc': 49.85368623340322, 'loss': 8.311613082885742}


EP_train:4:   5%|| 1442/27626 [03:24<1:02:24,  6.99it/s]

{'epoch': 4, 'iter': 1440, 'avg_loss': 8.493677221016284, 'avg_acc': 49.87638792505204, 'loss': 7.955507278442383}


EP_train:4:   5%|| 1452/27626 [03:25<1:02:00,  7.04it/s]

{'epoch': 4, 'iter': 1450, 'avg_loss': 8.49353034935024, 'avg_acc': 49.862164024810475, 'loss': 8.50300121307373}


EP_train:4:   5%|| 1462/27626 [03:27<1:01:59,  7.03it/s]

{'epoch': 4, 'iter': 1460, 'avg_loss': 8.495934119867512, 'avg_acc': 49.85669062286105, 'loss': 9.206058502197266}


EP_train:4:   5%|| 1472/27626 [03:28<1:02:27,  6.98it/s]

{'epoch': 4, 'iter': 1470, 'avg_loss': 8.498840186484083, 'avg_acc': 49.874660095173354, 'loss': 8.40241527557373}


EP_train:4:   5%|| 1482/27626 [03:30<1:02:08,  7.01it/s]

{'epoch': 4, 'iter': 1480, 'avg_loss': 8.499523927841857, 'avg_acc': 49.860735989196485, 'loss': 8.395832061767578}


EP_train:4:   5%|| 1492/27626 [03:31<1:02:04,  7.02it/s]

{'epoch': 4, 'iter': 1490, 'avg_loss': 8.50009314347081, 'avg_acc': 49.84071093226023, 'loss': 8.4513578414917}


EP_train:4:   5%|| 1502/27626 [03:33<1:01:49,  7.04it/s]

{'epoch': 4, 'iter': 1500, 'avg_loss': 8.500528957270369, 'avg_acc': 49.86675549633578, 'loss': 8.562762260437012}


EP_train:4:   5%|| 1512/27626 [03:34<1:01:46,  7.05it/s]

{'epoch': 4, 'iter': 1510, 'avg_loss': 8.503232302056647, 'avg_acc': 49.85936465916612, 'loss': 8.66376781463623}


EP_train:4:   6%|| 1522/27626 [03:35<1:01:37,  7.06it/s]

{'epoch': 4, 'iter': 1520, 'avg_loss': 8.504400977486455, 'avg_acc': 49.86850756081525, 'loss': 8.1886568069458}


EP_train:4:   6%|| 1532/27626 [03:37<1:02:11,  6.99it/s]

{'epoch': 4, 'iter': 1530, 'avg_loss': 8.504169309318339, 'avg_acc': 49.853037230568255, 'loss': 8.234545707702637}


EP_train:4:   6%|| 1542/27626 [03:38<1:01:43,  7.04it/s]

{'epoch': 4, 'iter': 1540, 'avg_loss': 8.50371698339909, 'avg_acc': 49.88643737832576, 'loss': 7.7478227615356445}


EP_train:4:   6%|| 1552/27626 [03:40<1:02:10,  6.99it/s]

{'epoch': 4, 'iter': 1550, 'avg_loss': 8.501995196732915, 'avg_acc': 49.87709542230819, 'loss': 8.776612281799316}


EP_train:4:   6%|| 1562/27626 [03:41<1:01:49,  7.03it/s]

{'epoch': 4, 'iter': 1560, 'avg_loss': 8.502757152481617, 'avg_acc': 49.8959000640615, 'loss': 7.995871067047119}


EP_train:4:   6%|| 1572/27626 [03:42<1:01:34,  7.05it/s]

{'epoch': 4, 'iter': 1570, 'avg_loss': 8.502583458194453, 'avg_acc': 49.868714194780395, 'loss': 8.175219535827637}


EP_train:4:   6%|| 1582/27626 [03:44<1:01:19,  7.08it/s]

{'epoch': 4, 'iter': 1580, 'avg_loss': 8.503111331693587, 'avg_acc': 49.86361480075902, 'loss': 8.119317054748535}


EP_train:4:   6%|| 1592/27626 [03:45<1:01:33,  7.05it/s]

{'epoch': 4, 'iter': 1590, 'avg_loss': 8.50204211731964, 'avg_acc': 49.8585795097423, 'loss': 8.712190628051758}


EP_train:4:   6%|| 1602/27626 [03:47<1:01:33,  7.05it/s]

{'epoch': 4, 'iter': 1600, 'avg_loss': 8.501121454876262, 'avg_acc': 49.85360712054966, 'loss': 7.904636859893799}


EP_train:4:   6%|| 1612/27626 [03:48<1:02:09,  6.97it/s]

{'epoch': 4, 'iter': 1610, 'avg_loss': 8.50077441728759, 'avg_acc': 49.86421477343265, 'loss': 8.591917991638184}


EP_train:4:   6%|| 1622/27626 [03:50<1:02:04,  6.98it/s]

{'epoch': 4, 'iter': 1620, 'avg_loss': 8.501932282127179, 'avg_acc': 49.9055367057372, 'loss': 8.455814361572266}


EP_train:4:   6%|| 1632/27626 [03:51<1:01:43,  7.02it/s]

{'epoch': 4, 'iter': 1630, 'avg_loss': 8.500637343001468, 'avg_acc': 49.91377988963826, 'loss': 8.492609024047852}


EP_train:4:   6%|| 1642/27626 [03:52<1:02:07,  6.97it/s]

{'epoch': 4, 'iter': 1640, 'avg_loss': 8.501701686238459, 'avg_acc': 49.952391834247415, 'loss': 8.773905754089355}


EP_train:4:   6%|| 1652/27626 [03:54<1:01:46,  7.01it/s]

{'epoch': 4, 'iter': 1650, 'avg_loss': 8.499624350517175, 'avg_acc': 49.9507874015748, 'loss': 7.878639221191406}


EP_train:4:   6%|| 1662/27626 [03:55<1:01:48,  7.00it/s]

{'epoch': 4, 'iter': 1660, 'avg_loss': 8.501929058072655, 'avg_acc': 49.94355809753161, 'loss': 8.11941909790039}


EP_train:4:   6%|| 1672/27626 [03:57<1:01:20,  7.05it/s]

{'epoch': 4, 'iter': 1670, 'avg_loss': 8.502645773919024, 'avg_acc': 49.91958408138839, 'loss': 8.186702728271484}


EP_train:4:   6%|| 1682/27626 [03:58<1:01:26,  7.04it/s]

{'epoch': 4, 'iter': 1680, 'avg_loss': 8.500921160603193, 'avg_acc': 49.892177275431294, 'loss': 8.57938289642334}


EP_train:4:   6%|| 1692/27626 [03:59<1:01:14,  7.06it/s]

{'epoch': 4, 'iter': 1690, 'avg_loss': 8.501724383310762, 'avg_acc': 49.909447072738025, 'loss': 8.859325408935547}


EP_train:4:   6%|| 1702/27626 [04:01<1:02:23,  6.93it/s]

{'epoch': 4, 'iter': 1700, 'avg_loss': 8.504538206126254, 'avg_acc': 49.88977072310406, 'loss': 9.473166465759277}


EP_train:4:   6%|| 1712/27626 [04:02<1:01:33,  7.02it/s]

{'epoch': 4, 'iter': 1710, 'avg_loss': 8.505321850768311, 'avg_acc': 49.899547048509646, 'loss': 8.108916282653809}


EP_train:4:   6%|| 1722/27626 [04:04<1:01:41,  7.00it/s]

{'epoch': 4, 'iter': 1720, 'avg_loss': 8.505034880607646, 'avg_acc': 49.89468332364904, 'loss': 7.610215187072754}


EP_train:4:   6%|| 1732/27626 [04:05<1:01:04,  7.07it/s]

{'epoch': 4, 'iter': 1730, 'avg_loss': 8.505858956837779, 'avg_acc': 49.85918544194107, 'loss': 8.77636432647705}


EP_train:4:   6%|| 1742/27626 [04:07<1:01:20,  7.03it/s]

{'epoch': 4, 'iter': 1740, 'avg_loss': 8.508646406845276, 'avg_acc': 49.8869184376795, 'loss': 9.097662925720215}


EP_train:4:   6%|| 1752/27626 [04:08<1:01:27,  7.02it/s]

{'epoch': 4, 'iter': 1750, 'avg_loss': 8.50625885288624, 'avg_acc': 49.85543974871502, 'loss': 8.698171615600586}


EP_train:4:   6%|| 1762/27626 [04:09<1:01:33,  7.00it/s]

{'epoch': 4, 'iter': 1760, 'avg_loss': 8.50470258809165, 'avg_acc': 49.88110448608745, 'loss': 8.412686347961426}


EP_train:4:   6%|| 1772/27626 [04:11<1:01:26,  7.01it/s]

{'epoch': 4, 'iter': 1770, 'avg_loss': 8.504692116139099, 'avg_acc': 49.87471767363072, 'loss': 8.243163108825684}


EP_train:4:   6%|| 1782/27626 [04:12<1:01:02,  7.06it/s]

{'epoch': 4, 'iter': 1780, 'avg_loss': 8.50464100224593, 'avg_acc': 49.87542111173498, 'loss': 8.347664833068848}


EP_train:4:   6%|| 1792/27626 [04:14<1:01:15,  7.03it/s]

{'epoch': 4, 'iter': 1790, 'avg_loss': 8.505778848891948, 'avg_acc': 49.87960636515913, 'loss': 9.460721969604492}


EP_train:4:   7%|| 1802/27626 [04:15<1:01:11,  7.03it/s]

{'epoch': 4, 'iter': 1800, 'avg_loss': 8.505745978040341, 'avg_acc': 49.88895058300944, 'loss': 8.922025680541992}


EP_train:4:   7%|| 1812/27626 [04:17<1:01:32,  6.99it/s]

{'epoch': 4, 'iter': 1810, 'avg_loss': 8.505350844890357, 'avg_acc': 49.89128934290447, 'loss': 7.981232166290283}


EP_train:4:   7%|| 1822/27626 [04:18<1:01:18,  7.02it/s]

{'epoch': 4, 'iter': 1820, 'avg_loss': 8.50660302362751, 'avg_acc': 49.9176276771005, 'loss': 8.368963241577148}


EP_train:4:   7%|| 1832/27626 [04:19<1:01:01,  7.04it/s]

{'epoch': 4, 'iter': 1830, 'avg_loss': 8.506279069305704, 'avg_acc': 49.91807755324959, 'loss': 8.332878112792969}


EP_train:4:   7%|| 1842/27626 [04:21<1:00:51,  7.06it/s]

{'epoch': 4, 'iter': 1840, 'avg_loss': 8.507591274495619, 'avg_acc': 49.93040467137425, 'loss': 8.732992172241211}


EP_train:4:   7%|| 1852/27626 [04:22<1:01:07,  7.03it/s]

{'epoch': 4, 'iter': 1850, 'avg_loss': 8.508270540603235, 'avg_acc': 49.935845488924905, 'loss': 8.662858009338379}


EP_train:4:   7%|| 1862/27626 [04:24<1:00:58,  7.04it/s]

{'epoch': 4, 'iter': 1860, 'avg_loss': 8.508413453437768, 'avg_acc': 49.95130306286942, 'loss': 8.4762544631958}


EP_train:4:   7%|| 1872/27626 [04:25<1:01:17,  7.00it/s]

{'epoch': 4, 'iter': 1870, 'avg_loss': 8.506676150413199, 'avg_acc': 49.93486103687867, 'loss': 8.885504722595215}


EP_train:4:   7%|| 1882/27626 [04:26<1:01:23,  6.99it/s]

{'epoch': 4, 'iter': 1880, 'avg_loss': 8.508116148685534, 'avg_acc': 49.908625730994146, 'loss': 7.842850685119629}


EP_train:4:   7%|| 1892/27626 [04:28<1:01:19,  6.99it/s]

{'epoch': 4, 'iter': 1890, 'avg_loss': 8.511638221760895, 'avg_acc': 49.922329455314646, 'loss': 8.39251708984375}


EP_train:4:   7%|| 1902/27626 [04:29<1:01:16,  7.00it/s]

{'epoch': 4, 'iter': 1900, 'avg_loss': 8.51268553219614, 'avg_acc': 49.92766964755392, 'loss': 9.394503593444824}


EP_train:4:   7%|| 1912/27626 [04:31<1:01:11,  7.00it/s]

{'epoch': 4, 'iter': 1910, 'avg_loss': 8.514839035375527, 'avg_acc': 49.92314233385662, 'loss': 8.102901458740234}


EP_train:4:   7%|| 1922/27626 [04:32<1:00:45,  7.05it/s]

{'epoch': 4, 'iter': 1920, 'avg_loss': 8.515605783040545, 'avg_acc': 49.917035398230084, 'loss': 8.64680004119873}


EP_train:4:   7%|| 1932/27626 [04:34<1:01:19,  6.98it/s]

{'epoch': 4, 'iter': 1930, 'avg_loss': 8.515961645799, 'avg_acc': 49.88995339202486, 'loss': 9.364805221557617}


EP_train:4:   7%|| 1942/27626 [04:35<1:01:11,  7.00it/s]

{'epoch': 4, 'iter': 1940, 'avg_loss': 8.516723972813853, 'avg_acc': 49.888910355486864, 'loss': 9.668722152709961}


EP_train:4:   7%|| 1952/27626 [04:36<1:01:14,  6.99it/s]

{'epoch': 4, 'iter': 1950, 'avg_loss': 8.51863262434607, 'avg_acc': 49.89909021014864, 'loss': 8.199329376220703}


EP_train:4:   7%|| 1962/27626 [04:38<1:00:34,  7.06it/s]

{'epoch': 4, 'iter': 1960, 'avg_loss': 8.517013487798836, 'avg_acc': 49.91075981642019, 'loss': 7.95339298248291}


EP_train:4:   7%|| 1972/27626 [04:39<1:01:04,  7.00it/s]

{'epoch': 4, 'iter': 1970, 'avg_loss': 8.517308398468854, 'avg_acc': 49.93499492643328, 'loss': 9.525355339050293}


EP_train:4:   7%|| 1982/27626 [04:41<1:01:10,  6.99it/s]

{'epoch': 4, 'iter': 1980, 'avg_loss': 8.516805243937432, 'avg_acc': 49.951097930338214, 'loss': 8.149995803833008}


EP_train:4:   7%|| 1992/27626 [04:42<1:00:31,  7.06it/s]

{'epoch': 4, 'iter': 1990, 'avg_loss': 8.516014492134543, 'avg_acc': 49.91367403314917, 'loss': 8.747309684753418}


EP_train:4:   7%|| 2002/27626 [04:43<1:00:39,  7.04it/s]

{'epoch': 4, 'iter': 2000, 'avg_loss': 8.513729400243955, 'avg_acc': 49.90473513243378, 'loss': 7.379587173461914}


EP_train:4:   7%|| 2012/27626 [04:45<1:01:14,  6.97it/s]

{'epoch': 4, 'iter': 2010, 'avg_loss': 8.5125336061361, 'avg_acc': 49.903654898060665, 'loss': 8.013752937316895}


EP_train:4:   7%|| 2022/27626 [04:46<1:00:27,  7.06it/s]

{'epoch': 4, 'iter': 2020, 'avg_loss': 8.511708351127703, 'avg_acc': 49.87629886194953, 'loss': 9.091861724853516}


EP_train:4:   7%|| 2032/27626 [04:48<1:00:49,  7.01it/s]

{'epoch': 4, 'iter': 2030, 'avg_loss': 8.512446815071876, 'avg_acc': 49.88152387986214, 'loss': 8.454219818115234}


EP_train:4:   7%|| 2042/27626 [04:49<1:00:15,  7.08it/s]

{'epoch': 4, 'iter': 2040, 'avg_loss': 8.512390199798629, 'avg_acc': 49.89129103380696, 'loss': 8.533339500427246}


EP_train:4:   7%|| 2052/27626 [04:51<1:00:45,  7.02it/s]

{'epoch': 4, 'iter': 2050, 'avg_loss': 8.512142123738943, 'avg_acc': 49.905533885909314, 'loss': 8.308137893676758}


EP_train:4:   7%|| 2062/27626 [04:52<1:01:12,  6.96it/s]

{'epoch': 4, 'iter': 2060, 'avg_loss': 8.512450709382287, 'avg_acc': 49.902959728287236, 'loss': 8.285684585571289}


EP_train:4:   8%|| 2072/27626 [04:53<1:01:11,  6.96it/s]

{'epoch': 4, 'iter': 2070, 'avg_loss': 8.511897553803552, 'avg_acc': 49.88532110091743, 'loss': 8.457016944885254}


EP_train:4:   8%|| 2082/27626 [04:55<1:00:28,  7.04it/s]

{'epoch': 4, 'iter': 2080, 'avg_loss': 8.51162787986913, 'avg_acc': 49.87235703988467, 'loss': 8.340108871459961}


EP_train:4:   8%|| 2092/27626 [04:56<1:00:51,  6.99it/s]

{'epoch': 4, 'iter': 2090, 'avg_loss': 8.510179658987372, 'avg_acc': 49.88043998087039, 'loss': 8.311463356018066}


EP_train:4:   8%|| 2102/27626 [04:58<1:00:28,  7.03it/s]

{'epoch': 4, 'iter': 2100, 'avg_loss': 8.512404871917463, 'avg_acc': 49.86762256068539, 'loss': 9.241540908813477}


EP_train:4:   8%|| 2112/27626 [04:59<1:00:51,  6.99it/s]

{'epoch': 4, 'iter': 2110, 'avg_loss': 8.512909467047507, 'avg_acc': 49.85640691615348, 'loss': 9.24702262878418}


EP_train:4:   8%|| 2122/27626 [05:01<1:00:45,  7.00it/s]

{'epoch': 4, 'iter': 2120, 'avg_loss': 8.51408842451895, 'avg_acc': 49.83645685997171, 'loss': 8.90727710723877}


EP_train:4:   8%|| 2132/27626 [05:02<1:00:28,  7.03it/s]

{'epoch': 4, 'iter': 2130, 'avg_loss': 8.514056923354751, 'avg_acc': 49.84602299389958, 'loss': 9.463236808776855}


EP_train:4:   8%|| 2142/27626 [05:03<1:00:11,  7.06it/s]

{'epoch': 4, 'iter': 2140, 'avg_loss': 8.513762377178152, 'avg_acc': 49.84090378327884, 'loss': 8.9380521774292}


EP_train:4:   8%|| 2152/27626 [05:05<1:00:24,  7.03it/s]

{'epoch': 4, 'iter': 2150, 'avg_loss': 8.514456519965291, 'avg_acc': 49.837284983728495, 'loss': 8.641961097717285}


EP_train:4:   8%|| 2162/27626 [05:06<1:00:46,  6.98it/s]

{'epoch': 4, 'iter': 2160, 'avg_loss': 8.51262806919315, 'avg_acc': 49.82357704766312, 'loss': 8.189006805419922}


EP_train:4:   8%|| 2172/27626 [05:08<1:01:14,  6.93it/s]

{'epoch': 4, 'iter': 2170, 'avg_loss': 8.511322344573946, 'avg_acc': 49.83014739751267, 'loss': 8.687813758850098}


EP_train:4:   8%|| 2182/27626 [05:09<1:00:46,  6.98it/s]

{'epoch': 4, 'iter': 2180, 'avg_loss': 8.510885473677238, 'avg_acc': 49.83235900962861, 'loss': 8.832061767578125}


EP_train:4:   8%|| 2192/27626 [05:10<1:00:17,  7.03it/s]

{'epoch': 4, 'iter': 2190, 'avg_loss': 8.51081194849332, 'avg_acc': 49.84025559105431, 'loss': 8.012310981750488}


EP_train:4:   8%|| 2202/27626 [05:12<1:00:43,  6.98it/s]

{'epoch': 4, 'iter': 2200, 'avg_loss': 8.509897442418627, 'avg_acc': 49.8565992730577, 'loss': 8.541257858276367}


EP_train:4:   8%|| 2212/27626 [05:13<1:00:08,  7.04it/s]

{'epoch': 4, 'iter': 2210, 'avg_loss': 8.50919159242655, 'avg_acc': 49.85724785165084, 'loss': 7.978185176849365}


EP_train:4:   8%|| 2222/27626 [05:15<1:00:14,  7.03it/s]

{'epoch': 4, 'iter': 2220, 'avg_loss': 8.50964587897344, 'avg_acc': 49.84804142278253, 'loss': 8.0757417678833}


EP_train:4:   8%|| 2232/27626 [05:16<1:00:16,  7.02it/s]

{'epoch': 4, 'iter': 2230, 'avg_loss': 8.509275877758935, 'avg_acc': 49.8515239802779, 'loss': 8.247203826904297}


EP_train:4:   8%|| 2242/27626 [05:18<1:00:12,  7.03it/s]

{'epoch': 4, 'iter': 2240, 'avg_loss': 8.509634228808068, 'avg_acc': 49.854975457385095, 'loss': 9.002633094787598}


EP_train:4:   8%|| 2252/27626 [05:19<59:48,  7.07it/s]  

{'epoch': 4, 'iter': 2250, 'avg_loss': 8.508983799745009, 'avg_acc': 49.852843180808534, 'loss': 9.011685371398926}


EP_train:4:   8%|| 2262/27626 [05:20<1:00:16,  7.01it/s]

{'epoch': 4, 'iter': 2260, 'avg_loss': 8.509527716368819, 'avg_acc': 49.8562582927908, 'loss': 8.827329635620117}


EP_train:4:   8%|| 2272/27626 [05:22<1:00:07,  7.03it/s]

{'epoch': 4, 'iter': 2270, 'avg_loss': 8.509953239739913, 'avg_acc': 49.85689123734038, 'loss': 8.743624687194824}


EP_train:4:   8%|| 2282/27626 [05:23<59:59,  7.04it/s]

{'epoch': 4, 'iter': 2280, 'avg_loss': 8.512922218837847, 'avg_acc': 49.853408592722495, 'loss': 9.232967376708984}


EP_train:4:   8%|| 2292/27626 [05:25<1:00:15,  7.01it/s]

{'epoch': 4, 'iter': 2290, 'avg_loss': 8.513456044473902, 'avg_acc': 49.85950458315146, 'loss': 8.350224494934082}


EP_train:4:   8%|| 2302/27626 [05:26<59:41,  7.07it/s]

{'epoch': 4, 'iter': 2300, 'avg_loss': 8.51192837909531, 'avg_acc': 49.862831377661884, 'loss': 7.990960597991943}


EP_train:4:   8%|| 2312/27626 [05:27<59:51,  7.05it/s]

{'epoch': 4, 'iter': 2310, 'avg_loss': 8.51146074127501, 'avg_acc': 49.839084811769794, 'loss': 9.700237274169922}


EP_train:4:   8%|| 2322/27626 [05:29<59:56,  7.04it/s]  

{'epoch': 4, 'iter': 2320, 'avg_loss': 8.512588632461172, 'avg_acc': 49.83843171046963, 'loss': 8.304191589355469}


EP_train:4:   8%|| 2332/27626 [05:30<1:00:08,  7.01it/s]

{'epoch': 4, 'iter': 2330, 'avg_loss': 8.51316533831249, 'avg_acc': 49.8471685971686, 'loss': 8.8895845413208}


EP_train:4:   8%|| 2342/27626 [05:32<1:00:11,  7.00it/s]

{'epoch': 4, 'iter': 2340, 'avg_loss': 8.513109564628422, 'avg_acc': 49.831802648440835, 'loss': 8.257791519165039}


EP_train:4:   9%|| 2352/27626 [05:33<59:59,  7.02it/s]  

{'epoch': 4, 'iter': 2350, 'avg_loss': 8.513754646507337, 'avg_acc': 49.81125053168864, 'loss': 8.921172142028809}


EP_train:4:   9%|| 2362/27626 [05:35<59:40,  7.06it/s]

{'epoch': 4, 'iter': 2360, 'avg_loss': 8.512794216322424, 'avg_acc': 49.79351969504447, 'loss': 8.316577911376953}


EP_train:4:   9%|| 2372/27626 [05:36<59:32,  7.07it/s]

{'epoch': 4, 'iter': 2370, 'avg_loss': 8.512471972714492, 'avg_acc': 49.78516448755799, 'loss': 8.705279350280762}


EP_train:4:   9%|| 2382/27626 [05:37<1:00:43,  6.93it/s]

{'epoch': 4, 'iter': 2380, 'avg_loss': 8.513498250399753, 'avg_acc': 49.780816883662325, 'loss': 8.364818572998047}


EP_train:4:   9%|| 2392/27626 [05:39<59:41,  7.05it/s]

{'epoch': 4, 'iter': 2390, 'avg_loss': 8.514886434683486, 'avg_acc': 49.760821831869514, 'loss': 8.818531036376953}


EP_train:4:   9%|| 2402/27626 [05:40<59:39,  7.05it/s]

{'epoch': 4, 'iter': 2400, 'avg_loss': 8.514001035630727, 'avg_acc': 49.777436484798, 'loss': 8.614599227905273}


EP_train:4:   9%|| 2412/27626 [05:42<59:25,  7.07it/s]

{'epoch': 4, 'iter': 2410, 'avg_loss': 8.515646828897065, 'avg_acc': 49.79132102861883, 'loss': 8.550873756408691}


EP_train:4:   9%|| 2422/27626 [05:43<1:00:01,  7.00it/s]

{'epoch': 4, 'iter': 2420, 'avg_loss': 8.515847341623349, 'avg_acc': 49.806381660470876, 'loss': 8.212937355041504}


EP_train:4:   9%|| 2432/27626 [05:44<59:49,  7.02it/s]

{'epoch': 4, 'iter': 2430, 'avg_loss': 8.516292606347482, 'avg_acc': 49.79046688605512, 'loss': 8.484060287475586}


EP_train:4:   9%|| 2442/27626 [05:46<59:16,  7.08it/s]

{'epoch': 4, 'iter': 2440, 'avg_loss': 8.51512626857359, 'avg_acc': 49.8015669807456, 'loss': 8.251704216003418}


EP_train:4:   9%|| 2452/27626 [05:47<59:33,  7.04it/s]

{'epoch': 4, 'iter': 2450, 'avg_loss': 8.514666190199927, 'avg_acc': 49.780701754385966, 'loss': 8.634033203125}


EP_train:4:   9%|| 2462/27626 [05:49<59:26,  7.06it/s]

{'epoch': 4, 'iter': 2460, 'avg_loss': 8.514519659884815, 'avg_acc': 49.781592848435594, 'loss': 7.501045227050781}


EP_train:4:   9%|| 2472/27626 [05:50<59:54,  7.00it/s]  

{'epoch': 4, 'iter': 2470, 'avg_loss': 8.514169477441927, 'avg_acc': 49.763506677458516, 'loss': 8.089451789855957}


EP_train:4:   9%|| 2482/27626 [05:52<59:46,  7.01it/s]  

{'epoch': 4, 'iter': 2480, 'avg_loss': 8.51487116369688, 'avg_acc': 49.75942160419186, 'loss': 8.56821060180664}


EP_train:4:   9%|| 2492/27626 [05:53<59:40,  7.02it/s]

{'epoch': 4, 'iter': 2490, 'avg_loss': 8.514647358060122, 'avg_acc': 49.766659975913285, 'loss': 7.952756881713867}


EP_train:4:   9%|| 2502/27626 [05:54<59:39,  7.02it/s]

{'epoch': 4, 'iter': 2500, 'avg_loss': 8.514385400891829, 'avg_acc': 49.767592962814874, 'loss': 9.869696617126465}


EP_train:4:   9%|| 2512/27626 [05:56<59:30,  7.03it/s]

{'epoch': 4, 'iter': 2510, 'avg_loss': 8.514122364389047, 'avg_acc': 49.78594185583433, 'loss': 8.321728706359863}


EP_train:4:   9%|| 2522/27626 [05:57<59:33,  7.02it/s]

{'epoch': 4, 'iter': 2520, 'avg_loss': 8.514162638980876, 'avg_acc': 49.79298889329631, 'loss': 8.383249282836914}


EP_train:4:   9%|| 2532/27626 [05:59<59:39,  7.01it/s]

{'epoch': 4, 'iter': 2530, 'avg_loss': 8.514103049207044, 'avg_acc': 49.81850059265113, 'loss': 8.690768241882324}


EP_train:4:   9%|| 2542/27626 [06:00<1:00:15,  6.94it/s]

{'epoch': 4, 'iter': 2540, 'avg_loss': 8.513218293083037, 'avg_acc': 49.81306572215663, 'loss': 8.267518043518066}


EP_train:4:   9%|| 2552/27626 [06:01<59:42,  7.00it/s]

{'epoch': 4, 'iter': 2550, 'avg_loss': 8.513218805491807, 'avg_acc': 49.81379851038808, 'loss': 8.111821174621582}


EP_train:4:   9%|| 2562/27626 [06:03<59:37,  7.01it/s]

{'epoch': 4, 'iter': 2560, 'avg_loss': 8.513874560769114, 'avg_acc': 49.82428738773916, 'loss': 8.493392944335938}


EP_train:4:   9%|| 2572/27626 [06:04<59:07,  7.06it/s]

{'epoch': 4, 'iter': 2570, 'avg_loss': 8.514265114210405, 'avg_acc': 49.8310482302606, 'loss': 9.69639778137207}


EP_train:4:   9%|| 2582/27626 [06:06<59:33,  7.01it/s]

{'epoch': 4, 'iter': 2580, 'avg_loss': 8.514379669279611, 'avg_acc': 49.83412437039907, 'loss': 8.978325843811035}


EP_train:4:   9%|| 2592/27626 [06:07<59:21,  7.03it/s]

{'epoch': 4, 'iter': 2590, 'avg_loss': 8.515596263889915, 'avg_acc': 49.825115785411036, 'loss': 9.403368949890137}


EP_train:4:   9%|| 2602/27626 [06:09<59:18,  7.03it/s]

{'epoch': 4, 'iter': 2600, 'avg_loss': 8.517396190266387, 'avg_acc': 49.83419838523645, 'loss': 8.33737564086914}


EP_train:4:   9%|| 2612/27626 [06:10<1:05:41,  6.35it/s]

{'epoch': 4, 'iter': 2610, 'avg_loss': 8.516742856600239, 'avg_acc': 49.84560513213328, 'loss': 7.613482475280762}


EP_train:4:   9%|| 2622/27626 [06:11<59:16,  7.03it/s]

{'epoch': 4, 'iter': 2620, 'avg_loss': 8.51580834907232, 'avg_acc': 49.84500190766883, 'loss': 8.099383354187012}


EP_train:4:  10%|| 2632/27626 [06:13<59:04,  7.05it/s]

{'epoch': 4, 'iter': 2630, 'avg_loss': 8.515635103474366, 'avg_acc': 49.861031927023944, 'loss': 9.191572189331055}


EP_train:4:  10%|| 2642/27626 [06:14<59:21,  7.01it/s]

{'epoch': 4, 'iter': 2640, 'avg_loss': 8.515702459587377, 'avg_acc': 49.86392464975388, 'loss': 9.599143028259277}


EP_train:4:  10%|| 2652/27626 [06:16<59:01,  7.05it/s]

{'epoch': 4, 'iter': 2650, 'avg_loss': 8.517122774473094, 'avg_acc': 49.86325914749151, 'loss': 8.81381893157959}


EP_train:4:  10%|| 2662/27626 [06:17<58:54,  7.06it/s]

{'epoch': 4, 'iter': 2660, 'avg_loss': 8.518136727814387, 'avg_acc': 49.87316798196167, 'loss': 8.54074764251709}


EP_train:4:  10%|| 2672/27626 [06:19<59:45,  6.96it/s]

{'epoch': 4, 'iter': 2670, 'avg_loss': 8.51805816398315, 'avg_acc': 49.88183264694871, 'loss': 8.76475715637207}


EP_train:4:  10%|| 2682/27626 [06:20<59:07,  7.03it/s]

{'epoch': 4, 'iter': 2680, 'avg_loss': 8.518669836648318, 'avg_acc': 49.88577023498694, 'loss': 8.85287857055664}


EP_train:4:  10%|| 2692/27626 [06:21<59:47,  6.95it/s]

{'epoch': 4, 'iter': 2690, 'avg_loss': 8.518324426894115, 'avg_acc': 49.88735600148644, 'loss': 9.108416557312012}


EP_train:4:  10%|| 2702/27626 [06:23<59:44,  6.95it/s]  

{'epoch': 4, 'iter': 2700, 'avg_loss': 8.518654891801118, 'avg_acc': 49.88777304701962, 'loss': 8.290810585021973}


EP_train:4:  10%|| 2712/27626 [06:24<59:17,  7.00it/s]

{'epoch': 4, 'iter': 2710, 'avg_loss': 8.517794287895109, 'avg_acc': 49.87204905938768, 'loss': 8.966451644897461}


EP_train:4:  10%|| 2722/27626 [06:26<59:05,  7.02it/s]

{'epoch': 4, 'iter': 2720, 'avg_loss': 8.51799709960758, 'avg_acc': 49.8736677692025, 'loss': 8.037694931030273}


EP_train:4:  10%|| 2732/27626 [06:27<59:15,  7.00it/s]

{'epoch': 4, 'iter': 2730, 'avg_loss': 8.5179326927998, 'avg_acc': 49.90044855364336, 'loss': 8.583686828613281}


EP_train:4:  10%|| 2742/27626 [06:29<59:20,  6.99it/s]

{'epoch': 4, 'iter': 2740, 'avg_loss': 8.518898217619792, 'avg_acc': 49.890550893834366, 'loss': 9.286311149597168}


EP_train:4:  10%|| 2752/27626 [06:30<58:53,  7.04it/s]

{'epoch': 4, 'iter': 2750, 'avg_loss': 8.519696434988884, 'avg_acc': 49.88867684478372, 'loss': 8.227889060974121}


EP_train:4:  10%|| 2762/27626 [06:31<59:06,  7.01it/s]

{'epoch': 4, 'iter': 2760, 'avg_loss': 8.520250191597004, 'avg_acc': 49.8947392249185, 'loss': 9.249265670776367}


EP_train:4:  10%|| 2772/27626 [06:33<59:04,  7.01it/s]

{'epoch': 4, 'iter': 2770, 'avg_loss': 8.520587028385027, 'avg_acc': 49.88496932515337, 'loss': 8.675239562988281}


EP_train:4:  10%|| 2782/27626 [06:34<59:17,  6.98it/s]

{'epoch': 4, 'iter': 2780, 'avg_loss': 8.519846042597392, 'avg_acc': 49.879764473211075, 'loss': 8.5941801071167}


EP_train:4:  10%|| 2792/27626 [06:36<59:28,  6.96it/s]

{'epoch': 4, 'iter': 2790, 'avg_loss': 8.520774272384289, 'avg_acc': 49.889152633464704, 'loss': 7.7050862312316895}


EP_train:4:  10%|| 2802/27626 [06:37<58:54,  7.02it/s]

{'epoch': 4, 'iter': 2800, 'avg_loss': 8.522073985593142, 'avg_acc': 49.87950731881471, 'loss': 7.3306660652160645}


EP_train:4:  10%|| 2812/27626 [06:38<58:32,  7.06it/s]

{'epoch': 4, 'iter': 2810, 'avg_loss': 8.52204987944254, 'avg_acc': 49.87993596584845, 'loss': 8.54202651977539}


EP_train:4:  10%|| 2822/27626 [06:40<59:03,  7.00it/s]

{'epoch': 4, 'iter': 2820, 'avg_loss': 8.521113096674636, 'avg_acc': 49.87925381070543, 'loss': 8.473004341125488}


EP_train:4:  10%|| 2832/27626 [06:41<58:38,  7.05it/s]

{'epoch': 4, 'iter': 2830, 'avg_loss': 8.520953208892347, 'avg_acc': 49.854291769692686, 'loss': 8.575772285461426}


EP_train:4:  10%|| 2842/27626 [06:43<58:27,  7.07it/s]

{'epoch': 4, 'iter': 2840, 'avg_loss': 8.520062353610154, 'avg_acc': 49.86030447025695, 'loss': 7.917901515960693}


EP_train:4:  10%|| 2852/27626 [06:44<58:37,  7.04it/s]

{'epoch': 4, 'iter': 2850, 'avg_loss': 8.519245367140487, 'avg_acc': 49.8673710978604, 'loss': 7.896544456481934}


EP_train:4:  10%|| 2862/27626 [06:46<58:59,  7.00it/s]

{'epoch': 4, 'iter': 2860, 'avg_loss': 8.51799583318391, 'avg_acc': 49.86783467319119, 'loss': 8.266921043395996}


EP_train:4:  10%|| 2872/27626 [06:47<58:35,  7.04it/s]

{'epoch': 4, 'iter': 2870, 'avg_loss': 8.518355614380818, 'avg_acc': 49.8780912574016, 'loss': 8.768723487854004}


EP_train:4:  10%|| 2882/27626 [06:48<59:10,  6.97it/s]

{'epoch': 4, 'iter': 2880, 'avg_loss': 8.51969219454044, 'avg_acc': 49.87309094064561, 'loss': 9.268665313720703}


EP_train:4:  10%|| 2892/27626 [06:50<58:51,  7.00it/s]

{'epoch': 4, 'iter': 2890, 'avg_loss': 8.519865154766451, 'avg_acc': 49.883258388101005, 'loss': 8.282201766967773}


EP_train:4:  11%|| 2902/27626 [06:51<58:27,  7.05it/s]

{'epoch': 4, 'iter': 2900, 'avg_loss': 8.51966802123003, 'avg_acc': 49.87504308859015, 'loss': 9.02982234954834}


EP_train:4:  11%|| 2912/27626 [06:53<58:33,  7.03it/s]

{'epoch': 4, 'iter': 2910, 'avg_loss': 8.519722665801813, 'avg_acc': 49.861516660941255, 'loss': 8.19983959197998}


EP_train:4:  11%|| 2922/27626 [06:54<59:11,  6.96it/s]

{'epoch': 4, 'iter': 2920, 'avg_loss': 8.51945937149168, 'avg_acc': 49.85450188291681, 'loss': 8.025940895080566}


EP_train:4:  11%|| 2932/27626 [06:55<58:18,  7.06it/s]

{'epoch': 4, 'iter': 2930, 'avg_loss': 8.519735040863152, 'avg_acc': 49.8528659160696, 'loss': 8.510401725769043}


EP_train:4:  11%|| 2942/27626 [06:57<58:33,  7.02it/s]

{'epoch': 4, 'iter': 2940, 'avg_loss': 8.519339714679374, 'avg_acc': 49.85974158449507, 'loss': 8.893167495727539}


EP_train:4:  11%|| 2952/27626 [06:58<58:44,  7.00it/s]

{'epoch': 4, 'iter': 2950, 'avg_loss': 8.519807638293079, 'avg_acc': 49.86445272788885, 'loss': 8.685877799987793}


EP_train:4:  11%|| 2962/27626 [07:00<58:39,  7.01it/s]

{'epoch': 4, 'iter': 2960, 'avg_loss': 8.52020063451801, 'avg_acc': 49.85857818304627, 'loss': 8.889437675476074}


EP_train:4:  11%|| 2972/27626 [07:01<58:51,  6.98it/s]

{'epoch': 4, 'iter': 2970, 'avg_loss': 8.520418810868495, 'avg_acc': 49.863261528105014, 'loss': 8.614262580871582}


EP_train:4:  11%|| 2982/27626 [07:03<58:37,  7.01it/s]

{'epoch': 4, 'iter': 2980, 'avg_loss': 8.520521389322367, 'avg_acc': 49.86267192217377, 'loss': 7.943949222564697}


EP_train:4:  11%|| 2992/27626 [07:04<58:23,  7.03it/s]

{'epoch': 4, 'iter': 2990, 'avg_loss': 8.521691630812084, 'avg_acc': 49.87566867268472, 'loss': 8.599371910095215}


EP_train:4:  11%|| 3002/27626 [07:05<58:56,  6.96it/s]

{'epoch': 4, 'iter': 3000, 'avg_loss': 8.52344126655276, 'avg_acc': 49.88128957014328, 'loss': 8.759489059448242}


EP_train:4:  11%|| 3012/27626 [07:07<58:09,  7.05it/s]

{'epoch': 4, 'iter': 3010, 'avg_loss': 8.52423315450465, 'avg_acc': 49.88583527067419, 'loss': 9.684593200683594}


EP_train:4:  11%|| 3022/27626 [07:08<58:07,  7.05it/s]

{'epoch': 4, 'iter': 3020, 'avg_loss': 8.524645635048627, 'avg_acc': 49.902763985435286, 'loss': 8.048571586608887}


EP_train:4:  11%|| 3032/27626 [07:10<58:42,  6.98it/s]

{'epoch': 4, 'iter': 3030, 'avg_loss': 8.524972575051445, 'avg_acc': 49.9010227647641, 'loss': 8.190155982971191}


EP_train:4:  11%|| 3042/27626 [07:11<58:09,  7.04it/s]

{'epoch': 4, 'iter': 3040, 'avg_loss': 8.524583247647948, 'avg_acc': 49.9105968431437, 'loss': 8.194904327392578}


EP_train:4:  11%|| 3052/27626 [07:12<58:21,  7.02it/s]

{'epoch': 4, 'iter': 3050, 'avg_loss': 8.524529498665109, 'avg_acc': 49.90576860045886, 'loss': 8.884668350219727}


EP_train:4:  11%|| 3062/27626 [07:14<58:29,  7.00it/s]

{'epoch': 4, 'iter': 3060, 'avg_loss': 8.524123511490421, 'avg_acc': 49.90913917020581, 'loss': 9.022167205810547}


EP_train:4:  11%|| 3072/27626 [07:15<58:08,  7.04it/s]

{'epoch': 4, 'iter': 3070, 'avg_loss': 8.523790892542321, 'avg_acc': 49.91961087593618, 'loss': 8.142274856567383}


EP_train:4:  11%|| 3082/27626 [07:17<58:14,  7.02it/s]

{'epoch': 4, 'iter': 3080, 'avg_loss': 8.524253700507689, 'avg_acc': 49.91784323271665, 'loss': 8.675854682922363}


EP_train:4:  11%|| 3092/27626 [07:18<58:06,  7.04it/s]

{'epoch': 4, 'iter': 3090, 'avg_loss': 8.524440160194134, 'avg_acc': 49.929230022646394, 'loss': 8.45496940612793}


EP_train:4:  11%|| 3102/27626 [07:20<57:53,  7.06it/s]

{'epoch': 4, 'iter': 3100, 'avg_loss': 8.525674723225691, 'avg_acc': 49.93752015478878, 'loss': 8.794100761413574}


EP_train:4:  11%|| 3112/27626 [07:21<57:52,  7.06it/s]

{'epoch': 4, 'iter': 3110, 'avg_loss': 8.52606054135589, 'avg_acc': 49.94173899067824, 'loss': 7.978766918182373}


EP_train:4:  11%|| 3122/27626 [07:22<58:10,  7.02it/s]

{'epoch': 4, 'iter': 3120, 'avg_loss': 8.526703673652715, 'avg_acc': 49.93491669336751, 'loss': 8.46835994720459}


EP_train:4:  11%|| 3132/27626 [07:24<58:01,  7.04it/s]

{'epoch': 4, 'iter': 3130, 'avg_loss': 8.526789608720543, 'avg_acc': 49.94610348131587, 'loss': 9.24148178100586}


EP_train:4:  11%|| 3142/27626 [07:25<58:20,  6.99it/s]

{'epoch': 4, 'iter': 3140, 'avg_loss': 8.52666032135923, 'avg_acc': 49.94627507163324, 'loss': 8.39791488647461}


EP_train:4:  11%|| 3152/27626 [07:27<58:37,  6.96it/s]

{'epoch': 4, 'iter': 3150, 'avg_loss': 8.52741403407197, 'avg_acc': 49.942478578229135, 'loss': 8.368416786193848}


EP_train:4:  11%|| 3162/27626 [07:28<58:02,  7.02it/s]

{'epoch': 4, 'iter': 3160, 'avg_loss': 8.527262239809167, 'avg_acc': 49.95056944005062, 'loss': 7.738027095794678}


EP_train:4:  11%|| 3172/27626 [07:30<58:10,  7.01it/s]

{'epoch': 4, 'iter': 3170, 'avg_loss': 8.526418342909126, 'avg_acc': 49.949739829706715, 'loss': 9.097031593322754}


EP_train:4:  12%|| 3182/27626 [07:31<57:49,  7.05it/s]

{'epoch': 4, 'iter': 3180, 'avg_loss': 8.526571850175566, 'avg_acc': 49.93810908519333, 'loss': 9.291601181030273}


EP_train:4:  12%|| 3192/27626 [07:32<57:31,  7.08it/s]

{'epoch': 4, 'iter': 3190, 'avg_loss': 8.52739189321888, 'avg_acc': 49.93732372297085, 'loss': 8.897403717041016}


EP_train:4:  12%|| 3202/27626 [07:34<58:03,  7.01it/s]

{'epoch': 4, 'iter': 3200, 'avg_loss': 8.527765328569064, 'avg_acc': 49.94532958450485, 'loss': 9.268406867980957}


EP_train:4:  12%|| 3212/27626 [07:35<57:47,  7.04it/s]

{'epoch': 4, 'iter': 3210, 'avg_loss': 8.528201220526348, 'avg_acc': 49.9435534101526, 'loss': 8.222951889038086}


EP_train:4:  12%|| 3222/27626 [07:37<57:37,  7.06it/s]

{'epoch': 4, 'iter': 3220, 'avg_loss': 8.52856266983397, 'avg_acc': 49.95246041601987, 'loss': 9.10264778137207}


EP_train:4:  12%|| 3232/27626 [07:38<57:44,  7.04it/s]

{'epoch': 4, 'iter': 3230, 'avg_loss': 8.528201001821676, 'avg_acc': 49.94293562364593, 'loss': 8.043208122253418}


EP_train:4:  12%|| 3242/27626 [07:39<58:32,  6.94it/s]

{'epoch': 4, 'iter': 3240, 'avg_loss': 8.527621146816783, 'avg_acc': 49.94311169392163, 'loss': 8.21760368347168}


EP_train:4:  12%|| 3252/27626 [07:41<57:49,  7.02it/s]

{'epoch': 4, 'iter': 3250, 'avg_loss': 8.527647946585658, 'avg_acc': 49.948092894494, 'loss': 8.20034122467041}


EP_train:4:  12%|| 3262/27626 [07:42<57:41,  7.04it/s]

{'epoch': 4, 'iter': 3260, 'avg_loss': 8.52653018026694, 'avg_acc': 49.9482520699172, 'loss': 9.160858154296875}


EP_train:4:  12%|| 3272/27626 [07:44<57:46,  7.03it/s]

{'epoch': 4, 'iter': 3270, 'avg_loss': 8.52665660068625, 'avg_acc': 49.96369611739529, 'loss': 8.028822898864746}


EP_train:4:  12%|| 3282/27626 [07:45<57:07,  7.10it/s]

{'epoch': 4, 'iter': 3280, 'avg_loss': 8.527374053212137, 'avg_acc': 49.972378847912225, 'loss': 9.598177909851074}


EP_train:4:  12%|| 3292/27626 [07:47<58:08,  6.98it/s]

{'epoch': 4, 'iter': 3290, 'avg_loss': 8.527273656096874, 'avg_acc': 49.97531145548466, 'loss': 8.168083190917969}


EP_train:4:  12%|| 3302/27626 [07:48<58:22,  6.94it/s]

{'epoch': 4, 'iter': 3300, 'avg_loss': 8.526079552276897, 'avg_acc': 49.98011966070888, 'loss': 8.965521812438965}


EP_train:4:  12%|| 3312/27626 [07:49<57:48,  7.01it/s]

{'epoch': 4, 'iter': 3310, 'avg_loss': 8.525864398936713, 'avg_acc': 49.98017970401691, 'loss': 8.776063919067383}


EP_train:4:  12%|| 3322/27626 [07:51<57:30,  7.04it/s]

{'epoch': 4, 'iter': 3320, 'avg_loss': 8.525508001316602, 'avg_acc': 49.991531165311656, 'loss': 8.059733390808105}


EP_train:4:  12%|| 3332/27626 [07:52<57:36,  7.03it/s]

{'epoch': 4, 'iter': 3330, 'avg_loss': 8.52497630089262, 'avg_acc': 49.99249474632242, 'loss': 8.834590911865234}


EP_train:4:  12%|| 3342/27626 [07:54<57:07,  7.08it/s]

{'epoch': 4, 'iter': 3340, 'avg_loss': 8.524826252328461, 'avg_acc': 50.00187069739599, 'loss': 8.27620792388916}


EP_train:4:  12%|| 3352/27626 [07:55<57:25,  7.05it/s]

{'epoch': 4, 'iter': 3350, 'avg_loss': 8.525750728124507, 'avg_acc': 49.998134885108925, 'loss': 9.235718727111816}


EP_train:4:  12%|| 3362/27626 [07:56<58:04,  6.96it/s]

{'epoch': 4, 'iter': 3360, 'avg_loss': 8.525856507672188, 'avg_acc': 49.98326390955073, 'loss': 8.017594337463379}


EP_train:4:  12%|| 3372/27626 [07:58<57:34,  7.02it/s]

{'epoch': 4, 'iter': 3370, 'avg_loss': 8.525665382078584, 'avg_acc': 49.99721892613468, 'loss': 8.370052337646484}


EP_train:4:  12%|| 3382/27626 [07:59<57:32,  7.02it/s]

{'epoch': 4, 'iter': 3380, 'avg_loss': 8.525584778959178, 'avg_acc': 49.99907571724342, 'loss': 8.766755104064941}


EP_train:4:  12%|| 3392/27626 [08:01<57:19,  7.05it/s]

{'epoch': 4, 'iter': 3390, 'avg_loss': 8.525215999386157, 'avg_acc': 50.005529342376875, 'loss': 8.60793399810791}


EP_train:4:  12%|| 3402/27626 [08:02<57:03,  7.08it/s]

{'epoch': 4, 'iter': 3400, 'avg_loss': 8.526850024870795, 'avg_acc': 50.01194501617171, 'loss': 9.041451454162598}


EP_train:4:  12%|| 3412/27626 [08:04<57:32,  7.01it/s]

{'epoch': 4, 'iter': 3410, 'avg_loss': 8.525899749527715, 'avg_acc': 50.00183230724128, 'loss': 8.394843101501465}


EP_train:4:  12%|| 3422/27626 [08:05<57:49,  6.98it/s]

{'epoch': 4, 'iter': 3420, 'avg_loss': 8.525669680112847, 'avg_acc': 50.005480853551596, 'loss': 8.480911254882812}


EP_train:4:  12%|| 3432/27626 [08:06<57:40,  6.99it/s]

{'epoch': 4, 'iter': 3430, 'avg_loss': 8.524678394141958, 'avg_acc': 50.00910813174002, 'loss': 7.779542446136475}


EP_train:4:  12%|| 3442/27626 [08:08<58:09,  6.93it/s]

{'epoch': 4, 'iter': 3440, 'avg_loss': 8.523191734581138, 'avg_acc': 50.013622493461206, 'loss': 7.763975143432617}


EP_train:4:  12%|| 3452/27626 [08:09<57:26,  7.01it/s]

{'epoch': 4, 'iter': 3450, 'avg_loss': 8.523580008401487, 'avg_acc': 50.01811069255289, 'loss': 8.45434856414795}


EP_train:4:  13%|| 3462/27626 [08:11<57:36,  6.99it/s]

{'epoch': 4, 'iter': 3460, 'avg_loss': 8.523431075442495, 'avg_acc': 50.02257295579312, 'loss': 8.918213844299316}


EP_train:4:  13%|| 3472/27626 [08:12<57:13,  7.03it/s]

{'epoch': 4, 'iter': 3470, 'avg_loss': 8.522279232044928, 'avg_acc': 50.0171060213195, 'loss': 9.036992073059082}


EP_train:4:  13%|| 3482/27626 [08:14<57:31,  6.99it/s]

{'epoch': 4, 'iter': 3480, 'avg_loss': 8.521358850939937, 'avg_acc': 50.03501149095088, 'loss': 8.080443382263184}


EP_train:4:  13%|| 3492/27626 [08:15<57:03,  7.05it/s]

{'epoch': 4, 'iter': 3490, 'avg_loss': 8.522300129188094, 'avg_acc': 50.03401604124893, 'loss': 9.801725387573242}


EP_train:4:  13%|| 3502/27626 [08:16<57:10,  7.03it/s]

{'epoch': 4, 'iter': 3500, 'avg_loss': 8.524633306940771, 'avg_acc': 50.03570408454727, 'loss': 9.959237098693848}


EP_train:4:  13%|| 3512/27626 [08:18<56:59,  7.05it/s]

{'epoch': 4, 'iter': 3510, 'avg_loss': 8.524864477773095, 'avg_acc': 50.04717317003703, 'loss': 8.333928108215332}


EP_train:4:  13%|| 3522/27626 [08:19<56:51,  7.07it/s]

{'epoch': 4, 'iter': 3520, 'avg_loss': 8.525098434194723, 'avg_acc': 50.05591451292246, 'loss': 8.29233169555664}


EP_train:4:  13%|| 3532/27626 [08:21<56:50,  7.06it/s]

{'epoch': 4, 'iter': 3530, 'avg_loss': 8.524823550427138, 'avg_acc': 50.061951288586805, 'loss': 7.900958061218262}


EP_train:4:  13%|| 3542/27626 [08:22<57:21,  7.00it/s]

{'epoch': 4, 'iter': 3540, 'avg_loss': 8.52463720209078, 'avg_acc': 50.06089381530641, 'loss': 7.728555202484131}


EP_train:4:  13%|| 3552/27626 [08:23<57:05,  7.03it/s]

{'epoch': 4, 'iter': 3550, 'avg_loss': 8.524301520873713, 'avg_acc': 50.06512250070403, 'loss': 8.10939884185791}


EP_train:4:  13%|| 3562/27626 [08:25<56:52,  7.05it/s]

{'epoch': 4, 'iter': 3560, 'avg_loss': 8.524036513523786, 'avg_acc': 50.070204998595905, 'loss': 8.1964750289917}


EP_train:4:  13%|| 3572/27626 [08:26<56:45,  7.06it/s]

{'epoch': 4, 'iter': 3570, 'avg_loss': 8.522704736821225, 'avg_acc': 50.070008401008124, 'loss': 8.152873992919922}


EP_train:4:  13%|| 3582/27626 [08:28<57:04,  7.02it/s]

{'epoch': 4, 'iter': 3580, 'avg_loss': 8.522359840033008, 'avg_acc': 50.078539514102204, 'loss': 8.306147575378418}


EP_train:4:  13%|| 3592/27626 [08:29<57:23,  6.98it/s]

{'epoch': 4, 'iter': 3590, 'avg_loss': 8.522685687145593, 'avg_acc': 50.08789334447229, 'loss': 8.429641723632812}


EP_train:4:  13%|| 3602/27626 [08:30<56:41,  7.06it/s]

{'epoch': 4, 'iter': 3600, 'avg_loss': 8.523499321056981, 'avg_acc': 50.08244237711747, 'loss': 9.072417259216309}


EP_train:4:  13%|| 3612/27626 [08:32<57:01,  7.02it/s]

{'epoch': 4, 'iter': 3610, 'avg_loss': 8.523275107038017, 'avg_acc': 50.08481030185544, 'loss': 7.819800853729248}


EP_train:4:  13%|| 3622/27626 [08:33<57:12,  6.99it/s]

{'epoch': 4, 'iter': 3620, 'avg_loss': 8.523408297049146, 'avg_acc': 50.08198702016018, 'loss': 9.212806701660156}


EP_train:4:  13%|| 3632/27626 [08:35<57:01,  7.01it/s]

{'epoch': 4, 'iter': 3630, 'avg_loss': 8.524073088211779, 'avg_acc': 50.080900578353074, 'loss': 8.86286735534668}


EP_train:4:  13%|| 3642/27626 [08:36<56:28,  7.08it/s]

{'epoch': 4, 'iter': 3640, 'avg_loss': 8.524018917198989, 'avg_acc': 50.07552870090635, 'loss': 8.871830940246582}


EP_train:4:  13%|| 3652/27626 [08:38<56:32,  7.07it/s]

{'epoch': 4, 'iter': 3650, 'avg_loss': 8.523490598710456, 'avg_acc': 50.06847439057792, 'loss': 7.814986705780029}


EP_train:4:  13%|| 3662/27626 [08:39<56:48,  7.03it/s]

{'epoch': 4, 'iter': 3660, 'avg_loss': 8.523327455335679, 'avg_acc': 50.06828735318219, 'loss': 8.502567291259766}


EP_train:4:  13%|| 3672/27626 [08:40<56:30,  7.06it/s]

{'epoch': 4, 'iter': 3670, 'avg_loss': 8.523484148858387, 'avg_acc': 50.06214246799237, 'loss': 8.987117767333984}


EP_train:4:  13%|| 3682/27626 [08:42<56:36,  7.05it/s]

{'epoch': 4, 'iter': 3680, 'avg_loss': 8.522952350641328, 'avg_acc': 50.061973648465084, 'loss': 9.077047348022461}


EP_train:4:  13%|| 3692/27626 [08:43<56:55,  7.01it/s]

{'epoch': 4, 'iter': 3690, 'avg_loss': 8.522812118700804, 'avg_acc': 50.05333920346789, 'loss': 8.075518608093262}


EP_train:4:  13%|| 3702/27626 [08:45<56:41,  7.03it/s]

{'epoch': 4, 'iter': 3700, 'avg_loss': 8.522699343839294, 'avg_acc': 50.05319508241016, 'loss': 8.478704452514648}


EP_train:4:  13%|| 3712/27626 [08:46<57:12,  6.97it/s]

{'epoch': 4, 'iter': 3710, 'avg_loss': 8.523492096248454, 'avg_acc': 50.04210455402857, 'loss': 8.74747371673584}


EP_train:4:  13%|| 3722/27626 [08:48<56:45,  7.02it/s]

{'epoch': 4, 'iter': 3720, 'avg_loss': 8.523703514776766, 'avg_acc': 50.0369524321419, 'loss': 8.403639793395996}


EP_train:4:  14%|| 3732/27626 [08:49<56:34,  7.04it/s]

{'epoch': 4, 'iter': 3730, 'avg_loss': 8.522943842797648, 'avg_acc': 50.039366121683194, 'loss': 7.6886677742004395}


EP_train:4:  14%|| 3742/27626 [08:50<56:29,  7.05it/s]

{'epoch': 4, 'iter': 3740, 'avg_loss': 8.522394353901756, 'avg_acc': 50.04594359796846, 'loss': 8.429899215698242}


EP_train:4:  14%|| 3752/27626 [08:52<56:32,  7.04it/s]

{'epoch': 4, 'iter': 3750, 'avg_loss': 8.52347452651339, 'avg_acc': 50.05915089309517, 'loss': 8.880988121032715}


EP_train:4:  14%|| 3762/27626 [08:53<56:22,  7.06it/s]

{'epoch': 4, 'iter': 3760, 'avg_loss': 8.522758390699984, 'avg_acc': 50.06065541079499, 'loss': 7.812322616577148}


EP_train:4:  14%|| 3772/27626 [08:55<56:37,  7.02it/s]

{'epoch': 4, 'iter': 3770, 'avg_loss': 8.521749698903951, 'avg_acc': 50.06629541235746, 'loss': 8.10738754272461}


EP_train:4:  14%|| 3782/27626 [08:56<56:38,  7.02it/s]

{'epoch': 4, 'iter': 3780, 'avg_loss': 8.522696690911244, 'avg_acc': 50.07438508331129, 'loss': 8.950037956237793}


EP_train:4:  14%|| 3792/27626 [08:57<56:20,  7.05it/s]

{'epoch': 4, 'iter': 3790, 'avg_loss': 8.523157218740911, 'avg_acc': 50.06347269849643, 'loss': 8.592032432556152}


EP_train:4:  14%|| 3802/27626 [08:59<56:36,  7.01it/s]

{'epoch': 4, 'iter': 3800, 'avg_loss': 8.523014833098554, 'avg_acc': 50.06741646935017, 'loss': 8.937873840332031}


EP_train:4:  14%|| 3812/27626 [09:00<56:28,  7.03it/s]

{'epoch': 4, 'iter': 3810, 'avg_loss': 8.522647987041408, 'avg_acc': 50.066419574914725, 'loss': 8.062938690185547}


EP_train:4:  14%|| 3822/27626 [09:02<56:29,  7.02it/s]

{'epoch': 4, 'iter': 3820, 'avg_loss': 8.522173835481844, 'avg_acc': 50.06052080607171, 'loss': 8.693958282470703}


EP_train:4:  14%|| 3832/27626 [09:03<56:17,  7.05it/s]

{'epoch': 4, 'iter': 3830, 'avg_loss': 8.521874550894326, 'avg_acc': 50.05057426259463, 'loss': 8.689903259277344}


EP_train:4:  14%|| 3842/27626 [09:05<56:09,  7.06it/s]

{'epoch': 4, 'iter': 3840, 'avg_loss': 8.522151041142614, 'avg_acc': 50.05369695391825, 'loss': 9.119399070739746}


EP_train:4:  14%|| 3852/27626 [09:06<56:01,  7.07it/s]

{'epoch': 4, 'iter': 3850, 'avg_loss': 8.52223228349589, 'avg_acc': 50.047877174759805, 'loss': 8.743562698364258}


EP_train:4:  14%|| 3862/27626 [09:07<56:47,  6.97it/s]

{'epoch': 4, 'iter': 3860, 'avg_loss': 8.522385750246677, 'avg_acc': 50.04370629370629, 'loss': 9.076946258544922}


EP_train:4:  14%|| 3872/27626 [09:09<56:38,  6.99it/s]

{'epoch': 4, 'iter': 3870, 'avg_loss': 8.522264602910454, 'avg_acc': 50.03955696202531, 'loss': 7.7088704109191895}


EP_train:4:  14%|| 3882/27626 [09:10<56:08,  7.05it/s]

{'epoch': 4, 'iter': 3880, 'avg_loss': 8.521438928155424, 'avg_acc': 50.04831229064675, 'loss': 8.291377067565918}


EP_train:4:  14%|| 3892/27626 [09:12<55:56,  7.07it/s]

{'epoch': 4, 'iter': 3890, 'avg_loss': 8.52125823139317, 'avg_acc': 50.04979439732716, 'loss': 8.08157730102539}


EP_train:4:  14%|| 3902/27626 [09:13<56:11,  7.04it/s]

{'epoch': 4, 'iter': 3900, 'avg_loss': 8.521323016532657, 'avg_acc': 50.037650602409634, 'loss': 8.4483060836792}


EP_train:4:  14%|| 3912/27626 [09:14<56:20,  7.01it/s]

{'epoch': 4, 'iter': 3910, 'avg_loss': 8.521497814711998, 'avg_acc': 50.04234850421887, 'loss': 8.497297286987305}


EP_train:4:  14%|| 3922/27626 [09:16<56:35,  6.98it/s]

{'epoch': 4, 'iter': 3920, 'avg_loss': 8.52204690907446, 'avg_acc': 50.03108263198164, 'loss': 8.217180252075195}


EP_train:4:  14%|| 3932/27626 [09:17<55:59,  7.05it/s]

{'epoch': 4, 'iter': 3930, 'avg_loss': 8.522433964722758, 'avg_acc': 50.041338081913, 'loss': 8.131043434143066}


EP_train:4:  14%|| 3942/27626 [09:19<56:02,  7.04it/s]

{'epoch': 4, 'iter': 3940, 'avg_loss': 8.52255215955791, 'avg_acc': 50.04916264907384, 'loss': 8.639365196228027}


EP_train:4:  14%|| 3952/27626 [09:20<56:05,  7.03it/s]

{'epoch': 4, 'iter': 3950, 'avg_loss': 8.523348875270255, 'avg_acc': 50.057738547203236, 'loss': 8.726285934448242}


EP_train:4:  14%|| 3962/27626 [09:21<56:12,  7.02it/s]

{'epoch': 4, 'iter': 3960, 'avg_loss': 8.523683715787568, 'avg_acc': 50.05917066397374, 'loss': 8.360223770141602}


EP_train:4:  14%|| 3972/27626 [09:23<55:56,  7.05it/s]

{'epoch': 4, 'iter': 3970, 'avg_loss': 8.522783273294872, 'avg_acc': 50.05508687987912, 'loss': 8.299459457397461}


EP_train:4:  14%|| 3982/27626 [09:24<55:58,  7.04it/s]

{'epoch': 4, 'iter': 3980, 'avg_loss': 8.523220795418442, 'avg_acc': 50.05965837729214, 'loss': 9.215912818908691}


EP_train:4:  14%|| 3992/27626 [09:26<55:52,  7.05it/s]

{'epoch': 4, 'iter': 3990, 'avg_loss': 8.524090892119803, 'avg_acc': 50.07282009521423, 'loss': 8.679598808288574}


EP_train:4:  14%|| 4002/27626 [09:27<55:52,  7.05it/s]

{'epoch': 4, 'iter': 4000, 'avg_loss': 8.523560093391302, 'avg_acc': 50.08044863784053, 'loss': 8.92602825164795}


EP_train:4:  15%|| 4012/27626 [09:29<56:25,  6.98it/s]

{'epoch': 4, 'iter': 4010, 'avg_loss': 8.523688202337029, 'avg_acc': 50.08336449763151, 'loss': 8.365986824035645}


EP_train:4:  15%|| 4022/27626 [09:30<55:49,  7.05it/s]

{'epoch': 4, 'iter': 4020, 'avg_loss': 8.524189031055927, 'avg_acc': 50.06605943795076, 'loss': 8.287182807922363}


EP_train:4:  15%|| 4032/27626 [09:31<55:38,  7.07it/s]

{'epoch': 4, 'iter': 4030, 'avg_loss': 8.52438665849936, 'avg_acc': 50.06744604316546, 'loss': 8.431899070739746}


EP_train:4:  15%|| 4042/27626 [09:33<56:14,  6.99it/s]

{'epoch': 4, 'iter': 4040, 'avg_loss': 8.523912652829177, 'avg_acc': 50.07269240287058, 'loss': 8.370471954345703}


EP_train:4:  15%|| 4052/27626 [09:34<56:09,  7.00it/s]

{'epoch': 4, 'iter': 4050, 'avg_loss': 8.524164390870007, 'avg_acc': 50.06711305850408, 'loss': 8.381675720214844}


EP_train:4:  15%|| 4062/27626 [09:36<55:58,  7.02it/s]

{'epoch': 4, 'iter': 4060, 'avg_loss': 8.525117588935595, 'avg_acc': 50.06848682590495, 'loss': 8.777920722961426}


EP_train:4:  15%|| 4072/27626 [09:37<55:49,  7.03it/s]

{'epoch': 4, 'iter': 4070, 'avg_loss': 8.525673853270517, 'avg_acc': 50.06755097027757, 'loss': 8.555166244506836}


EP_train:4:  15%|| 4082/27626 [09:38<55:48,  7.03it/s]

{'epoch': 4, 'iter': 4080, 'avg_loss': 8.525927473884737, 'avg_acc': 50.06815118843421, 'loss': 8.671698570251465}


EP_train:4:  15%|| 4092/27626 [09:40<55:27,  7.07it/s]

{'epoch': 4, 'iter': 4090, 'avg_loss': 8.525569910992623, 'avg_acc': 50.06645685651429, 'loss': 8.326107025146484}


EP_train:4:  15%|| 4102/27626 [09:41<55:50,  7.02it/s]

{'epoch': 4, 'iter': 4100, 'avg_loss': 8.525225001118644, 'avg_acc': 50.06477078761278, 'loss': 9.397439956665039}


EP_train:4:  15%|| 4112/27626 [09:43<56:13,  6.97it/s]

{'epoch': 4, 'iter': 4110, 'avg_loss': 8.525382078700158, 'avg_acc': 50.06993432254926, 'loss': 8.774581909179688}


EP_train:4:  15%|| 4122/27626 [09:44<55:33,  7.05it/s]

{'epoch': 4, 'iter': 4120, 'avg_loss': 8.525588235760221, 'avg_acc': 50.07355617568552, 'loss': 7.993484973907471}


EP_train:4:  15%|| 4132/27626 [09:46<55:48,  7.02it/s]

{'epoch': 4, 'iter': 4130, 'avg_loss': 8.525013340151474, 'avg_acc': 50.063543936092955, 'loss': 8.4263334274292}


EP_train:4:  15%|| 4142/27626 [09:47<55:35,  7.04it/s]

{'epoch': 4, 'iter': 4140, 'avg_loss': 8.52435861270631, 'avg_acc': 50.068673025839175, 'loss': 7.623898506164551}


EP_train:4:  15%|| 4152/27626 [09:48<55:27,  7.05it/s]

{'epoch': 4, 'iter': 4150, 'avg_loss': 8.52376442672373, 'avg_acc': 50.06248494338713, 'loss': 7.856878280639648}


EP_train:4:  15%|| 4162/27626 [09:50<55:35,  7.03it/s]

{'epoch': 4, 'iter': 4160, 'avg_loss': 8.523580949027444, 'avg_acc': 50.06759192501803, 'loss': 9.050277709960938}


EP_train:4:  15%|| 4172/27626 [09:51<55:24,  7.06it/s]

{'epoch': 4, 'iter': 4170, 'avg_loss': 8.523293318665019, 'avg_acc': 50.067429872932145, 'loss': 8.546408653259277}


EP_train:4:  15%|| 4182/27626 [09:53<55:33,  7.03it/s]

{'epoch': 4, 'iter': 4180, 'avg_loss': 8.523628103251207, 'avg_acc': 50.0687634537192, 'loss': 9.003698348999023}


EP_train:4:  15%|| 4192/27626 [09:54<55:17,  7.06it/s]

{'epoch': 4, 'iter': 4190, 'avg_loss': 8.523708372063751, 'avg_acc': 50.05443211644, 'loss': 8.575839042663574}


EP_train:4:  15%|| 4202/27626 [09:55<55:18,  7.06it/s]

{'epoch': 4, 'iter': 4200, 'avg_loss': 8.52365341620115, 'avg_acc': 50.04760771244941, 'loss': 8.684977531433105}


EP_train:4:  15%|| 4212/27626 [09:57<55:25,  7.04it/s]

{'epoch': 4, 'iter': 4210, 'avg_loss': 8.523054518571689, 'avg_acc': 50.0467525528378, 'loss': 8.472805976867676}


EP_train:4:  15%|| 4222/27626 [09:58<55:55,  6.98it/s]

{'epoch': 4, 'iter': 4220, 'avg_loss': 8.522857307342804, 'avg_acc': 50.04516109926558, 'loss': 8.4922456741333}


EP_train:4:  15%|| 4232/27626 [10:00<55:26,  7.03it/s]

{'epoch': 4, 'iter': 4230, 'avg_loss': 8.522262842146338, 'avg_acc': 50.04579295674782, 'loss': 8.219260215759277}


EP_train:4:  15%|| 4242/27626 [10:01<55:21,  7.04it/s]

{'epoch': 4, 'iter': 4240, 'avg_loss': 8.522660267799987, 'avg_acc': 50.05379037962745, 'loss': 7.885796546936035}


EP_train:4:  15%|| 4252/27626 [10:03<55:29,  7.02it/s]

{'epoch': 4, 'iter': 4250, 'avg_loss': 8.52246339403582, 'avg_acc': 50.06027993413314, 'loss': 8.635887145996094}


EP_train:4:  15%|| 4262/27626 [10:04<55:29,  7.02it/s]

{'epoch': 4, 'iter': 4260, 'avg_loss': 8.52288604381358, 'avg_acc': 50.06600563248064, 'loss': 7.7413859367370605}


EP_train:4:  15%|| 4272/27626 [10:05<54:53,  7.09it/s]

{'epoch': 4, 'iter': 4270, 'avg_loss': 8.523026404406476, 'avg_acc': 50.07024116132054, 'loss': 7.877716064453125}


EP_train:4:  15%|| 4282/27626 [10:07<54:58,  7.08it/s]

{'epoch': 4, 'iter': 4280, 'avg_loss': 8.523064757139709, 'avg_acc': 50.06642723662695, 'loss': 9.3992280960083}


EP_train:4:  16%|| 4292/27626 [10:08<55:14,  7.04it/s]

{'epoch': 4, 'iter': 4290, 'avg_loss': 8.523440190112776, 'avg_acc': 50.07282684688884, 'loss': 8.586054801940918}


EP_train:4:  16%|| 4302/27626 [10:10<55:16,  7.03it/s]

{'epoch': 4, 'iter': 4300, 'avg_loss': 8.522994252865438, 'avg_acc': 50.073384096721696, 'loss': 7.544062614440918}


EP_train:4:  16%|| 4312/27626 [10:11<55:21,  7.02it/s]

{'epoch': 4, 'iter': 4310, 'avg_loss': 8.522894885970832, 'avg_acc': 50.08046276965901, 'loss': 8.893698692321777}


EP_train:4:  16%|| 4322/27626 [10:12<55:08,  7.04it/s]

{'epoch': 4, 'iter': 4320, 'avg_loss': 8.523039781724489, 'avg_acc': 50.07883013191391, 'loss': 9.295811653137207}


EP_train:4:  16%|| 4332/27626 [10:14<55:00,  7.06it/s]

{'epoch': 4, 'iter': 4330, 'avg_loss': 8.52342418224975, 'avg_acc': 50.06638189794504, 'loss': 8.75217056274414}


EP_train:4:  16%|| 4342/27626 [10:15<55:11,  7.03it/s]

{'epoch': 4, 'iter': 4340, 'avg_loss': 8.523849027273815, 'avg_acc': 50.06406933886202, 'loss': 8.85476016998291}


EP_train:4:  16%|| 4352/27626 [10:17<55:06,  7.04it/s]

{'epoch': 4, 'iter': 4350, 'avg_loss': 8.524030855950803, 'avg_acc': 50.0761319236957, 'loss': 8.065953254699707}


EP_train:4:  16%|| 4362/27626 [10:18<55:05,  7.04it/s]

{'epoch': 4, 'iter': 4360, 'avg_loss': 8.523280452517461, 'avg_acc': 50.068791561568446, 'loss': 7.888030052185059}


EP_train:4:  16%|| 4372/27626 [10:20<54:52,  7.06it/s]

{'epoch': 4, 'iter': 4370, 'avg_loss': 8.52310432729937, 'avg_acc': 50.07435369480668, 'loss': 8.058098793029785}


EP_train:4:  16%|| 4382/27626 [10:21<55:20,  7.00it/s]

{'epoch': 4, 'iter': 4380, 'avg_loss': 8.522444594426123, 'avg_acc': 50.066337594156586, 'loss': 8.209820747375488}


EP_train:4:  16%|| 4392/27626 [10:22<55:12,  7.01it/s]

{'epoch': 4, 'iter': 4390, 'avg_loss': 8.522617274062613, 'avg_acc': 50.05978137098611, 'loss': 7.740772247314453}


EP_train:4:  16%|| 4402/27626 [10:24<55:19,  7.00it/s]

{'epoch': 4, 'iter': 4400, 'avg_loss': 8.52274590864747, 'avg_acc': 50.067456259940926, 'loss': 8.89045524597168}


EP_train:4:  16%|| 4412/27626 [10:25<55:04,  7.03it/s]

{'epoch': 4, 'iter': 4410, 'avg_loss': 8.523094456797418, 'avg_acc': 50.08005554296078, 'loss': 8.17745590209961}


EP_train:4:  16%|| 4422/27626 [10:27<54:56,  7.04it/s]

{'epoch': 4, 'iter': 4420, 'avg_loss': 8.523950187309488, 'avg_acc': 50.07492648722008, 'loss': 8.679296493530273}


EP_train:4:  16%|| 4432/27626 [10:28<54:53,  7.04it/s]

{'epoch': 4, 'iter': 4430, 'avg_loss': 8.523844452696336, 'avg_acc': 50.07898894154819, 'loss': 8.596814155578613}


EP_train:4:  16%|| 4442/27626 [10:29<55:12,  7.00it/s]

{'epoch': 4, 'iter': 4440, 'avg_loss': 8.524405323312884, 'avg_acc': 50.08725512272011, 'loss': 8.39505672454834}


EP_train:4:  16%|| 4452/27626 [10:31<55:12,  7.00it/s]

{'epoch': 4, 'iter': 4450, 'avg_loss': 8.524243770570227, 'avg_acc': 50.091973713772184, 'loss': 8.579586029052734}


EP_train:4:  16%|| 4462/27626 [10:32<55:06,  7.01it/s]

{'epoch': 4, 'iter': 4460, 'avg_loss': 8.524149434872738, 'avg_acc': 50.086163416274374, 'loss': 8.76827335357666}


EP_train:4:  16%|| 4472/27626 [10:34<54:50,  7.04it/s]

{'epoch': 4, 'iter': 4470, 'avg_loss': 8.524194073021159, 'avg_acc': 50.083873853724, 'loss': 8.958797454833984}


EP_train:4:  16%|| 4482/27626 [10:35<54:30,  7.08it/s]

{'epoch': 4, 'iter': 4480, 'avg_loss': 8.524167216428022, 'avg_acc': 50.08926578888641, 'loss': 8.413973808288574}


EP_train:4:  16%|| 4492/27626 [10:37<55:02,  7.00it/s]

{'epoch': 4, 'iter': 4490, 'avg_loss': 8.52386897795717, 'avg_acc': 50.080021153417945, 'loss': 8.920839309692383}


EP_train:4:  16%|| 4502/27626 [10:38<55:13,  6.98it/s]

{'epoch': 4, 'iter': 4500, 'avg_loss': 8.523140028407113, 'avg_acc': 50.07845478782493, 'loss': 8.036038398742676}


EP_train:4:  16%|| 4512/27626 [10:39<54:53,  7.02it/s]

{'epoch': 4, 'iter': 4510, 'avg_loss': 8.52270334159633, 'avg_acc': 50.06442584792728, 'loss': 7.250033378601074}


EP_train:4:  16%|| 4522/27626 [10:41<54:44,  7.03it/s]

{'epoch': 4, 'iter': 4520, 'avg_loss': 8.521957977765748, 'avg_acc': 50.06082725060828, 'loss': 8.050313949584961}


EP_train:4:  16%|| 4532/27626 [10:42<54:28,  7.06it/s]

{'epoch': 4, 'iter': 4530, 'avg_loss': 8.52179067557391, 'avg_acc': 50.0641414698742, 'loss': 8.442218780517578}


EP_train:4:  16%|| 4542/27626 [10:44<54:49,  7.02it/s]

{'epoch': 4, 'iter': 4540, 'avg_loss': 8.521581164647555, 'avg_acc': 50.05643030169566, 'loss': 8.240486145019531}


EP_train:4:  16%|| 4552/27626 [10:45<54:17,  7.08it/s]

{'epoch': 4, 'iter': 4550, 'avg_loss': 8.522212916168636, 'avg_acc': 50.059739617666445, 'loss': 8.685404777526855}


EP_train:4:  17%|| 4562/27626 [10:46<54:36,  7.04it/s]

{'epoch': 4, 'iter': 4560, 'avg_loss': 8.52203671844924, 'avg_acc': 50.062349265511955, 'loss': 8.775493621826172}


EP_train:4:  17%|| 4572/27626 [10:48<54:35,  7.04it/s]

{'epoch': 4, 'iter': 4570, 'avg_loss': 8.522422630607991, 'avg_acc': 50.067682126449355, 'loss': 8.460273742675781}


EP_train:4:  17%|| 4582/27626 [10:49<54:21,  7.06it/s]

{'epoch': 4, 'iter': 4580, 'avg_loss': 8.522199867438395, 'avg_acc': 50.06685221567343, 'loss': 8.13416576385498}


EP_train:4:  17%|| 4592/27626 [10:51<54:46,  7.01it/s]

{'epoch': 4, 'iter': 4590, 'avg_loss': 8.522105295500758, 'avg_acc': 50.070790677412326, 'loss': 8.692923545837402}


EP_train:4:  17%|| 4602/27626 [10:52<54:47,  7.00it/s]

{'epoch': 4, 'iter': 4600, 'avg_loss': 8.521935328222622, 'avg_acc': 50.07878722016953, 'loss': 7.844514846801758}


EP_train:4:  17%|| 4612/27626 [10:54<54:45,  7.00it/s]

{'epoch': 4, 'iter': 4610, 'avg_loss': 8.522198350691687, 'avg_acc': 50.0833604424203, 'loss': 8.92838191986084}


EP_train:4:  17%|| 4622/27626 [10:55<54:19,  7.06it/s]

{'epoch': 4, 'iter': 4620, 'avg_loss': 8.521664636472014, 'avg_acc': 50.077093702661756, 'loss': 8.257401466369629}


EP_train:4:  17%|| 4632/27626 [10:56<54:12,  7.07it/s]

{'epoch': 4, 'iter': 4630, 'avg_loss': 8.521401729163637, 'avg_acc': 50.07017922694882, 'loss': 8.9781494140625}


EP_train:4:  17%|| 4642/27626 [10:58<54:23,  7.04it/s]

{'epoch': 4, 'iter': 4640, 'avg_loss': 8.521991334478292, 'avg_acc': 50.07137470372764, 'loss': 8.71474838256836}


EP_train:4:  17%|| 4652/27626 [10:59<54:06,  7.08it/s]

{'epoch': 4, 'iter': 4650, 'avg_loss': 8.521591160174879, 'avg_acc': 50.06517415609546, 'loss': 8.744184494018555}


EP_train:4:  17%|| 4662/27626 [11:01<54:15,  7.05it/s]

{'epoch': 4, 'iter': 4660, 'avg_loss': 8.52142122072663, 'avg_acc': 50.07240935421583, 'loss': 8.668968200683594}


EP_train:4:  17%|| 4672/27626 [11:02<54:22,  7.04it/s]

{'epoch': 4, 'iter': 4670, 'avg_loss': 8.52136854205645, 'avg_acc': 50.07626846499679, 'loss': 7.865607738494873}


EP_train:4:  17%|| 4682/27626 [11:03<54:38,  7.00it/s]

{'epoch': 4, 'iter': 4680, 'avg_loss': 8.521227681975516, 'avg_acc': 50.07009720145268, 'loss': 8.134641647338867}


EP_train:4:  17%|| 4692/27626 [11:05<54:41,  6.99it/s]

{'epoch': 4, 'iter': 4690, 'avg_loss': 8.521291441054416, 'avg_acc': 50.065284587507996, 'loss': 8.55152702331543}


EP_train:4:  17%|| 4702/27626 [11:06<53:59,  7.08it/s]

{'epoch': 4, 'iter': 4700, 'avg_loss': 8.522211744084913, 'avg_acc': 50.058498191874065, 'loss': 8.883062362670898}


EP_train:4:  17%|| 4712/27626 [11:08<54:19,  7.03it/s]

{'epoch': 4, 'iter': 4710, 'avg_loss': 8.522152129112346, 'avg_acc': 50.055720653789, 'loss': 7.53423547744751}


EP_train:4:  17%|| 4722/27626 [11:09<54:18,  7.03it/s]

{'epoch': 4, 'iter': 4720, 'avg_loss': 8.52246805632223, 'avg_acc': 50.04898326625715, 'loss': 8.856607437133789}


EP_train:4:  17%|| 4732/27626 [11:11<54:22,  7.02it/s]

{'epoch': 4, 'iter': 4730, 'avg_loss': 8.522669945275197, 'avg_acc': 50.04821919255972, 'loss': 8.884210586547852}


EP_train:4:  17%|| 4742/27626 [11:12<54:17,  7.03it/s]

{'epoch': 4, 'iter': 4740, 'avg_loss': 8.523408512102604, 'avg_acc': 50.04416262391901, 'loss': 9.10089111328125}


EP_train:4:  17%|| 4752/27626 [11:13<54:33,  6.99it/s]

{'epoch': 4, 'iter': 4750, 'avg_loss': 8.523353192861947, 'avg_acc': 50.04078088823406, 'loss': 8.44302749633789}


EP_train:4:  17%|| 4762/27626 [11:15<54:32,  6.99it/s]

{'epoch': 4, 'iter': 4760, 'avg_loss': 8.52341688913699, 'avg_acc': 50.04332073093888, 'loss': 8.835054397583008}


EP_train:4:  17%|| 4772/27626 [11:16<54:54,  6.94it/s]

{'epoch': 4, 'iter': 4770, 'avg_loss': 8.52376660619035, 'avg_acc': 50.03995493607211, 'loss': 8.238926887512207}


EP_train:4:  17%|| 4782/27626 [11:18<56:24,  6.75it/s]

{'epoch': 4, 'iter': 4780, 'avg_loss': 8.52336022904218, 'avg_acc': 50.05032942898975, 'loss': 8.001260757446289}


EP_train:4:  17%|| 4792/27626 [11:19<54:36,  6.97it/s]

{'epoch': 4, 'iter': 4790, 'avg_loss': 8.523126586148894, 'avg_acc': 50.05087664370696, 'loss': 8.7280912399292}


EP_train:4:  17%|| 4802/27626 [11:21<54:03,  7.04it/s]

{'epoch': 4, 'iter': 4800, 'avg_loss': 8.523177617292955, 'avg_acc': 50.046865236409076, 'loss': 8.591296195983887}


EP_train:4:  17%|| 4812/27626 [11:22<54:23,  6.99it/s]

{'epoch': 4, 'iter': 4810, 'avg_loss': 8.522724700801662, 'avg_acc': 50.04806692995219, 'loss': 8.223617553710938}


EP_train:4:  17%|| 4822/27626 [11:23<53:39,  7.08it/s]

{'epoch': 4, 'iter': 4820, 'avg_loss': 8.52262521309369, 'avg_acc': 50.04796722671645, 'loss': 8.498764991760254}


EP_train:4:  17%|| 4832/27626 [11:25<53:32,  7.10it/s]

{'epoch': 4, 'iter': 4830, 'avg_loss': 8.52280273824418, 'avg_acc': 50.053689712274895, 'loss': 9.52354621887207}


EP_train:4:  18%|| 4842/27626 [11:26<53:41,  7.07it/s]

{'epoch': 4, 'iter': 4840, 'avg_loss': 8.5222888660687, 'avg_acc': 50.0445414170626, 'loss': 6.798943042755127}


EP_train:4:  18%|| 4852/27626 [11:28<54:00,  7.03it/s]

{'epoch': 4, 'iter': 4850, 'avg_loss': 8.521553532213703, 'avg_acc': 50.04638218923934, 'loss': 7.252399444580078}


EP_train:4:  18%|| 4862/27626 [11:29<54:05,  7.01it/s]

{'epoch': 4, 'iter': 4860, 'avg_loss': 8.521291124013882, 'avg_acc': 50.041143797572516, 'loss': 7.647130966186523}


EP_train:4:  18%|| 4872/27626 [11:30<53:48,  7.05it/s]

{'epoch': 4, 'iter': 4870, 'avg_loss': 8.522001509645886, 'avg_acc': 50.03464381030589, 'loss': 8.693521499633789}


EP_train:4:  18%|| 4882/27626 [11:32<53:53,  7.03it/s]

{'epoch': 4, 'iter': 4880, 'avg_loss': 8.521271144188567, 'avg_acc': 50.030731407498465, 'loss': 8.63230037689209}


EP_train:4:  18%|| 4892/27626 [11:33<53:41,  7.06it/s]

{'epoch': 4, 'iter': 4890, 'avg_loss': 8.521164665483449, 'avg_acc': 50.03066857493356, 'loss': 8.158795356750488}


EP_train:4:  18%|| 4902/27626 [11:35<53:55,  7.02it/s]

{'epoch': 4, 'iter': 4900, 'avg_loss': 8.5207791647554, 'avg_acc': 50.03443174862273, 'loss': 8.568726539611816}


EP_train:4:  18%|| 4912/27626 [11:36<53:48,  7.04it/s]

{'epoch': 4, 'iter': 4910, 'avg_loss': 8.520753086329332, 'avg_acc': 50.03436163714111, 'loss': 8.230973243713379}


EP_train:4:  18%|| 4922/27626 [11:37<53:26,  7.08it/s]

{'epoch': 4, 'iter': 4920, 'avg_loss': 8.520855517679054, 'avg_acc': 50.04000711237553, 'loss': 8.654525756835938}


EP_train:4:  18%|| 4932/27626 [11:39<53:43,  7.04it/s]

{'epoch': 4, 'iter': 4930, 'avg_loss': 8.521333254174175, 'avg_acc': 50.03802474143176, 'loss': 9.332464218139648}


EP_train:4:  18%|| 4942/27626 [11:40<53:26,  7.07it/s]

{'epoch': 4, 'iter': 4940, 'avg_loss': 8.522006647761982, 'avg_acc': 50.038580246913575, 'loss': 8.66755199432373}


EP_train:4:  18%|| 4952/27626 [11:42<53:46,  7.03it/s]

{'epoch': 4, 'iter': 4950, 'avg_loss': 8.522005536632811, 'avg_acc': 50.037239951524946, 'loss': 8.645598411560059}


EP_train:4:  18%|| 4962/27626 [11:43<53:38,  7.04it/s]

{'epoch': 4, 'iter': 4960, 'avg_loss': 8.522198990308764, 'avg_acc': 50.0403144527313, 'loss': 8.472118377685547}


EP_train:4:  18%|| 4972/27626 [11:45<53:29,  7.06it/s]

{'epoch': 4, 'iter': 4970, 'avg_loss': 8.52180628746814, 'avg_acc': 50.03646147656408, 'loss': 8.660183906555176}


EP_train:4:  18%|| 4982/27626 [11:46<53:47,  7.02it/s]

{'epoch': 4, 'iter': 4980, 'avg_loss': 8.521601107148754, 'avg_acc': 50.04579903633809, 'loss': 8.332801818847656}


EP_train:4:  18%|| 4992/27626 [11:47<53:40,  7.03it/s]

{'epoch': 4, 'iter': 4990, 'avg_loss': 8.521542018507459, 'avg_acc': 50.04633340012021, 'loss': 8.128068923950195}


EP_train:4:  18%|| 5002/27626 [11:49<53:45,  7.01it/s]

{'epoch': 4, 'iter': 5000, 'avg_loss': 8.521714095710825, 'avg_acc': 50.05373925214956, 'loss': 8.962735176086426}


EP_train:4:  18%|| 5012/27626 [11:50<53:31,  7.04it/s]

{'epoch': 4, 'iter': 5010, 'avg_loss': 8.5218935220262, 'avg_acc': 50.05425563759729, 'loss': 8.936187744140625}


EP_train:4:  18%|| 5022/27626 [11:52<53:33,  7.03it/s]

{'epoch': 4, 'iter': 5020, 'avg_loss': 8.52220466995923, 'avg_acc': 50.05165803624776, 'loss': 9.297082901000977}


EP_train:4:  18%|| 5032/27626 [11:53<53:41,  7.01it/s]

{'epoch': 4, 'iter': 5030, 'avg_loss': 8.521946202166108, 'avg_acc': 50.05900914331147, 'loss': 7.80617094039917}


EP_train:4:  18%|| 5042/27626 [11:54<53:50,  6.99it/s]

{'epoch': 4, 'iter': 5040, 'avg_loss': 8.52157604285257, 'avg_acc': 50.053312834755005, 'loss': 7.917421340942383}


EP_train:4:  18%|| 5052/27626 [11:56<53:30,  7.03it/s]

{'epoch': 4, 'iter': 5050, 'avg_loss': 8.521710160084938, 'avg_acc': 50.051969906949125, 'loss': 8.923049926757812}


EP_train:4:  18%|| 5062/27626 [11:57<53:29,  7.03it/s]

{'epoch': 4, 'iter': 5060, 'avg_loss': 8.521684645758484, 'avg_acc': 50.05618948824343, 'loss': 8.765195846557617}


EP_train:4:  18%|| 5072/27626 [11:59<53:35,  7.01it/s]

{'epoch': 4, 'iter': 5070, 'avg_loss': 8.52248308311151, 'avg_acc': 50.05977617826859, 'loss': 8.405646324157715}


EP_train:4:  18%|| 5082/27626 [12:00<53:15,  7.06it/s]

{'epoch': 4, 'iter': 5080, 'avg_loss': 8.521983598916696, 'avg_acc': 50.05904349537492, 'loss': 8.787016868591309}


EP_train:4:  18%|| 5092/27626 [12:02<53:57,  6.96it/s]

{'epoch': 4, 'iter': 5090, 'avg_loss': 8.522436676407535, 'avg_acc': 50.061996660773914, 'loss': 8.658174514770508}


EP_train:4:  18%|| 5102/27626 [12:03<53:08,  7.06it/s]

{'epoch': 4, 'iter': 5100, 'avg_loss': 8.522893179867506, 'avg_acc': 50.064938247402466, 'loss': 8.664931297302246}


EP_train:4:  19%|| 5112/27626 [12:04<53:27,  7.02it/s]

{'epoch': 4, 'iter': 5110, 'avg_loss': 8.522436739063805, 'avg_acc': 50.06909117589513, 'loss': 8.464827537536621}


EP_train:4:  19%|| 5122/27626 [12:06<53:29,  7.01it/s]

{'epoch': 4, 'iter': 5120, 'avg_loss': 8.521856361430876, 'avg_acc': 50.077499511814096, 'loss': 7.754526138305664}


EP_train:4:  19%|| 5132/27626 [12:07<53:10,  7.05it/s]

{'epoch': 4, 'iter': 5130, 'avg_loss': 8.521258932227768, 'avg_acc': 50.07125803936855, 'loss': 8.743293762207031}


EP_train:4:  19%|| 5142/27626 [12:09<53:20,  7.02it/s]

{'epoch': 4, 'iter': 5140, 'avg_loss': 8.521092678642162, 'avg_acc': 50.079629449523445, 'loss': 8.891412734985352}


EP_train:4:  19%|| 5152/27626 [12:10<53:38,  6.98it/s]

{'epoch': 4, 'iter': 5150, 'avg_loss': 8.520764799956742, 'avg_acc': 50.082508250825086, 'loss': 8.195687294006348}


EP_train:4:  19%|| 5162/27626 [12:12<53:19,  7.02it/s]

{'epoch': 4, 'iter': 5160, 'avg_loss': 8.521014894493973, 'avg_acc': 50.076293354001166, 'loss': 8.287286758422852}


EP_train:4:  19%|| 5172/27626 [12:13<53:11,  7.04it/s]

{'epoch': 4, 'iter': 5170, 'avg_loss': 8.52126363452918, 'avg_acc': 50.08521079094953, 'loss': 8.922456741333008}


EP_train:4:  19%|| 5182/27626 [12:14<53:09,  7.04it/s]

{'epoch': 4, 'iter': 5180, 'avg_loss': 8.521765752456243, 'avg_acc': 50.075395676510325, 'loss': 7.782176494598389}


EP_train:4:  19%|| 5192/27626 [12:16<53:29,  6.99it/s]

{'epoch': 4, 'iter': 5190, 'avg_loss': 8.521775541266962, 'avg_acc': 50.0812704681179, 'loss': 9.118664741516113}


EP_train:4:  19%|| 5202/27626 [12:17<52:54,  7.06it/s]

{'epoch': 4, 'iter': 5200, 'avg_loss': 8.521977868361603, 'avg_acc': 50.08291674677946, 'loss': 8.703784942626953}


EP_train:4:  19%|| 5212/27626 [12:19<53:06,  7.03it/s]

{'epoch': 4, 'iter': 5210, 'avg_loss': 8.52207396241347, 'avg_acc': 50.08395701400883, 'loss': 7.621120929718018}


EP_train:4:  19%|| 5222/27626 [12:20<53:37,  6.96it/s]

{'epoch': 4, 'iter': 5220, 'avg_loss': 8.52279459190332, 'avg_acc': 50.06763551043861, 'loss': 8.54238510131836}


EP_train:4:  19%|| 5232/27626 [12:21<53:10,  7.02it/s]

{'epoch': 4, 'iter': 5230, 'avg_loss': 8.523031857792631, 'avg_acc': 50.0716880137641, 'loss': 8.213499069213867}


EP_train:4:  19%|| 5242/27626 [12:23<53:04,  7.03it/s]

{'epoch': 4, 'iter': 5240, 'avg_loss': 8.523199882882171, 'avg_acc': 50.0709549704255, 'loss': 9.495443344116211}


EP_train:4:  19%|| 5252/27626 [12:24<52:57,  7.04it/s]

{'epoch': 4, 'iter': 5250, 'avg_loss': 8.522743093152428, 'avg_acc': 50.07617596648257, 'loss': 7.675971031188965}


EP_train:4:  19%|| 5262/27626 [12:26<52:36,  7.08it/s]

{'epoch': 4, 'iter': 5260, 'avg_loss': 8.521894228546774, 'avg_acc': 50.077219159855545, 'loss': 7.806528568267822}


EP_train:4:  19%|| 5272/27626 [12:27<52:35,  7.09it/s]

{'epoch': 4, 'iter': 5270, 'avg_loss': 8.521472199595058, 'avg_acc': 50.075886928476564, 'loss': 8.480330467224121}


EP_train:4:  19%|| 5282/27626 [12:28<52:30,  7.09it/s]

{'epoch': 4, 'iter': 5280, 'avg_loss': 8.521117430161265, 'avg_acc': 50.071601022533606, 'loss': 8.049578666687012}


EP_train:4:  19%|| 5292/27626 [12:30<52:45,  7.05it/s]

{'epoch': 4, 'iter': 5290, 'avg_loss': 8.520224943703546, 'avg_acc': 50.06851256851257, 'loss': 7.9123687744140625}


EP_train:4:  19%|| 5302/27626 [12:31<52:45,  7.05it/s]

{'epoch': 4, 'iter': 5300, 'avg_loss': 8.520295842146789, 'avg_acc': 50.064846255423504, 'loss': 8.284225463867188}


EP_train:4:  19%|| 5312/27626 [12:33<53:20,  6.97it/s]

{'epoch': 4, 'iter': 5310, 'avg_loss': 8.520456147180441, 'avg_acc': 50.06178215025419, 'loss': 9.164102554321289}


EP_train:4:  19%|| 5322/27626 [12:34<52:50,  7.03it/s]

{'epoch': 4, 'iter': 5320, 'avg_loss': 8.521219807355795, 'avg_acc': 50.05461849276451, 'loss': 9.226484298706055}


EP_train:4:  19%|| 5332/27626 [12:36<52:24,  7.09it/s]

{'epoch': 4, 'iter': 5330, 'avg_loss': 8.522192612975203, 'avg_acc': 50.0592055899456, 'loss': 8.910304069519043}


EP_train:4:  19%|| 5342/27626 [12:37<53:05,  6.99it/s]

{'epoch': 4, 'iter': 5340, 'avg_loss': 8.52172669666221, 'avg_acc': 50.05441396742183, 'loss': 8.449821472167969}


EP_train:4:  19%|| 5352/27626 [12:38<53:16,  6.97it/s]

{'epoch': 4, 'iter': 5350, 'avg_loss': 8.521985005717527, 'avg_acc': 50.044968230237345, 'loss': 9.726597785949707}


EP_train:4:  19%|| 5362/27626 [12:40<52:32,  7.06it/s]

{'epoch': 4, 'iter': 5360, 'avg_loss': 8.522835304211867, 'avg_acc': 50.04022104085058, 'loss': 8.523208618164062}


EP_train:4:  19%|| 5372/27626 [12:41<52:37,  7.05it/s]

{'epoch': 4, 'iter': 5370, 'avg_loss': 8.522769676997573, 'avg_acc': 50.043055296965186, 'loss': 8.369836807250977}


EP_train:4:  19%|| 5382/27626 [12:43<52:54,  7.01it/s]

{'epoch': 4, 'iter': 5380, 'avg_loss': 8.522956217383168, 'avg_acc': 50.04123304218546, 'loss': 8.886025428771973}


EP_train:4:  20%|| 5392/27626 [12:44<53:18,  6.95it/s]

{'epoch': 4, 'iter': 5390, 'avg_loss': 8.523437705647634, 'avg_acc': 50.04057688740493, 'loss': 8.972091674804688}


EP_train:4:  20%|| 5402/27626 [12:45<53:14,  6.96it/s]

{'epoch': 4, 'iter': 5400, 'avg_loss': 8.523330971965567, 'avg_acc': 50.038187372708755, 'loss': 9.291135787963867}


EP_train:4:  20%|| 5412/27626 [12:47<52:56,  6.99it/s]

{'epoch': 4, 'iter': 5410, 'avg_loss': 8.523468068513605, 'avg_acc': 50.03407410829791, 'loss': 8.257905006408691}


EP_train:4:  20%|| 5422/27626 [12:48<52:44,  7.02it/s]

{'epoch': 4, 'iter': 5420, 'avg_loss': 8.523051748502697, 'avg_acc': 50.02882309536986, 'loss': 9.103787422180176}


EP_train:4:  20%|| 5432/27626 [12:50<52:33,  7.04it/s]

{'epoch': 4, 'iter': 5430, 'avg_loss': 8.522900917718388, 'avg_acc': 50.0241668201068, 'loss': 7.616025924682617}


EP_train:4:  20%|| 5442/27626 [12:51<52:31,  7.04it/s]

{'epoch': 4, 'iter': 5440, 'avg_loss': 8.523138327143732, 'avg_acc': 50.02527108987318, 'loss': 8.55827808380127}


EP_train:4:  20%|| 5452/27626 [12:53<52:40,  7.02it/s]

{'epoch': 4, 'iter': 5450, 'avg_loss': 8.523179183901176, 'avg_acc': 50.01490552192258, 'loss': 8.62637710571289}


EP_train:4:  20%|| 5462/27626 [12:54<52:10,  7.08it/s]

{'epoch': 4, 'iter': 5460, 'avg_loss': 8.5234435817912, 'avg_acc': 50.013733748397726, 'loss': 8.580177307128906}


EP_train:4:  20%|| 5472/27626 [12:55<52:15,  7.07it/s]

{'epoch': 4, 'iter': 5470, 'avg_loss': 8.523513459174426, 'avg_acc': 50.0188493876805, 'loss': 8.998648643493652}


EP_train:4:  20%|| 5482/27626 [12:57<52:37,  7.01it/s]

{'epoch': 4, 'iter': 5480, 'avg_loss': 8.523435767256764, 'avg_acc': 50.01539408866995, 'loss': 9.315145492553711}


EP_train:4:  20%|| 5492/27626 [12:58<52:45,  6.99it/s]

{'epoch': 4, 'iter': 5490, 'avg_loss': 8.522958665874524, 'avg_acc': 50.01707339282462, 'loss': 7.9874587059021}


EP_train:4:  20%|| 5502/27626 [13:00<52:20,  7.04it/s]

{'epoch': 4, 'iter': 5500, 'avg_loss': 8.522823249485423, 'avg_acc': 50.011929649154695, 'loss': 7.746694564819336}


EP_train:4:  20%|| 5512/27626 [13:01<52:40,  7.00it/s]

{'epoch': 4, 'iter': 5510, 'avg_loss': 8.523137076150656, 'avg_acc': 50.01020685900925, 'loss': 9.973027229309082}


EP_train:4:  20%|| 5522/27626 [13:02<52:16,  7.05it/s]

{'epoch': 4, 'iter': 5520, 'avg_loss': 8.5233169696271, 'avg_acc': 50.012452454265535, 'loss': 8.647526741027832}


EP_train:4:  20%|| 5532/27626 [13:04<52:20,  7.03it/s]

{'epoch': 4, 'iter': 5530, 'avg_loss': 8.523245876466058, 'avg_acc': 50.01468992948834, 'loss': 8.07971477508545}


EP_train:4:  20%|| 5542/27626 [13:05<52:00,  7.08it/s]

{'epoch': 4, 'iter': 5540, 'avg_loss': 8.523138288943008, 'avg_acc': 50.00902364194189, 'loss': 8.074790000915527}


EP_train:4:  20%|| 5552/27626 [13:07<52:21,  7.03it/s]

{'epoch': 4, 'iter': 5550, 'avg_loss': 8.523306984146357, 'avg_acc': 50.01407404071338, 'loss': 8.215015411376953}


EP_train:4:  20%|| 5562/27626 [13:08<52:05,  7.06it/s]

{'epoch': 4, 'iter': 5560, 'avg_loss': 8.523641925946219, 'avg_acc': 50.02079212371876, 'loss': 8.355293273925781}


EP_train:4:  20%|| 5572/27626 [13:10<52:13,  7.04it/s]

{'epoch': 4, 'iter': 5570, 'avg_loss': 8.523568375066546, 'avg_acc': 50.01963292048106, 'loss': 8.754344940185547}


EP_train:4:  20%|| 5582/27626 [13:11<52:16,  7.03it/s]

{'epoch': 4, 'iter': 5580, 'avg_loss': 8.523784195046082, 'avg_acc': 50.01511825837663, 'loss': 9.174226760864258}


EP_train:4:  20%|| 5592/27626 [13:12<51:53,  7.08it/s]

{'epoch': 4, 'iter': 5590, 'avg_loss': 8.523676200621756, 'avg_acc': 50.007825076015024, 'loss': 8.01970100402832}


EP_train:4:  20%|| 5602/27626 [13:14<52:08,  7.04it/s]

{'epoch': 4, 'iter': 5600, 'avg_loss': 8.52357716816618, 'avg_acc': 50.010042849491164, 'loss': 8.12985897064209}


EP_train:4:  20%|| 5612/27626 [13:15<52:10,  7.03it/s]

{'epoch': 4, 'iter': 5610, 'avg_loss': 8.523576007469774, 'avg_acc': 50.00668330065942, 'loss': 9.11392879486084}


EP_train:4:  20%|| 5622/27626 [13:17<52:15,  7.02it/s]

{'epoch': 4, 'iter': 5620, 'avg_loss': 8.523453192786377, 'avg_acc': 50.013342821562, 'loss': 8.598136901855469}


EP_train:4:  20%|| 5632/27626 [13:18<52:17,  7.01it/s]

{'epoch': 4, 'iter': 5630, 'avg_loss': 8.523945476292845, 'avg_acc': 50.0138740898597, 'loss': 8.82161808013916}


EP_train:4:  20%|| 5642/27626 [13:19<52:17,  7.01it/s]

{'epoch': 4, 'iter': 5640, 'avg_loss': 8.523680074780023, 'avg_acc': 50.02326715121433, 'loss': 7.630066871643066}


EP_train:4:  20%|| 5652/27626 [13:21<52:00,  7.04it/s]

{'epoch': 4, 'iter': 5650, 'avg_loss': 8.523646567484231, 'avg_acc': 50.02764997345602, 'loss': 7.958637714385986}


EP_train:4:  20%|| 5662/27626 [13:22<52:11,  7.01it/s]

{'epoch': 4, 'iter': 5660, 'avg_loss': 8.523151850114965, 'avg_acc': 50.02704910793147, 'loss': 8.357494354248047}


EP_train:4:  21%|| 5672/27626 [13:24<52:36,  6.95it/s]

{'epoch': 4, 'iter': 5670, 'avg_loss': 8.522350995129871, 'avg_acc': 50.031960853464994, 'loss': 8.232003211975098}


EP_train:4:  21%|| 5682/27626 [13:25<52:13,  7.00it/s]

{'epoch': 4, 'iter': 5680, 'avg_loss': 8.522733571348507, 'avg_acc': 50.026953881358914, 'loss': 9.101651191711426}


EP_train:4:  21%|| 5692/27626 [13:27<52:17,  6.99it/s]

{'epoch': 4, 'iter': 5690, 'avg_loss': 8.522545215095247, 'avg_acc': 50.02141539272535, 'loss': 7.754975318908691}


EP_train:4:  21%|| 5702/27626 [13:28<51:47,  7.05it/s]

{'epoch': 4, 'iter': 5700, 'avg_loss': 8.523088342284888, 'avg_acc': 50.01918523066129, 'loss': 9.01514720916748}


EP_train:4:  21%|| 5712/27626 [13:29<51:52,  7.04it/s]

{'epoch': 4, 'iter': 5710, 'avg_loss': 8.523282371705353, 'avg_acc': 50.02188758536158, 'loss': 8.968480110168457}


EP_train:4:  21%|| 5722/27626 [13:31<52:13,  6.99it/s]

{'epoch': 4, 'iter': 5720, 'avg_loss': 8.522599733536362, 'avg_acc': 50.0180256948086, 'loss': 7.613610744476318}


EP_train:4:  21%|| 5732/27626 [13:32<51:33,  7.08it/s]

{'epoch': 4, 'iter': 5730, 'avg_loss': 8.522341307036188, 'avg_acc': 50.01744896178677, 'loss': 8.239691734313965}


EP_train:4:  21%|| 5742/27626 [13:34<51:59,  7.02it/s]

{'epoch': 4, 'iter': 5740, 'avg_loss': 8.52239844251273, 'avg_acc': 50.01306392614527, 'loss': 8.362472534179688}


EP_train:4:  21%|| 5752/27626 [13:35<51:36,  7.06it/s]

{'epoch': 4, 'iter': 5750, 'avg_loss': 8.52249991311299, 'avg_acc': 50.01086767518692, 'loss': 9.282304763793945}


EP_train:4:  21%|| 5762/27626 [13:37<52:15,  6.97it/s]

{'epoch': 4, 'iter': 5760, 'avg_loss': 8.52227692114895, 'avg_acc': 49.99837267835445, 'loss': 8.553083419799805}


EP_train:4:  21%|| 5772/27626 [13:38<52:20,  6.96it/s]

{'epoch': 4, 'iter': 5770, 'avg_loss': 8.522291504888512, 'avg_acc': 50.002166002425916, 'loss': 8.29875373840332}


EP_train:4:  21%|| 5782/27626 [13:39<51:45,  7.03it/s]

{'epoch': 4, 'iter': 5780, 'avg_loss': 8.522144098963174, 'avg_acc': 50.01297353399066, 'loss': 9.395417213439941}


EP_train:4:  21%|| 5792/27626 [13:41<51:24,  7.08it/s]

{'epoch': 4, 'iter': 5790, 'avg_loss': 8.522134801880762, 'avg_acc': 50.00917371783803, 'loss': 8.428791046142578}


EP_train:4:  21%|| 5802/27626 [13:42<51:28,  7.07it/s]

{'epoch': 4, 'iter': 5800, 'avg_loss': 8.521951473369576, 'avg_acc': 50.00754180313739, 'loss': 7.961287975311279}


EP_train:4:  21%|| 5812/27626 [13:44<51:41,  7.03it/s]

{'epoch': 4, 'iter': 5810, 'avg_loss': 8.522210183190388, 'avg_acc': 50.009679917398046, 'loss': 9.114625930786133}


EP_train:4:  21%|| 5822/27626 [13:45<51:20,  7.08it/s]

{'epoch': 4, 'iter': 5820, 'avg_loss': 8.522254932379317, 'avg_acc': 50.0021473973544, 'loss': 8.425320625305176}


EP_train:4:  21%|| 5832/27626 [13:46<51:27,  7.06it/s]

{'epoch': 4, 'iter': 5830, 'avg_loss': 8.522626521676917, 'avg_acc': 50.00964671582919, 'loss': 9.765220642089844}


EP_train:4:  21%|| 5842/27626 [13:48<51:59,  6.98it/s]

{'epoch': 4, 'iter': 5840, 'avg_loss': 8.52296542966229, 'avg_acc': 50.011235233692865, 'loss': 8.438673973083496}


EP_train:4:  21%|| 5852/27626 [13:49<52:08,  6.96it/s]

{'epoch': 4, 'iter': 5850, 'avg_loss': 8.523117413339076, 'avg_acc': 50.00961374124081, 'loss': 8.731409072875977}


EP_train:4:  21%|| 5862/27626 [13:51<51:45,  7.01it/s]

{'epoch': 4, 'iter': 5860, 'avg_loss': 8.52246246490843, 'avg_acc': 50.0085309674117, 'loss': 8.694023132324219}


EP_train:4:  21%|| 5872/27626 [13:52<51:56,  6.98it/s]

{'epoch': 4, 'iter': 5870, 'avg_loss': 8.52271912285141, 'avg_acc': 50.00319366377107, 'loss': 8.616448402404785}


EP_train:4:  21%|| 5882/27626 [13:54<51:51,  6.99it/s]

{'epoch': 4, 'iter': 5880, 'avg_loss': 8.522837289882505, 'avg_acc': 50.00637646658731, 'loss': 8.01370620727539}


EP_train:4:  21%|| 5892/27626 [13:55<51:09,  7.08it/s]

{'epoch': 4, 'iter': 5890, 'avg_loss': 8.522177090448144, 'avg_acc': 50.01007893396707, 'loss': 8.610616683959961}


EP_train:4:  21%|| 5902/27626 [13:56<51:05,  7.09it/s]

{'epoch': 4, 'iter': 5900, 'avg_loss': 8.522390370691374, 'avg_acc': 50.00052957125911, 'loss': 8.600626945495605}


EP_train:4:  21%|| 5912/27626 [13:58<51:05,  7.08it/s]

{'epoch': 4, 'iter': 5910, 'avg_loss': 8.522313878451925, 'avg_acc': 50.007930130265606, 'loss': 8.636350631713867}


EP_train:4:  21%|| 5922/27626 [13:59<51:18,  7.05it/s]

{'epoch': 4, 'iter': 5920, 'avg_loss': 8.521689403542311, 'avg_acc': 50.01055564938355, 'loss': 7.639776229858398}


EP_train:4:  21%|| 5932/27626 [14:01<51:19,  7.04it/s]

{'epoch': 4, 'iter': 5930, 'avg_loss': 8.521806103004169, 'avg_acc': 50.0126454223571, 'loss': 8.920817375183105}


EP_train:4:  22%|| 5942/27626 [14:02<51:23,  7.03it/s]

{'epoch': 4, 'iter': 5940, 'avg_loss': 8.522105287289584, 'avg_acc': 49.99684396566234, 'loss': 8.436297416687012}


EP_train:4:  22%|| 5952/27626 [14:03<51:16,  7.04it/s]

{'epoch': 4, 'iter': 5950, 'avg_loss': 8.521863889782354, 'avg_acc': 49.992648294404304, 'loss': 8.575400352478027}


EP_train:4:  22%|| 5962/27626 [14:05<51:09,  7.06it/s]

{'epoch': 4, 'iter': 5960, 'avg_loss': 8.522237448779514, 'avg_acc': 49.99213638651233, 'loss': 8.910445213317871}


EP_train:4:  22%|| 5972/27626 [14:06<51:11,  7.05it/s]

{'epoch': 4, 'iter': 5970, 'avg_loss': 8.522437907966252, 'avg_acc': 49.993719644950595, 'loss': 8.34286117553711}


EP_train:4:  22%|| 5982/27626 [14:08<51:29,  7.01it/s]

{'epoch': 4, 'iter': 5980, 'avg_loss': 8.522401460176566, 'avg_acc': 50.003134927269684, 'loss': 8.111347198486328}


EP_train:4:  22%|| 5992/27626 [14:09<51:15,  7.03it/s]

{'epoch': 4, 'iter': 5990, 'avg_loss': 8.521831832571193, 'avg_acc': 49.99895676848606, 'loss': 7.801148891448975}


EP_train:4:  22%|| 6002/27626 [14:10<51:14,  7.03it/s]

{'epoch': 4, 'iter': 6000, 'avg_loss': 8.521500606056929, 'avg_acc': 50.00052074654224, 'loss': 8.772665977478027}


EP_train:4:  22%|| 6012/27626 [14:12<51:07,  7.05it/s]

{'epoch': 4, 'iter': 6010, 'avg_loss': 8.521724621600308, 'avg_acc': 50.0, 'loss': 8.589838027954102}


EP_train:4:  22%|| 6022/27626 [14:13<50:58,  7.06it/s]

{'epoch': 4, 'iter': 6020, 'avg_loss': 8.521802026317516, 'avg_acc': 49.99844294967614, 'loss': 8.966917037963867}


EP_train:4:  22%|| 6032/27626 [14:15<51:48,  6.95it/s]

{'epoch': 4, 'iter': 6030, 'avg_loss': 8.521892490364825, 'avg_acc': 49.992745813297965, 'loss': 9.470799446105957}


EP_train:4:  22%|| 6042/27626 [14:16<51:01,  7.05it/s]

{'epoch': 4, 'iter': 6040, 'avg_loss': 8.52212197708385, 'avg_acc': 49.9922405230922, 'loss': 8.675359725952148}


EP_train:4:  22%|| 6052/27626 [14:18<51:13,  7.02it/s]

{'epoch': 4, 'iter': 6050, 'avg_loss': 8.522151074514293, 'avg_acc': 49.99070401586515, 'loss': 8.058433532714844}


EP_train:4:  22%|| 6062/27626 [14:19<50:54,  7.06it/s]

{'epoch': 4, 'iter': 6060, 'avg_loss': 8.522272531987417, 'avg_acc': 49.99020376175549, 'loss': 8.278434753417969}


EP_train:4:  22%|| 6072/27626 [14:20<50:56,  7.05it/s]

{'epoch': 4, 'iter': 6070, 'avg_loss': 8.521738027155724, 'avg_acc': 49.98764618678966, 'loss': 7.859659194946289}


EP_train:4:  22%|| 6082/27626 [14:22<52:48,  6.80it/s]

{'epoch': 4, 'iter': 6080, 'avg_loss': 8.521299088830482, 'avg_acc': 49.99074987666503, 'loss': 8.255147933959961}


EP_train:4:  22%|| 6092/27626 [14:23<50:58,  7.04it/s]

{'epoch': 4, 'iter': 6090, 'avg_loss': 8.521310524211652, 'avg_acc': 49.98871285503201, 'loss': 8.702215194702148}


EP_train:4:  22%|| 6102/27626 [14:25<50:53,  7.05it/s]

{'epoch': 4, 'iter': 6100, 'avg_loss': 8.521345202013462, 'avg_acc': 49.99129241108015, 'loss': 7.6498637199401855}


EP_train:4:  22%|| 6112/27626 [14:26<51:03,  7.02it/s]

{'epoch': 4, 'iter': 6110, 'avg_loss': 8.521472117374541, 'avg_acc': 49.99693176239568, 'loss': 9.349437713623047}


EP_train:4:  22%|| 6122/27626 [14:28<51:36,  6.94it/s]

{'epoch': 4, 'iter': 6120, 'avg_loss': 8.52126373237419, 'avg_acc': 49.99336301257964, 'loss': 8.037832260131836}


EP_train:4:  22%|| 6132/27626 [14:29<51:13,  6.99it/s]

{'epoch': 4, 'iter': 6130, 'avg_loss': 8.521313144728635, 'avg_acc': 49.98980590442016, 'loss': 8.489045143127441}


EP_train:4:  22%|| 6142/27626 [14:30<50:57,  7.03it/s]

{'epoch': 4, 'iter': 6140, 'avg_loss': 8.52106125466192, 'avg_acc': 49.99287575313467, 'loss': 8.538987159729004}


EP_train:4:  22%|| 6152/27626 [14:32<50:49,  7.04it/s]

{'epoch': 4, 'iter': 6150, 'avg_loss': 8.520916133794412, 'avg_acc': 49.98983905056089, 'loss': 8.666829109191895}


EP_train:4:  22%|| 6162/27626 [14:33<51:16,  6.98it/s]

{'epoch': 4, 'iter': 6160, 'avg_loss': 8.52157468134814, 'avg_acc': 49.99086998863821, 'loss': 8.720839500427246}


EP_train:4:  22%|| 6172/27626 [14:35<51:12,  6.98it/s]

{'epoch': 4, 'iter': 6170, 'avg_loss': 8.52234275065564, 'avg_acc': 49.98683357640577, 'loss': 8.811771392822266}


EP_train:4:  22%|| 6182/27626 [14:36<51:04,  7.00it/s]

{'epoch': 4, 'iter': 6180, 'avg_loss': 8.522180813837352, 'avg_acc': 49.98584371460929, 'loss': 8.240195274353027}


EP_train:4:  22%|| 6192/27626 [14:37<50:31,  7.07it/s]

{'epoch': 4, 'iter': 6190, 'avg_loss': 8.522672122446368, 'avg_acc': 49.98687611048296, 'loss': 8.63457202911377}


EP_train:4:  22%|| 6202/27626 [14:39<50:46,  7.03it/s]

{'epoch': 4, 'iter': 6200, 'avg_loss': 8.522889693616532, 'avg_acc': 49.98689727463312, 'loss': 9.111272811889648}


EP_train:4:  22%|| 6212/27626 [14:40<50:47,  7.03it/s]

{'epoch': 4, 'iter': 6210, 'avg_loss': 8.522435025764997, 'avg_acc': 49.97685557881179, 'loss': 7.584270477294922}


EP_train:4:  23%|| 6222/27626 [14:42<50:45,  7.03it/s]

{'epoch': 4, 'iter': 6220, 'avg_loss': 8.521918038351515, 'avg_acc': 49.96986015110111, 'loss': 8.116549491882324}


EP_train:4:  23%|| 6232/27626 [14:43<50:38,  7.04it/s]

{'epoch': 4, 'iter': 6230, 'avg_loss': 8.522480111542261, 'avg_acc': 49.975425292890385, 'loss': 8.614885330200195}


EP_train:4:  23%|| 6242/27626 [14:45<50:31,  7.05it/s]

{'epoch': 4, 'iter': 6240, 'avg_loss': 8.522574024845289, 'avg_acc': 49.97195962185547, 'loss': 8.515735626220703}


EP_train:4:  23%|| 6252/27626 [14:46<50:22,  7.07it/s]

{'epoch': 4, 'iter': 6250, 'avg_loss': 8.522390742050973, 'avg_acc': 49.972004479283314, 'loss': 8.85008716583252}


EP_train:4:  23%|| 6262/27626 [14:47<51:12,  6.95it/s]

{'epoch': 4, 'iter': 6260, 'avg_loss': 8.521976757080024, 'avg_acc': 49.98652371825587, 'loss': 8.477458000183105}


EP_train:4:  23%|| 6272/27626 [14:49<51:04,  6.97it/s]

{'epoch': 4, 'iter': 6270, 'avg_loss': 8.521654475155373, 'avg_acc': 49.98455190559719, 'loss': 8.400259971618652}


EP_train:4:  23%|| 6282/27626 [14:50<50:32,  7.04it/s]

{'epoch': 4, 'iter': 6280, 'avg_loss': 8.522249658630448, 'avg_acc': 49.98855675847795, 'loss': 9.1510591506958}


EP_train:4:  23%|| 6292/27626 [14:52<50:21,  7.06it/s]

{'epoch': 4, 'iter': 6290, 'avg_loss': 8.522291007168512, 'avg_acc': 49.98708472420918, 'loss': 8.936278343200684}


EP_train:4:  23%|| 6302/27626 [14:53<50:55,  6.98it/s]

{'epoch': 4, 'iter': 6300, 'avg_loss': 8.52183130055718, 'avg_acc': 49.985617362323445, 'loss': 8.029075622558594}


EP_train:4:  23%|| 6312/27626 [14:54<50:57,  6.97it/s]

{'epoch': 4, 'iter': 6310, 'avg_loss': 8.521083168854258, 'avg_acc': 49.99059182379971, 'loss': 9.240865707397461}


EP_train:4:  23%|| 6322/27626 [14:56<50:13,  7.07it/s]

{'epoch': 4, 'iter': 6320, 'avg_loss': 8.521008689613627, 'avg_acc': 49.98615725359911, 'loss': 8.312220573425293}


EP_train:4:  23%|| 6332/27626 [14:57<50:12,  7.07it/s]

{'epoch': 4, 'iter': 6330, 'avg_loss': 8.52108564807649, 'avg_acc': 49.97778786921497, 'loss': 8.620424270629883}


EP_train:4:  23%|| 6342/27626 [14:59<50:08,  7.07it/s]

{'epoch': 4, 'iter': 6340, 'avg_loss': 8.520906098312693, 'avg_acc': 49.97979419649897, 'loss': 8.463421821594238}


EP_train:4:  23%|| 6352/27626 [15:00<50:53,  6.97it/s]

{'epoch': 4, 'iter': 6350, 'avg_loss': 8.521039802396302, 'avg_acc': 49.98228625413321, 'loss': 8.488319396972656}


EP_train:4:  23%|| 6362/27626 [15:02<50:35,  7.00it/s]

{'epoch': 4, 'iter': 6360, 'avg_loss': 8.52084163404302, 'avg_acc': 49.97985772677252, 'loss': 9.107733726501465}


EP_train:4:  23%|| 6372/27626 [15:03<50:10,  7.06it/s]

{'epoch': 4, 'iter': 6370, 'avg_loss': 8.521624401064388, 'avg_acc': 49.98037984617799, 'loss': 9.321548461914062}


EP_train:4:  23%|| 6382/27626 [15:04<50:27,  7.02it/s]

{'epoch': 4, 'iter': 6380, 'avg_loss': 8.521402795194104, 'avg_acc': 49.98041059395079, 'loss': 8.387914657592773}


EP_train:4:  23%|| 6392/27626 [15:06<50:51,  6.96it/s]

{'epoch': 4, 'iter': 6390, 'avg_loss': 8.520694141376001, 'avg_acc': 49.97701846346425, 'loss': 7.484935283660889}


EP_train:4:  23%|| 6402/27626 [15:07<50:19,  7.03it/s]

{'epoch': 4, 'iter': 6400, 'avg_loss': 8.520592071957521, 'avg_acc': 49.979007186377125, 'loss': 9.40632152557373}


EP_train:4:  23%|| 6412/27626 [15:09<50:05,  7.06it/s]

{'epoch': 4, 'iter': 6410, 'avg_loss': 8.52121689486589, 'avg_acc': 49.9785524879114, 'loss': 9.051025390625}


EP_train:4:  23%|| 6422/27626 [15:10<50:22,  7.01it/s]

{'epoch': 4, 'iter': 6420, 'avg_loss': 8.521876450398874, 'avg_acc': 49.98150599595078, 'loss': 8.70692253112793}


EP_train:4:  23%|| 6432/27626 [15:11<50:05,  7.05it/s]

{'epoch': 4, 'iter': 6430, 'avg_loss': 8.521779181667702, 'avg_acc': 49.98153475353755, 'loss': 8.213172912597656}


EP_train:4:  23%|| 6442/27626 [15:13<50:40,  6.97it/s]

{'epoch': 4, 'iter': 6440, 'avg_loss': 8.521794599658342, 'avg_acc': 49.98495963359727, 'loss': 9.063457489013672}


EP_train:4:  23%|| 6452/27626 [15:14<50:19,  7.01it/s]

{'epoch': 4, 'iter': 6450, 'avg_loss': 8.521459290083573, 'avg_acc': 49.9844985273601, 'loss': 9.432826042175293}


EP_train:4:  23%|| 6462/27626 [15:16<50:30,  6.98it/s]

{'epoch': 4, 'iter': 6460, 'avg_loss': 8.521407326203976, 'avg_acc': 49.98694087602538, 'loss': 8.642090797424316}


EP_train:4:  23%|| 6472/27626 [15:17<50:08,  7.03it/s]

{'epoch': 4, 'iter': 6470, 'avg_loss': 8.521410271292805, 'avg_acc': 49.98647813320971, 'loss': 9.527956008911133}


EP_train:4:  23%|| 6482/27626 [15:19<50:11,  7.02it/s]

{'epoch': 4, 'iter': 6480, 'avg_loss': 8.521454419384877, 'avg_acc': 49.996624749267085, 'loss': 8.170319557189941}


EP_train:4:  23%|| 6492/27626 [15:20<49:50,  7.07it/s]

{'epoch': 4, 'iter': 6490, 'avg_loss': 8.522124855236617, 'avg_acc': 49.99374133415498, 'loss': 8.545534133911133}


EP_train:4:  24%|| 6502/27626 [15:21<50:05,  7.03it/s]

{'epoch': 4, 'iter': 6500, 'avg_loss': 8.52204822514758, 'avg_acc': 49.99375096139055, 'loss': 7.847245693206787}


EP_train:4:  24%|| 6512/27626 [15:23<50:16,  7.00it/s]

{'epoch': 4, 'iter': 6510, 'avg_loss': 8.521881593169018, 'avg_acc': 49.995680387037325, 'loss': 8.68991756439209}


EP_train:4:  24%|| 6522/27626 [15:24<49:38,  7.08it/s]

{'epoch': 4, 'iter': 6520, 'avg_loss': 8.521843188051394, 'avg_acc': 49.991853243367586, 'loss': 9.579636573791504}


EP_train:4:  24%|| 6532/27626 [15:26<50:08,  7.01it/s]

{'epoch': 4, 'iter': 6530, 'avg_loss': 8.52165632725419, 'avg_acc': 49.98612387077017, 'loss': 7.962695598602295}


EP_train:4:  24%|| 6542/27626 [15:27<49:39,  7.08it/s]

{'epoch': 4, 'iter': 6540, 'avg_loss': 8.52110908629606, 'avg_acc': 49.9890116190185, 'loss': 8.308796882629395}


EP_train:4:  24%|| 6552/27626 [15:28<49:48,  7.05it/s]

{'epoch': 4, 'iter': 6550, 'avg_loss': 8.520894366791186, 'avg_acc': 49.989982445428176, 'loss': 8.26531982421875}


EP_train:4:  24%|| 6562/27626 [15:30<50:10,  7.00it/s]

{'epoch': 4, 'iter': 6560, 'avg_loss': 8.52090399077418, 'avg_acc': 49.9961896052431, 'loss': 8.792035102844238}


EP_train:4:  24%|| 6572/27626 [15:31<50:08,  7.00it/s]

{'epoch': 4, 'iter': 6570, 'avg_loss': 8.520959513569473, 'avg_acc': 49.99667097854208, 'loss': 7.918099403381348}


EP_train:4:  24%|| 6582/27626 [15:33<49:34,  7.07it/s]

{'epoch': 4, 'iter': 6580, 'avg_loss': 8.52073856197833, 'avg_acc': 49.994301777845315, 'loss': 9.270881652832031}


EP_train:4:  24%|| 6592/27626 [15:34<49:40,  7.06it/s]

{'epoch': 4, 'iter': 6590, 'avg_loss': 8.520751208677366, 'avg_acc': 49.990517372174175, 'loss': 8.425721168518066}


EP_train:4:  24%|| 6602/27626 [15:36<49:34,  7.07it/s]

{'epoch': 4, 'iter': 6600, 'avg_loss': 8.520682857101965, 'avg_acc': 49.98721784578094, 'loss': 7.9942240715026855}


EP_train:4:  24%|| 6612/27626 [15:37<49:38,  7.05it/s]

{'epoch': 4, 'iter': 6610, 'avg_loss': 8.521077695222507, 'avg_acc': 49.99149145363788, 'loss': 8.488591194152832}


EP_train:4:  24%|| 6622/27626 [15:38<49:55,  7.01it/s]

{'epoch': 4, 'iter': 6620, 'avg_loss': 8.52114065650222, 'avg_acc': 49.99008835523335, 'loss': 9.536116600036621}


EP_train:4:  24%|| 6632/27626 [15:40<49:30,  7.07it/s]

{'epoch': 4, 'iter': 6630, 'avg_loss': 8.521132848432135, 'avg_acc': 49.987275674860506, 'loss': 8.884819984436035}


EP_train:4:  24%|| 6642/27626 [15:41<49:31,  7.06it/s]

{'epoch': 4, 'iter': 6640, 'avg_loss': 8.521174098906469, 'avg_acc': 49.98729483511519, 'loss': 8.090136528015137}


EP_train:4:  24%|| 6652/27626 [15:43<49:55,  7.00it/s]

{'epoch': 4, 'iter': 6650, 'avg_loss': 8.521347846075782, 'avg_acc': 49.97791685460833, 'loss': 9.092864036560059}


EP_train:4:  24%|| 6662/27626 [15:44<49:49,  7.01it/s]

{'epoch': 4, 'iter': 6660, 'avg_loss': 8.521169175831087, 'avg_acc': 49.974196817294704, 'loss': 7.658261775970459}


EP_train:4:  24%|| 6672/27626 [15:45<49:19,  7.08it/s]

{'epoch': 4, 'iter': 6670, 'avg_loss': 8.520983027797145, 'avg_acc': 49.98032528856243, 'loss': 8.611339569091797}


EP_train:4:  24%|| 6682/27626 [15:47<49:31,  7.05it/s]

{'epoch': 4, 'iter': 6680, 'avg_loss': 8.521033182987784, 'avg_acc': 49.982693459063015, 'loss': 8.503297805786133}


EP_train:4:  24%|| 6692/27626 [15:48<49:27,  7.05it/s]

{'epoch': 4, 'iter': 6690, 'avg_loss': 8.520962108964968, 'avg_acc': 49.9869227320281, 'loss': 8.120707511901855}


EP_train:4:  24%|| 6702/27626 [15:50<49:14,  7.08it/s]

{'epoch': 4, 'iter': 6700, 'avg_loss': 8.521012468598455, 'avg_acc': 49.98647589911954, 'loss': 8.734274864196777}


EP_train:4:  24%|| 6712/27626 [15:51<49:53,  6.99it/s]

{'epoch': 4, 'iter': 6710, 'avg_loss': 8.521563251156012, 'avg_acc': 49.982770824020264, 'loss': 9.37360954284668}


EP_train:4:  24%|| 6722/27626 [15:53<49:30,  7.04it/s]

{'epoch': 4, 'iter': 6720, 'avg_loss': 8.521242767550232, 'avg_acc': 49.97954173486088, 'loss': 8.824654579162598}


EP_train:4:  24%|| 6732/27626 [15:54<49:42,  7.01it/s]

{'epoch': 4, 'iter': 6730, 'avg_loss': 8.521132294410158, 'avg_acc': 49.97817931956619, 'loss': 8.038010597229004}


EP_train:4:  24%|| 6742/27626 [15:55<49:32,  7.03it/s]

{'epoch': 4, 'iter': 6740, 'avg_loss': 8.521304655563158, 'avg_acc': 49.97728452751817, 'loss': 8.93144702911377}


EP_train:4:  24%|| 6752/27626 [15:57<49:18,  7.05it/s]

{'epoch': 4, 'iter': 6750, 'avg_loss': 8.521148219565571, 'avg_acc': 49.9782439638572, 'loss': 8.017237663269043}


EP_train:4:  24%|| 6762/27626 [15:58<49:43,  6.99it/s]

{'epoch': 4, 'iter': 6760, 'avg_loss': 8.521016171408412, 'avg_acc': 49.97966277177932, 'loss': 8.981578826904297}


EP_train:4:  25%|| 6772/27626 [16:00<49:55,  6.96it/s]

{'epoch': 4, 'iter': 6770, 'avg_loss': 8.521294922688531, 'avg_acc': 49.97600059075469, 'loss': 8.27621078491211}


EP_train:4:  25%|| 6782/27626 [16:01<49:31,  7.02it/s]

{'epoch': 4, 'iter': 6780, 'avg_loss': 8.521036285124175, 'avg_acc': 49.97695767585902, 'loss': 8.15484619140625}


EP_train:4:  25%|| 6792/27626 [16:02<49:33,  7.01it/s]

{'epoch': 4, 'iter': 6790, 'avg_loss': 8.520994316090402, 'avg_acc': 49.9797526137535, 'loss': 8.325271606445312}


EP_train:4:  25%|| 6802/27626 [16:04<49:50,  6.96it/s]

{'epoch': 4, 'iter': 6800, 'avg_loss': 8.521155168126391, 'avg_acc': 49.981620349948535, 'loss': 8.200846672058105}


EP_train:4:  25%|| 6812/27626 [16:05<49:37,  6.99it/s]

{'epoch': 4, 'iter': 6810, 'avg_loss': 8.521197127903843, 'avg_acc': 49.986694318014976, 'loss': 8.591714859008789}


EP_train:4:  25%|| 6822/27626 [16:07<49:22,  7.02it/s]

{'epoch': 4, 'iter': 6820, 'avg_loss': 8.521117629691698, 'avg_acc': 49.98488124908371, 'loss': 7.94923734664917}


EP_train:4:  25%|| 6832/27626 [16:08<49:46,  6.96it/s]

{'epoch': 4, 'iter': 6830, 'avg_loss': 8.52069793468582, 'avg_acc': 49.98124359537403, 'loss': 7.992868900299072}


EP_train:4:  25%|| 6842/27626 [16:10<49:27,  7.00it/s]

{'epoch': 4, 'iter': 6840, 'avg_loss': 8.520140966708993, 'avg_acc': 49.983555035813474, 'loss': 9.583416938781738}


EP_train:4:  25%|| 6852/27626 [16:11<49:18,  7.02it/s]

{'epoch': 4, 'iter': 6850, 'avg_loss': 8.520202069748164, 'avg_acc': 49.97628083491461, 'loss': 8.791328430175781}


EP_train:4:  25%|| 6862/27626 [16:12<49:06,  7.05it/s]

{'epoch': 4, 'iter': 6860, 'avg_loss': 8.520367443691866, 'avg_acc': 49.97358256813876, 'loss': 7.825531482696533}


EP_train:4:  25%|| 6872/27626 [16:14<49:09,  7.04it/s]

{'epoch': 4, 'iter': 6870, 'avg_loss': 8.52079069074837, 'avg_acc': 49.97316620579247, 'loss': 9.324796676635742}


EP_train:4:  25%|| 6882/27626 [16:15<49:01,  7.05it/s]

{'epoch': 4, 'iter': 6880, 'avg_loss': 8.520672983250773, 'avg_acc': 49.9763842464758, 'loss': 8.351408004760742}


EP_train:4:  25%|| 6892/27626 [16:17<49:06,  7.04it/s]

{'epoch': 4, 'iter': 6890, 'avg_loss': 8.520791527702853, 'avg_acc': 49.979592947322594, 'loss': 8.367514610290527}


EP_train:4:  25%|| 6902/27626 [16:18<48:58,  7.05it/s]

{'epoch': 4, 'iter': 6900, 'avg_loss': 8.520314854101036, 'avg_acc': 49.97735835386176, 'loss': 8.06782054901123}


EP_train:4:  25%|| 6912/27626 [16:19<49:19,  7.00it/s]

{'epoch': 4, 'iter': 6910, 'avg_loss': 8.519917582209834, 'avg_acc': 49.97874764867603, 'loss': 7.746835231781006}


EP_train:4:  25%|| 6922/27626 [16:21<49:05,  7.03it/s]

{'epoch': 4, 'iter': 6920, 'avg_loss': 8.520456806012547, 'avg_acc': 49.977875307036555, 'loss': 8.49752140045166}


EP_train:4:  25%|| 6932/27626 [16:22<49:07,  7.02it/s]

{'epoch': 4, 'iter': 6930, 'avg_loss': 8.520828490063941, 'avg_acc': 49.98917905064204, 'loss': 7.978915214538574}


EP_train:4:  25%|| 6942/27626 [16:24<48:56,  7.04it/s]

{'epoch': 4, 'iter': 6940, 'avg_loss': 8.520700847874147, 'avg_acc': 49.99504754358162, 'loss': 8.92084789276123}


EP_train:4:  25%|| 6952/27626 [16:25<49:05,  7.02it/s]

{'epoch': 4, 'iter': 6950, 'avg_loss': 8.520424811347167, 'avg_acc': 49.9905589123867, 'loss': 8.016310691833496}


EP_train:4:  25%|| 6962/27626 [16:27<49:04,  7.02it/s]

{'epoch': 4, 'iter': 6960, 'avg_loss': 8.520338251919192, 'avg_acc': 49.98249173969258, 'loss': 8.101327896118164}


EP_train:4:  25%|| 6972/27626 [16:28<48:58,  7.03it/s]

{'epoch': 4, 'iter': 6970, 'avg_loss': 8.520269018114938, 'avg_acc': 49.982965141299665, 'loss': 8.060179710388184}


EP_train:4:  25%|| 6982/27626 [16:29<48:40,  7.07it/s]

{'epoch': 4, 'iter': 6980, 'avg_loss': 8.520489745692663, 'avg_acc': 49.980303681421, 'loss': 9.4920015335083}


EP_train:4:  25%|| 6992/27626 [16:31<48:42,  7.06it/s]

{'epoch': 4, 'iter': 6990, 'avg_loss': 8.520961739784692, 'avg_acc': 49.97764983550279, 'loss': 8.993444442749023}


EP_train:4:  25%|| 7002/27626 [16:32<48:48,  7.04it/s]

{'epoch': 4, 'iter': 7000, 'avg_loss': 8.520691729021012, 'avg_acc': 49.98437723182402, 'loss': 7.291583061218262}


EP_train:4:  25%|| 7012/27626 [16:34<48:50,  7.03it/s]

{'epoch': 4, 'iter': 7010, 'avg_loss': 8.519962004597764, 'avg_acc': 49.979050777349876, 'loss': 7.6743364334106445}


EP_train:4:  25%|| 7022/27626 [16:35<49:08,  6.99it/s]

{'epoch': 4, 'iter': 7020, 'avg_loss': 8.520103806122265, 'avg_acc': 49.97284930921521, 'loss': 8.294364929199219}


EP_train:4:  25%|| 7032/27626 [16:36<48:59,  7.01it/s]

{'epoch': 4, 'iter': 7030, 'avg_loss': 8.520110716132754, 'avg_acc': 49.9782214478737, 'loss': 8.181001663208008}


EP_train:4:  25%|| 7042/27626 [16:38<49:10,  6.98it/s]

{'epoch': 4, 'iter': 7040, 'avg_loss': 8.520052026202269, 'avg_acc': 49.976920891918766, 'loss': 8.2901029586792}


EP_train:4:  26%|| 7052/27626 [16:39<49:02,  6.99it/s]

{'epoch': 4, 'iter': 7050, 'avg_loss': 8.519808567021691, 'avg_acc': 49.97695362359949, 'loss': 8.245019912719727}


EP_train:4:  26%|| 7062/27626 [16:41<48:40,  7.04it/s]

{'epoch': 4, 'iter': 7060, 'avg_loss': 8.519959420324502, 'avg_acc': 49.972117971958646, 'loss': 8.318785667419434}


EP_train:4:  26%|| 7072/27626 [16:42<48:48,  7.02it/s]

{'epoch': 4, 'iter': 7070, 'avg_loss': 8.52065168029263, 'avg_acc': 49.97127351152595, 'loss': 8.848270416259766}


EP_train:4:  26%|| 7082/27626 [16:44<48:39,  7.04it/s]

{'epoch': 4, 'iter': 7080, 'avg_loss': 8.521208777264036, 'avg_acc': 49.9735206891682, 'loss': 8.957460403442383}


EP_train:4:  26%|| 7092/27626 [16:45<48:28,  7.06it/s]

{'epoch': 4, 'iter': 7090, 'avg_loss': 8.521412195195737, 'avg_acc': 49.96826963756875, 'loss': 9.412631034851074}


EP_train:4:  26%|| 7102/27626 [16:46<48:53,  7.00it/s]

{'epoch': 4, 'iter': 7100, 'avg_loss': 8.521861723352027, 'avg_acc': 49.970514716237155, 'loss': 8.407797813415527}


EP_train:4:  26%|| 7112/27626 [16:48<48:44,  7.01it/s]

{'epoch': 4, 'iter': 7110, 'avg_loss': 8.521609081294597, 'avg_acc': 49.97319294051469, 'loss': 8.700992584228516}


EP_train:4:  26%|| 7122/27626 [16:49<48:33,  7.04it/s]

{'epoch': 4, 'iter': 7120, 'avg_loss': 8.521851873350954, 'avg_acc': 49.971036371296165, 'loss': 8.33868408203125}


EP_train:4:  26%|| 7132/27626 [16:51<48:45,  7.00it/s]

{'epoch': 4, 'iter': 7130, 'avg_loss': 8.52174249863561, 'avg_acc': 49.97458280746038, 'loss': 8.146319389343262}


EP_train:4:  26%|| 7142/27626 [16:52<48:52,  6.98it/s]

{'epoch': 4, 'iter': 7140, 'avg_loss': 8.522012670706875, 'avg_acc': 49.97811931102086, 'loss': 8.333909034729004}


EP_train:4:  26%|| 7152/27626 [16:54<48:48,  6.99it/s]

{'epoch': 4, 'iter': 7150, 'avg_loss': 8.521769569163855, 'avg_acc': 49.98208292546497, 'loss': 8.33423137664795}


EP_train:4:  26%|| 7162/27626 [16:55<48:30,  7.03it/s]

{'epoch': 4, 'iter': 7160, 'avg_loss': 8.521945523077042, 'avg_acc': 49.98647186147186, 'loss': 8.85229778289795}


EP_train:4:  26%|| 7172/27626 [16:56<48:40,  7.00it/s]

{'epoch': 4, 'iter': 7170, 'avg_loss': 8.522004136811772, 'avg_acc': 49.979518198298706, 'loss': 8.948749542236328}


EP_train:4:  26%|| 7182/27626 [16:58<48:25,  7.04it/s]

{'epoch': 4, 'iter': 7180, 'avg_loss': 8.521800916185951, 'avg_acc': 49.97475978276006, 'loss': 8.727174758911133}


EP_train:4:  26%|| 7192/27626 [16:59<48:35,  7.01it/s]

{'epoch': 4, 'iter': 7190, 'avg_loss': 8.52161370012031, 'avg_acc': 49.97696773744959, 'loss': 8.147128105163574}


EP_train:4:  26%|| 7202/27626 [17:01<48:43,  6.99it/s]

{'epoch': 4, 'iter': 7200, 'avg_loss': 8.521015998407, 'avg_acc': 49.977867657269826, 'loss': 7.562497138977051}


EP_train:4:  26%|| 7212/27626 [17:02<48:26,  7.02it/s]

{'epoch': 4, 'iter': 7210, 'avg_loss': 8.520972614793536, 'avg_acc': 49.98049854389128, 'loss': 9.482087135314941}


EP_train:4:  26%|| 7222/27626 [17:03<48:53,  6.96it/s]

{'epoch': 4, 'iter': 7220, 'avg_loss': 8.52097923731543, 'avg_acc': 49.98139108156765, 'loss': 8.56653118133545}


EP_train:4:  26%|| 7232/27626 [17:05<48:25,  7.02it/s]

{'epoch': 4, 'iter': 7230, 'avg_loss': 8.520815348661452, 'avg_acc': 49.98617065412806, 'loss': 8.642498970031738}


EP_train:4:  26%|| 7242/27626 [17:06<48:12,  7.05it/s]

{'epoch': 4, 'iter': 7240, 'avg_loss': 8.521026295961027, 'avg_acc': 49.983168761220824, 'loss': 7.772752285003662}


EP_train:4:  26%|| 7252/27626 [17:08<47:55,  7.08it/s]

{'epoch': 4, 'iter': 7250, 'avg_loss': 8.520719874301395, 'avg_acc': 49.98534684871052, 'loss': 8.555277824401855}


EP_train:4:  26%|| 7262/27626 [17:09<48:14,  7.04it/s]

{'epoch': 4, 'iter': 7260, 'avg_loss': 8.520974976779048, 'avg_acc': 49.981493595923425, 'loss': 8.093923568725586}


EP_train:4:  26%|| 7272/27626 [17:11<48:16,  7.03it/s]

{'epoch': 4, 'iter': 7270, 'avg_loss': 8.521084870519573, 'avg_acc': 49.982808416999035, 'loss': 8.126554489135742}


EP_train:4:  26%|| 7282/27626 [17:12<48:14,  7.03it/s]

{'epoch': 4, 'iter': 7280, 'avg_loss': 8.520708481241527, 'avg_acc': 49.986265622854006, 'loss': 7.678002834320068}


EP_train:4:  26%|| 7292/27626 [17:13<48:16,  7.02it/s]

{'epoch': 4, 'iter': 7290, 'avg_loss': 8.520354858495754, 'avg_acc': 49.987141681525166, 'loss': 8.079739570617676}


EP_train:4:  26%|| 7302/27626 [17:15<48:31,  6.98it/s]

{'epoch': 4, 'iter': 7300, 'avg_loss': 8.520384520671904, 'avg_acc': 49.987587316805914, 'loss': 9.429302215576172}


EP_train:4:  26%|| 7312/27626 [17:16<47:53,  7.07it/s]

{'epoch': 4, 'iter': 7310, 'avg_loss': 8.5203490036161, 'avg_acc': 49.98332991382848, 'loss': 9.182239532470703}


EP_train:4:  27%|| 7322/27626 [17:18<48:11,  7.02it/s]

{'epoch': 4, 'iter': 7320, 'avg_loss': 8.520131390890354, 'avg_acc': 49.97823043300096, 'loss': 8.019885063171387}


EP_train:4:  27%|| 7332/27626 [17:19<48:12,  7.02it/s]

{'epoch': 4, 'iter': 7330, 'avg_loss': 8.520171956595807, 'avg_acc': 49.9901957441004, 'loss': 8.286507606506348}


EP_train:4:  27%|| 7342/27626 [17:20<48:20,  6.99it/s]

{'epoch': 4, 'iter': 7340, 'avg_loss': 8.519646158986156, 'avg_acc': 49.9893577169323, 'loss': 8.09815788269043}


EP_train:4:  27%|| 7352/27626 [17:22<48:00,  7.04it/s]

{'epoch': 4, 'iter': 7350, 'avg_loss': 8.519823573167884, 'avg_acc': 49.986396408651885, 'loss': 9.730496406555176}


EP_train:4:  27%|| 7362/27626 [17:23<48:30,  6.96it/s]

{'epoch': 4, 'iter': 7360, 'avg_loss': 8.51987363663466, 'avg_acc': 49.98259407689173, 'loss': 8.33442211151123}


EP_train:4:  27%|| 7372/27626 [17:25<48:35,  6.95it/s]

{'epoch': 4, 'iter': 7370, 'avg_loss': 8.519691569432203, 'avg_acc': 49.9783781033781, 'loss': 8.404454231262207}


EP_train:4:  27%|| 7382/27626 [17:26<50:31,  6.68it/s]

{'epoch': 4, 'iter': 7380, 'avg_loss': 8.519745764637394, 'avg_acc': 49.97798401300636, 'loss': 9.115951538085938}


EP_train:4:  27%|| 7392/27626 [17:28<48:29,  6.95it/s]

{'epoch': 4, 'iter': 7390, 'avg_loss': 8.51930598931357, 'avg_acc': 49.981396292788524, 'loss': 7.890536308288574}


EP_train:4:  27%|| 7402/27626 [17:29<47:55,  7.03it/s]

{'epoch': 4, 'iter': 7400, 'avg_loss': 8.519583784040769, 'avg_acc': 49.98226591001216, 'loss': 8.430904388427734}


EP_train:4:  27%|| 7412/27626 [17:30<48:16,  6.98it/s]

{'epoch': 4, 'iter': 7410, 'avg_loss': 8.51948946147389, 'avg_acc': 49.980181486978815, 'loss': 8.423357009887695}


EP_train:4:  27%|| 7422/27626 [17:32<47:50,  7.04it/s]

{'epoch': 4, 'iter': 7420, 'avg_loss': 8.519560115129506, 'avg_acc': 49.98020819296591, 'loss': 9.111742973327637}


EP_train:4:  27%|| 7432/27626 [17:33<48:16,  6.97it/s]

{'epoch': 4, 'iter': 7430, 'avg_loss': 8.51963951385225, 'avg_acc': 49.97771161351097, 'loss': 8.55506706237793}


EP_train:4:  27%|| 7442/27626 [17:35<47:41,  7.05it/s]

{'epoch': 4, 'iter': 7440, 'avg_loss': 8.519384428051719, 'avg_acc': 49.972701921784704, 'loss': 8.071046829223633}


EP_train:4:  27%|| 7452/27626 [17:36<47:59,  7.01it/s]

{'epoch': 4, 'iter': 7450, 'avg_loss': 8.518913663772748, 'avg_acc': 49.97399677895584, 'loss': 8.185688018798828}


EP_train:4:  27%|| 7462/27626 [17:38<48:02,  7.00it/s]

{'epoch': 4, 'iter': 7460, 'avg_loss': 8.518625277805674, 'avg_acc': 49.96900549524192, 'loss': 8.622600555419922}


EP_train:4:  27%|| 7472/27626 [17:39<47:47,  7.03it/s]

{'epoch': 4, 'iter': 7470, 'avg_loss': 8.518410025216093, 'avg_acc': 49.97197496988355, 'loss': 8.198299407958984}


EP_train:4:  27%|| 7482/27626 [17:40<48:11,  6.97it/s]

{'epoch': 4, 'iter': 7480, 'avg_loss': 8.51838759339568, 'avg_acc': 49.974518780911644, 'loss': 8.522262573242188}


EP_train:4:  27%|| 7492/27626 [17:42<48:12,  6.96it/s]

{'epoch': 4, 'iter': 7490, 'avg_loss': 8.5186969525759, 'avg_acc': 49.97413562942197, 'loss': 8.581794738769531}


EP_train:4:  27%|| 7502/27626 [17:43<47:48,  7.01it/s]

{'epoch': 4, 'iter': 7500, 'avg_loss': 8.518892192789721, 'avg_acc': 49.97833622183709, 'loss': 8.763920783996582}


EP_train:4:  27%|| 7512/27626 [17:45<47:40,  7.03it/s]

{'epoch': 4, 'iter': 7510, 'avg_loss': 8.519269611146347, 'avg_acc': 49.97836506457196, 'loss': 8.662150382995605}


EP_train:4:  27%|| 7522/27626 [17:46<47:37,  7.04it/s]

{'epoch': 4, 'iter': 7520, 'avg_loss': 8.519178678532608, 'avg_acc': 49.974654301289725, 'loss': 8.98664379119873}


EP_train:4:  27%|| 7532/27626 [17:47<47:22,  7.07it/s]

{'epoch': 4, 'iter': 7530, 'avg_loss': 8.518962360159712, 'avg_acc': 49.97551785951401, 'loss': 8.502557754516602}


EP_train:4:  27%|| 7542/27626 [17:49<48:12,  6.94it/s]

{'epoch': 4, 'iter': 7540, 'avg_loss': 8.519110265326365, 'avg_acc': 49.97679352870972, 'loss': 8.567896842956543}


EP_train:4:  27%|| 7552/27626 [17:50<47:50,  6.99it/s]

{'epoch': 4, 'iter': 7550, 'avg_loss': 8.519143463870323, 'avg_acc': 49.972685736988474, 'loss': 8.656197547912598}


EP_train:4:  27%|| 7562/27626 [17:52<47:40,  7.01it/s]

{'epoch': 4, 'iter': 7560, 'avg_loss': 8.518903288887286, 'avg_acc': 49.969415421240576, 'loss': 8.86625862121582}


EP_train:4:  27%|| 7572/27626 [17:53<47:29,  7.04it/s]

{'epoch': 4, 'iter': 7570, 'avg_loss': 8.519243227697052, 'avg_acc': 49.965328226126005, 'loss': 9.049811363220215}


EP_train:4:  27%|| 7582/27626 [17:55<47:30,  7.03it/s]

{'epoch': 4, 'iter': 7580, 'avg_loss': 8.519619664012435, 'avg_acc': 49.96331288748186, 'loss': 8.648263931274414}


EP_train:4:  27%|| 7592/27626 [17:56<47:33,  7.02it/s]

{'epoch': 4, 'iter': 7590, 'avg_loss': 8.519374497337878, 'avg_acc': 49.9613028586484, 'loss': 8.256301879882812}


EP_train:4:  28%|| 7602/27626 [17:57<47:21,  7.05it/s]

{'epoch': 4, 'iter': 7600, 'avg_loss': 8.519651921669631, 'avg_acc': 49.96094263912643, 'loss': 9.01905632019043}


EP_train:4:  28%|| 7612/27626 [17:59<47:19,  7.05it/s]

{'epoch': 4, 'iter': 7610, 'avg_loss': 8.519708235400717, 'avg_acc': 49.96386808566549, 'loss': 8.04644775390625}


EP_train:4:  28%|| 7622/27626 [18:00<47:14,  7.06it/s]

{'epoch': 4, 'iter': 7620, 'avg_loss': 8.519462815452288, 'avg_acc': 49.9602250360845, 'loss': 8.593680381774902}


EP_train:4:  28%|| 7632/27626 [18:02<47:24,  7.03it/s]

{'epoch': 4, 'iter': 7630, 'avg_loss': 8.519442019277, 'avg_acc': 49.960277158956885, 'loss': 8.456134796142578}


EP_train:4:  28%|| 7642/27626 [18:03<47:32,  7.01it/s]

{'epoch': 4, 'iter': 7640, 'avg_loss': 8.51939738444236, 'avg_acc': 49.96032914539982, 'loss': 7.8752121925354}


EP_train:4:  28%|| 7652/27626 [18:05<47:30,  7.01it/s]

{'epoch': 4, 'iter': 7650, 'avg_loss': 8.519490724388344, 'avg_acc': 49.9660992027186, 'loss': 8.474287986755371}


EP_train:4:  28%|| 7662/27626 [18:06<47:09,  7.06it/s]

{'epoch': 4, 'iter': 7660, 'avg_loss': 8.519441403198641, 'avg_acc': 49.968183004829655, 'loss': 8.531744956970215}


EP_train:4:  28%|| 7672/27626 [18:07<47:19,  7.03it/s]

{'epoch': 4, 'iter': 7670, 'avg_loss': 8.518970715643732, 'avg_acc': 49.96415069743188, 'loss': 9.469215393066406}


EP_train:4:  28%|| 7682/27626 [18:09<47:14,  7.04it/s]

{'epoch': 4, 'iter': 7680, 'avg_loss': 8.518319929969946, 'avg_acc': 49.96134943366749, 'loss': 7.966026306152344}


EP_train:4:  28%|| 7692/27626 [18:10<47:17,  7.02it/s]

{'epoch': 4, 'iter': 7690, 'avg_loss': 8.517887484312771, 'avg_acc': 49.961806007021195, 'loss': 9.211631774902344}


EP_train:4:  28%|| 7702/27626 [18:12<47:31,  6.99it/s]

{'epoch': 4, 'iter': 7700, 'avg_loss': 8.51736927174884, 'avg_acc': 49.9638845604467, 'loss': 7.507855415344238}


EP_train:4:  28%|| 7712/27626 [18:13<46:59,  7.06it/s]

{'epoch': 4, 'iter': 7710, 'avg_loss': 8.517438520919548, 'avg_acc': 49.96433666191156, 'loss': 8.370388984680176}


EP_train:4:  28%|| 7722/27626 [18:14<47:19,  7.01it/s]

{'epoch': 4, 'iter': 7720, 'avg_loss': 8.51713071019519, 'avg_acc': 49.96438285196218, 'loss': 9.376606941223145}


EP_train:4:  28%|| 7732/27626 [18:16<47:14,  7.02it/s]

{'epoch': 4, 'iter': 7730, 'avg_loss': 8.516937281332632, 'avg_acc': 49.96321627215108, 'loss': 7.711910724639893}


EP_train:4:  28%|| 7742/27626 [18:17<47:27,  6.98it/s]

{'epoch': 4, 'iter': 7740, 'avg_loss': 8.517214249585152, 'avg_acc': 49.966089652499676, 'loss': 9.265203475952148}


EP_train:4:  28%|| 7752/27626 [18:19<47:00,  7.05it/s]

{'epoch': 4, 'iter': 7750, 'avg_loss': 8.517109711513783, 'avg_acc': 49.96613340214166, 'loss': 8.210585594177246}


EP_train:4:  28%|| 7762/27626 [18:20<47:08,  7.02it/s]

{'epoch': 4, 'iter': 7760, 'avg_loss': 8.516728372368643, 'avg_acc': 49.96617703904136, 'loss': 8.161109924316406}


EP_train:4:  28%|| 7772/27626 [18:22<47:07,  7.02it/s]

{'epoch': 4, 'iter': 7770, 'avg_loss': 8.516942487691393, 'avg_acc': 49.968231244370095, 'loss': 9.749906539916992}


EP_train:4:  28%|| 7782/27626 [18:23<47:03,  7.03it/s]

{'epoch': 4, 'iter': 7780, 'avg_loss': 8.516876792895458, 'avg_acc': 49.96264940239043, 'loss': 8.547454833984375}


EP_train:4:  28%|| 7792/27626 [18:24<47:22,  6.98it/s]

{'epoch': 4, 'iter': 7790, 'avg_loss': 8.516618582177507, 'avg_acc': 49.95988961622385, 'loss': 8.095027923583984}


EP_train:4:  28%|| 7802/27626 [18:26<46:57,  7.04it/s]

{'epoch': 4, 'iter': 7800, 'avg_loss': 8.516198326969526, 'avg_acc': 49.95313100884502, 'loss': 8.784208297729492}


EP_train:4:  28%|| 7812/27626 [18:27<47:08,  7.01it/s]

{'epoch': 4, 'iter': 7810, 'avg_loss': 8.51581289264277, 'avg_acc': 49.956791704007166, 'loss': 9.22824764251709}


EP_train:4:  28%|| 7822/27626 [18:29<47:11,  6.99it/s]

{'epoch': 4, 'iter': 7820, 'avg_loss': 8.515960923211834, 'avg_acc': 49.95325086306099, 'loss': 8.013092994689941}


EP_train:4:  28%|| 7832/27626 [18:30<47:17,  6.98it/s]

{'epoch': 4, 'iter': 7830, 'avg_loss': 8.515921320651564, 'avg_acc': 49.94852190014047, 'loss': 8.513396263122559}


EP_train:4:  28%|| 7842/27626 [18:31<46:49,  7.04it/s]

{'epoch': 4, 'iter': 7840, 'avg_loss': 8.51619288161494, 'avg_acc': 49.945000637673765, 'loss': 8.432625770568848}


EP_train:4:  28%|| 7852/27626 [18:33<46:51,  7.03it/s]

{'epoch': 4, 'iter': 7850, 'avg_loss': 8.51591229010594, 'avg_acc': 49.94188638390014, 'loss': 8.25727653503418}


EP_train:4:  28%|| 7862/27626 [18:34<46:59,  7.01it/s]

{'epoch': 4, 'iter': 7860, 'avg_loss': 8.515797504773957, 'avg_acc': 49.93838252130772, 'loss': 8.535506248474121}


EP_train:4:  28%|| 7872/27626 [18:36<46:50,  7.03it/s]

{'epoch': 4, 'iter': 7870, 'avg_loss': 8.51575989511776, 'avg_acc': 49.94163702197942, 'loss': 9.345173835754395}


EP_train:4:  29%|| 7882/27626 [18:37<47:15,  6.96it/s]

{'epoch': 4, 'iter': 7880, 'avg_loss': 8.51565059334656, 'avg_acc': 49.9353667047329, 'loss': 8.43684196472168}


EP_train:4:  29%|| 7892/27626 [18:39<46:37,  7.06it/s]

{'epoch': 4, 'iter': 7890, 'avg_loss': 8.51574946676517, 'avg_acc': 49.93544861234317, 'loss': 8.95176887512207}


EP_train:4:  29%|| 7902/27626 [18:40<46:29,  7.07it/s]

{'epoch': 4, 'iter': 7900, 'avg_loss': 8.515502838496392, 'avg_acc': 49.93276167573725, 'loss': 8.2665376663208}


EP_train:4:  29%|| 7912/27626 [18:41<46:44,  7.03it/s]

{'epoch': 4, 'iter': 7910, 'avg_loss': 8.515822860899563, 'avg_acc': 49.93403172797371, 'loss': 8.09750747680664}


EP_train:4:  29%|| 7922/27626 [18:43<46:46,  7.02it/s]

{'epoch': 4, 'iter': 7920, 'avg_loss': 8.515439622436203, 'avg_acc': 49.9360876152001, 'loss': 8.142488479614258}


EP_train:4:  29%|| 7932/27626 [18:44<47:04,  6.97it/s]

{'epoch': 4, 'iter': 7930, 'avg_loss': 8.515081577351529, 'avg_acc': 49.93774429454041, 'loss': 8.607934951782227}


EP_train:4:  29%|| 7942/27626 [18:46<46:53,  7.00it/s]

{'epoch': 4, 'iter': 7940, 'avg_loss': 8.515273100782471, 'avg_acc': 49.93703563782899, 'loss': 8.401570320129395}


EP_train:4:  29%|| 7952/27626 [18:47<46:53,  6.99it/s]

{'epoch': 4, 'iter': 7950, 'avg_loss': 8.5155069497288, 'avg_acc': 49.93711482832348, 'loss': 8.69514274597168}


EP_train:4:  29%|| 7962/27626 [18:49<46:36,  7.03it/s]

{'epoch': 4, 'iter': 7960, 'avg_loss': 8.515611513432749, 'avg_acc': 49.93444604949127, 'loss': 9.170103073120117}


EP_train:4:  29%|| 7972/27626 [18:50<46:52,  6.99it/s]

{'epoch': 4, 'iter': 7970, 'avg_loss': 8.515539239422319, 'avg_acc': 49.93805670555765, 'loss': 8.049982070922852}


EP_train:4:  29%|| 7982/27626 [18:51<46:53,  6.98it/s]

{'epoch': 4, 'iter': 7980, 'avg_loss': 8.515248804877476, 'avg_acc': 49.94165831349455, 'loss': 7.734853267669678}


EP_train:4:  29%|| 7992/27626 [18:53<46:46,  7.00it/s]

{'epoch': 4, 'iter': 7990, 'avg_loss': 8.515280859802441, 'avg_acc': 49.942904517582285, 'loss': 8.549031257629395}


EP_train:4:  29%|| 8002/27626 [18:54<46:48,  6.99it/s]

{'epoch': 4, 'iter': 8000, 'avg_loss': 8.515274897707565, 'avg_acc': 49.94063242094738, 'loss': 7.619835376739502}


EP_train:4:  29%|| 8012/27626 [18:56<46:59,  6.96it/s]

{'epoch': 4, 'iter': 8010, 'avg_loss': 8.515076835181379, 'avg_acc': 49.937585819498196, 'loss': 8.364066123962402}


EP_train:4:  29%|| 8022/27626 [18:57<46:27,  7.03it/s]

{'epoch': 4, 'iter': 8020, 'avg_loss': 8.515585063803064, 'avg_acc': 49.93493641690562, 'loss': 8.319635391235352}


EP_train:4:  29%|| 8032/27626 [18:58<46:45,  6.98it/s]

{'epoch': 4, 'iter': 8030, 'avg_loss': 8.515423949755466, 'avg_acc': 49.93112626073963, 'loss': 8.93404769897461}


EP_train:4:  29%|| 8042/27626 [19:00<46:11,  7.07it/s]

{'epoch': 4, 'iter': 8040, 'avg_loss': 8.51512530448647, 'avg_acc': 49.932766446959334, 'loss': 7.409848690032959}


EP_train:4:  29%|| 8052/27626 [19:01<46:27,  7.02it/s]

{'epoch': 4, 'iter': 8050, 'avg_loss': 8.515362895804005, 'avg_acc': 49.930132902745, 'loss': 8.820548057556152}


EP_train:4:  29%|| 8062/27626 [19:03<46:23,  7.03it/s]

{'epoch': 4, 'iter': 8060, 'avg_loss': 8.51491760796463, 'avg_acc': 49.92556754745069, 'loss': 7.831779956817627}


EP_train:4:  29%|| 8072/27626 [19:04<46:26,  7.02it/s]

{'epoch': 4, 'iter': 8070, 'avg_loss': 8.514729248075707, 'avg_acc': 49.929531656548136, 'loss': 8.145374298095703}


EP_train:4:  29%|| 8082/27626 [19:06<46:20,  7.03it/s]

{'epoch': 4, 'iter': 8080, 'avg_loss': 8.51467555337223, 'avg_acc': 49.93077898774904, 'loss': 9.062864303588867}


EP_train:4:  29%|| 8092/27626 [19:07<46:34,  6.99it/s]

{'epoch': 4, 'iter': 8090, 'avg_loss': 8.51460535501818, 'avg_acc': 49.930092077617104, 'loss': 8.402377128601074}


EP_train:4:  29%|| 8102/27626 [19:08<46:19,  7.02it/s]

{'epoch': 4, 'iter': 8100, 'avg_loss': 8.514293539805141, 'avg_acc': 49.92940686335021, 'loss': 7.783075332641602}


EP_train:4:  29%|| 8112/27626 [19:10<46:15,  7.03it/s]

{'epoch': 4, 'iter': 8110, 'avg_loss': 8.513712128557033, 'avg_acc': 49.926026383923066, 'loss': 8.112356185913086}


EP_train:4:  29%|| 8122/27626 [19:11<46:27,  7.00it/s]

{'epoch': 4, 'iter': 8120, 'avg_loss': 8.513909602015495, 'avg_acc': 49.92496305873661, 'loss': 9.972612380981445}


EP_train:4:  29%|| 8132/27626 [19:13<46:34,  6.98it/s]

{'epoch': 4, 'iter': 8130, 'avg_loss': 8.513918379167983, 'avg_acc': 49.92736133316935, 'loss': 8.196900367736816}


EP_train:4:  29%|| 8142/27626 [19:14<46:18,  7.01it/s]

{'epoch': 4, 'iter': 8140, 'avg_loss': 8.513868054920735, 'avg_acc': 49.92437968308562, 'loss': 7.75126838684082}


EP_train:4:  30%|| 8152/27626 [19:16<46:02,  7.05it/s]

{'epoch': 4, 'iter': 8150, 'avg_loss': 8.513646320775452, 'avg_acc': 49.92370568028463, 'loss': 8.66150188446045}


EP_train:4:  30%|| 8162/27626 [19:17<46:08,  7.03it/s]

{'epoch': 4, 'iter': 8160, 'avg_loss': 8.513549788678018, 'avg_acc': 49.91767246660949, 'loss': 8.430092811584473}


EP_train:4:  30%|| 8172/27626 [19:18<46:09,  7.02it/s]

{'epoch': 4, 'iter': 8170, 'avg_loss': 8.513548620735442, 'avg_acc': 49.9177732223718, 'loss': 8.763436317443848}


EP_train:4:  30%|| 8182/27626 [19:20<46:17,  7.00it/s]

{'epoch': 4, 'iter': 8180, 'avg_loss': 8.51375801693532, 'avg_acc': 49.91367192274783, 'loss': 8.545811653137207}


EP_train:4:  30%|| 8192/27626 [19:21<46:06,  7.03it/s]

{'epoch': 4, 'iter': 8190, 'avg_loss': 8.513101907894734, 'avg_acc': 49.90919912098645, 'loss': 7.691298007965088}


EP_train:4:  30%|| 8202/27626 [19:23<46:04,  7.03it/s]

{'epoch': 4, 'iter': 8200, 'avg_loss': 8.512930696555106, 'avg_acc': 49.90930984026338, 'loss': 9.486588478088379}


EP_train:4:  30%|| 8212/27626 [19:24<46:11,  7.00it/s]

{'epoch': 4, 'iter': 8210, 'avg_loss': 8.513522527813404, 'avg_acc': 49.90409207161125, 'loss': 8.665230751037598}


EP_train:4:  30%|| 8222/27626 [19:25<45:53,  7.05it/s]

{'epoch': 4, 'iter': 8220, 'avg_loss': 8.513205572341048, 'avg_acc': 49.90458885780319, 'loss': 8.521528244018555}


EP_train:4:  30%|| 8232/27626 [19:27<45:55,  7.04it/s]

{'epoch': 4, 'iter': 8230, 'avg_loss': 8.513144914499394, 'avg_acc': 49.90166747661281, 'loss': 8.210588455200195}


EP_train:4:  30%|| 8242/27626 [19:28<46:10,  7.00it/s]

{'epoch': 4, 'iter': 8240, 'avg_loss': 8.513421232123758, 'avg_acc': 49.90406200703798, 'loss': 7.909396171569824}


EP_train:4:  30%|| 8252/27626 [19:30<46:02,  7.01it/s]

{'epoch': 4, 'iter': 8250, 'avg_loss': 8.513495884235232, 'avg_acc': 49.90039086171373, 'loss': 9.504419326782227}


EP_train:4:  30%|| 8262/27626 [19:31<46:00,  7.01it/s]

{'epoch': 4, 'iter': 8260, 'avg_loss': 8.513263691598661, 'avg_acc': 49.902402856796996, 'loss': 7.853230953216553}


EP_train:4:  30%|| 8272/27626 [19:33<46:07,  6.99it/s]

{'epoch': 4, 'iter': 8270, 'avg_loss': 8.512498731754983, 'avg_acc': 49.90440998670052, 'loss': 7.449774265289307}


EP_train:4:  30%|| 8282/27626 [19:34<45:56,  7.02it/s]

{'epoch': 4, 'iter': 8280, 'avg_loss': 8.512271121595955, 'avg_acc': 49.90377067986958, 'loss': 8.224982261657715}


EP_train:4:  30%|| 8292/27626 [19:35<45:54,  7.02it/s]

{'epoch': 4, 'iter': 8290, 'avg_loss': 8.512362827769868, 'avg_acc': 49.899740682667954, 'loss': 8.172722816467285}


EP_train:4:  30%|| 8302/27626 [19:37<45:52,  7.02it/s]

{'epoch': 4, 'iter': 8300, 'avg_loss': 8.512617069789407, 'avg_acc': 49.90061438380918, 'loss': 9.784611701965332}


EP_train:4:  30%|| 8312/27626 [19:38<45:57,  7.00it/s]

{'epoch': 4, 'iter': 8310, 'avg_loss': 8.513088059302, 'avg_acc': 49.90035795933101, 'loss': 8.024377822875977}


EP_train:4:  30%|| 8322/27626 [19:40<45:48,  7.02it/s]

{'epoch': 4, 'iter': 8320, 'avg_loss': 8.513005201353245, 'avg_acc': 49.8925910347314, 'loss': 8.636191368103027}


EP_train:4:  30%|| 8332/27626 [19:41<45:44,  7.03it/s]

{'epoch': 4, 'iter': 8330, 'avg_loss': 8.512914467739352, 'avg_acc': 49.8949705917657, 'loss': 8.882325172424316}


EP_train:4:  30%|| 8342/27626 [19:42<45:53,  7.00it/s]

{'epoch': 4, 'iter': 8340, 'avg_loss': 8.513103718752484, 'avg_acc': 49.89846840906366, 'loss': 8.110503196716309}


EP_train:4:  30%|| 8352/27626 [19:44<46:02,  6.98it/s]

{'epoch': 4, 'iter': 8350, 'avg_loss': 8.513112100323236, 'avg_acc': 49.90008681595018, 'loss': 8.47826099395752}


EP_train:4:  30%|| 8362/27626 [19:45<45:51,  7.00it/s]

{'epoch': 4, 'iter': 8360, 'avg_loss': 8.512477989502464, 'avg_acc': 49.906560220069366, 'loss': 8.972217559814453}


EP_train:4:  30%|| 8372/27626 [19:47<45:50,  7.00it/s]

{'epoch': 4, 'iter': 8370, 'avg_loss': 8.512715816782618, 'avg_acc': 49.90667184326843, 'loss': 8.41758918762207}


EP_train:4:  30%|| 8382/27626 [19:48<45:27,  7.06it/s]

{'epoch': 4, 'iter': 8380, 'avg_loss': 8.51277054865311, 'avg_acc': 49.91013900489202, 'loss': 7.778193473815918}


EP_train:4:  30%|| 8392/27626 [19:50<45:37,  7.03it/s]

{'epoch': 4, 'iter': 8390, 'avg_loss': 8.512497065022114, 'avg_acc': 49.90912882850673, 'loss': 8.027304649353027}


EP_train:4:  30%|| 8402/27626 [19:51<45:38,  7.02it/s]

{'epoch': 4, 'iter': 8400, 'avg_loss': 8.512152477582712, 'avg_acc': 49.90737709796453, 'loss': 9.215415000915527}


EP_train:4:  30%|| 8412/27626 [19:52<45:34,  7.03it/s]

{'epoch': 4, 'iter': 8410, 'avg_loss': 8.512427983733565, 'avg_acc': 49.907487219117826, 'loss': 8.907054901123047}


EP_train:4:  30%|| 8422/27626 [19:54<45:46,  6.99it/s]

{'epoch': 4, 'iter': 8420, 'avg_loss': 8.51264976121626, 'avg_acc': 49.90203063769149, 'loss': 8.026898384094238}


EP_train:4:  31%|| 8432/27626 [19:55<45:20,  7.06it/s]

{'epoch': 4, 'iter': 8430, 'avg_loss': 8.512461985575744, 'avg_acc': 49.906594709998814, 'loss': 9.209144592285156}


EP_train:4:  31%|| 8442/27626 [19:57<45:41,  7.00it/s]

{'epoch': 4, 'iter': 8440, 'avg_loss': 8.512496836390799, 'avg_acc': 49.911147968250205, 'loss': 7.964105129241943}


EP_train:4:  31%|| 8452/27626 [19:58<45:33,  7.01it/s]

{'epoch': 4, 'iter': 8450, 'avg_loss': 8.512120667469881, 'avg_acc': 49.914950893385395, 'loss': 8.249542236328125}


EP_train:4:  31%|| 8462/27626 [20:00<45:36,  7.00it/s]

{'epoch': 4, 'iter': 8460, 'avg_loss': 8.512232404780324, 'avg_acc': 49.91874482921641, 'loss': 9.28862190246582}


EP_train:4:  31%|| 8472/27626 [20:01<45:30,  7.01it/s]

{'epoch': 4, 'iter': 8470, 'avg_loss': 8.512539208251967, 'avg_acc': 49.91699622240586, 'loss': 8.77702522277832}


EP_train:4:  31%|| 8482/27626 [20:02<45:16,  7.05it/s]

{'epoch': 4, 'iter': 8480, 'avg_loss': 8.512665822868316, 'avg_acc': 49.91930491687301, 'loss': 8.708420753479004}


EP_train:4:  31%|| 8492/27626 [20:04<45:19,  7.04it/s]

{'epoch': 4, 'iter': 8490, 'avg_loss': 8.512622590930397, 'avg_acc': 49.921976210104816, 'loss': 9.543819427490234}


EP_train:4:  31%|| 8502/27626 [20:05<45:13,  7.05it/s]

{'epoch': 4, 'iter': 8500, 'avg_loss': 8.512358542288517, 'avg_acc': 49.92427361486884, 'loss': 8.079665184020996}


EP_train:4:  31%|| 8512/27626 [20:07<45:09,  7.06it/s]

{'epoch': 4, 'iter': 8510, 'avg_loss': 8.512490962930185, 'avg_acc': 49.9272999647515, 'loss': 8.53652286529541}


EP_train:4:  31%|| 8522/27626 [20:08<45:15,  7.03it/s]

{'epoch': 4, 'iter': 8520, 'avg_loss': 8.51258946231743, 'avg_acc': 49.928485506395965, 'loss': 8.349312782287598}


EP_train:4:  31%|| 8532/27626 [20:09<45:20,  7.02it/s]

{'epoch': 4, 'iter': 8530, 'avg_loss': 8.512442439064417, 'avg_acc': 49.92856933536514, 'loss': 8.821849822998047}


EP_train:4:  31%|| 8542/27626 [20:11<45:27,  7.00it/s]

{'epoch': 4, 'iter': 8540, 'avg_loss': 8.512452043759545, 'avg_acc': 49.93158002575811, 'loss': 8.058250427246094}


EP_train:4:  31%|| 8552/27626 [20:12<45:03,  7.06it/s]

{'epoch': 4, 'iter': 8550, 'avg_loss': 8.512347867733943, 'avg_acc': 49.93019822243013, 'loss': 8.474723815917969}


EP_train:4:  31%|| 8562/27626 [20:14<45:27,  6.99it/s]

{'epoch': 4, 'iter': 8560, 'avg_loss': 8.512621850112987, 'avg_acc': 49.92772456488728, 'loss': 8.964537620544434}


EP_train:4:  31%|| 8572/27626 [20:15<45:25,  6.99it/s]

{'epoch': 4, 'iter': 8570, 'avg_loss': 8.512635899791778, 'avg_acc': 49.92744428888111, 'loss': 8.16067123413086}


EP_train:4:  31%|| 8582/27626 [20:17<44:55,  7.07it/s]

{'epoch': 4, 'iter': 8580, 'avg_loss': 8.512342402935307, 'avg_acc': 49.933719846171776, 'loss': 8.815166473388672}


EP_train:4:  31%|| 8592/27626 [20:18<45:30,  6.97it/s]

{'epoch': 4, 'iter': 8590, 'avg_loss': 8.512123221471887, 'avg_acc': 49.93088697474101, 'loss': 7.801769733428955}


EP_train:4:  31%|| 8602/27626 [20:19<45:06,  7.03it/s]

{'epoch': 4, 'iter': 8600, 'avg_loss': 8.512044107729634, 'avg_acc': 49.9287873503081, 'loss': 9.049981117248535}


EP_train:4:  31%|| 8612/27626 [20:21<45:34,  6.95it/s]

{'epoch': 4, 'iter': 8610, 'avg_loss': 8.51194703787991, 'avg_acc': 49.92052316804087, 'loss': 9.120162010192871}


EP_train:4:  31%|| 8622/27626 [20:22<45:04,  7.03it/s]

{'epoch': 4, 'iter': 8620, 'avg_loss': 8.5118553515391, 'avg_acc': 49.917715462243365, 'loss': 8.529407501220703}


EP_train:4:  31%|| 8632/27626 [20:24<45:23,  6.97it/s]

{'epoch': 4, 'iter': 8630, 'avg_loss': 8.511836696618568, 'avg_acc': 49.92179353493222, 'loss': 8.778556823730469}


EP_train:4:  31%|| 8642/27626 [20:25<44:59,  7.03it/s]

{'epoch': 4, 'iter': 8640, 'avg_loss': 8.51214973734231, 'avg_acc': 49.920075801411876, 'loss': 7.873075485229492}


EP_train:4:  31%|| 8652/27626 [20:26<45:01,  7.02it/s]

{'epoch': 4, 'iter': 8650, 'avg_loss': 8.512070193757571, 'avg_acc': 49.91655588949254, 'loss': 8.206225395202637}


EP_train:4:  31%|| 8662/27626 [20:28<45:09,  7.00it/s]

{'epoch': 4, 'iter': 8660, 'avg_loss': 8.512003092715707, 'avg_acc': 49.91412654427895, 'loss': 8.140746116638184}


EP_train:4:  31%|| 8672/27626 [20:29<45:00,  7.02it/s]

{'epoch': 4, 'iter': 8670, 'avg_loss': 8.511958865747637, 'avg_acc': 49.91026121554608, 'loss': 9.001373291015625}


EP_train:4:  31%|| 8682/27626 [20:31<45:35,  6.93it/s]

{'epoch': 4, 'iter': 8680, 'avg_loss': 8.512133122314411, 'avg_acc': 49.9092846446262, 'loss': 8.299225807189941}


EP_train:4:  31%|| 8692/27626 [20:32<44:51,  7.03it/s]

{'epoch': 4, 'iter': 8690, 'avg_loss': 8.51242548433742, 'avg_acc': 49.902916810493615, 'loss': 9.575977325439453}


EP_train:4:  31%|| 8702/27626 [20:34<44:41,  7.06it/s]

{'epoch': 4, 'iter': 8700, 'avg_loss': 8.512516299193926, 'avg_acc': 49.90410584990231, 'loss': 8.00123119354248}


EP_train:4:  32%|| 8712/27626 [20:35<44:42,  7.05it/s]

{'epoch': 4, 'iter': 8710, 'avg_loss': 8.512102435806865, 'avg_acc': 49.90493341751808, 'loss': 8.17088508605957}


EP_train:4:  32%|| 8722/27626 [20:36<44:51,  7.02it/s]

{'epoch': 4, 'iter': 8720, 'avg_loss': 8.51214327028675, 'avg_acc': 49.90934239192753, 'loss': 7.76050329208374}


EP_train:4:  32%|| 8732/27626 [20:38<44:54,  7.01it/s]

{'epoch': 4, 'iter': 8730, 'avg_loss': 8.512173681703715, 'avg_acc': 49.906940785706105, 'loss': 8.234899520874023}


EP_train:4:  32%|| 8742/27626 [20:39<44:55,  7.01it/s]

{'epoch': 4, 'iter': 8740, 'avg_loss': 8.512129902116849, 'avg_acc': 49.908834801510125, 'loss': 8.219131469726562}


EP_train:4:  32%|| 8752/27626 [20:41<44:46,  7.03it/s]

{'epoch': 4, 'iter': 8750, 'avg_loss': 8.512277469595777, 'avg_acc': 49.91072448862987, 'loss': 8.992408752441406}


EP_train:4:  32%|| 8762/27626 [20:42<44:53,  7.00it/s]

{'epoch': 4, 'iter': 8760, 'avg_loss': 8.512343436557808, 'avg_acc': 49.914036639653006, 'loss': 8.988319396972656}


EP_train:4:  32%|| 8772/27626 [20:44<44:38,  7.04it/s]

{'epoch': 4, 'iter': 8770, 'avg_loss': 8.512430013428935, 'avg_acc': 49.91520351157222, 'loss': 8.504054069519043}


EP_train:4:  32%|| 8782/27626 [20:45<44:45,  7.02it/s]

{'epoch': 4, 'iter': 8780, 'avg_loss': 8.512207518730602, 'avg_acc': 49.914588315681584, 'loss': 7.574112415313721}


EP_train:4:  32%|| 8792/27626 [20:46<44:23,  7.07it/s]

{'epoch': 4, 'iter': 8790, 'avg_loss': 8.510954201526575, 'avg_acc': 49.919306677283586, 'loss': 6.975858688354492}


EP_train:4:  32%|| 8802/27626 [20:48<44:31,  7.05it/s]

{'epoch': 4, 'iter': 8800, 'avg_loss': 8.510118228497118, 'avg_acc': 49.911941824792635, 'loss': 8.440166473388672}


EP_train:4:  32%|| 8812/27626 [20:49<44:43,  7.01it/s]

{'epoch': 4, 'iter': 8810, 'avg_loss': 8.510096809647841, 'avg_acc': 49.91452445806379, 'loss': 8.793525695800781}


EP_train:4:  32%|| 8822/27626 [20:51<45:07,  6.94it/s]

{'epoch': 4, 'iter': 8820, 'avg_loss': 8.51015625690849, 'avg_acc': 49.916038431016894, 'loss': 7.751807689666748}


EP_train:4:  32%|| 8832/27626 [20:52<44:34,  7.03it/s]

{'epoch': 4, 'iter': 8830, 'avg_loss': 8.509928340402144, 'avg_acc': 49.915779639904876, 'loss': 9.306772232055664}


EP_train:4:  32%|| 8842/27626 [20:53<44:40,  7.01it/s]

{'epoch': 4, 'iter': 8840, 'avg_loss': 8.510204171773037, 'avg_acc': 49.92400463748445, 'loss': 8.485566139221191}


EP_train:4:  32%|| 8852/27626 [20:55<44:28,  7.03it/s]

{'epoch': 4, 'iter': 8850, 'avg_loss': 8.510228457282242, 'avg_acc': 49.923384363348774, 'loss': 8.19724178314209}


EP_train:4:  32%|| 8862/27626 [20:56<44:38,  7.00it/s]

{'epoch': 4, 'iter': 8860, 'avg_loss': 8.509941677465461, 'avg_acc': 49.92311815822142, 'loss': 8.901091575622559}


EP_train:4:  32%|| 8872/27626 [20:58<44:43,  6.99it/s]

{'epoch': 4, 'iter': 8870, 'avg_loss': 8.509718837604732, 'avg_acc': 49.92461391049487, 'loss': 8.10571575164795}


EP_train:4:  32%|| 8882/27626 [20:59<44:23,  7.04it/s]

{'epoch': 4, 'iter': 8880, 'avg_loss': 8.5096450047428, 'avg_acc': 49.9306806665916, 'loss': 8.695385932922363}


EP_train:4:  32%|| 8892/27626 [21:01<44:12,  7.06it/s]

{'epoch': 4, 'iter': 8890, 'avg_loss': 8.509634482875313, 'avg_acc': 49.92618940501631, 'loss': 8.220915794372559}


EP_train:4:  32%|| 8902/27626 [21:02<44:55,  6.95it/s]

{'epoch': 4, 'iter': 8900, 'avg_loss': 8.50971774494577, 'avg_acc': 49.92170823502977, 'loss': 8.355690956115723}


EP_train:4:  32%|| 8912/27626 [21:03<44:53,  6.95it/s]

{'epoch': 4, 'iter': 8910, 'avg_loss': 8.509454741353286, 'avg_acc': 49.91969195376501, 'loss': 8.584919929504395}


EP_train:4:  32%|| 8922/27626 [21:05<44:23,  7.02it/s]

{'epoch': 4, 'iter': 8920, 'avg_loss': 8.509303743776993, 'avg_acc': 49.921183163322496, 'loss': 7.996070384979248}


EP_train:4:  32%|| 8932/27626 [21:06<44:13,  7.04it/s]

{'epoch': 4, 'iter': 8930, 'avg_loss': 8.50924300720331, 'avg_acc': 49.915672936961144, 'loss': 8.595285415649414}


EP_train:4:  32%|| 8942/27626 [21:08<44:19,  7.03it/s]

{'epoch': 4, 'iter': 8940, 'avg_loss': 8.509445084086629, 'avg_acc': 49.91856335980315, 'loss': 9.192730903625488}


EP_train:4:  32%|| 8952/27626 [21:09<44:17,  7.03it/s]

{'epoch': 4, 'iter': 8950, 'avg_loss': 8.509659824684578, 'avg_acc': 49.91621047927606, 'loss': 9.088536262512207}


EP_train:4:  32%|| 8962/27626 [21:11<44:38,  6.97it/s]

{'epoch': 4, 'iter': 8960, 'avg_loss': 8.509821641632733, 'avg_acc': 49.910375516125434, 'loss': 7.925033092498779}


EP_train:4:  32%|| 8972/27626 [21:12<44:07,  7.05it/s]

{'epoch': 4, 'iter': 8970, 'avg_loss': 8.509970362164374, 'avg_acc': 49.91291383346338, 'loss': 8.997078895568848}


EP_train:4:  33%|| 8982/27626 [21:13<44:17,  7.02it/s]

{'epoch': 4, 'iter': 8980, 'avg_loss': 8.509705883797473, 'avg_acc': 49.911618973388265, 'loss': 8.166704177856445}


EP_train:4:  33%|| 8992/27626 [21:15<44:24,  6.99it/s]

{'epoch': 4, 'iter': 8990, 'avg_loss': 8.509779210801836, 'avg_acc': 49.91797352908464, 'loss': 7.7673187255859375}


EP_train:4:  33%|| 9002/27626 [21:16<44:20,  7.00it/s]

{'epoch': 4, 'iter': 9000, 'avg_loss': 8.509968661266013, 'avg_acc': 49.917023108543496, 'loss': 8.965386390686035}


EP_train:4:  33%|| 9012/27626 [21:18<44:12,  7.02it/s]

{'epoch': 4, 'iter': 9010, 'avg_loss': 8.510186228658473, 'avg_acc': 49.91746199090001, 'loss': 8.972747802734375}


EP_train:4:  33%|| 9022/27626 [21:19<44:20,  6.99it/s]

{'epoch': 4, 'iter': 9020, 'avg_loss': 8.510895535549718, 'avg_acc': 49.91859272807893, 'loss': 9.36605167388916}


EP_train:4:  33%|| 9032/27626 [21:20<44:02,  7.04it/s]

{'epoch': 4, 'iter': 9030, 'avg_loss': 8.51095819863987, 'avg_acc': 49.91591462739453, 'loss': 8.32141399383545}


EP_train:4:  33%|| 9042/27626 [21:22<44:04,  7.03it/s]

{'epoch': 4, 'iter': 9040, 'avg_loss': 8.510665886170859, 'avg_acc': 49.914279393872356, 'loss': 8.394371032714844}


EP_train:4:  33%|| 9052/27626 [21:23<44:23,  6.97it/s]

{'epoch': 4, 'iter': 9050, 'avg_loss': 8.510401019601186, 'avg_acc': 49.91575516517512, 'loss': 8.954326629638672}


EP_train:4:  33%|| 9062/27626 [21:25<43:58,  7.04it/s]

{'epoch': 4, 'iter': 9060, 'avg_loss': 8.510393982019899, 'avg_acc': 49.91377883235846, 'loss': 8.590933799743652}


EP_train:4:  33%|| 9072/27626 [21:26<44:01,  7.03it/s]

{'epoch': 4, 'iter': 9070, 'avg_loss': 8.510347851265516, 'avg_acc': 49.91490739719987, 'loss': 8.598730087280273}


EP_train:4:  33%|| 9082/27626 [21:28<44:11,  6.99it/s]

{'epoch': 4, 'iter': 9080, 'avg_loss': 8.510571300898102, 'avg_acc': 49.915345226296665, 'loss': 9.387469291687012}


EP_train:4:  33%|| 9092/27626 [21:29<44:19,  6.97it/s]

{'epoch': 4, 'iter': 9090, 'avg_loss': 8.510687491692092, 'avg_acc': 49.91750082499175, 'loss': 8.672306060791016}


EP_train:4:  33%|| 9102/27626 [21:30<43:57,  7.02it/s]

{'epoch': 4, 'iter': 9100, 'avg_loss': 8.510830376701241, 'avg_acc': 49.91724810460389, 'loss': 9.196881294250488}


EP_train:4:  33%|| 9112/27626 [21:32<45:05,  6.84it/s]

{'epoch': 4, 'iter': 9110, 'avg_loss': 8.510831600773702, 'avg_acc': 49.92522774667984, 'loss': 8.025189399719238}


EP_train:4:  33%|| 9122/27626 [21:33<44:04,  7.00it/s]

{'epoch': 4, 'iter': 9120, 'avg_loss': 8.510361861691194, 'avg_acc': 49.92530972481087, 'loss': 7.980643272399902}


EP_train:4:  33%|| 9132/27626 [21:35<44:03,  6.99it/s]

{'epoch': 4, 'iter': 9130, 'avg_loss': 8.510387585727372, 'avg_acc': 49.92504928266345, 'loss': 8.117901802062988}


EP_train:4:  33%|| 9142/27626 [21:36<43:59,  7.00it/s]

{'epoch': 4, 'iter': 9140, 'avg_loss': 8.51013589726928, 'avg_acc': 49.923080078766, 'loss': 9.527291297912598}


EP_train:4:  33%|| 9152/27626 [21:37<43:44,  7.04it/s]

{'epoch': 4, 'iter': 9150, 'avg_loss': 8.510243696432246, 'avg_acc': 49.92589607693148, 'loss': 7.209627151489258}


EP_train:4:  33%|| 9162/27626 [21:39<43:43,  7.04it/s]

{'epoch': 4, 'iter': 9160, 'avg_loss': 8.510482190438928, 'avg_acc': 49.92700032747517, 'loss': 8.24517822265625}


EP_train:4:  33%|| 9172/27626 [21:40<44:01,  6.99it/s]

{'epoch': 4, 'iter': 9170, 'avg_loss': 8.510425837267942, 'avg_acc': 49.92844291789336, 'loss': 8.008901596069336}


EP_train:4:  33%|| 9182/27626 [21:42<43:54,  7.00it/s]

{'epoch': 4, 'iter': 9180, 'avg_loss': 8.510161450272513, 'avg_acc': 49.926478597102715, 'loss': 7.191323757171631}


EP_train:4:  33%|| 9192/27626 [21:43<43:19,  7.09it/s]

{'epoch': 4, 'iter': 9190, 'avg_loss': 8.510060766811742, 'avg_acc': 49.924518550756176, 'loss': 8.675259590148926}


EP_train:4:  33%|| 9202/27626 [21:45<43:34,  7.05it/s]

{'epoch': 4, 'iter': 9200, 'avg_loss': 8.510164465415532, 'avg_acc': 49.92799695685252, 'loss': 8.478160858154297}


EP_train:4:  33%|| 9212/27626 [21:46<43:33,  7.04it/s]

{'epoch': 4, 'iter': 9210, 'avg_loss': 8.510466670927869, 'avg_acc': 49.92841439583107, 'loss': 9.143871307373047}


EP_train:4:  33%|| 9222/27626 [21:47<43:30,  7.05it/s]

{'epoch': 4, 'iter': 9220, 'avg_loss': 8.51033152624837, 'avg_acc': 49.93255883309836, 'loss': 8.463750839233398}


EP_train:4:  33%|| 9232/27626 [21:49<43:23,  7.07it/s]

{'epoch': 4, 'iter': 9230, 'avg_loss': 8.510420043113857, 'avg_acc': 49.93364749214603, 'loss': 8.312384605407715}


EP_train:4:  33%|| 9242/27626 [21:50<43:24,  7.06it/s]

{'epoch': 4, 'iter': 9240, 'avg_loss': 8.510111602788982, 'avg_acc': 49.93507196190888, 'loss': 8.910889625549316}


EP_train:4:  33%|| 9252/27626 [21:52<43:17,  7.07it/s]

{'epoch': 4, 'iter': 9250, 'avg_loss': 8.509717256620037, 'avg_acc': 49.92939952437575, 'loss': 9.263816833496094}


EP_train:4:  34%|| 9262/27626 [21:53<44:06,  6.94it/s]

{'epoch': 4, 'iter': 9260, 'avg_loss': 8.50996042072818, 'avg_acc': 49.929813195119316, 'loss': 8.233329772949219}


EP_train:4:  34%|| 9272/27626 [21:54<43:26,  7.04it/s]

{'epoch': 4, 'iter': 9270, 'avg_loss': 8.510044991860767, 'avg_acc': 49.93359669938518, 'loss': 8.419130325317383}


EP_train:4:  34%|| 9282/27626 [21:56<43:39,  7.00it/s]

{'epoch': 4, 'iter': 9280, 'avg_loss': 8.510346443331224, 'avg_acc': 49.93669863161298, 'loss': 8.136048316955566}


EP_train:4:  34%|| 9292/27626 [21:57<43:30,  7.02it/s]

{'epoch': 4, 'iter': 9290, 'avg_loss': 8.510239222529322, 'avg_acc': 49.93172155849747, 'loss': 8.616926193237305}


EP_train:4:  34%|| 9302/27626 [21:59<43:31,  7.02it/s]

{'epoch': 4, 'iter': 9300, 'avg_loss': 8.510325845191971, 'avg_acc': 49.93112299752715, 'loss': 8.710137367248535}


EP_train:4:  34%|| 9312/27626 [22:00<43:29,  7.02it/s]

{'epoch': 4, 'iter': 9310, 'avg_loss': 8.51079855245853, 'avg_acc': 49.9345532166255, 'loss': 10.707598686218262}


EP_train:4:  34%|| 9322/27626 [22:02<43:17,  7.05it/s]

{'epoch': 4, 'iter': 9320, 'avg_loss': 8.511246223099791, 'avg_acc': 49.94233451346422, 'loss': 8.780736923217773}


EP_train:4:  34%|| 9332/27626 [22:03<43:29,  7.01it/s]

{'epoch': 4, 'iter': 9330, 'avg_loss': 8.511376124437108, 'avg_acc': 49.93804254635087, 'loss': 9.373607635498047}


EP_train:4:  34%|| 9342/27626 [22:04<43:16,  7.04it/s]

{'epoch': 4, 'iter': 9340, 'avg_loss': 8.511381881878446, 'avg_acc': 49.94145434107698, 'loss': 8.10755729675293}


EP_train:4:  34%|| 9352/27626 [22:06<43:19,  7.03it/s]

{'epoch': 4, 'iter': 9350, 'avg_loss': 8.511644970884987, 'avg_acc': 49.946863971767726, 'loss': 8.512862205505371}


EP_train:4:  34%|| 9362/27626 [22:07<43:10,  7.05it/s]

{'epoch': 4, 'iter': 9360, 'avg_loss': 8.511509540383855, 'avg_acc': 49.95126054908663, 'loss': 7.732462406158447}


EP_train:4:  34%|| 9372/27626 [22:09<42:56,  7.08it/s]

{'epoch': 4, 'iter': 9370, 'avg_loss': 8.511502281195478, 'avg_acc': 49.95064560879309, 'loss': 9.131216049194336}


EP_train:4:  34%|| 9382/27626 [22:10<43:12,  7.04it/s]

{'epoch': 4, 'iter': 9380, 'avg_loss': 8.511449916977044, 'avg_acc': 49.957360622534914, 'loss': 8.374968528747559}


EP_train:4:  34%|| 9392/27626 [22:11<43:19,  7.01it/s]

{'epoch': 4, 'iter': 9390, 'avg_loss': 8.511363300014363, 'avg_acc': 49.95174901501437, 'loss': 8.527636528015137}


EP_train:4:  34%|| 9402/27626 [22:13<43:32,  6.97it/s]

{'epoch': 4, 'iter': 9400, 'avg_loss': 8.511394422569879, 'avg_acc': 49.949473460270184, 'loss': 8.899798393249512}


EP_train:4:  34%|| 9412/27626 [22:14<43:07,  7.04it/s]

{'epoch': 4, 'iter': 9410, 'avg_loss': 8.511328016377803, 'avg_acc': 49.94521039209436, 'loss': 8.486454010009766}


EP_train:4:  34%|| 9422/27626 [22:16<43:15,  7.01it/s]

{'epoch': 4, 'iter': 9420, 'avg_loss': 8.51140067446018, 'avg_acc': 49.941951491349116, 'loss': 8.195040702819824}


EP_train:4:  34%|| 9432/27626 [22:17<43:24,  6.98it/s]

{'epoch': 4, 'iter': 9430, 'avg_loss': 8.51127169830222, 'avg_acc': 49.94101897995971, 'loss': 8.355469703674316}


EP_train:4:  34%|| 9442/27626 [22:19<43:08,  7.02it/s]

{'epoch': 4, 'iter': 9440, 'avg_loss': 8.511203171906109, 'avg_acc': 49.93810242559051, 'loss': 8.698044776916504}


EP_train:4:  34%|| 9452/27626 [22:20<43:05,  7.03it/s]

{'epoch': 4, 'iter': 9450, 'avg_loss': 8.511062200126693, 'avg_acc': 49.936514654533916, 'loss': 8.664727210998535}


EP_train:4:  34%|| 9462/27626 [22:21<43:03,  7.03it/s]

{'epoch': 4, 'iter': 9460, 'avg_loss': 8.511011910254577, 'avg_acc': 49.93228781312757, 'loss': 8.601713180541992}


EP_train:4:  34%|| 9472/27626 [22:23<43:06,  7.02it/s]

{'epoch': 4, 'iter': 9470, 'avg_loss': 8.510909397289174, 'avg_acc': 49.92971967057333, 'loss': 8.560111045837402}


EP_train:4:  34%|| 9482/27626 [22:24<42:46,  7.07it/s]

{'epoch': 4, 'iter': 9480, 'avg_loss': 8.510743015590267, 'avg_acc': 49.92781615863306, 'loss': 9.074767112731934}


EP_train:4:  34%|| 9492/27626 [22:26<42:40,  7.08it/s]

{'epoch': 4, 'iter': 9490, 'avg_loss': 8.511006804038093, 'avg_acc': 49.9262459171847, 'loss': 8.891992568969727}


EP_train:4:  34%|| 9502/27626 [22:27<43:12,  6.99it/s]

{'epoch': 4, 'iter': 9500, 'avg_loss': 8.510677791095986, 'avg_acc': 49.927310283128094, 'loss': 7.567451000213623}


EP_train:4:  34%|| 9512/27626 [22:28<42:47,  7.06it/s]

{'epoch': 4, 'iter': 9510, 'avg_loss': 8.510700910929412, 'avg_acc': 49.92771527704763, 'loss': 8.15917682647705}


EP_train:4:  34%|| 9522/27626 [22:30<42:49,  7.05it/s]

{'epoch': 4, 'iter': 9520, 'avg_loss': 8.510939155102227, 'avg_acc': 49.93172986030879, 'loss': 8.502267837524414}


EP_train:4:  35%|| 9532/27626 [22:31<42:47,  7.05it/s]

{'epoch': 4, 'iter': 9530, 'avg_loss': 8.510985353438029, 'avg_acc': 49.93475238694786, 'loss': 8.78274154663086}


EP_train:4:  35%|| 9542/27626 [22:33<42:57,  7.02it/s]

{'epoch': 4, 'iter': 9540, 'avg_loss': 8.51112312656343, 'avg_acc': 49.93776857771722, 'loss': 9.166104316711426}


EP_train:4:  35%|| 9552/27626 [22:34<42:56,  7.01it/s]

{'epoch': 4, 'iter': 9550, 'avg_loss': 8.511159742980611, 'avg_acc': 49.9384881164276, 'loss': 7.856780529022217}


EP_train:4:  35%|| 9562/27626 [22:36<43:11,  6.97it/s]

{'epoch': 4, 'iter': 9560, 'avg_loss': 8.51146881740695, 'avg_acc': 49.93364972283234, 'loss': 8.32203483581543}


EP_train:4:  35%|| 9572/27626 [22:37<43:01,  6.99it/s]

{'epoch': 4, 'iter': 9570, 'avg_loss': 8.511375238424359, 'avg_acc': 49.9353515829067, 'loss': 7.69190788269043}


EP_train:4:  35%|| 9582/27626 [22:38<42:34,  7.06it/s]

{'epoch': 4, 'iter': 9580, 'avg_loss': 8.511423312128368, 'avg_acc': 49.935745224924325, 'loss': 8.223623275756836}


EP_train:4:  35%|| 9592/27626 [22:40<43:03,  6.98it/s]

{'epoch': 4, 'iter': 9590, 'avg_loss': 8.511567193093855, 'avg_acc': 49.938744656448755, 'loss': 8.99411392211914}


EP_train:4:  35%|| 9602/27626 [22:41<42:32,  7.06it/s]

{'epoch': 4, 'iter': 9600, 'avg_loss': 8.511536058591389, 'avg_acc': 49.94011040516612, 'loss': 8.254842758178711}


EP_train:4:  35%|| 9612/27626 [22:43<43:02,  6.97it/s]

{'epoch': 4, 'iter': 9610, 'avg_loss': 8.51128741325765, 'avg_acc': 49.945049942773906, 'loss': 8.555832862854004}


EP_train:4:  35%|| 9622/27626 [22:44<42:42,  7.03it/s]

{'epoch': 4, 'iter': 9620, 'avg_loss': 8.511333497147243, 'avg_acc': 49.940234902816755, 'loss': 8.210774421691895}


EP_train:4:  35%|| 9632/27626 [22:46<42:29,  7.06it/s]

{'epoch': 4, 'iter': 9630, 'avg_loss': 8.511399856795528, 'avg_acc': 49.93964801162911, 'loss': 8.993653297424316}


EP_train:4:  35%|| 9642/27626 [22:47<42:27,  7.06it/s]

{'epoch': 4, 'iter': 9640, 'avg_loss': 8.511501418593365, 'avg_acc': 49.938738201431384, 'loss': 8.362344741821289}


EP_train:4:  35%|| 9652/27626 [22:48<42:40,  7.02it/s]

{'epoch': 4, 'iter': 9650, 'avg_loss': 8.511402964925484, 'avg_acc': 49.93944927986737, 'loss': 9.9234619140625}


EP_train:4:  35%|| 9662/27626 [22:50<42:46,  7.00it/s]

{'epoch': 4, 'iter': 9660, 'avg_loss': 8.511499466702084, 'avg_acc': 49.937894627885306, 'loss': 8.126583099365234}


EP_train:4:  35%|| 9672/27626 [22:51<42:48,  6.99it/s]

{'epoch': 4, 'iter': 9670, 'avg_loss': 8.511135843421147, 'avg_acc': 49.93666632199359, 'loss': 8.115405082702637}


EP_train:4:  35%|| 9682/27626 [22:53<42:32,  7.03it/s]

{'epoch': 4, 'iter': 9680, 'avg_loss': 8.51112912019143, 'avg_acc': 49.94028251213717, 'loss': 8.08768367767334}


EP_train:4:  35%|| 9692/27626 [22:54<42:38,  7.01it/s]

{'epoch': 4, 'iter': 9690, 'avg_loss': 8.511080769928675, 'avg_acc': 49.94098906201631, 'loss': 8.285569190979004}


EP_train:4:  35%|| 9702/27626 [22:55<42:21,  7.05it/s]

{'epoch': 4, 'iter': 9700, 'avg_loss': 8.511192337948813, 'avg_acc': 49.9355736522008, 'loss': 8.556994438171387}


EP_train:4:  35%|| 9712/27626 [22:57<42:15,  7.06it/s]

{'epoch': 4, 'iter': 9710, 'avg_loss': 8.511162340008823, 'avg_acc': 49.937248995983936, 'loss': 9.357625007629395}


EP_train:4:  35%|| 9722/27626 [22:58<42:08,  7.08it/s]

{'epoch': 4, 'iter': 9720, 'avg_loss': 8.511059707089862, 'avg_acc': 49.93345592017282, 'loss': 8.828415870666504}


EP_train:4:  35%|| 9732/27626 [23:00<42:31,  7.01it/s]

{'epoch': 4, 'iter': 9730, 'avg_loss': 8.51131652930623, 'avg_acc': 49.935772274175314, 'loss': 9.615443229675293}


EP_train:4:  35%|| 9742/27626 [23:01<42:48,  6.96it/s]

{'epoch': 4, 'iter': 9740, 'avg_loss': 8.511850481833195, 'avg_acc': 49.936479827533105, 'loss': 9.557437896728516}


EP_train:4:  35%|| 9752/27626 [23:03<42:33,  7.00it/s]

{'epoch': 4, 'iter': 9750, 'avg_loss': 8.5120756710484, 'avg_acc': 49.93494256999283, 'loss': 9.188541412353516}


EP_train:4:  35%|| 9762/27626 [23:04<42:28,  7.01it/s]

{'epoch': 4, 'iter': 9760, 'avg_loss': 8.51260598376813, 'avg_acc': 49.937250281733434, 'loss': 8.680923461914062}


EP_train:4:  35%|| 9772/27626 [23:05<42:04,  7.07it/s]

{'epoch': 4, 'iter': 9770, 'avg_loss': 8.512601521983628, 'avg_acc': 49.93795415003582, 'loss': 8.534538269042969}


EP_train:4:  35%|| 9782/27626 [23:07<41:58,  7.09it/s]

{'epoch': 4, 'iter': 9780, 'avg_loss': 8.512465665901308, 'avg_acc': 49.94344903384112, 'loss': 8.515798568725586}


EP_train:4:  35%|| 9792/27626 [23:08<42:08,  7.05it/s]

{'epoch': 4, 'iter': 9790, 'avg_loss': 8.512055343121043, 'avg_acc': 49.94159176795016, 'loss': 7.861152172088623}


EP_train:4:  35%|| 9802/27626 [23:10<42:32,  6.98it/s]

{'epoch': 4, 'iter': 9800, 'avg_loss': 8.512010274255973, 'avg_acc': 49.937825221916135, 'loss': 7.9960527420043945}


EP_train:4:  36%|| 9812/27626 [23:11<42:04,  7.06it/s]

{'epoch': 4, 'iter': 9810, 'avg_loss': 8.512067936183893, 'avg_acc': 49.940755274691675, 'loss': 8.128531455993652}


EP_train:4:  36%|| 9822/27626 [23:12<42:09,  7.04it/s]

{'epoch': 4, 'iter': 9820, 'avg_loss': 8.511868240852461, 'avg_acc': 49.94240657774157, 'loss': 8.45366096496582}


EP_train:4:  36%|| 9832/27626 [23:14<42:13,  7.02it/s]

{'epoch': 4, 'iter': 9830, 'avg_loss': 8.511728403199884, 'avg_acc': 49.94532600956159, 'loss': 9.112943649291992}


EP_train:4:  36%|| 9842/27626 [23:15<42:05,  7.04it/s]

{'epoch': 4, 'iter': 9840, 'avg_loss': 8.51177761780272, 'avg_acc': 49.948557057209634, 'loss': 8.584712028503418}


EP_train:4:  36%|| 9852/27626 [23:17<42:01,  7.05it/s]

{'epoch': 4, 'iter': 9850, 'avg_loss': 8.511706295116046, 'avg_acc': 49.947023144858385, 'loss': 8.903066635131836}


EP_train:4:  36%|| 9862/27626 [23:18<42:02,  7.04it/s]

{'epoch': 4, 'iter': 9860, 'avg_loss': 8.511828340728268, 'avg_acc': 49.94897829834702, 'loss': 8.876643180847168}


EP_train:4:  36%|| 9872/27626 [23:19<41:49,  7.07it/s]

{'epoch': 4, 'iter': 9870, 'avg_loss': 8.51191902648544, 'avg_acc': 49.944914395704586, 'loss': 8.667939186096191}


EP_train:4:  36%|| 9882/27626 [23:21<41:49,  7.07it/s]

{'epoch': 4, 'iter': 9880, 'avg_loss': 8.51201033401605, 'avg_acc': 49.943388827041794, 'loss': 7.8387556076049805}


EP_train:4:  36%|| 9892/27626 [23:22<42:05,  7.02it/s]

{'epoch': 4, 'iter': 9890, 'avg_loss': 8.511734785246373, 'avg_acc': 49.94313011828935, 'loss': 7.958441734313965}


EP_train:4:  36%|| 9902/27626 [23:24<42:16,  6.99it/s]

{'epoch': 4, 'iter': 9900, 'avg_loss': 8.51123405309557, 'avg_acc': 49.94192505807494, 'loss': 7.75555419921875}


EP_train:4:  36%|| 9912/27626 [23:25<41:48,  7.06it/s]

{'epoch': 4, 'iter': 9910, 'avg_loss': 8.51096298480272, 'avg_acc': 49.940407123398245, 'loss': 8.901140213012695}


EP_train:4:  36%|| 9922/27626 [23:27<41:56,  7.03it/s]

{'epoch': 4, 'iter': 9920, 'avg_loss': 8.510878432338943, 'avg_acc': 49.946136982159054, 'loss': 8.37439250946045}


EP_train:4:  36%|| 9932/27626 [23:28<41:57,  7.03it/s]

{'epoch': 4, 'iter': 9930, 'avg_loss': 8.51098198180544, 'avg_acc': 49.946820561876955, 'loss': 8.631569862365723}


EP_train:4:  36%|| 9942/27626 [23:29<42:08,  6.99it/s]

{'epoch': 4, 'iter': 9940, 'avg_loss': 8.51097056002531, 'avg_acc': 49.94687405693592, 'loss': 9.076957702636719}


EP_train:4:  36%|| 9952/27626 [23:31<42:22,  6.95it/s]

{'epoch': 4, 'iter': 9950, 'avg_loss': 8.51110560267957, 'avg_acc': 49.94033262988644, 'loss': 9.093185424804688}


EP_train:4:  36%|| 9962/27626 [23:32<42:03,  7.00it/s]

{'epoch': 4, 'iter': 9960, 'avg_loss': 8.51128589138785, 'avg_acc': 49.94415721313121, 'loss': 8.632230758666992}


EP_train:4:  36%|| 9972/27626 [23:34<41:59,  7.01it/s]

{'epoch': 4, 'iter': 9970, 'avg_loss': 8.51133344462996, 'avg_acc': 49.94264617390432, 'loss': 8.172874450683594}


EP_train:4:  36%|| 9982/27626 [23:35<42:24,  6.93it/s]

{'epoch': 4, 'iter': 9980, 'avg_loss': 8.51144698673971, 'avg_acc': 49.94019887786795, 'loss': 8.194278717041016}


EP_train:4:  36%|| 9992/27626 [23:37<41:51,  7.02it/s]

{'epoch': 4, 'iter': 9990, 'avg_loss': 8.51160046336243, 'avg_acc': 49.93775648083275, 'loss': 8.450911521911621}


EP_train:4:  36%|| 10002/27626 [23:38<41:31,  7.07it/s]

{'epoch': 4, 'iter': 10000, 'avg_loss': 8.511488868527241, 'avg_acc': 49.93750624937506, 'loss': 9.151118278503418}


EP_train:4:  36%|| 10012/27626 [23:39<42:03,  6.98it/s]

{'epoch': 4, 'iter': 10010, 'avg_loss': 8.511543992158476, 'avg_acc': 49.93600789131955, 'loss': 8.744244575500488}


EP_train:4:  36%|| 10022/27626 [23:41<41:46,  7.02it/s]

{'epoch': 4, 'iter': 10020, 'avg_loss': 8.511623439430988, 'avg_acc': 49.93326514319928, 'loss': 8.600872039794922}


EP_train:4:  36%|| 10032/27626 [23:42<41:42,  7.03it/s]

{'epoch': 4, 'iter': 10030, 'avg_loss': 8.511507012435036, 'avg_acc': 49.933954740305055, 'loss': 7.754744529724121}


EP_train:4:  36%|| 10042/27626 [23:44<41:28,  7.07it/s]

{'epoch': 4, 'iter': 10040, 'avg_loss': 8.511550056591249, 'avg_acc': 49.937132755701626, 'loss': 8.704925537109375}


EP_train:4:  36%|| 10052/27626 [23:45<41:26,  7.07it/s]

{'epoch': 4, 'iter': 10050, 'avg_loss': 8.511631707933551, 'avg_acc': 49.93937170430803, 'loss': 9.166993141174316}


EP_train:4:  36%|| 10062/27626 [23:46<41:30,  7.05it/s]

{'epoch': 4, 'iter': 10060, 'avg_loss': 8.511800417403864, 'avg_acc': 49.94253801808966, 'loss': 8.46571159362793}


EP_train:4:  36%|| 10072/27626 [23:48<41:37,  7.03it/s]

{'epoch': 4, 'iter': 10070, 'avg_loss': 8.511559955129888, 'avg_acc': 49.946318637672526, 'loss': 8.1021089553833}


EP_train:4:  36%|| 10082/27626 [23:49<41:45,  7.00it/s]

{'epoch': 4, 'iter': 10080, 'avg_loss': 8.511419908050364, 'avg_acc': 49.94637188770955, 'loss': 7.80750846862793}


EP_train:4:  37%|| 10092/27626 [23:51<42:01,  6.95it/s]

{'epoch': 4, 'iter': 10090, 'avg_loss': 8.511318827511806, 'avg_acc': 49.95261867010207, 'loss': 8.615399360656738}


EP_train:4:  37%|| 10102/27626 [23:52<41:31,  7.03it/s]

{'epoch': 4, 'iter': 10100, 'avg_loss': 8.511343863704113, 'avg_acc': 49.95421245421245, 'loss': 9.172676086425781}


EP_train:4:  37%|| 10112/27626 [23:54<41:33,  7.03it/s]

{'epoch': 4, 'iter': 10110, 'avg_loss': 8.511752258338888, 'avg_acc': 49.957039363069924, 'loss': 8.989433288574219}


EP_train:4:  37%|| 10122/27626 [23:55<41:33,  7.02it/s]

{'epoch': 4, 'iter': 10120, 'avg_loss': 8.511350721371125, 'avg_acc': 49.96480090900109, 'loss': 8.002097129821777}


EP_train:4:  37%|| 10132/27626 [23:56<41:27,  7.03it/s]

{'epoch': 4, 'iter': 10130, 'avg_loss': 8.510944390553451, 'avg_acc': 49.96298489783832, 'loss': 7.784283638000488}


EP_train:4:  37%|| 10142/27626 [23:58<41:24,  7.04it/s]

{'epoch': 4, 'iter': 10140, 'avg_loss': 8.510905285979996, 'avg_acc': 49.96548663839858, 'loss': 9.218456268310547}


EP_train:4:  37%|| 10152/27626 [23:59<41:24,  7.03it/s]

{'epoch': 4, 'iter': 10150, 'avg_loss': 8.511228120170806, 'avg_acc': 49.96552063836076, 'loss': 8.959936141967773}


EP_train:4:  37%|| 10162/27626 [24:01<41:48,  6.96it/s]

{'epoch': 4, 'iter': 10160, 'avg_loss': 8.511398404640143, 'avg_acc': 49.966477216809366, 'loss': 8.750229835510254}


EP_train:4:  37%|| 10172/27626 [24:02<41:11,  7.06it/s]

{'epoch': 4, 'iter': 10170, 'avg_loss': 8.51145000563717, 'avg_acc': 49.96927539081703, 'loss': 8.627931594848633}


EP_train:4:  37%|| 10182/27626 [24:03<41:20,  7.03it/s]

{'epoch': 4, 'iter': 10180, 'avg_loss': 8.511649891407735, 'avg_acc': 49.969919457813575, 'loss': 8.263391494750977}


EP_train:4:  37%|| 10192/27626 [24:05<41:08,  7.06it/s]

{'epoch': 4, 'iter': 10190, 'avg_loss': 8.51144049177225, 'avg_acc': 49.971175547051324, 'loss': 7.820699214935303}


EP_train:4:  37%|| 10202/27626 [24:06<40:57,  7.09it/s]

{'epoch': 4, 'iter': 10200, 'avg_loss': 8.511631061511698, 'avg_acc': 49.96844672090972, 'loss': 8.633407592773438}


EP_train:4:  37%|| 10212/27626 [24:08<41:27,  7.00it/s]

{'epoch': 4, 'iter': 10210, 'avg_loss': 8.511189816409257, 'avg_acc': 49.96694740965626, 'loss': 8.662184715270996}


EP_train:4:  37%|| 10222/27626 [24:09<41:02,  7.07it/s]

{'epoch': 4, 'iter': 10220, 'avg_loss': 8.511011513720378, 'avg_acc': 49.964533802954705, 'loss': 8.275699615478516}


EP_train:4:  37%|| 10232/27626 [24:11<41:18,  7.02it/s]

{'epoch': 4, 'iter': 10230, 'avg_loss': 8.510821517617714, 'avg_acc': 49.965179356856616, 'loss': 8.592487335205078}


EP_train:4:  37%|| 10242/27626 [24:12<41:18,  7.01it/s]

{'epoch': 4, 'iter': 10240, 'avg_loss': 8.510989944376805, 'avg_acc': 49.96795967190704, 'loss': 9.481409072875977}


EP_train:4:  37%|| 10252/27626 [24:13<41:35,  6.96it/s]

{'epoch': 4, 'iter': 10250, 'avg_loss': 8.511134743376507, 'avg_acc': 49.96311335479465, 'loss': 8.714983940124512}


EP_train:4:  37%|| 10262/27626 [24:15<41:13,  7.02it/s]

{'epoch': 4, 'iter': 10260, 'avg_loss': 8.511481261069797, 'avg_acc': 49.96071289348017, 'loss': 8.506817817687988}


EP_train:4:  37%|| 10272/27626 [24:16<41:05,  7.04it/s]

{'epoch': 4, 'iter': 10270, 'avg_loss': 8.511409820849678, 'avg_acc': 49.96014263460228, 'loss': 8.991927146911621}


EP_train:4:  37%|| 10282/27626 [24:18<40:54,  7.07it/s]

{'epoch': 4, 'iter': 10280, 'avg_loss': 8.51138860406422, 'avg_acc': 49.9626130726583, 'loss': 8.646088600158691}


EP_train:4:  37%|| 10292/27626 [24:19<40:57,  7.05it/s]

{'epoch': 4, 'iter': 10290, 'avg_loss': 8.511357086553435, 'avg_acc': 49.961131085414436, 'loss': 8.460702896118164}


EP_train:4:  37%|| 10302/27626 [24:20<40:39,  7.10it/s]

{'epoch': 4, 'iter': 10300, 'avg_loss': 8.511201681459822, 'avg_acc': 49.96025871274634, 'loss': 8.197366714477539}


EP_train:4:  37%|| 10312/27626 [24:22<41:01,  7.04it/s]

{'epoch': 4, 'iter': 10310, 'avg_loss': 8.511362508987087, 'avg_acc': 49.96060032974493, 'loss': 8.782286643981934}


EP_train:4:  37%|| 10322/27626 [24:23<40:48,  7.07it/s]

{'epoch': 4, 'iter': 10320, 'avg_loss': 8.511502150289889, 'avg_acc': 49.96184962697413, 'loss': 8.724913597106934}


EP_train:4:  37%|| 10332/27626 [24:25<41:11,  7.00it/s]

{'epoch': 4, 'iter': 10330, 'avg_loss': 8.511631297298997, 'avg_acc': 49.95946665376053, 'loss': 9.202665328979492}


EP_train:4:  37%|| 10342/27626 [24:26<41:19,  6.97it/s]

{'epoch': 4, 'iter': 10340, 'avg_loss': 8.511347360438506, 'avg_acc': 49.95769267962479, 'loss': 8.521217346191406}


EP_train:4:  37%|| 10352/27626 [24:27<40:55,  7.03it/s]

{'epoch': 4, 'iter': 10350, 'avg_loss': 8.511340041285424, 'avg_acc': 49.95924306830258, 'loss': 8.0267333984375}


EP_train:4:  38%|| 10362/27626 [24:29<40:44,  7.06it/s]

{'epoch': 4, 'iter': 10360, 'avg_loss': 8.511365644329592, 'avg_acc': 49.96018724061384, 'loss': 8.278153419494629}


EP_train:4:  38%|| 10372/27626 [24:30<40:53,  7.03it/s]

{'epoch': 4, 'iter': 10370, 'avg_loss': 8.511123148756585, 'avg_acc': 49.95781506122843, 'loss': 8.34661865234375}


EP_train:4:  38%|| 10382/27626 [24:32<40:33,  7.09it/s]

{'epoch': 4, 'iter': 10380, 'avg_loss': 8.510806468327534, 'avg_acc': 49.953340236971385, 'loss': 7.878325939178467}


EP_train:4:  38%|| 10392/27626 [24:33<40:34,  7.08it/s]

{'epoch': 4, 'iter': 10390, 'avg_loss': 8.510797943168098, 'avg_acc': 49.95097921278029, 'loss': 7.652029037475586}


EP_train:4:  38%|| 10402/27626 [24:35<40:44,  7.05it/s]

{'epoch': 4, 'iter': 10400, 'avg_loss': 8.511353516484691, 'avg_acc': 49.95162724738006, 'loss': 9.62972354888916}


EP_train:4:  38%|| 10412/27626 [24:36<42:08,  6.81it/s]

{'epoch': 4, 'iter': 10410, 'avg_loss': 8.5113908482633, 'avg_acc': 49.95587599654212, 'loss': 8.331520080566406}


EP_train:4:  38%|| 10422/27626 [24:37<40:38,  7.06it/s]

{'epoch': 4, 'iter': 10420, 'avg_loss': 8.511435885402385, 'avg_acc': 49.95981671624604, 'loss': 8.767189979553223}


EP_train:4:  38%|| 10432/27626 [24:39<41:07,  6.97it/s]

{'epoch': 4, 'iter': 10430, 'avg_loss': 8.511423971837896, 'avg_acc': 49.95326430831176, 'loss': 8.507185935974121}


EP_train:4:  38%|| 10442/27626 [24:40<40:49,  7.02it/s]

{'epoch': 4, 'iter': 10440, 'avg_loss': 8.511301310648578, 'avg_acc': 49.95211186667944, 'loss': 8.7847261428833}


EP_train:4:  38%|| 10452/27626 [24:42<40:42,  7.03it/s]

{'epoch': 4, 'iter': 10450, 'avg_loss': 8.511350159895567, 'avg_acc': 49.9521576882595, 'loss': 9.24892520904541}


EP_train:4:  38%|| 10462/27626 [24:43<40:33,  7.05it/s]

{'epoch': 4, 'iter': 10460, 'avg_loss': 8.511689750223585, 'avg_acc': 49.95280087945703, 'loss': 7.806098937988281}


EP_train:4:  38%|| 10472/27626 [24:44<40:31,  7.06it/s]

{'epoch': 4, 'iter': 10470, 'avg_loss': 8.511788038068532, 'avg_acc': 49.954935058733646, 'loss': 8.892287254333496}


EP_train:4:  38%|| 10482/27626 [24:46<40:16,  7.10it/s]

{'epoch': 4, 'iter': 10480, 'avg_loss': 8.511720756574073, 'avg_acc': 49.95169831122984, 'loss': 7.847323894500732}


EP_train:4:  38%|| 10492/27626 [24:47<40:28,  7.05it/s]

{'epoch': 4, 'iter': 10490, 'avg_loss': 8.511848968950925, 'avg_acc': 49.944297493089316, 'loss': 8.860037803649902}


EP_train:4:  38%|| 10502/27626 [24:49<40:22,  7.07it/s]

{'epoch': 4, 'iter': 10500, 'avg_loss': 8.511941499431273, 'avg_acc': 49.95119512427388, 'loss': 9.196136474609375}


EP_train:4:  38%|| 10512/27626 [24:50<40:35,  7.03it/s]

{'epoch': 4, 'iter': 10510, 'avg_loss': 8.512154582202088, 'avg_acc': 49.95778232328038, 'loss': 8.52309799194336}


EP_train:4:  38%|| 10522/27626 [24:52<40:36,  7.02it/s]

{'epoch': 4, 'iter': 10520, 'avg_loss': 8.512110490375639, 'avg_acc': 49.96168377530653, 'loss': 8.950786590576172}


EP_train:4:  38%|| 10532/27626 [24:53<40:25,  7.05it/s]

{'epoch': 4, 'iter': 10530, 'avg_loss': 8.512188503878283, 'avg_acc': 49.956378786440034, 'loss': 8.986714363098145}


EP_train:4:  38%|| 10542/27626 [24:54<40:42,  6.99it/s]

{'epoch': 4, 'iter': 10540, 'avg_loss': 8.512369533336521, 'avg_acc': 49.95345555450147, 'loss': 8.528841972351074}


EP_train:4:  38%|| 10552/27626 [24:56<40:41,  6.99it/s]

{'epoch': 4, 'iter': 10550, 'avg_loss': 8.511754085120291, 'avg_acc': 49.95024168325277, 'loss': 7.82845401763916}


EP_train:4:  38%|| 10562/27626 [24:57<40:08,  7.09it/s]

{'epoch': 4, 'iter': 10560, 'avg_loss': 8.511899865133023, 'avg_acc': 49.95709449862702, 'loss': 8.66423225402832}


EP_train:4:  38%|| 10572/27626 [24:59<40:36,  7.00it/s]

{'epoch': 4, 'iter': 10570, 'avg_loss': 8.51203012272466, 'avg_acc': 49.95979566739192, 'loss': 8.824353218078613}


EP_train:4:  38%|| 10582/27626 [25:00<40:15,  7.06it/s]

{'epoch': 4, 'iter': 10580, 'avg_loss': 8.511922764875028, 'avg_acc': 49.96131036764011, 'loss': 8.363253593444824}


EP_train:4:  38%|| 10592/27626 [25:01<40:17,  7.05it/s]

{'epoch': 4, 'iter': 10590, 'avg_loss': 8.511729920262734, 'avg_acc': 49.96400245491455, 'loss': 7.892126560211182}


EP_train:4:  38%|| 10602/27626 [25:03<40:07,  7.07it/s]

{'epoch': 4, 'iter': 10600, 'avg_loss': 8.511833071854866, 'avg_acc': 49.970816432412036, 'loss': 7.883184909820557}


EP_train:4:  38%|| 10612/27626 [25:04<40:18,  7.04it/s]

{'epoch': 4, 'iter': 10610, 'avg_loss': 8.511901744516818, 'avg_acc': 49.972905475450005, 'loss': 8.516980171203613}


EP_train:4:  38%|| 10622/27626 [25:06<40:15,  7.04it/s]

{'epoch': 4, 'iter': 10620, 'avg_loss': 8.511710621275569, 'avg_acc': 49.97587326993692, 'loss': 8.230598449707031}


EP_train:4:  38%|| 10632/27626 [25:07<40:18,  7.03it/s]

{'epoch': 4, 'iter': 10630, 'avg_loss': 8.511542298901217, 'avg_acc': 49.97971733609256, 'loss': 8.355294227600098}


EP_train:4:  39%|| 10642/27626 [25:09<40:04,  7.06it/s]

{'epoch': 4, 'iter': 10640, 'avg_loss': 8.511441996106303, 'avg_acc': 49.975918616671365, 'loss': 8.446561813354492}


EP_train:4:  39%|| 10652/27626 [25:10<40:02,  7.06it/s]

{'epoch': 4, 'iter': 10650, 'avg_loss': 8.511530312201572, 'avg_acc': 49.97564782649516, 'loss': 8.934574127197266}


EP_train:4:  39%|| 10662/27626 [25:11<39:58,  7.07it/s]

{'epoch': 4, 'iter': 10660, 'avg_loss': 8.511641528948354, 'avg_acc': 49.979481286933684, 'loss': 8.516573905944824}


EP_train:4:  39%|| 10672/27626 [25:13<40:13,  7.03it/s]

{'epoch': 4, 'iter': 10670, 'avg_loss': 8.511538633175503, 'avg_acc': 49.9821361634336, 'loss': 9.434840202331543}


EP_train:4:  39%|| 10682/27626 [25:14<40:06,  7.04it/s]

{'epoch': 4, 'iter': 10680, 'avg_loss': 8.511678427282027, 'avg_acc': 49.98156773710327, 'loss': 8.541557312011719}


EP_train:4:  39%|| 10692/27626 [25:16<39:50,  7.08it/s]

{'epoch': 4, 'iter': 10690, 'avg_loss': 8.511639687892552, 'avg_acc': 49.98304648769994, 'loss': 8.4895601272583}


EP_train:4:  39%|| 10702/27626 [25:17<40:03,  7.04it/s]

{'epoch': 4, 'iter': 10700, 'avg_loss': 8.511976161768208, 'avg_acc': 49.977513783758525, 'loss': 8.868550300598145}


EP_train:4:  39%|| 10712/27626 [25:18<40:01,  7.04it/s]

{'epoch': 4, 'iter': 10710, 'avg_loss': 8.511772322572366, 'avg_acc': 49.978118289608815, 'loss': 8.150355339050293}


EP_train:4:  39%|| 10722/27626 [25:20<40:01,  7.04it/s]

{'epoch': 4, 'iter': 10720, 'avg_loss': 8.511740259576696, 'avg_acc': 49.977847215744795, 'loss': 8.30309009552002}


EP_train:4:  39%|| 10732/27626 [25:21<39:58,  7.04it/s]

{'epoch': 4, 'iter': 10730, 'avg_loss': 8.511463532498027, 'avg_acc': 49.97641179759575, 'loss': 8.370423316955566}


EP_train:4:  39%|| 10742/27626 [25:23<39:52,  7.06it/s]

{'epoch': 4, 'iter': 10740, 'avg_loss': 8.51128734271294, 'avg_acc': 49.97497905222977, 'loss': 8.645062446594238}


EP_train:4:  39%|| 10752/27626 [25:24<39:56,  7.04it/s]

{'epoch': 4, 'iter': 10750, 'avg_loss': 8.51132893870569, 'avg_acc': 49.97267696028276, 'loss': 8.333855628967285}


EP_train:4:  39%|| 10762/27626 [25:25<39:49,  7.06it/s]

{'epoch': 4, 'iter': 10760, 'avg_loss': 8.511138029739257, 'avg_acc': 49.97328315212341, 'loss': 7.6451544761657715}


EP_train:4:  39%|| 10772/27626 [25:27<39:53,  7.04it/s]

{'epoch': 4, 'iter': 10770, 'avg_loss': 8.511430760676587, 'avg_acc': 49.97446848017826, 'loss': 8.410050392150879}


EP_train:4:  39%|| 10782/27626 [25:28<39:47,  7.05it/s]

{'epoch': 4, 'iter': 10780, 'avg_loss': 8.511320914068666, 'avg_acc': 49.97159354419813, 'loss': 7.784597396850586}


EP_train:4:  39%|| 10792/27626 [25:30<40:07,  6.99it/s]

{'epoch': 4, 'iter': 10790, 'avg_loss': 8.511193771864285, 'avg_acc': 49.96988230933185, 'loss': 7.6768975257873535}


EP_train:4:  39%|| 10802/27626 [25:31<40:24,  6.94it/s]

{'epoch': 4, 'iter': 10800, 'avg_loss': 8.511108680495884, 'avg_acc': 49.96933154337561, 'loss': 7.676493167877197}


EP_train:4:  39%|| 10812/27626 [25:33<39:40,  7.06it/s]

{'epoch': 4, 'iter': 10810, 'avg_loss': 8.511099019993157, 'avg_acc': 49.96907085376006, 'loss': 9.026632308959961}


EP_train:4:  39%|| 10822/27626 [25:34<39:33,  7.08it/s]

{'epoch': 4, 'iter': 10820, 'avg_loss': 8.51095175414812, 'avg_acc': 49.96823306533592, 'loss': 8.179397583007812}


EP_train:4:  39%|| 10832/27626 [25:35<39:37,  7.06it/s]

{'epoch': 4, 'iter': 10830, 'avg_loss': 8.51102968306106, 'avg_acc': 49.96797387129536, 'loss': 7.6592512130737305}


EP_train:4:  39%|| 10842/27626 [25:37<39:53,  7.01it/s]

{'epoch': 4, 'iter': 10840, 'avg_loss': 8.5111323978043, 'avg_acc': 49.96569735264275, 'loss': 8.28980541229248}


EP_train:4:  39%|| 10852/27626 [25:38<39:58,  6.99it/s]

{'epoch': 4, 'iter': 10850, 'avg_loss': 8.51132303241782, 'avg_acc': 49.96716892452309, 'loss': 8.48978328704834}


EP_train:4:  39%|| 10862/27626 [25:40<39:44,  7.03it/s]

{'epoch': 4, 'iter': 10860, 'avg_loss': 8.511334875628561, 'avg_acc': 49.967199152932515, 'loss': 8.460049629211426}


EP_train:4:  39%|| 10872/27626 [25:41<39:49,  7.01it/s]

{'epoch': 4, 'iter': 10870, 'avg_loss': 8.511250519114293, 'avg_acc': 49.966366939563976, 'loss': 8.391648292541504}


EP_train:4:  39%|| 10882/27626 [25:43<40:08,  6.95it/s]

{'epoch': 4, 'iter': 10880, 'avg_loss': 8.511081000437533, 'avg_acc': 49.964387464387464, 'loss': 8.779218673706055}


EP_train:4:  39%|| 10892/27626 [25:44<39:34,  7.05it/s]

{'epoch': 4, 'iter': 10890, 'avg_loss': 8.511015640365969, 'avg_acc': 49.96528096593518, 'loss': 8.72965145111084}


EP_train:4:  39%|| 10902/27626 [25:45<39:44,  7.01it/s]

{'epoch': 4, 'iter': 10900, 'avg_loss': 8.510896491278269, 'avg_acc': 49.963019447757084, 'loss': 7.749350070953369}


EP_train:4:  39%|| 10912/27626 [25:47<39:24,  7.07it/s]

{'epoch': 4, 'iter': 10910, 'avg_loss': 8.511187119484385, 'avg_acc': 49.96419897351297, 'loss': 8.38703441619873}


EP_train:4:  40%|| 10922/27626 [25:48<39:30,  7.05it/s]

{'epoch': 4, 'iter': 10920, 'avg_loss': 8.511368799947489, 'avg_acc': 49.96137029576046, 'loss': 8.363420486450195}


EP_train:4:  40%|| 10932/27626 [25:50<39:23,  7.06it/s]

{'epoch': 4, 'iter': 10930, 'avg_loss': 8.511173097051932, 'avg_acc': 49.963120940444604, 'loss': 8.561758041381836}


EP_train:4:  40%|| 10942/27626 [25:51<39:25,  7.05it/s]

{'epoch': 4, 'iter': 10940, 'avg_loss': 8.511527913109866, 'avg_acc': 49.959155927246144, 'loss': 8.543988227844238}


EP_train:4:  40%|| 10952/27626 [25:52<39:12,  7.09it/s]

{'epoch': 4, 'iter': 10950, 'avg_loss': 8.511689439123378, 'avg_acc': 49.95805177609351, 'loss': 8.618085861206055}


EP_train:4:  40%|| 10962/27626 [25:54<39:31,  7.03it/s]

{'epoch': 4, 'iter': 10960, 'avg_loss': 8.511972232909438, 'avg_acc': 49.95495392756135, 'loss': 9.335450172424316}


EP_train:4:  40%|| 10972/27626 [25:55<39:33,  7.02it/s]

{'epoch': 4, 'iter': 10970, 'avg_loss': 8.512059168224075, 'avg_acc': 49.95043751709051, 'loss': 8.564187049865723}


EP_train:4:  40%|| 10982/27626 [25:57<39:46,  6.97it/s]

{'epoch': 4, 'iter': 10980, 'avg_loss': 8.512190293575308, 'avg_acc': 49.95105181677443, 'loss': 8.818940162658691}


EP_train:4:  40%|| 10992/27626 [25:58<39:17,  7.05it/s]

{'epoch': 4, 'iter': 10990, 'avg_loss': 8.512689485854485, 'avg_acc': 49.94853743972341, 'loss': 8.901019096374512}


EP_train:4:  40%|| 11002/27626 [26:00<39:28,  7.02it/s]

{'epoch': 4, 'iter': 11000, 'avg_loss': 8.51260289798248, 'avg_acc': 49.94375511317153, 'loss': 8.480877876281738}


EP_train:4:  40%|| 11012/27626 [26:01<39:02,  7.09it/s]

{'epoch': 4, 'iter': 11010, 'avg_loss': 8.512710941378096, 'avg_acc': 49.949198528743985, 'loss': 8.138833045959473}


EP_train:4:  40%|| 11022/27626 [26:02<39:19,  7.04it/s]

{'epoch': 4, 'iter': 11020, 'avg_loss': 8.512819370463262, 'avg_acc': 49.94896107431268, 'loss': 9.430306434631348}


EP_train:4:  40%|| 11032/27626 [26:04<39:25,  7.01it/s]

{'epoch': 4, 'iter': 11030, 'avg_loss': 8.512860606927278, 'avg_acc': 49.95184026833469, 'loss': 7.872452735900879}


EP_train:4:  40%|| 11042/27626 [26:05<39:27,  7.01it/s]

{'epoch': 4, 'iter': 11040, 'avg_loss': 8.512794574526447, 'avg_acc': 49.9482044198895, 'loss': 9.369233131408691}


EP_train:4:  40%|| 11052/27626 [26:07<39:17,  7.03it/s]

{'epoch': 4, 'iter': 11050, 'avg_loss': 8.512698640565334, 'avg_acc': 49.950796308026426, 'loss': 7.947269916534424}


EP_train:4:  40%|| 11062/27626 [26:08<39:24,  7.01it/s]

{'epoch': 4, 'iter': 11060, 'avg_loss': 8.512504776286535, 'avg_acc': 49.95055826778772, 'loss': 8.521010398864746}


EP_train:4:  40%|| 11072/27626 [26:09<39:27,  6.99it/s]

{'epoch': 4, 'iter': 11070, 'avg_loss': 8.512269518280856, 'avg_acc': 49.948627043627496, 'loss': 7.067967414855957}


EP_train:4:  40%|| 11082/27626 [26:11<39:18,  7.01it/s]

{'epoch': 4, 'iter': 11080, 'avg_loss': 8.512242624318812, 'avg_acc': 49.946699305116866, 'loss': 8.0709228515625}


EP_train:4:  40%|| 11092/27626 [26:12<39:12,  7.03it/s]

{'epoch': 4, 'iter': 11090, 'avg_loss': 8.512191524089442, 'avg_acc': 49.94139392300063, 'loss': 8.939519882202148}


EP_train:4:  40%|| 11102/27626 [26:14<39:03,  7.05it/s]

{'epoch': 4, 'iter': 11100, 'avg_loss': 8.512207446232775, 'avg_acc': 49.93863165480587, 'loss': 8.58964729309082}


EP_train:4:  40%|| 11112/27626 [26:15<39:05,  7.04it/s]

{'epoch': 4, 'iter': 11110, 'avg_loss': 8.512389502380703, 'avg_acc': 49.93418684186842, 'loss': 8.19134521484375}


EP_train:4:  40%|| 11122/27626 [26:17<38:59,  7.05it/s]

{'epoch': 4, 'iter': 11120, 'avg_loss': 8.512523793432418, 'avg_acc': 49.93480802086143, 'loss': 8.393377304077148}


EP_train:4:  40%|| 11132/27626 [26:18<39:02,  7.04it/s]

{'epoch': 4, 'iter': 11130, 'avg_loss': 8.512312237008992, 'avg_acc': 49.9326206091097, 'loss': 6.786409854888916}


EP_train:4:  40%|| 11142/27626 [26:19<39:11,  7.01it/s]

{'epoch': 4, 'iter': 11140, 'avg_loss': 8.512604362221404, 'avg_acc': 49.930717619603264, 'loss': 8.823126792907715}


EP_train:4:  40%|| 11152/27626 [26:21<39:00,  7.04it/s]

{'epoch': 4, 'iter': 11150, 'avg_loss': 8.512773989904153, 'avg_acc': 49.93442292171106, 'loss': 8.888301849365234}


EP_train:4:  40%|| 11162/27626 [26:22<39:08,  7.01it/s]

{'epoch': 4, 'iter': 11160, 'avg_loss': 8.51250857638689, 'avg_acc': 49.93532165576561, 'loss': 7.988142490386963}


EP_train:4:  40%|| 11172/27626 [26:24<39:02,  7.02it/s]

{'epoch': 4, 'iter': 11170, 'avg_loss': 8.51294568799174, 'avg_acc': 49.93649852296124, 'loss': 9.173327445983887}


EP_train:4:  40%|| 11182/27626 [26:25<38:46,  7.07it/s]

{'epoch': 4, 'iter': 11180, 'avg_loss': 8.512839227123173, 'avg_acc': 49.93739379304177, 'loss': 8.626622200012207}


EP_train:4:  41%|| 11192/27626 [26:26<38:42,  7.08it/s]

{'epoch': 4, 'iter': 11190, 'avg_loss': 8.512666438307841, 'avg_acc': 49.93661200965061, 'loss': 8.227622032165527}


EP_train:4:  41%|| 11202/27626 [26:28<39:02,  7.01it/s]

{'epoch': 4, 'iter': 11200, 'avg_loss': 8.512403083820342, 'avg_acc': 49.936668601017765, 'loss': 8.466808319091797}


EP_train:4:  41%|| 11212/27626 [26:29<38:46,  7.06it/s]

{'epoch': 4, 'iter': 11210, 'avg_loss': 8.512189336937848, 'avg_acc': 49.93811881188119, 'loss': 7.817078113555908}


EP_train:4:  41%|| 11222/27626 [26:31<39:05,  6.99it/s]

{'epoch': 4, 'iter': 11220, 'avg_loss': 8.511989408482334, 'avg_acc': 49.94123741199537, 'loss': 8.281916618347168}


EP_train:4:  41%|| 11232/27626 [26:32<39:19,  6.95it/s]

{'epoch': 4, 'iter': 11230, 'avg_loss': 8.511911814576445, 'avg_acc': 49.941567981479835, 'loss': 9.18382740020752}


EP_train:4:  41%|| 11242/27626 [26:34<38:50,  7.03it/s]

{'epoch': 4, 'iter': 11240, 'avg_loss': 8.512043299342844, 'avg_acc': 49.93883996085757, 'loss': 8.881887435913086}


EP_train:4:  41%|| 11252/27626 [26:35<38:57,  7.00it/s]

{'epoch': 4, 'iter': 11250, 'avg_loss': 8.512071257135135, 'avg_acc': 49.93667229579593, 'loss': 8.34903335571289}


EP_train:4:  41%|| 11262/27626 [26:36<38:41,  7.05it/s]

{'epoch': 4, 'iter': 11260, 'avg_loss': 8.51211360258633, 'avg_acc': 49.940336115797884, 'loss': 8.141619682312012}


EP_train:4:  41%|| 11272/27626 [26:38<38:33,  7.07it/s]

{'epoch': 4, 'iter': 11270, 'avg_loss': 8.511954948965801, 'avg_acc': 49.94343891402715, 'loss': 8.126766204833984}


EP_train:4:  41%|| 11282/27626 [26:39<38:52,  7.01it/s]

{'epoch': 4, 'iter': 11280, 'avg_loss': 8.51166476781524, 'avg_acc': 49.94404308128712, 'loss': 8.157670974731445}


EP_train:4:  41%|| 11292/27626 [26:41<38:17,  7.11it/s]

{'epoch': 4, 'iter': 11290, 'avg_loss': 8.51186559054799, 'avg_acc': 49.94298556372332, 'loss': 8.967525482177734}


EP_train:4:  41%|| 11302/27626 [26:42<38:42,  7.03it/s]

{'epoch': 4, 'iter': 11300, 'avg_loss': 8.51178818155776, 'avg_acc': 49.945248208123175, 'loss': 8.227787971496582}


EP_train:4:  41%|| 11312/27626 [26:43<38:43,  7.02it/s]

{'epoch': 4, 'iter': 11310, 'avg_loss': 8.511660397753625, 'avg_acc': 49.94142869772787, 'loss': 7.829097270965576}


EP_train:4:  41%|| 11322/27626 [26:45<38:39,  7.03it/s]

{'epoch': 4, 'iter': 11320, 'avg_loss': 8.511525363111737, 'avg_acc': 49.939824220475224, 'loss': 8.058435440063477}


EP_train:4:  41%|| 11332/27626 [26:46<38:45,  7.01it/s]

{'epoch': 4, 'iter': 11330, 'avg_loss': 8.511397405899945, 'avg_acc': 49.93987732768511, 'loss': 7.526551723480225}


EP_train:4:  41%|| 11342/27626 [26:48<38:35,  7.03it/s]

{'epoch': 4, 'iter': 11340, 'avg_loss': 8.511236641859211, 'avg_acc': 49.93607265673221, 'loss': 8.171463966369629}


EP_train:4:  41%|| 11352/27626 [26:49<38:34,  7.03it/s]

{'epoch': 4, 'iter': 11350, 'avg_loss': 8.511168803005909, 'avg_acc': 49.9306228526121, 'loss': 7.803162097930908}


EP_train:4:  41%|| 11362/27626 [26:51<38:35,  7.02it/s]

{'epoch': 4, 'iter': 11360, 'avg_loss': 8.511350573079811, 'avg_acc': 49.92765821670628, 'loss': 8.321271896362305}


EP_train:4:  41%|| 11372/27626 [26:52<38:10,  7.10it/s]

{'epoch': 4, 'iter': 11370, 'avg_loss': 8.511055101118389, 'avg_acc': 49.9296455896579, 'loss': 8.150437355041504}


EP_train:4:  41%|| 11382/27626 [26:53<38:08,  7.10it/s]

{'epoch': 4, 'iter': 11380, 'avg_loss': 8.511234242303773, 'avg_acc': 49.93492443546261, 'loss': 8.379887580871582}


EP_train:4:  41%|| 11392/27626 [26:55<38:27,  7.03it/s]

{'epoch': 4, 'iter': 11390, 'avg_loss': 8.511104193429492, 'avg_acc': 49.93114081292248, 'loss': 8.418904304504395}


EP_train:4:  41%|| 11402/27626 [26:56<38:33,  7.01it/s]

{'epoch': 4, 'iter': 11400, 'avg_loss': 8.5107658991175, 'avg_acc': 49.932845802999736, 'loss': 7.947807788848877}


EP_train:4:  41%|| 11412/27626 [26:58<38:27,  7.03it/s]

{'epoch': 4, 'iter': 11410, 'avg_loss': 8.510946461137072, 'avg_acc': 49.92824905792656, 'loss': 7.915914058685303}


EP_train:4:  41%|| 11422/27626 [26:59<38:30,  7.01it/s]

{'epoch': 4, 'iter': 11420, 'avg_loss': 8.510796070599888, 'avg_acc': 49.92584931266965, 'loss': 8.669916152954102}


EP_train:4:  41%|| 11432/27626 [27:00<38:50,  6.95it/s]

{'epoch': 4, 'iter': 11430, 'avg_loss': 8.510711134601147, 'avg_acc': 49.92673431895722, 'loss': 8.24612045288086}


EP_train:4:  41%|| 11442/27626 [27:02<38:24,  7.02it/s]

{'epoch': 4, 'iter': 11440, 'avg_loss': 8.510570305896882, 'avg_acc': 49.92597893540774, 'loss': 8.769767761230469}


EP_train:4:  41%|| 11452/27626 [27:03<38:30,  7.00it/s]

{'epoch': 4, 'iter': 11450, 'avg_loss': 8.51072345693167, 'avg_acc': 49.92413326347044, 'loss': 8.38020133972168}


EP_train:4:  41%|| 11462/27626 [27:05<38:31,  6.99it/s]

{'epoch': 4, 'iter': 11460, 'avg_loss': 8.510566691912944, 'avg_acc': 49.92392679521856, 'loss': 7.488015174865723}


EP_train:4:  42%|| 11472/27626 [27:06<38:06,  7.06it/s]

{'epoch': 4, 'iter': 11470, 'avg_loss': 8.510616338904613, 'avg_acc': 49.924265539185775, 'loss': 8.608551025390625}


EP_train:4:  42%|| 11482/27626 [27:08<38:01,  7.08it/s]

{'epoch': 4, 'iter': 11480, 'avg_loss': 8.510565117399988, 'avg_acc': 49.926236826060446, 'loss': 7.824680328369141}


EP_train:4:  42%|| 11492/27626 [27:09<38:12,  7.04it/s]

{'epoch': 4, 'iter': 11490, 'avg_loss': 8.510215749899118, 'avg_acc': 49.92439735445131, 'loss': 8.178102493286133}


EP_train:4:  42%|| 11502/27626 [27:10<38:13,  7.03it/s]

{'epoch': 4, 'iter': 11500, 'avg_loss': 8.510437824178618, 'avg_acc': 49.92690852969307, 'loss': 8.908470153808594}


EP_train:4:  42%|| 11512/27626 [27:12<38:36,  6.96it/s]

{'epoch': 4, 'iter': 11510, 'avg_loss': 8.510315969023258, 'avg_acc': 49.92832942402919, 'loss': 8.75715160369873}


EP_train:4:  42%|| 11522/27626 [27:13<38:23,  6.99it/s]

{'epoch': 4, 'iter': 11520, 'avg_loss': 8.510228397841876, 'avg_acc': 49.9292053641177, 'loss': 8.234109878540039}


EP_train:4:  42%|| 11532/27626 [27:15<38:01,  7.05it/s]

{'epoch': 4, 'iter': 11530, 'avg_loss': 8.50988936016932, 'avg_acc': 49.92818272482872, 'loss': 8.40931224822998}


EP_train:4:  42%|| 11542/27626 [27:16<38:05,  7.04it/s]

{'epoch': 4, 'iter': 11540, 'avg_loss': 8.509734270715413, 'avg_acc': 49.92905727406637, 'loss': 8.30459213256836}


EP_train:4:  42%|| 11552/27626 [27:17<38:08,  7.02it/s]

{'epoch': 4, 'iter': 11550, 'avg_loss': 8.509739401639566, 'avg_acc': 49.92803653363345, 'loss': 8.488481521606445}


EP_train:4:  42%|| 11562/27626 [27:19<37:54,  7.06it/s]

{'epoch': 4, 'iter': 11560, 'avg_loss': 8.509571405278287, 'avg_acc': 49.92972061240377, 'loss': 8.367279052734375}


EP_train:4:  42%|| 11572/27626 [27:20<38:02,  7.03it/s]

{'epoch': 4, 'iter': 11570, 'avg_loss': 8.509474106558603, 'avg_acc': 49.9311317085818, 'loss': 8.564535140991211}


EP_train:4:  42%|| 11582/27626 [27:22<37:57,  7.05it/s]

{'epoch': 4, 'iter': 11580, 'avg_loss': 8.509548884868499, 'avg_acc': 49.93254036784388, 'loss': 9.018969535827637}


EP_train:4:  42%|| 11592/27626 [27:23<38:13,  6.99it/s]

{'epoch': 4, 'iter': 11590, 'avg_loss': 8.509807158047167, 'avg_acc': 49.93340738504012, 'loss': 8.67108154296875}


EP_train:4:  42%|| 11602/27626 [27:25<38:20,  6.97it/s]

{'epoch': 4, 'iter': 11600, 'avg_loss': 8.509925760202249, 'avg_acc': 49.93211792086889, 'loss': 8.302095413208008}


EP_train:4:  42%|| 11612/27626 [27:26<38:16,  6.97it/s]

{'epoch': 4, 'iter': 11610, 'avg_loss': 8.509620688105521, 'avg_acc': 49.92948497114805, 'loss': 7.505721092224121}


EP_train:4:  42%|| 11622/27626 [27:27<37:51,  7.04it/s]

{'epoch': 4, 'iter': 11620, 'avg_loss': 8.509588127589227, 'avg_acc': 49.93223474743998, 'loss': 8.640559196472168}


EP_train:4:  42%|| 11632/27626 [27:29<37:44,  7.06it/s]

{'epoch': 4, 'iter': 11630, 'avg_loss': 8.509281158611254, 'avg_acc': 49.93471111684292, 'loss': 8.314390182495117}


EP_train:4:  42%|| 11642/27626 [27:30<37:49,  7.04it/s]

{'epoch': 4, 'iter': 11640, 'avg_loss': 8.508906905793026, 'avg_acc': 49.93396185894682, 'loss': 8.489166259765625}


EP_train:4:  42%|| 11652/27626 [27:32<37:48,  7.04it/s]

{'epoch': 4, 'iter': 11650, 'avg_loss': 8.508630127244196, 'avg_acc': 49.93106814865676, 'loss': 8.4465970993042}


EP_train:4:  42%|| 11662/27626 [27:33<38:14,  6.96it/s]

{'epoch': 4, 'iter': 11660, 'avg_loss': 8.508909259281236, 'avg_acc': 49.93032329988852, 'loss': 8.762487411499023}


EP_train:4:  42%|| 11672/27626 [27:34<37:37,  7.07it/s]

{'epoch': 4, 'iter': 11670, 'avg_loss': 8.509414148702483, 'avg_acc': 49.930918515979776, 'loss': 9.633967399597168}


EP_train:4:  42%|| 11682/27626 [27:36<37:45,  7.04it/s]

{'epoch': 4, 'iter': 11680, 'avg_loss': 8.509563051544939, 'avg_acc': 49.936595753788204, 'loss': 9.403255462646484}


EP_train:4:  42%|| 11692/27626 [27:37<37:50,  7.02it/s]

{'epoch': 4, 'iter': 11690, 'avg_loss': 8.509624526637541, 'avg_acc': 49.935580788640834, 'loss': 8.271001815795898}


EP_train:4:  42%|| 11702/27626 [27:39<37:43,  7.03it/s]

{'epoch': 4, 'iter': 11700, 'avg_loss': 8.509409703491956, 'avg_acc': 49.934033415947354, 'loss': 8.900473594665527}


EP_train:4:  42%|| 11712/27626 [27:40<37:27,  7.08it/s]

{'epoch': 4, 'iter': 11710, 'avg_loss': 8.509198599127984, 'avg_acc': 49.934089744684485, 'loss': 8.01201057434082}


EP_train:4:  42%|| 11722/27626 [27:42<37:43,  7.03it/s]

{'epoch': 4, 'iter': 11720, 'avg_loss': 8.508925728724277, 'avg_acc': 49.939478286835595, 'loss': 8.901284217834473}


EP_train:4:  42%|| 11732/27626 [27:43<37:27,  7.07it/s]

{'epoch': 4, 'iter': 11730, 'avg_loss': 8.509046980349185, 'avg_acc': 49.93979626630296, 'loss': 8.150276184082031}


EP_train:4:  43%|| 11742/27626 [27:44<37:28,  7.06it/s]

{'epoch': 4, 'iter': 11740, 'avg_loss': 8.508932107149382, 'avg_acc': 49.93665360701814, 'loss': 8.912288665771484}


EP_train:4:  43%|| 11752/27626 [27:46<37:41,  7.02it/s]

{'epoch': 4, 'iter': 11750, 'avg_loss': 8.508692545481372, 'avg_acc': 49.941494340907155, 'loss': 8.106229782104492}


EP_train:4:  43%|| 11762/27626 [27:47<37:22,  7.08it/s]

{'epoch': 4, 'iter': 11760, 'avg_loss': 8.50872100708122, 'avg_acc': 49.94021554289601, 'loss': 8.434548377990723}


EP_train:4:  43%|| 11772/27626 [27:49<37:20,  7.08it/s]

{'epoch': 4, 'iter': 11770, 'avg_loss': 8.509019783871691, 'avg_acc': 49.938407951745816, 'loss': 8.31014633178711}


EP_train:4:  43%|| 11782/27626 [27:50<37:27,  7.05it/s]

{'epoch': 4, 'iter': 11780, 'avg_loss': 8.50919228152178, 'avg_acc': 49.937399202105084, 'loss': 8.5088529586792}


EP_train:4:  43%|| 11792/27626 [27:51<37:33,  7.03it/s]

{'epoch': 4, 'iter': 11790, 'avg_loss': 8.509068676800776, 'avg_acc': 49.937982359426684, 'loss': 8.490747451782227}


EP_train:4:  43%|| 11802/27626 [27:53<37:18,  7.07it/s]

{'epoch': 4, 'iter': 11800, 'avg_loss': 8.509244050164211, 'avg_acc': 49.93565163969155, 'loss': 9.075042724609375}


EP_train:4:  43%|| 11812/27626 [27:54<37:27,  7.04it/s]

{'epoch': 4, 'iter': 11810, 'avg_loss': 8.509163203686363, 'avg_acc': 49.93544153754974, 'loss': 8.180596351623535}


EP_train:4:  43%|| 11822/27626 [27:56<37:18,  7.06it/s]

{'epoch': 4, 'iter': 11820, 'avg_loss': 8.509041328557645, 'avg_acc': 49.93708231114119, 'loss': 8.30465316772461}


EP_train:4:  43%|| 11832/27626 [27:57<37:20,  7.05it/s]

{'epoch': 4, 'iter': 11830, 'avg_loss': 8.509296046640634, 'avg_acc': 49.93951272081819, 'loss': 9.33520793914795}


EP_train:4:  43%|| 11842/27626 [27:59<37:54,  6.94it/s]

{'epoch': 4, 'iter': 11840, 'avg_loss': 8.5093203719308, 'avg_acc': 49.93982771725361, 'loss': 8.423134803771973}


EP_train:4:  43%|| 11852/27626 [28:00<37:09,  7.07it/s]

{'epoch': 4, 'iter': 11850, 'avg_loss': 8.509551493450214, 'avg_acc': 49.93486836553877, 'loss': 8.650209426879883}


EP_train:4:  43%|| 11862/27626 [28:01<37:30,  7.01it/s]

{'epoch': 4, 'iter': 11860, 'avg_loss': 8.509674716544547, 'avg_acc': 49.93834836860299, 'loss': 8.808852195739746}


EP_train:4:  43%|| 11872/27626 [28:03<37:11,  7.06it/s]

{'epoch': 4, 'iter': 11870, 'avg_loss': 8.509594448373347, 'avg_acc': 49.93787381012552, 'loss': 8.75687026977539}


EP_train:4:  43%|| 11882/27626 [28:04<37:31,  6.99it/s]

{'epoch': 4, 'iter': 11880, 'avg_loss': 8.509527244709144, 'avg_acc': 49.935558875515525, 'loss': 8.74364185333252}


EP_train:4:  43%|| 11892/27626 [28:06<37:06,  7.07it/s]

{'epoch': 4, 'iter': 11890, 'avg_loss': 8.509678332000211, 'avg_acc': 49.93613867630982, 'loss': 8.44316577911377}


EP_train:4:  43%|| 11902/27626 [28:07<37:18,  7.03it/s]

{'epoch': 4, 'iter': 11900, 'avg_loss': 8.509829709734419, 'avg_acc': 49.936980085707084, 'loss': 9.26206111907959}


EP_train:4:  43%|| 11912/27626 [28:08<37:06,  7.06it/s]

{'epoch': 4, 'iter': 11910, 'avg_loss': 8.509654340430957, 'avg_acc': 49.93886953236505, 'loss': 8.044219017028809}


EP_train:4:  43%|| 11922/27626 [28:10<37:05,  7.06it/s]

{'epoch': 4, 'iter': 11920, 'avg_loss': 8.509749508016052, 'avg_acc': 49.94652294270615, 'loss': 8.592047691345215}


EP_train:4:  43%|| 11932/27626 [28:11<37:02,  7.06it/s]

{'epoch': 4, 'iter': 11930, 'avg_loss': 8.509779159325925, 'avg_acc': 49.94735353281368, 'loss': 8.417651176452637}


EP_train:4:  43%|| 11942/27626 [28:13<37:15,  7.01it/s]

{'epoch': 4, 'iter': 11940, 'avg_loss': 8.509798115176897, 'avg_acc': 49.94582740139017, 'loss': 8.006498336791992}


EP_train:4:  43%|| 11952/27626 [28:14<37:15,  7.01it/s]

{'epoch': 4, 'iter': 11950, 'avg_loss': 8.509707468183018, 'avg_acc': 49.94430382394779, 'loss': 8.311623573303223}


EP_train:4:  43%|| 11962/27626 [28:15<37:17,  7.00it/s]

{'epoch': 4, 'iter': 11960, 'avg_loss': 8.509429410631945, 'avg_acc': 49.948530641250734, 'loss': 7.858999729156494}


EP_train:4:  43%|| 11972/27626 [28:17<37:34,  6.94it/s]

{'epoch': 4, 'iter': 11970, 'avg_loss': 8.509348937716464, 'avg_acc': 49.95275039679225, 'loss': 7.773251533508301}


EP_train:4:  43%|| 11982/27626 [28:18<36:56,  7.06it/s]

{'epoch': 4, 'iter': 11980, 'avg_loss': 8.509456213642347, 'avg_acc': 49.95018153743427, 'loss': 9.696330070495605}


EP_train:4:  43%|| 11992/27626 [28:20<37:04,  7.03it/s]

{'epoch': 4, 'iter': 11990, 'avg_loss': 8.509793104476044, 'avg_acc': 49.947616962722044, 'loss': 8.678915023803711}


EP_train:4:  43%|| 12002/27626 [28:21<37:02,  7.03it/s]

{'epoch': 4, 'iter': 12000, 'avg_loss': 8.509736941621837, 'avg_acc': 49.950004166319474, 'loss': 8.288810729980469}


EP_train:4:  43%|| 12012/27626 [28:23<36:48,  7.07it/s]

{'epoch': 4, 'iter': 12010, 'avg_loss': 8.509535617509181, 'avg_acc': 49.95004579135792, 'loss': 7.484274864196777}


EP_train:4:  44%|| 12022/27626 [28:24<36:54,  7.05it/s]

{'epoch': 4, 'iter': 12020, 'avg_loss': 8.509618490688933, 'avg_acc': 49.94748772980617, 'loss': 7.980652809143066}


EP_train:4:  44%|| 12032/27626 [28:25<36:55,  7.04it/s]

{'epoch': 4, 'iter': 12030, 'avg_loss': 8.509574395384973, 'avg_acc': 49.94597290333306, 'loss': 8.883078575134277}


EP_train:4:  44%|| 12042/27626 [28:27<36:57,  7.03it/s]

{'epoch': 4, 'iter': 12040, 'avg_loss': 8.509472065654647, 'avg_acc': 49.9454987127315, 'loss': 8.899544715881348}


EP_train:4:  44%|| 12052/27626 [28:28<36:52,  7.04it/s]

{'epoch': 4, 'iter': 12050, 'avg_loss': 8.509460231355884, 'avg_acc': 49.94787776947971, 'loss': 8.011958122253418}


EP_train:4:  44%|| 12062/27626 [28:30<37:08,  6.99it/s]

{'epoch': 4, 'iter': 12060, 'avg_loss': 8.50926976658181, 'avg_acc': 49.946366387530055, 'loss': 8.081223487854004}


EP_train:4:  44%|| 12072/27626 [28:31<36:47,  7.05it/s]

{'epoch': 4, 'iter': 12070, 'avg_loss': 8.509318906719114, 'avg_acc': 49.946151934388205, 'loss': 7.6291327476501465}


EP_train:4:  44%|| 12082/27626 [28:33<36:57,  7.01it/s]

{'epoch': 4, 'iter': 12080, 'avg_loss': 8.509064873385219, 'avg_acc': 49.94826587203046, 'loss': 8.244145393371582}


EP_train:4:  44%|| 12092/27626 [28:34<36:57,  7.01it/s]

{'epoch': 4, 'iter': 12090, 'avg_loss': 8.508946582663889, 'avg_acc': 49.95115168307005, 'loss': 7.812851905822754}


EP_train:4:  44%|| 12102/27626 [28:35<36:49,  7.03it/s]

{'epoch': 4, 'iter': 12100, 'avg_loss': 8.50870276632294, 'avg_acc': 49.95196677960499, 'loss': 8.039000511169434}


EP_train:4:  44%|| 12112/27626 [28:37<36:48,  7.03it/s]

{'epoch': 4, 'iter': 12110, 'avg_loss': 8.508385440850669, 'avg_acc': 49.9543287094377, 'loss': 8.676468849182129}


EP_train:4:  44%|| 12122/27626 [28:38<36:35,  7.06it/s]

{'epoch': 4, 'iter': 12120, 'avg_loss': 8.508226799774107, 'avg_acc': 49.95617110799439, 'loss': 8.699764251708984}


EP_train:4:  44%|| 12132/27626 [28:40<36:36,  7.05it/s]

{'epoch': 4, 'iter': 12130, 'avg_loss': 8.508519832048831, 'avg_acc': 49.95801046904624, 'loss': 9.234156608581543}


EP_train:4:  44%|| 12142/27626 [28:41<36:47,  7.01it/s]

{'epoch': 4, 'iter': 12140, 'avg_loss': 8.508606164363707, 'avg_acc': 49.95907462317766, 'loss': 8.562272071838379}


EP_train:4:  44%|| 12152/27626 [28:42<36:48,  7.01it/s]

{'epoch': 4, 'iter': 12150, 'avg_loss': 8.508506889687027, 'avg_acc': 49.96142292815406, 'loss': 9.094520568847656}


EP_train:4:  44%|| 12162/27626 [28:44<36:46,  7.01it/s]

{'epoch': 4, 'iter': 12160, 'avg_loss': 8.508461840616794, 'avg_acc': 49.96633706109695, 'loss': 8.813562393188477}


EP_train:4:  44%|| 12172/27626 [28:45<36:57,  6.97it/s]

{'epoch': 4, 'iter': 12170, 'avg_loss': 8.508520105852606, 'avg_acc': 49.96559444581382, 'loss': 9.212658882141113}


EP_train:4:  44%|| 12182/27626 [28:47<36:26,  7.06it/s]

{'epoch': 4, 'iter': 12180, 'avg_loss': 8.50878584019882, 'avg_acc': 49.96741852064691, 'loss': 8.150535583496094}


EP_train:4:  44%|| 12192/27626 [28:48<36:31,  7.04it/s]

{'epoch': 4, 'iter': 12190, 'avg_loss': 8.508773116284717, 'avg_acc': 49.967701583135096, 'loss': 8.204252243041992}


EP_train:4:  44%|| 12202/27626 [28:50<36:52,  6.97it/s]

{'epoch': 4, 'iter': 12200, 'avg_loss': 8.50866031330166, 'avg_acc': 49.9646545365134, 'loss': 8.09786319732666}


EP_train:4:  44%|| 12212/27626 [28:51<36:41,  7.00it/s]

{'epoch': 4, 'iter': 12210, 'avg_loss': 8.5082534822919, 'avg_acc': 49.96673081647695, 'loss': 8.109997749328613}


EP_train:4:  44%|| 12222/27626 [28:52<36:40,  7.00it/s]

{'epoch': 4, 'iter': 12220, 'avg_loss': 8.508176074774427, 'avg_acc': 49.96752516160707, 'loss': 8.26768970489502}


EP_train:4:  44%|| 12232/27626 [28:54<36:42,  6.99it/s]

{'epoch': 4, 'iter': 12230, 'avg_loss': 8.508194287111184, 'avg_acc': 49.96652971956504, 'loss': 8.45776081085205}


EP_train:4:  44%|| 12242/27626 [28:55<36:59,  6.93it/s]

{'epoch': 4, 'iter': 12240, 'avg_loss': 8.50807060929317, 'avg_acc': 49.96579119352994, 'loss': 9.221043586730957}


EP_train:4:  44%|| 12252/27626 [28:57<36:33,  7.01it/s]

{'epoch': 4, 'iter': 12250, 'avg_loss': 8.507939831012317, 'avg_acc': 49.964288629499634, 'loss': 8.274075508117676}


EP_train:4:  44%|| 12262/27626 [28:58<36:25,  7.03it/s]

{'epoch': 4, 'iter': 12260, 'avg_loss': 8.507922766949708, 'avg_acc': 49.96304338960933, 'loss': 8.455904006958008}


EP_train:4:  44%|| 12272/27626 [28:59<36:19,  7.04it/s]

{'epoch': 4, 'iter': 12270, 'avg_loss': 8.508037117580438, 'avg_acc': 49.958998859098685, 'loss': 7.925723552703857}


EP_train:4:  44%|| 12282/27626 [29:01<36:29,  7.01it/s]

{'epoch': 4, 'iter': 12280, 'avg_loss': 8.508111446457649, 'avg_acc': 49.961831284097386, 'loss': 8.785785675048828}


EP_train:4:  44%|| 12292/27626 [29:02<36:14,  7.05it/s]

{'epoch': 4, 'iter': 12290, 'avg_loss': 8.50792257547786, 'avg_acc': 49.96135383614026, 'loss': 8.254554748535156}


EP_train:4:  45%|| 12302/27626 [29:04<36:03,  7.08it/s]

{'epoch': 4, 'iter': 12300, 'avg_loss': 8.5077901137727, 'avg_acc': 49.95859076497845, 'loss': 8.225491523742676}


EP_train:4:  45%|| 12312/27626 [29:05<36:23,  7.02it/s]

{'epoch': 4, 'iter': 12310, 'avg_loss': 8.507868213411873, 'avg_acc': 49.96497035171797, 'loss': 10.033243179321289}


EP_train:4:  45%|| 12322/27626 [29:07<36:12,  7.05it/s]

{'epoch': 4, 'iter': 12320, 'avg_loss': 8.508202535031572, 'avg_acc': 49.970071422774126, 'loss': 9.025700569152832}


EP_train:4:  45%|| 12332/27626 [29:08<36:16,  7.03it/s]

{'epoch': 4, 'iter': 12330, 'avg_loss': 8.508325621368492, 'avg_acc': 49.97085597275161, 'loss': 8.085756301879883}


EP_train:4:  45%|| 12342/27626 [29:09<36:12,  7.03it/s]

{'epoch': 4, 'iter': 12340, 'avg_loss': 8.508675341996002, 'avg_acc': 49.97189247224698, 'loss': 8.763704299926758}


EP_train:4:  45%|| 12352/27626 [29:11<36:06,  7.05it/s]

{'epoch': 4, 'iter': 12350, 'avg_loss': 8.508566243842916, 'avg_acc': 49.976722532588454, 'loss': 7.503453731536865}


EP_train:4:  45%|| 12362/27626 [29:12<36:04,  7.05it/s]

{'epoch': 4, 'iter': 12360, 'avg_loss': 8.508596928718937, 'avg_acc': 49.97370762883262, 'loss': 9.275853157043457}


EP_train:4:  45%|| 12372/27626 [29:14<36:04,  7.05it/s]

{'epoch': 4, 'iter': 12370, 'avg_loss': 8.508440740298708, 'avg_acc': 49.96968717161103, 'loss': 8.598299980163574}


EP_train:4:  45%|| 12382/27626 [29:15<36:08,  7.03it/s]

{'epoch': 4, 'iter': 12380, 'avg_loss': 8.508275915468927, 'avg_acc': 49.969964057830545, 'loss': 8.274896621704102}


EP_train:4:  45%|| 12392/27626 [29:16<36:09,  7.02it/s]

{'epoch': 4, 'iter': 12390, 'avg_loss': 8.50809937483192, 'avg_acc': 49.967214107013156, 'loss': 8.261239051818848}


EP_train:4:  45%|| 12402/27626 [29:18<36:05,  7.03it/s]

{'epoch': 4, 'iter': 12400, 'avg_loss': 8.507985045800103, 'avg_acc': 49.97152447383276, 'loss': 8.896817207336426}


EP_train:4:  45%|| 12412/27626 [29:19<36:24,  6.96it/s]

{'epoch': 4, 'iter': 12410, 'avg_loss': 8.50811104643461, 'avg_acc': 49.97129562484893, 'loss': 8.542454719543457}


EP_train:4:  45%|| 12422/27626 [29:21<36:20,  6.97it/s]

{'epoch': 4, 'iter': 12420, 'avg_loss': 8.50796841830315, 'avg_acc': 49.972325094597856, 'loss': 7.948034763336182}


EP_train:4:  45%|| 12432/27626 [29:22<35:54,  7.05it/s]

{'epoch': 4, 'iter': 12430, 'avg_loss': 8.50774765801002, 'avg_acc': 49.97460984635186, 'loss': 7.900729179382324}


EP_train:4:  45%|| 12442/27626 [29:24<35:48,  7.07it/s]

{'epoch': 4, 'iter': 12440, 'avg_loss': 8.507654222297225, 'avg_acc': 49.97463025480267, 'loss': 8.711982727050781}


EP_train:4:  45%|| 12452/27626 [29:25<36:12,  6.98it/s]

{'epoch': 4, 'iter': 12450, 'avg_loss': 8.507542813487747, 'avg_acc': 49.971638824190826, 'loss': 9.120367050170898}


EP_train:4:  45%|| 12462/27626 [29:26<35:49,  7.06it/s]

{'epoch': 4, 'iter': 12460, 'avg_loss': 8.507585066469396, 'avg_acc': 49.970658454377656, 'loss': 7.683093070983887}


EP_train:4:  45%|| 12472/27626 [29:28<35:43,  7.07it/s]

{'epoch': 4, 'iter': 12470, 'avg_loss': 8.507416864527743, 'avg_acc': 49.972436051639804, 'loss': 8.09979248046875}


EP_train:4:  45%|| 12482/27626 [29:29<35:39,  7.08it/s]

{'epoch': 4, 'iter': 12480, 'avg_loss': 8.507373502149232, 'avg_acc': 49.974210800416635, 'loss': 8.625555038452148}


EP_train:4:  45%|| 12492/27626 [29:31<36:05,  6.99it/s]

{'epoch': 4, 'iter': 12490, 'avg_loss': 8.507473696529335, 'avg_acc': 49.977483788327596, 'loss': 8.939022064208984}


EP_train:4:  45%|| 12502/27626 [29:32<36:06,  6.98it/s]

{'epoch': 4, 'iter': 12500, 'avg_loss': 8.50738980562036, 'avg_acc': 49.97800175985921, 'loss': 8.11037540435791}


EP_train:4:  45%|| 12512/27626 [29:33<35:58,  7.00it/s]

{'epoch': 4, 'iter': 12510, 'avg_loss': 8.507472444329556, 'avg_acc': 49.974022859883306, 'loss': 8.96072006225586}


EP_train:4:  45%|| 12522/27626 [29:35<35:38,  7.06it/s]

{'epoch': 4, 'iter': 12520, 'avg_loss': 8.507594979996593, 'avg_acc': 49.97454276814951, 'loss': 8.02991771697998}


EP_train:4:  45%|| 12532/27626 [29:36<35:44,  7.04it/s]

{'epoch': 4, 'iter': 12530, 'avg_loss': 8.5068129292985, 'avg_acc': 49.97680751735695, 'loss': 7.086221694946289}


EP_train:4:  45%|| 12542/27626 [29:38<35:32,  7.07it/s]

{'epoch': 4, 'iter': 12540, 'avg_loss': 8.50675331558606, 'avg_acc': 49.98031456821625, 'loss': 8.243993759155273}


EP_train:4:  45%|| 12552/27626 [29:39<35:31,  7.07it/s]

{'epoch': 4, 'iter': 12550, 'avg_loss': 8.50676580968867, 'avg_acc': 49.975350569675726, 'loss': 9.05085563659668}


EP_train:4:  45%|| 12562/27626 [29:41<35:40,  7.04it/s]

{'epoch': 4, 'iter': 12560, 'avg_loss': 8.506872502721691, 'avg_acc': 49.97362869198312, 'loss': 8.807364463806152}


EP_train:4:  46%|| 12572/27626 [29:42<35:24,  7.09it/s]

{'epoch': 4, 'iter': 12570, 'avg_loss': 8.506959245859434, 'avg_acc': 49.972655317794924, 'loss': 8.685439109802246}


EP_train:4:  46%|| 12582/27626 [29:43<35:42,  7.02it/s]

{'epoch': 4, 'iter': 12580, 'avg_loss': 8.506861651458268, 'avg_acc': 49.97317383355854, 'loss': 9.263260841369629}


EP_train:4:  46%|| 12592/27626 [29:45<35:43,  7.01it/s]

{'epoch': 4, 'iter': 12590, 'avg_loss': 8.507026859674582, 'avg_acc': 49.96897585576999, 'loss': 8.275333404541016}


EP_train:4:  46%|| 12602/27626 [29:46<35:41,  7.02it/s]

{'epoch': 4, 'iter': 12600, 'avg_loss': 8.507054421553752, 'avg_acc': 49.967512499008016, 'loss': 8.403226852416992}


EP_train:4:  46%|| 12612/27626 [29:48<35:28,  7.05it/s]

{'epoch': 4, 'iter': 12610, 'avg_loss': 8.507070898433083, 'avg_acc': 49.967538260248986, 'loss': 7.752647876739502}


EP_train:4:  46%|| 12622/27626 [29:49<35:21,  7.07it/s]

{'epoch': 4, 'iter': 12620, 'avg_loss': 8.507160485957298, 'avg_acc': 49.96459274225497, 'loss': 8.7504243850708}


EP_train:4:  46%|| 12632/27626 [29:50<35:23,  7.06it/s]

{'epoch': 4, 'iter': 12630, 'avg_loss': 8.506957756199828, 'avg_acc': 49.965857810149636, 'loss': 8.367156982421875}


EP_train:4:  46%|| 12642/27626 [29:52<35:21,  7.06it/s]

{'epoch': 4, 'iter': 12640, 'avg_loss': 8.507077361084919, 'avg_acc': 49.966626453603354, 'loss': 8.345734596252441}


EP_train:4:  46%|| 12652/27626 [29:53<35:32,  7.02it/s]

{'epoch': 4, 'iter': 12650, 'avg_loss': 8.507164902333148, 'avg_acc': 49.96961702632203, 'loss': 7.930814743041992}


EP_train:4:  46%|| 12662/27626 [29:55<35:12,  7.08it/s]

{'epoch': 4, 'iter': 12660, 'avg_loss': 8.507228625158724, 'avg_acc': 49.9691473817234, 'loss': 8.213608741760254}


EP_train:4:  46%|| 12672/27626 [29:56<35:44,  6.97it/s]

{'epoch': 4, 'iter': 12670, 'avg_loss': 8.507303675125527, 'avg_acc': 49.97015823534054, 'loss': 8.678024291992188}


EP_train:4:  46%|| 12682/27626 [29:58<35:17,  7.06it/s]

{'epoch': 4, 'iter': 12680, 'avg_loss': 8.506958309234049, 'avg_acc': 49.968949609652235, 'loss': 7.5376739501953125}


EP_train:4:  46%|| 12692/27626 [29:59<35:26,  7.02it/s]

{'epoch': 4, 'iter': 12690, 'avg_loss': 8.506774731199375, 'avg_acc': 49.96700417618785, 'loss': 8.482970237731934}


EP_train:4:  46%|| 12702/27626 [30:00<35:34,  6.99it/s]

{'epoch': 4, 'iter': 12700, 'avg_loss': 8.506518525801702, 'avg_acc': 49.96752224234312, 'loss': 8.462456703186035}


EP_train:4:  46%|| 12712/27626 [30:02<35:20,  7.03it/s]

{'epoch': 4, 'iter': 12710, 'avg_loss': 8.506424495457534, 'avg_acc': 49.96705609314767, 'loss': 8.803264617919922}


EP_train:4:  46%|| 12722/27626 [30:03<35:19,  7.03it/s]

{'epoch': 4, 'iter': 12720, 'avg_loss': 8.506338652795277, 'avg_acc': 49.96364279537772, 'loss': 8.307197570800781}


EP_train:4:  46%|| 12732/27626 [30:05<35:15,  7.04it/s]

{'epoch': 4, 'iter': 12730, 'avg_loss': 8.506369829271494, 'avg_acc': 49.965389600188516, 'loss': 8.660722732543945}


EP_train:4:  46%|| 12742/27626 [30:06<35:23,  7.01it/s]

{'epoch': 4, 'iter': 12740, 'avg_loss': 8.506527128385025, 'avg_acc': 49.96713366297779, 'loss': 8.697049140930176}


EP_train:4:  46%|| 12752/27626 [30:07<35:40,  6.95it/s]

{'epoch': 4, 'iter': 12750, 'avg_loss': 8.50658048142565, 'avg_acc': 49.97010038428358, 'loss': 8.914783477783203}


EP_train:4:  46%|| 12762/27626 [30:09<35:24,  7.00it/s]

{'epoch': 4, 'iter': 12760, 'avg_loss': 8.506662466750045, 'avg_acc': 49.969144267690616, 'loss': 9.020421981811523}


EP_train:4:  46%|| 12772/27626 [30:10<35:37,  6.95it/s]

{'epoch': 4, 'iter': 12770, 'avg_loss': 8.506876095022127, 'avg_acc': 49.9674555633858, 'loss': 9.02199649810791}


EP_train:4:  46%|| 12782/27626 [30:12<35:12,  7.03it/s]

{'epoch': 4, 'iter': 12780, 'avg_loss': 8.507263638680179, 'avg_acc': 49.970904076363354, 'loss': 9.01844596862793}


EP_train:4:  46%|| 12792/27626 [30:13<35:05,  7.05it/s]

{'epoch': 4, 'iter': 12790, 'avg_loss': 8.50714905399367, 'avg_acc': 49.972881322805094, 'loss': 8.61145305633545}


EP_train:4:  46%|| 12802/27626 [30:15<35:05,  7.04it/s]

{'epoch': 4, 'iter': 12800, 'avg_loss': 8.507199930572927, 'avg_acc': 49.97241426451058, 'loss': 8.952016830444336}


EP_train:4:  46%|| 12812/27626 [30:16<35:13,  7.01it/s]

{'epoch': 4, 'iter': 12810, 'avg_loss': 8.507168887342859, 'avg_acc': 49.970728280384044, 'loss': 8.780719757080078}


EP_train:4:  46%|| 12822/27626 [30:17<35:01,  7.04it/s]

{'epoch': 4, 'iter': 12820, 'avg_loss': 8.507186875032026, 'avg_acc': 49.97001988924421, 'loss': 8.55927562713623}


EP_train:4:  46%|| 12832/27626 [30:19<35:11,  7.01it/s]

{'epoch': 4, 'iter': 12830, 'avg_loss': 8.507046097819426, 'avg_acc': 49.972965863923314, 'loss': 8.155233383178711}


EP_train:4:  46%|| 12842/27626 [30:20<35:06,  7.02it/s]

{'epoch': 4, 'iter': 12840, 'avg_loss': 8.50703591897883, 'avg_acc': 49.97201347247099, 'loss': 8.271405220031738}


EP_train:4:  47%|| 12852/27626 [30:22<35:06,  7.01it/s]

{'epoch': 4, 'iter': 12850, 'avg_loss': 8.50707752229119, 'avg_acc': 49.97349428060073, 'loss': 9.83973217010498}


EP_train:4:  47%|| 12862/27626 [30:23<35:33,  6.92it/s]

{'epoch': 4, 'iter': 12860, 'avg_loss': 8.507466136922584, 'avg_acc': 49.974729803281235, 'loss': 9.290634155273438}


EP_train:4:  47%|| 12872/27626 [30:24<35:01,  7.02it/s]

{'epoch': 4, 'iter': 12870, 'avg_loss': 8.50745464191877, 'avg_acc': 49.97693458161759, 'loss': 8.623913764953613}


EP_train:4:  47%|| 12882/27626 [30:26<34:50,  7.05it/s]

{'epoch': 4, 'iter': 12880, 'avg_loss': 8.507683964973559, 'avg_acc': 49.975011645058615, 'loss': 8.590682029724121}


EP_train:4:  47%|| 12892/27626 [30:27<35:07,  6.99it/s]

{'epoch': 4, 'iter': 12890, 'avg_loss': 8.507566420954827, 'avg_acc': 49.9721220231169, 'loss': 8.319205284118652}


EP_train:4:  47%|| 12902/27626 [30:29<34:46,  7.06it/s]

{'epoch': 4, 'iter': 12900, 'avg_loss': 8.507696714283709, 'avg_acc': 49.97117471513836, 'loss': 8.727082252502441}


EP_train:4:  47%|| 12912/27626 [30:30<34:58,  7.01it/s]

{'epoch': 4, 'iter': 12910, 'avg_loss': 8.507813703634342, 'avg_acc': 49.974585624661145, 'loss': 8.606825828552246}


EP_train:4:  47%|| 12922/27626 [30:32<34:51,  7.03it/s]

{'epoch': 4, 'iter': 12920, 'avg_loss': 8.507759511290887, 'avg_acc': 49.973879730671, 'loss': 8.516690254211426}


EP_train:4:  47%|| 12932/27626 [30:33<34:39,  7.07it/s]

{'epoch': 4, 'iter': 12930, 'avg_loss': 8.507466560154612, 'avg_acc': 49.9736582630887, 'loss': 8.065869331359863}


EP_train:4:  47%|| 12942/27626 [30:34<34:47,  7.03it/s]

{'epoch': 4, 'iter': 12940, 'avg_loss': 8.50765176700409, 'avg_acc': 49.97416157947608, 'loss': 8.673137664794922}


EP_train:4:  47%|| 12952/27626 [30:36<35:09,  6.96it/s]

{'epoch': 4, 'iter': 12950, 'avg_loss': 8.507461758674632, 'avg_acc': 49.97225117751525, 'loss': 8.789105415344238}


EP_train:4:  47%|| 12962/27626 [30:37<35:00,  6.98it/s]

{'epoch': 4, 'iter': 12960, 'avg_loss': 8.507170332019854, 'avg_acc': 49.97130815523494, 'loss': 8.746002197265625}


EP_train:4:  47%|| 12972/27626 [30:39<34:42,  7.04it/s]

{'epoch': 4, 'iter': 12970, 'avg_loss': 8.506829854710672, 'avg_acc': 49.973980417855216, 'loss': 8.304216384887695}


EP_train:4:  47%|| 12982/27626 [30:40<34:31,  7.07it/s]

{'epoch': 4, 'iter': 12980, 'avg_loss': 8.507169541790272, 'avg_acc': 49.97183383406517, 'loss': 8.816080093383789}


EP_train:4:  47%|| 12992/27626 [30:41<34:46,  7.01it/s]

{'epoch': 4, 'iter': 12990, 'avg_loss': 8.507394307645784, 'avg_acc': 49.9708933107536, 'loss': 8.728654861450195}


EP_train:4:  47%|| 13002/27626 [30:43<34:33,  7.05it/s]

{'epoch': 4, 'iter': 13000, 'avg_loss': 8.507289951649568, 'avg_acc': 49.970194600415354, 'loss': 7.709869861602783}


EP_train:4:  47%|| 13012/27626 [30:44<34:31,  7.06it/s]

{'epoch': 4, 'iter': 13010, 'avg_loss': 8.50736690657867, 'avg_acc': 49.973580047651986, 'loss': 9.085932731628418}


EP_train:4:  47%|| 13022/27626 [30:46<34:41,  7.01it/s]

{'epoch': 4, 'iter': 13020, 'avg_loss': 8.507574114745667, 'avg_acc': 49.97408033177175, 'loss': 8.27015209197998}


EP_train:4:  47%|| 13032/27626 [30:47<34:30,  7.05it/s]

{'epoch': 4, 'iter': 13030, 'avg_loss': 8.507662443250736, 'avg_acc': 49.970742843987416, 'loss': 8.278227806091309}


EP_train:4:  47%|| 13042/27626 [30:49<34:37,  7.02it/s]

{'epoch': 4, 'iter': 13040, 'avg_loss': 8.507378010815268, 'avg_acc': 49.96908787669657, 'loss': 9.009876251220703}


EP_train:4:  47%|| 13052/27626 [30:50<34:40,  7.01it/s]

{'epoch': 4, 'iter': 13050, 'avg_loss': 8.507194081847855, 'avg_acc': 49.9679143360662, 'loss': 8.781085014343262}


EP_train:4:  47%|| 13062/27626 [30:51<34:34,  7.02it/s]

{'epoch': 4, 'iter': 13060, 'avg_loss': 8.507171445834437, 'avg_acc': 49.969852997473396, 'loss': 8.394526481628418}


EP_train:4:  47%|| 13072/27626 [30:53<34:41,  6.99it/s]

{'epoch': 4, 'iter': 13070, 'avg_loss': 8.50708329888257, 'avg_acc': 49.971549613648534, 'loss': 8.364481925964355}


EP_train:4:  47%|| 13082/27626 [30:54<34:23,  7.05it/s]

{'epoch': 4, 'iter': 13080, 'avg_loss': 8.50697453241995, 'avg_acc': 49.976349285222845, 'loss': 7.987750053405762}


EP_train:4:  47%|| 13092/27626 [30:56<34:13,  7.08it/s]

{'epoch': 4, 'iter': 13090, 'avg_loss': 8.506905614806136, 'avg_acc': 49.977560919715835, 'loss': 7.8939528465271}


EP_train:4:  47%|| 13102/27626 [30:57<34:16,  7.06it/s]

{'epoch': 4, 'iter': 13100, 'avg_loss': 8.506837671271317, 'avg_acc': 49.97614685901839, 'loss': 8.06566333770752}


EP_train:4:  47%|| 13112/27626 [30:58<34:22,  7.04it/s]

{'epoch': 4, 'iter': 13110, 'avg_loss': 8.506976305215531, 'avg_acc': 49.97211311112806, 'loss': 8.474428176879883}


EP_train:4:  47%|| 13122/27626 [31:00<34:25,  7.02it/s]

{'epoch': 4, 'iter': 13120, 'avg_loss': 8.50754762271875, 'avg_acc': 49.970705357823334, 'loss': 9.47431468963623}


EP_train:4:  48%|| 13132/27626 [31:01<34:26,  7.01it/s]

{'epoch': 4, 'iter': 13130, 'avg_loss': 8.507656283084822, 'avg_acc': 49.96739585713198, 'loss': 8.271160125732422}


EP_train:4:  48%|| 13142/27626 [31:03<34:25,  7.01it/s]

{'epoch': 4, 'iter': 13140, 'avg_loss': 8.50769257532771, 'avg_acc': 49.96813408416407, 'loss': 8.420315742492676}


EP_train:4:  48%|| 13152/27626 [31:04<34:14,  7.04it/s]

{'epoch': 4, 'iter': 13150, 'avg_loss': 8.507543794375103, 'avg_acc': 49.96720781689605, 'loss': 8.028945922851562}


EP_train:4:  48%|| 13162/27626 [31:06<34:20,  7.02it/s]

{'epoch': 4, 'iter': 13160, 'avg_loss': 8.507496840467788, 'avg_acc': 49.96699528911177, 'loss': 8.51656436920166}


EP_train:4:  48%|| 13172/27626 [31:07<34:06,  7.06it/s]

{'epoch': 4, 'iter': 13170, 'avg_loss': 8.507283631657044, 'avg_acc': 49.96773213878976, 'loss': 8.9439697265625}


EP_train:4:  48%|| 13182/27626 [31:08<34:25,  6.99it/s]

{'epoch': 4, 'iter': 13180, 'avg_loss': 8.507290459648718, 'avg_acc': 49.969416205143766, 'loss': 8.556228637695312}


EP_train:4:  48%|| 13192/27626 [31:10<34:11,  7.04it/s]

{'epoch': 4, 'iter': 13190, 'avg_loss': 8.507394064856339, 'avg_acc': 49.97299294973846, 'loss': 8.680724143981934}


EP_train:4:  48%|| 13202/27626 [31:11<33:53,  7.09it/s]

{'epoch': 4, 'iter': 13200, 'avg_loss': 8.507426727360807, 'avg_acc': 49.97538065298083, 'loss': 8.96578598022461}


EP_train:4:  48%|| 13212/27626 [31:13<34:22,  6.99it/s]

{'epoch': 4, 'iter': 13210, 'avg_loss': 8.507199299325555, 'avg_acc': 49.97303383544016, 'loss': 8.003828048706055}


EP_train:4:  48%|| 13222/27626 [31:14<34:18,  7.00it/s]

{'epoch': 4, 'iter': 13220, 'avg_loss': 8.50698636669352, 'avg_acc': 49.9706905680357, 'loss': 7.5033955574035645}


EP_train:4:  48%|| 13232/27626 [31:16<34:23,  6.97it/s]

{'epoch': 4, 'iter': 13230, 'avg_loss': 8.507021599062428, 'avg_acc': 49.96976796916333, 'loss': 9.202591896057129}


EP_train:4:  48%|| 13242/27626 [31:17<34:11,  7.01it/s]

{'epoch': 4, 'iter': 13240, 'avg_loss': 8.506974040179012, 'avg_acc': 49.969554791934144, 'loss': 8.372220993041992}


EP_train:4:  48%|| 13252/27626 [31:18<34:11,  7.01it/s]

{'epoch': 4, 'iter': 13250, 'avg_loss': 8.507039958634886, 'avg_acc': 49.97052109274772, 'loss': 8.550647735595703}


EP_train:4:  48%|| 13262/27626 [31:20<33:59,  7.04it/s]

{'epoch': 4, 'iter': 13260, 'avg_loss': 8.507380001074443, 'avg_acc': 49.9707789759445, 'loss': 9.625940322875977}


EP_train:4:  48%|| 13272/27626 [31:21<33:45,  7.09it/s]

{'epoch': 4, 'iter': 13270, 'avg_loss': 8.507395776122456, 'avg_acc': 49.971742898048376, 'loss': 9.039294242858887}


EP_train:4:  48%|| 13282/27626 [31:23<34:03,  7.02it/s]

{'epoch': 4, 'iter': 13280, 'avg_loss': 8.507259689229375, 'avg_acc': 49.969646487463294, 'loss': 8.590660095214844}


EP_train:4:  48%|| 13292/27626 [31:24<34:02,  7.02it/s]

{'epoch': 4, 'iter': 13290, 'avg_loss': 8.507237858019225, 'avg_acc': 49.972961026258375, 'loss': 8.664709091186523}


EP_train:4:  48%|| 13302/27626 [31:25<33:57,  7.03it/s]

{'epoch': 4, 'iter': 13300, 'avg_loss': 8.507164370430187, 'avg_acc': 49.97251146530336, 'loss': 9.240001678466797}


EP_train:4:  48%|| 13312/27626 [31:27<34:15,  6.96it/s]

{'epoch': 4, 'iter': 13310, 'avg_loss': 8.507348196662866, 'avg_acc': 49.974879798662755, 'loss': 8.762207984924316}


EP_train:4:  48%|| 13322/27626 [31:28<34:03,  7.00it/s]

{'epoch': 4, 'iter': 13320, 'avg_loss': 8.507398222187387, 'avg_acc': 49.974429472261846, 'loss': 8.281411170959473}


EP_train:4:  48%|| 13332/27626 [31:30<33:44,  7.06it/s]

{'epoch': 4, 'iter': 13330, 'avg_loss': 8.507514728196751, 'avg_acc': 49.97726164578801, 'loss': 8.759610176086426}


EP_train:4:  48%|| 13342/27626 [31:31<33:53,  7.02it/s]

{'epoch': 4, 'iter': 13340, 'avg_loss': 8.507458693262215, 'avg_acc': 49.9739993253879, 'loss': 8.13635540008545}


EP_train:4:  48%|| 13352/27626 [31:33<33:53,  7.02it/s]

{'epoch': 4, 'iter': 13350, 'avg_loss': 8.507548980378594, 'avg_acc': 49.97589131900232, 'loss': 8.32679271697998}


EP_train:4:  48%|| 13362/27626 [31:34<33:51,  7.02it/s]

{'epoch': 4, 'iter': 13360, 'avg_loss': 8.507577664890865, 'avg_acc': 49.97661103210837, 'loss': 7.847815036773682}


EP_train:4:  48%|| 13372/27626 [31:35<34:09,  6.95it/s]

{'epoch': 4, 'iter': 13370, 'avg_loss': 8.50753316122524, 'avg_acc': 49.97335651783711, 'loss': 8.645090103149414}


EP_train:4:  48%|| 13382/27626 [31:37<33:47,  7.03it/s]

{'epoch': 4, 'iter': 13380, 'avg_loss': 8.507619105133006, 'avg_acc': 49.97501120992452, 'loss': 8.251596450805664}


EP_train:4:  48%|| 13392/27626 [31:38<33:39,  7.05it/s]

{'epoch': 4, 'iter': 13390, 'avg_loss': 8.507729589943569, 'avg_acc': 49.97736352774251, 'loss': 9.234560012817383}


EP_train:4:  49%|| 13402/27626 [31:40<33:51,  7.00it/s]

{'epoch': 4, 'iter': 13400, 'avg_loss': 8.507729327053323, 'avg_acc': 49.976447653160214, 'loss': 8.330073356628418}


EP_train:4:  49%|| 13412/27626 [31:41<33:56,  6.98it/s]

{'epoch': 4, 'iter': 13410, 'avg_loss': 8.507848418591834, 'avg_acc': 49.97646521512191, 'loss': 8.738561630249023}


EP_train:4:  49%|| 13422/27626 [31:42<33:22,  7.09it/s]

{'epoch': 4, 'iter': 13420, 'avg_loss': 8.507910285654388, 'avg_acc': 49.97974256761791, 'loss': 8.312743186950684}


EP_train:4:  49%|| 13432/27626 [31:44<33:37,  7.04it/s]

{'epoch': 4, 'iter': 13430, 'avg_loss': 8.50777274184248, 'avg_acc': 49.9797576502122, 'loss': 7.827982425689697}


EP_train:4:  49%|| 13442/27626 [31:45<33:44,  7.01it/s]

{'epoch': 4, 'iter': 13440, 'avg_loss': 8.507634456185533, 'avg_acc': 49.981400193437985, 'loss': 8.436467170715332}


EP_train:4:  49%|| 13452/27626 [31:47<34:07,  6.92it/s]

{'epoch': 4, 'iter': 13450, 'avg_loss': 8.507744718365453, 'avg_acc': 49.984666567541446, 'loss': 8.568807601928711}


EP_train:4:  49%|| 13462/27626 [31:48<33:50,  6.98it/s]

{'epoch': 4, 'iter': 13460, 'avg_loss': 8.50755874997802, 'avg_acc': 49.98421365426046, 'loss': 8.190759658813477}


EP_train:4:  49%|| 13472/27626 [31:50<33:43,  7.00it/s]

{'epoch': 4, 'iter': 13470, 'avg_loss': 8.507666938970901, 'avg_acc': 49.98538527206592, 'loss': 9.299004554748535}


EP_train:4:  49%|| 13482/27626 [31:51<33:25,  7.05it/s]

{'epoch': 4, 'iter': 13480, 'avg_loss': 8.507860726944443, 'avg_acc': 49.98910503671834, 'loss': 8.318733215332031}


EP_train:4:  49%|| 13492/27626 [31:52<33:38,  7.00it/s]

{'epoch': 4, 'iter': 13490, 'avg_loss': 8.508021715758119, 'avg_acc': 49.988881476539916, 'loss': 9.432927131652832}


EP_train:4:  49%|| 13502/27626 [31:54<33:42,  6.98it/s]

{'epoch': 4, 'iter': 13500, 'avg_loss': 8.508153022973504, 'avg_acc': 49.988195318865266, 'loss': 8.732115745544434}


EP_train:4:  49%|| 13512/27626 [31:55<33:33,  7.01it/s]

{'epoch': 4, 'iter': 13510, 'avg_loss': 8.508235073964855, 'avg_acc': 49.98866664199541, 'loss': 8.429916381835938}


EP_train:4:  49%|| 13522/27626 [31:57<33:33,  7.00it/s]

{'epoch': 4, 'iter': 13520, 'avg_loss': 8.508279694862667, 'avg_acc': 49.990061755787295, 'loss': 8.455058097839355}


EP_train:4:  49%|| 13532/27626 [31:58<33:19,  7.05it/s]

{'epoch': 4, 'iter': 13530, 'avg_loss': 8.508529339156652, 'avg_acc': 49.990761954031484, 'loss': 8.722615242004395}


EP_train:4:  49%|| 13542/27626 [32:00<33:08,  7.08it/s]

{'epoch': 4, 'iter': 13540, 'avg_loss': 8.50836467221748, 'avg_acc': 49.98984565393989, 'loss': 8.364598274230957}


EP_train:4:  49%|| 13552/27626 [32:01<33:16,  7.05it/s]

{'epoch': 4, 'iter': 13550, 'avg_loss': 8.508126449219002, 'avg_acc': 49.988008265072686, 'loss': 7.853991985321045}


EP_train:4:  49%|| 13562/27626 [32:02<33:09,  7.07it/s]

{'epoch': 4, 'iter': 13560, 'avg_loss': 8.508075710735739, 'avg_acc': 49.98617358601873, 'loss': 8.374673843383789}


EP_train:4:  49%|| 13572/27626 [32:04<33:21,  7.02it/s]

{'epoch': 4, 'iter': 13570, 'avg_loss': 8.507761051867215, 'avg_acc': 49.98572323336526, 'loss': 7.528346061706543}


EP_train:4:  49%|| 13582/27626 [32:05<33:33,  6.97it/s]

{'epoch': 4, 'iter': 13580, 'avg_loss': 8.507586370800567, 'avg_acc': 49.982512333406966, 'loss': 8.725817680358887}


EP_train:4:  49%|| 13592/27626 [32:07<33:22,  7.01it/s]

{'epoch': 4, 'iter': 13590, 'avg_loss': 8.507535303413503, 'avg_acc': 49.98183540578324, 'loss': 9.18140983581543}


EP_train:4:  49%|| 13602/27626 [32:08<33:16,  7.02it/s]

{'epoch': 4, 'iter': 13600, 'avg_loss': 8.5076227749054, 'avg_acc': 49.98001066098081, 'loss': 7.897175312042236}


EP_train:4:  49%|| 13612/27626 [32:09<33:07,  7.05it/s]

{'epoch': 4, 'iter': 13610, 'avg_loss': 8.507698145193686, 'avg_acc': 49.98002534714569, 'loss': 9.092084884643555}


EP_train:4:  49%|| 13622/27626 [32:11<33:08,  7.04it/s]

{'epoch': 4, 'iter': 13620, 'avg_loss': 8.50757716057558, 'avg_acc': 49.97981058659423, 'loss': 8.21047592163086}


EP_train:4:  49%|| 13632/27626 [32:12<33:09,  7.03it/s]

{'epoch': 4, 'iter': 13630, 'avg_loss': 8.507598656672618, 'avg_acc': 49.977991343261685, 'loss': 7.805915832519531}


EP_train:4:  49%|| 13642/27626 [32:14<33:01,  7.06it/s]

{'epoch': 4, 'iter': 13640, 'avg_loss': 8.507702318736426, 'avg_acc': 49.978236566234145, 'loss': 9.051131248474121}


EP_train:4:  49%|| 13652/27626 [32:15<33:10,  7.02it/s]

{'epoch': 4, 'iter': 13650, 'avg_loss': 8.507576185253965, 'avg_acc': 49.9782525089737, 'loss': 8.47981071472168}


EP_train:4:  49%|| 13662/27626 [32:17<33:27,  6.96it/s]

{'epoch': 4, 'iter': 13660, 'avg_loss': 8.507892263641592, 'avg_acc': 49.97735341483054, 'loss': 8.768733024597168}


EP_train:4:  49%|| 13672/27626 [32:18<33:08,  7.02it/s]

{'epoch': 4, 'iter': 13670, 'avg_loss': 8.508042076634869, 'avg_acc': 49.97828432448248, 'loss': 8.732495307922363}


EP_train:4:  50%|| 13682/27626 [32:19<33:14,  6.99it/s]

{'epoch': 4, 'iter': 13680, 'avg_loss': 8.508103393391291, 'avg_acc': 49.97967071120532, 'loss': 8.905410766601562}


EP_train:4:  50%|| 13692/27626 [32:21<33:19,  6.97it/s]

{'epoch': 4, 'iter': 13690, 'avg_loss': 8.507936473203083, 'avg_acc': 49.97968555985684, 'loss': 8.032211303710938}


EP_train:4:  50%|| 13702/27626 [32:22<32:49,  7.07it/s]

{'epoch': 4, 'iter': 13700, 'avg_loss': 8.507763954629237, 'avg_acc': 49.97992847237428, 'loss': 7.654054641723633}


EP_train:4:  50%|| 13712/27626 [32:24<32:55,  7.04it/s]

{'epoch': 4, 'iter': 13710, 'avg_loss': 8.507799077891267, 'avg_acc': 49.98153854569324, 'loss': 9.024181365966797}


EP_train:4:  50%|| 13722/27626 [32:25<32:52,  7.05it/s]

{'epoch': 4, 'iter': 13720, 'avg_loss': 8.507798369585377, 'avg_acc': 49.982463012899935, 'loss': 9.131916999816895}


EP_train:4:  50%|| 13732/27626 [32:26<33:12,  6.97it/s]

{'epoch': 4, 'iter': 13730, 'avg_loss': 8.50814530129044, 'avg_acc': 49.982930959143545, 'loss': 7.9800896644592285}


EP_train:4:  50%|| 13742/27626 [32:28<33:04,  7.00it/s]

{'epoch': 4, 'iter': 13740, 'avg_loss': 8.508069230153149, 'avg_acc': 49.978167527836405, 'loss': 8.114778518676758}


EP_train:4:  50%|| 13752/27626 [32:29<33:03,  6.99it/s]

{'epoch': 4, 'iter': 13750, 'avg_loss': 8.508416683160126, 'avg_acc': 49.9740927932514, 'loss': 9.7102632522583}


EP_train:4:  50%|| 13762/27626 [32:31<33:02,  6.99it/s]

{'epoch': 4, 'iter': 13760, 'avg_loss': 8.508362007473766, 'avg_acc': 49.97388452874064, 'loss': 9.00187873840332}


EP_train:4:  50%|| 13772/27626 [32:32<32:44,  7.05it/s]

{'epoch': 4, 'iter': 13770, 'avg_loss': 8.508526645795026, 'avg_acc': 49.97344964054898, 'loss': 8.899048805236816}


EP_train:4:  50%|| 13782/27626 [32:34<32:42,  7.05it/s]

{'epoch': 4, 'iter': 13780, 'avg_loss': 8.508947768099603, 'avg_acc': 49.974829475364636, 'loss': 9.537482261657715}


EP_train:4:  50%|| 13792/27626 [32:35<33:01,  6.98it/s]

{'epoch': 4, 'iter': 13790, 'avg_loss': 8.508939417958752, 'avg_acc': 49.97144877093756, 'loss': 8.192492485046387}


EP_train:4:  50%|| 13802/27626 [32:36<32:53,  7.01it/s]

{'epoch': 4, 'iter': 13800, 'avg_loss': 8.508993885636494, 'avg_acc': 49.968072965727124, 'loss': 8.241958618164062}


EP_train:4:  50%|| 13812/27626 [32:38<32:40,  7.05it/s]

{'epoch': 4, 'iter': 13810, 'avg_loss': 8.50879713237592, 'avg_acc': 49.972621461154155, 'loss': 8.65774154663086}


EP_train:4:  50%|| 13822/27626 [32:39<32:44,  7.03it/s]

{'epoch': 4, 'iter': 13820, 'avg_loss': 8.508706079793127, 'avg_acc': 49.97648505896824, 'loss': 8.918661117553711}


EP_train:4:  50%|| 13832/27626 [32:41<32:37,  7.05it/s]

{'epoch': 4, 'iter': 13830, 'avg_loss': 8.508910657595713, 'avg_acc': 49.97717988576386, 'loss': 8.794878005981445}


EP_train:4:  50%|| 13842/27626 [32:42<32:33,  7.06it/s]

{'epoch': 4, 'iter': 13840, 'avg_loss': 8.508754119545072, 'avg_acc': 49.97584170218915, 'loss': 8.121159553527832}


EP_train:4:  50%|| 13852/27626 [32:43<32:46,  7.00it/s]

{'epoch': 4, 'iter': 13850, 'avg_loss': 8.508680177865825, 'avg_acc': 49.97585914374413, 'loss': 9.128707885742188}


EP_train:4:  50%|| 13862/27626 [32:45<32:24,  7.08it/s]

{'epoch': 4, 'iter': 13860, 'avg_loss': 8.508821089540705, 'avg_acc': 49.97655291825986, 'loss': 9.030699729919434}


EP_train:4:  50%|| 13872/27626 [32:46<32:34,  7.04it/s]

{'epoch': 4, 'iter': 13870, 'avg_loss': 8.508921370356061, 'avg_acc': 49.97769627279936, 'loss': 9.263915061950684}


EP_train:4:  50%|| 13882/27626 [32:48<33:15,  6.89it/s]

{'epoch': 4, 'iter': 13880, 'avg_loss': 8.508976834212469, 'avg_acc': 49.979963619335784, 'loss': 8.190835952758789}


EP_train:4:  50%|| 13892/27626 [32:49<32:20,  7.08it/s]

{'epoch': 4, 'iter': 13890, 'avg_loss': 8.508829653456182, 'avg_acc': 49.98155280397379, 'loss': 9.476250648498535}


EP_train:4:  50%|| 13902/27626 [32:51<32:20,  7.07it/s]

{'epoch': 4, 'iter': 13900, 'avg_loss': 8.50860376253753, 'avg_acc': 49.981790878354076, 'loss': 7.672914028167725}


EP_train:4:  50%|| 13912/27626 [32:52<32:38,  7.00it/s]

{'epoch': 4, 'iter': 13910, 'avg_loss': 8.508614135965953, 'avg_acc': 49.98562288836173, 'loss': 8.566756248474121}


EP_train:4:  50%|| 13922/27626 [32:53<32:34,  7.01it/s]

{'epoch': 4, 'iter': 13920, 'avg_loss': 8.508941261104345, 'avg_acc': 49.986306659004384, 'loss': 8.682077407836914}


EP_train:4:  50%|| 13932/27626 [32:55<32:32,  7.01it/s]

{'epoch': 4, 'iter': 13930, 'avg_loss': 8.508836635760469, 'avg_acc': 49.98923264661546, 'loss': 7.840588569641113}


EP_train:4:  50%|| 13942/27626 [32:56<32:36,  6.99it/s]

{'epoch': 4, 'iter': 13940, 'avg_loss': 8.508728762523283, 'avg_acc': 49.98834373430887, 'loss': 8.12299633026123}


EP_train:4:  51%|| 13952/27626 [32:58<32:34,  6.99it/s]

{'epoch': 4, 'iter': 13950, 'avg_loss': 8.508458349390462, 'avg_acc': 49.98700809977779, 'loss': 7.734809398651123}


EP_train:4:  51%|| 13962/27626 [32:59<32:24,  7.03it/s]

{'epoch': 4, 'iter': 13960, 'avg_loss': 8.508434667738245, 'avg_acc': 49.9881365947998, 'loss': 7.8053460121154785}


EP_train:4:  51%|| 13972/27626 [33:00<32:19,  7.04it/s]

{'epoch': 4, 'iter': 13970, 'avg_loss': 8.5085007703242, 'avg_acc': 49.98680302054255, 'loss': 8.177641868591309}


EP_train:4:  51%|| 13982/27626 [33:02<32:25,  7.01it/s]

{'epoch': 4, 'iter': 13980, 'avg_loss': 8.50877614754923, 'avg_acc': 49.98502431871826, 'loss': 8.947937965393066}


EP_train:4:  51%|| 13992/27626 [33:03<32:17,  7.04it/s]

{'epoch': 4, 'iter': 13990, 'avg_loss': 8.508873125643005, 'avg_acc': 49.98548173826031, 'loss': 8.994375228881836}


EP_train:4:  51%|| 14002/27626 [33:05<32:17,  7.03it/s]

{'epoch': 4, 'iter': 14000, 'avg_loss': 8.509028205479241, 'avg_acc': 49.98571530604956, 'loss': 8.401731491088867}


EP_train:4:  51%|| 14012/27626 [33:06<32:14,  7.04it/s]

{'epoch': 4, 'iter': 14010, 'avg_loss': 8.509067683716452, 'avg_acc': 49.98773285275855, 'loss': 8.571054458618164}


EP_train:4:  51%|| 14022/27626 [33:08<32:04,  7.07it/s]

{'epoch': 4, 'iter': 14020, 'avg_loss': 8.509025777579119, 'avg_acc': 49.9870729619856, 'loss': 8.344679832458496}


EP_train:4:  51%|| 14032/27626 [33:09<32:05,  7.06it/s]

{'epoch': 4, 'iter': 14030, 'avg_loss': 8.50913033493229, 'avg_acc': 49.99175931865156, 'loss': 8.28402328491211}


EP_train:4:  51%|| 14042/27626 [33:10<32:13,  7.02it/s]

{'epoch': 4, 'iter': 14040, 'avg_loss': 8.509013207717276, 'avg_acc': 49.992432875151344, 'loss': 8.437434196472168}


EP_train:4:  51%|| 14052/27626 [33:12<31:55,  7.09it/s]

{'epoch': 4, 'iter': 14050, 'avg_loss': 8.509149039464441, 'avg_acc': 49.99088143192655, 'loss': 8.697784423828125}


EP_train:4:  51%|| 14062/27626 [33:13<32:03,  7.05it/s]

{'epoch': 4, 'iter': 14060, 'avg_loss': 8.509188226883362, 'avg_acc': 49.987554228006545, 'loss': 8.702004432678223}


EP_train:4:  51%|| 14072/27626 [33:15<32:13,  7.01it/s]

{'epoch': 4, 'iter': 14070, 'avg_loss': 8.50926028510484, 'avg_acc': 49.988007248951746, 'loss': 8.376937866210938}


EP_train:4:  51%|| 14082/27626 [33:16<31:54,  7.07it/s]

{'epoch': 4, 'iter': 14080, 'avg_loss': 8.509407653142153, 'avg_acc': 49.989569277750164, 'loss': 8.437261581420898}


EP_train:4:  51%|| 14092/27626 [33:18<32:20,  6.97it/s]

{'epoch': 4, 'iter': 14090, 'avg_loss': 8.509633391365893, 'avg_acc': 49.99312504435455, 'loss': 8.466028213500977}


EP_train:4:  51%|| 14102/27626 [33:19<32:06,  7.02it/s]

{'epoch': 4, 'iter': 14100, 'avg_loss': 8.509729792417241, 'avg_acc': 49.99268668888731, 'loss': 8.879230499267578}


EP_train:4:  51%|| 14112/27626 [33:20<31:51,  7.07it/s]

{'epoch': 4, 'iter': 14110, 'avg_loss': 8.509592705416347, 'avg_acc': 49.99490645595635, 'loss': 8.359705924987793}


EP_train:4:  51%|| 14122/27626 [33:22<32:07,  7.01it/s]

{'epoch': 4, 'iter': 14120, 'avg_loss': 8.509779775322484, 'avg_acc': 49.996680475886976, 'loss': 9.091529846191406}


EP_train:4:  51%|| 14132/27626 [33:23<31:56,  7.04it/s]

{'epoch': 4, 'iter': 14130, 'avg_loss': 8.509740981012298, 'avg_acc': 49.99911541999859, 'loss': 8.683037757873535}


EP_train:4:  51%|| 14142/27626 [33:25<31:53,  7.05it/s]

{'epoch': 4, 'iter': 14140, 'avg_loss': 8.509794538070487, 'avg_acc': 49.99756912523867, 'loss': 8.213967323303223}


EP_train:4:  51%|| 14152/27626 [33:26<31:58,  7.02it/s]

{'epoch': 4, 'iter': 14150, 'avg_loss': 8.509750611992171, 'avg_acc': 49.99668751324995, 'loss': 8.342202186584473}


EP_train:4:  51%|| 14162/27626 [33:27<31:54,  7.03it/s]

{'epoch': 4, 'iter': 14160, 'avg_loss': 8.509472080294287, 'avg_acc': 49.99646917590566, 'loss': 8.064836502075195}


EP_train:4:  51%|| 14172/27626 [33:29<31:41,  7.08it/s]

{'epoch': 4, 'iter': 14170, 'avg_loss': 8.509345981617281, 'avg_acc': 49.99735375061746, 'loss': 7.592530250549316}


EP_train:4:  51%|| 14182/27626 [33:30<31:51,  7.03it/s]

{'epoch': 4, 'iter': 14180, 'avg_loss': 8.509135688446564, 'avg_acc': 49.995592694450316, 'loss': 8.316661834716797}


EP_train:4:  51%|| 14192/27626 [33:32<31:45,  7.05it/s]

{'epoch': 4, 'iter': 14190, 'avg_loss': 8.509042431642722, 'avg_acc': 49.99383412021704, 'loss': 8.987492561340332}


EP_train:4:  51%|| 14202/27626 [33:33<31:39,  7.07it/s]

{'epoch': 4, 'iter': 14200, 'avg_loss': 8.508917266286163, 'avg_acc': 49.993838462080134, 'loss': 7.826533794403076}


EP_train:4:  51%|| 14212/27626 [33:34<31:42,  7.05it/s]

{'epoch': 4, 'iter': 14210, 'avg_loss': 8.508833358091687, 'avg_acc': 49.991203996903806, 'loss': 8.408106803894043}


EP_train:4:  51%|| 14222/27626 [33:36<31:47,  7.03it/s]

{'epoch': 4, 'iter': 14220, 'avg_loss': 8.508558589373868, 'avg_acc': 49.99274840025315, 'loss': 7.873532772064209}


EP_train:4:  52%|| 14232/27626 [33:37<31:29,  7.09it/s]

{'epoch': 4, 'iter': 14230, 'avg_loss': 8.508402353059834, 'avg_acc': 49.99275349588926, 'loss': 8.468833923339844}


EP_train:4:  52%|| 14242/27626 [33:39<31:39,  7.04it/s]

{'epoch': 4, 'iter': 14240, 'avg_loss': 8.508649515759098, 'avg_acc': 49.990564215996066, 'loss': 8.180013656616211}


EP_train:4:  52%|| 14252/27626 [33:40<31:41,  7.03it/s]

{'epoch': 4, 'iter': 14250, 'avg_loss': 8.508755279000571, 'avg_acc': 49.98947442284752, 'loss': 9.501693725585938}


EP_train:4:  52%|| 14262/27626 [33:42<31:43,  7.02it/s]

{'epoch': 4, 'iter': 14260, 'avg_loss': 8.508777230411683, 'avg_acc': 49.9846609634668, 'loss': 7.8532023429870605}


EP_train:4:  52%|| 14272/27626 [33:43<31:37,  7.04it/s]

{'epoch': 4, 'iter': 14270, 'avg_loss': 8.50873708320678, 'avg_acc': 49.980292200967, 'loss': 8.558996200561523}


EP_train:4:  52%|| 14282/27626 [33:44<31:40,  7.02it/s]

{'epoch': 4, 'iter': 14280, 'avg_loss': 8.508861865059734, 'avg_acc': 49.98074364540298, 'loss': 7.753533840179443}


EP_train:4:  52%|| 14292/27626 [33:46<31:51,  6.98it/s]

{'epoch': 4, 'iter': 14290, 'avg_loss': 8.508894577711043, 'avg_acc': 49.98075711986565, 'loss': 8.569674491882324}


EP_train:4:  52%|| 14302/27626 [33:47<31:30,  7.05it/s]

{'epoch': 4, 'iter': 14300, 'avg_loss': 8.508969812257817, 'avg_acc': 49.976400251730645, 'loss': 7.784595966339111}


EP_train:4:  52%|| 14312/27626 [33:49<31:32,  7.04it/s]

{'epoch': 4, 'iter': 14310, 'avg_loss': 8.509226765158491, 'avg_acc': 49.973141289916846, 'loss': 7.986301898956299}


EP_train:4:  52%|| 14322/27626 [33:50<31:48,  6.97it/s]

{'epoch': 4, 'iter': 14320, 'avg_loss': 8.5092805637077, 'avg_acc': 49.97054151246421, 'loss': 8.084440231323242}


EP_train:4:  52%|| 14332/27626 [33:51<31:26,  7.05it/s]

{'epoch': 4, 'iter': 14330, 'avg_loss': 8.509616062701202, 'avg_acc': 49.97383294954993, 'loss': 9.145312309265137}


EP_train:4:  52%|| 14342/27626 [33:53<31:29,  7.03it/s]

{'epoch': 4, 'iter': 14340, 'avg_loss': 8.509603793264791, 'avg_acc': 49.975158636078376, 'loss': 8.573057174682617}


EP_train:4:  52%|| 14352/27626 [33:54<31:24,  7.04it/s]

{'epoch': 4, 'iter': 14350, 'avg_loss': 8.509635944664167, 'avg_acc': 49.979748797993174, 'loss': 9.364696502685547}


EP_train:4:  52%|| 14362/27626 [33:56<31:38,  6.99it/s]

{'epoch': 4, 'iter': 14360, 'avg_loss': 8.5095041713445, 'avg_acc': 49.982156535060234, 'loss': 7.652779579162598}


EP_train:4:  52%|| 14372/27626 [33:57<31:19,  7.05it/s]

{'epoch': 4, 'iter': 14370, 'avg_loss': 8.50956009690344, 'avg_acc': 49.9819514995477, 'loss': 7.1637420654296875}


EP_train:4:  52%|| 14382/27626 [33:59<31:29,  7.01it/s]

{'epoch': 4, 'iter': 14380, 'avg_loss': 8.509600019730163, 'avg_acc': 49.98152944857798, 'loss': 7.826323509216309}


EP_train:4:  52%|| 14392/27626 [34:00<31:23,  7.02it/s]

{'epoch': 4, 'iter': 14390, 'avg_loss': 8.509603139830798, 'avg_acc': 49.984148078660276, 'loss': 9.036059379577637}


EP_train:4:  52%|| 14402/27626 [34:01<31:10,  7.07it/s]

{'epoch': 4, 'iter': 14400, 'avg_loss': 8.50962682201104, 'avg_acc': 49.986546073189366, 'loss': 8.747579574584961}


EP_train:4:  52%|| 14412/27626 [34:03<31:31,  6.98it/s]

{'epoch': 4, 'iter': 14410, 'avg_loss': 8.509752395358028, 'avg_acc': 49.98243529248491, 'loss': 8.530113220214844}


EP_train:4:  52%|| 14422/27626 [34:04<31:06,  7.08it/s]

{'epoch': 4, 'iter': 14420, 'avg_loss': 8.50974659175435, 'avg_acc': 49.98309756604951, 'loss': 8.048566818237305}


EP_train:4:  52%|| 14432/27626 [34:06<31:10,  7.05it/s]

{'epoch': 4, 'iter': 14430, 'avg_loss': 8.509929286641428, 'avg_acc': 49.98116034924814, 'loss': 8.632231712341309}


EP_train:4:  52%|| 14442/27626 [34:07<31:12,  7.04it/s]

{'epoch': 4, 'iter': 14440, 'avg_loss': 8.50998711115468, 'avg_acc': 49.98268817948895, 'loss': 8.128915786743164}


EP_train:4:  52%|| 14452/27626 [34:08<31:18,  7.01it/s]

{'epoch': 4, 'iter': 14450, 'avg_loss': 8.509897388295643, 'avg_acc': 49.98205141512698, 'loss': 8.831069946289062}


EP_train:4:  52%|| 14462/27626 [34:10<31:12,  7.03it/s]

{'epoch': 4, 'iter': 14460, 'avg_loss': 8.509873722839567, 'avg_acc': 49.981631629901116, 'loss': 8.293041229248047}


EP_train:4:  52%|| 14472/27626 [34:11<31:16,  7.01it/s]

{'epoch': 4, 'iter': 14470, 'avg_loss': 8.509813201855001, 'avg_acc': 49.977325340335845, 'loss': 9.249297142028809}


EP_train:4:  52%|| 14482/27626 [34:13<31:11,  7.02it/s]

{'epoch': 4, 'iter': 14480, 'avg_loss': 8.5097975408197, 'avg_acc': 49.9769093985222, 'loss': 8.06893539428711}


EP_train:4:  52%|| 14492/27626 [34:14<30:57,  7.07it/s]

{'epoch': 4, 'iter': 14490, 'avg_loss': 8.509612489360336, 'avg_acc': 49.9816696570285, 'loss': 7.870538711547852}


EP_train:4:  52%|| 14502/27626 [34:16<31:13,  7.00it/s]

{'epoch': 4, 'iter': 14500, 'avg_loss': 8.509473587511227, 'avg_acc': 49.981251293014274, 'loss': 8.158150672912598}


EP_train:4:  53%|| 14512/27626 [34:17<30:52,  7.08it/s]

{'epoch': 4, 'iter': 14510, 'avg_loss': 8.509384162732747, 'avg_acc': 49.97932602853008, 'loss': 8.71265983581543}


EP_train:4:  53%|| 14522/27626 [34:18<30:57,  7.05it/s]

{'epoch': 4, 'iter': 14520, 'avg_loss': 8.509164203240289, 'avg_acc': 49.977403415742714, 'loss': 7.703159809112549}


EP_train:4:  53%|| 14532/27626 [34:20<30:50,  7.08it/s]

{'epoch': 4, 'iter': 14530, 'avg_loss': 8.509293394631507, 'avg_acc': 49.97999965590806, 'loss': 8.771868705749512}


EP_train:4:  53%|| 14542/27626 [34:21<30:59,  7.04it/s]

{'epoch': 4, 'iter': 14540, 'avg_loss': 8.509416123333729, 'avg_acc': 49.980013410356925, 'loss': 8.345751762390137}


EP_train:4:  53%|| 14552/27626 [34:23<31:00,  7.03it/s]

{'epoch': 4, 'iter': 14550, 'avg_loss': 8.509460357429809, 'avg_acc': 49.979597622156554, 'loss': 8.051101684570312}


EP_train:4:  53%|| 14562/27626 [34:24<30:52,  7.05it/s]

{'epoch': 4, 'iter': 14560, 'avg_loss': 8.509350633827525, 'avg_acc': 49.979182405054594, 'loss': 8.097708702087402}


EP_train:4:  53%|| 14572/27626 [34:25<31:08,  6.99it/s]

{'epoch': 4, 'iter': 14570, 'avg_loss': 8.509238559358213, 'avg_acc': 49.97984009333608, 'loss': 8.521703720092773}


EP_train:4:  53%|| 14582/27626 [34:27<30:48,  7.06it/s]

{'epoch': 4, 'iter': 14580, 'avg_loss': 8.509319596048261, 'avg_acc': 49.98156847952816, 'loss': 7.445788860321045}


EP_train:4:  53%|| 14592/27626 [34:28<30:49,  7.05it/s]

{'epoch': 4, 'iter': 14590, 'avg_loss': 8.509142443704176, 'avg_acc': 49.98008189980125, 'loss': 7.955317497253418}


EP_train:4:  53%|| 14602/27626 [34:30<30:42,  7.07it/s]

{'epoch': 4, 'iter': 14600, 'avg_loss': 8.509072291786545, 'avg_acc': 49.97988151496473, 'loss': 8.821686744689941}


EP_train:4:  53%|| 14612/27626 [34:31<30:46,  7.05it/s]

{'epoch': 4, 'iter': 14610, 'avg_loss': 8.508929526109926, 'avg_acc': 49.9792536445144, 'loss': 8.257344245910645}


EP_train:4:  53%|| 14622/27626 [34:33<30:39,  7.07it/s]

{'epoch': 4, 'iter': 14620, 'avg_loss': 8.508876801221986, 'avg_acc': 49.97948156760824, 'loss': 8.911847114562988}


EP_train:4:  53%|| 14632/27626 [34:34<30:53,  7.01it/s]

{'epoch': 4, 'iter': 14630, 'avg_loss': 8.508867487204425, 'avg_acc': 49.982058642608166, 'loss': 7.808871269226074}


EP_train:4:  53%|| 14642/27626 [34:35<30:43,  7.04it/s]

{'epoch': 4, 'iter': 14640, 'avg_loss': 8.508944442107552, 'avg_acc': 49.98271122191107, 'loss': 8.78825855255127}


EP_train:4:  53%|| 14652/27626 [34:37<30:52,  7.00it/s]

{'epoch': 4, 'iter': 14650, 'avg_loss': 8.509015024208745, 'avg_acc': 49.9827230223193, 'loss': 8.470980644226074}


EP_train:4:  53%|| 14662/27626 [34:38<30:40,  7.05it/s]

{'epoch': 4, 'iter': 14660, 'avg_loss': 8.509088741953235, 'avg_acc': 49.981669053952665, 'loss': 7.882630348205566}


EP_train:4:  53%|| 14672/27626 [34:40<30:35,  7.06it/s]

{'epoch': 4, 'iter': 14670, 'avg_loss': 8.509111320303713, 'avg_acc': 49.97976450139732, 'loss': 8.382402420043945}


EP_train:4:  53%|| 14682/27626 [34:41<30:30,  7.07it/s]

{'epoch': 4, 'iter': 14680, 'avg_loss': 8.508951695833712, 'avg_acc': 49.98339690756761, 'loss': 8.524023056030273}


EP_train:4:  53%|| 14692/27626 [34:42<30:28,  7.07it/s]

{'epoch': 4, 'iter': 14690, 'avg_loss': 8.508885441770287, 'avg_acc': 49.98425907017902, 'loss': 9.214309692382812}


EP_train:4:  53%|| 14702/27626 [34:44<30:36,  7.04it/s]

{'epoch': 4, 'iter': 14700, 'avg_loss': 8.509111238802285, 'avg_acc': 49.98129378953813, 'loss': 8.97110366821289}


EP_train:4:  53%|| 14712/27626 [34:45<30:35,  7.04it/s]

{'epoch': 4, 'iter': 14710, 'avg_loss': 8.509065601926359, 'avg_acc': 49.980881653184696, 'loss': 9.526871681213379}


EP_train:4:  53%|| 14722/27626 [34:47<30:27,  7.06it/s]

{'epoch': 4, 'iter': 14720, 'avg_loss': 8.509088853691333, 'avg_acc': 49.981106922084095, 'loss': 8.72144889831543}


EP_train:4:  53%|| 14732/27626 [34:48<30:35,  7.02it/s]

{'epoch': 4, 'iter': 14730, 'avg_loss': 8.50916018025403, 'avg_acc': 49.98133188514018, 'loss': 8.142555236816406}


EP_train:4:  53%|| 14742/27626 [34:50<30:52,  6.95it/s]

{'epoch': 4, 'iter': 14740, 'avg_loss': 8.50927663501232, 'avg_acc': 49.98113255545757, 'loss': 8.47655200958252}


EP_train:4:  53%|| 14752/27626 [34:51<30:38,  7.00it/s]

{'epoch': 4, 'iter': 14750, 'avg_loss': 8.509132955868068, 'avg_acc': 49.97945054572571, 'loss': 8.3260498046875}


EP_train:4:  53%|| 14762/27626 [34:52<30:39,  6.99it/s]

{'epoch': 4, 'iter': 14760, 'avg_loss': 8.509311844845321, 'avg_acc': 49.98369859765599, 'loss': 8.443095207214355}


EP_train:4:  53%|| 14772/27626 [34:54<30:34,  7.01it/s]

{'epoch': 4, 'iter': 14770, 'avg_loss': 8.509366608595737, 'avg_acc': 49.98392119693995, 'loss': 8.749907493591309}


EP_train:4:  54%|| 14782/27626 [34:55<30:19,  7.06it/s]

{'epoch': 4, 'iter': 14780, 'avg_loss': 8.509351956974177, 'avg_acc': 49.984354915093704, 'loss': 7.590525150299072}


EP_train:4:  54%|| 14792/27626 [34:57<30:26,  7.02it/s]

{'epoch': 4, 'iter': 14790, 'avg_loss': 8.509546231001679, 'avg_acc': 49.986055709553106, 'loss': 8.811656951904297}


EP_train:4:  54%|| 14802/27626 [34:58<30:20,  7.04it/s]

{'epoch': 4, 'iter': 14800, 'avg_loss': 8.50961142702285, 'avg_acc': 49.986276265117226, 'loss': 8.660906791687012}


EP_train:4:  54%|| 14812/27626 [34:59<30:22,  7.03it/s]

{'epoch': 4, 'iter': 14810, 'avg_loss': 8.509567879558407, 'avg_acc': 49.9886064411586, 'loss': 8.425825119018555}


EP_train:4:  54%|| 14822/27626 [35:01<30:36,  6.97it/s]

{'epoch': 4, 'iter': 14820, 'avg_loss': 8.509586285679344, 'avg_acc': 49.989457526482695, 'loss': 8.378880500793457}


EP_train:4:  54%|| 14832/27626 [35:02<30:32,  6.98it/s]

{'epoch': 4, 'iter': 14830, 'avg_loss': 8.509552047704542, 'avg_acc': 49.98777897646821, 'loss': 8.349164009094238}


EP_train:4:  54%|| 14842/27626 [35:04<30:12,  7.05it/s]

{'epoch': 4, 'iter': 14840, 'avg_loss': 8.509548290231608, 'avg_acc': 49.988840037733304, 'loss': 8.944514274597168}


EP_train:4:  54%|| 14852/27626 [35:05<30:15,  7.04it/s]

{'epoch': 4, 'iter': 14850, 'avg_loss': 8.509561910379471, 'avg_acc': 49.98758501111036, 'loss': 8.05646800994873}


EP_train:4:  54%|| 14862/27626 [35:07<30:23,  7.00it/s]

{'epoch': 4, 'iter': 14860, 'avg_loss': 8.509558152674376, 'avg_acc': 49.98948590269834, 'loss': 8.757227897644043}


EP_train:4:  54%|| 14872/27626 [35:08<30:16,  7.02it/s]

{'epoch': 4, 'iter': 14870, 'avg_loss': 8.509777592904037, 'avg_acc': 49.98991325398426, 'loss': 8.554661750793457}


EP_train:4:  54%|| 14882/27626 [35:09<30:21,  7.00it/s]

{'epoch': 4, 'iter': 14880, 'avg_loss': 8.509637475005722, 'avg_acc': 49.99223002486392, 'loss': 8.135007858276367}


EP_train:4:  54%|| 14892/27626 [35:11<30:13,  7.02it/s]

{'epoch': 4, 'iter': 14890, 'avg_loss': 8.509797436932754, 'avg_acc': 49.99223524276409, 'loss': 8.534318923950195}


EP_train:4:  54%|| 14902/27626 [35:12<30:21,  6.99it/s]

{'epoch': 4, 'iter': 14900, 'avg_loss': 8.509653048742202, 'avg_acc': 49.992450171129455, 'loss': 8.553340911865234}


EP_train:4:  54%|| 14912/27626 [35:14<30:22,  6.97it/s]

{'epoch': 4, 'iter': 14910, 'avg_loss': 8.509456255443778, 'avg_acc': 49.99161692710079, 'loss': 7.753347873687744}


EP_train:4:  54%|| 14922/27626 [35:15<30:22,  6.97it/s]

{'epoch': 4, 'iter': 14920, 'avg_loss': 8.50920187692749, 'avg_acc': 49.99539239997319, 'loss': 8.22398853302002}


EP_train:4:  54%|| 14932/27626 [35:16<30:08,  7.02it/s]

{'epoch': 4, 'iter': 14930, 'avg_loss': 8.509407200454651, 'avg_acc': 49.99497689371107, 'loss': 8.794722557067871}


EP_train:4:  54%|| 14942/27626 [35:18<30:08,  7.01it/s]

{'epoch': 4, 'iter': 14940, 'avg_loss': 8.509453380628887, 'avg_acc': 49.99686265979519, 'loss': 8.12771224975586}


EP_train:4:  54%|| 14952/27626 [35:19<29:53,  7.07it/s]

{'epoch': 4, 'iter': 14950, 'avg_loss': 8.50958778601284, 'avg_acc': 49.99393853253963, 'loss': 9.333304405212402}


EP_train:4:  54%|| 14962/27626 [35:21<29:59,  7.04it/s]

{'epoch': 4, 'iter': 14960, 'avg_loss': 8.50979351664313, 'avg_acc': 49.99122719069581, 'loss': 8.420496940612793}


EP_train:4:  54%|| 14972/27626 [35:22<30:06,  7.01it/s]

{'epoch': 4, 'iter': 14970, 'avg_loss': 8.50984001331662, 'avg_acc': 49.989563155433835, 'loss': 9.016047477722168}


EP_train:4:  54%|| 14982/27626 [35:24<30:04,  7.01it/s]

{'epoch': 4, 'iter': 14980, 'avg_loss': 8.509722837291902, 'avg_acc': 49.98977871971164, 'loss': 8.710572242736816}


EP_train:4:  54%|| 14992/27626 [35:25<30:04,  7.00it/s]

{'epoch': 4, 'iter': 14990, 'avg_loss': 8.509737851772242, 'avg_acc': 49.98707557868054, 'loss': 8.6466703414917}


EP_train:4:  54%|| 15002/27626 [35:26<30:01,  7.01it/s]

{'epoch': 4, 'iter': 15000, 'avg_loss': 8.509628191700571, 'avg_acc': 49.99041730551296, 'loss': 8.417122840881348}


EP_train:4:  54%|| 15012/27626 [35:28<29:53,  7.03it/s]

{'epoch': 4, 'iter': 15010, 'avg_loss': 8.509551978859353, 'avg_acc': 49.99188095396709, 'loss': 7.77488374710083}


EP_train:4:  54%|| 15022/27626 [35:29<29:47,  7.05it/s]

{'epoch': 4, 'iter': 15020, 'avg_loss': 8.509517218331572, 'avg_acc': 49.990638106650685, 'loss': 8.578316688537598}


EP_train:4:  54%|| 15032/27626 [35:31<29:53,  7.02it/s]

{'epoch': 4, 'iter': 15030, 'avg_loss': 8.509546986930694, 'avg_acc': 49.990228527709405, 'loss': 8.761214256286621}


EP_train:4:  54%|| 15042/27626 [35:32<29:49,  7.03it/s]

{'epoch': 4, 'iter': 15040, 'avg_loss': 8.509641279177846, 'avg_acc': 49.992935975001664, 'loss': 8.235021591186523}


EP_train:4:  54%|| 15052/27626 [35:34<29:51,  7.02it/s]

{'epoch': 4, 'iter': 15050, 'avg_loss': 8.509639355187621, 'avg_acc': 49.995639824596374, 'loss': 8.415143013000488}


EP_train:4:  55%|| 15062/27626 [35:35<29:49,  7.02it/s]

{'epoch': 4, 'iter': 15060, 'avg_loss': 8.50939780280185, 'avg_acc': 49.997510125489676, 'loss': 7.88219690322876}


EP_train:4:  55%|| 15072/27626 [35:36<30:02,  6.96it/s]

{'epoch': 4, 'iter': 15070, 'avg_loss': 8.50935648081037, 'avg_acc': 50.00041470373565, 'loss': 8.138620376586914}


EP_train:4:  55%|| 15082/27626 [35:38<29:48,  7.01it/s]

{'epoch': 4, 'iter': 15080, 'avg_loss': 8.509466792439824, 'avg_acc': 49.998342284994365, 'loss': 8.463860511779785}


EP_train:4:  55%|| 15092/27626 [35:39<29:47,  7.01it/s]

{'epoch': 4, 'iter': 15090, 'avg_loss': 8.509249862402699, 'avg_acc': 50.00144953946061, 'loss': 8.647953033447266}


EP_train:4:  55%|| 15102/27626 [35:41<29:54,  6.98it/s]

{'epoch': 4, 'iter': 15100, 'avg_loss': 8.50949856167826, 'avg_acc': 50.0004138798755, 'loss': 8.877779960632324}


EP_train:4:  55%|| 15112/27626 [35:42<29:50,  6.99it/s]

{'epoch': 4, 'iter': 15110, 'avg_loss': 8.509465996351778, 'avg_acc': 50.0, 'loss': 8.113619804382324}


EP_train:4:  55%|| 15122/27626 [35:43<29:46,  7.00it/s]

{'epoch': 4, 'iter': 15120, 'avg_loss': 8.509680501068642, 'avg_acc': 49.99876000264533, 'loss': 8.555513381958008}


EP_train:4:  55%|| 15132/27626 [35:45<29:43,  7.00it/s]

{'epoch': 4, 'iter': 15130, 'avg_loss': 8.509890458516235, 'avg_acc': 49.998967351794334, 'loss': 9.270837783813477}


EP_train:4:  55%|| 15142/27626 [35:46<29:45,  6.99it/s]

{'epoch': 4, 'iter': 15140, 'avg_loss': 8.509921505936038, 'avg_acc': 49.99938082028928, 'loss': 8.740397453308105}


EP_train:4:  55%|| 15152/27626 [35:48<29:47,  6.98it/s]

{'epoch': 4, 'iter': 15150, 'avg_loss': 8.50982100313217, 'avg_acc': 50.000206257012735, 'loss': 8.416951179504395}


EP_train:4:  55%|| 15162/27626 [35:49<29:29,  7.04it/s]

{'epoch': 4, 'iter': 15160, 'avg_loss': 8.50980416263253, 'avg_acc': 50.000412241936544, 'loss': 8.5265474319458}


EP_train:4:  55%|| 15172/27626 [35:51<29:37,  7.01it/s]

{'epoch': 4, 'iter': 15170, 'avg_loss': 8.509621009571442, 'avg_acc': 50.000411970206315, 'loss': 8.00561809539795}


EP_train:4:  55%|| 15182/27626 [35:52<29:51,  6.95it/s]

{'epoch': 4, 'iter': 15180, 'avg_loss': 8.509524933992344, 'avg_acc': 50.00329359067255, 'loss': 8.05247688293457}


EP_train:4:  55%|| 15192/27626 [35:53<29:47,  6.96it/s]

{'epoch': 4, 'iter': 15190, 'avg_loss': 8.509589881958496, 'avg_acc': 50.00164571127641, 'loss': 8.825852394104004}


EP_train:4:  55%|| 15202/27626 [35:55<29:30,  7.02it/s]

{'epoch': 4, 'iter': 15200, 'avg_loss': 8.509706354244752, 'avg_acc': 50.00308367870535, 'loss': 9.51045036315918}


EP_train:4:  55%|| 15212/27626 [35:56<29:37,  6.99it/s]

{'epoch': 4, 'iter': 15210, 'avg_loss': 8.50977559058626, 'avg_acc': 50.00082177371639, 'loss': 8.709633827209473}


EP_train:4:  55%|| 15222/27626 [35:58<29:46,  6.94it/s]

{'epoch': 4, 'iter': 15220, 'avg_loss': 8.509759416624686, 'avg_acc': 50.00390086065305, 'loss': 9.072783470153809}


EP_train:4:  55%|| 15232/27626 [35:59<29:17,  7.05it/s]

{'epoch': 4, 'iter': 15230, 'avg_loss': 8.509802007571858, 'avg_acc': 50.00369312586172, 'loss': 7.923282146453857}


EP_train:4:  55%|| 15242/27626 [36:01<29:20,  7.03it/s]

{'epoch': 4, 'iter': 15240, 'avg_loss': 8.509639276825265, 'avg_acc': 50.004510858867526, 'loss': 8.177351951599121}


EP_train:4:  55%|| 15252/27626 [36:02<29:12,  7.06it/s]

{'epoch': 4, 'iter': 15250, 'avg_loss': 8.509564730200436, 'avg_acc': 50.00102452298209, 'loss': 9.103778839111328}


EP_train:4:  55%|| 15260/27626 [36:03<29:04,  7.09it/s]