必要ModuleをImport

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader
import math

import pickle
import tqdm
from collections import Counter

from torch.utils.data import Dataset
import random
import numpy as np

from utils import GELU, PositionwiseFeedForward, LayerNorm, SublayerConnection, LayerNorm

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from ipywidgets import FloatProgress
from IPython.display import display, clear_output

In [2]:
input_train_txt = './data/chatdata/splitted_1.txt'
input_valid_txt = './data/chatdata/splitted_2.txt'
processed_train_txt = './data/chatdata/train_X.txt'
processed_valid_txt = './data/chatdata/valid_X.txt'

Next Sentence Predictionのために, 意味的に連続する文章をtab区切りで並べる前処理をデータセットに対して行います.

In [3]:
# 偶数行の文章を奇数行の文章と接続するメソッド
def load_data(path):
    with open(path, encoding='utf-8') as f:
        even_rows = []
        odd_rows = []
        all_f = f.readlines()
        for row in all_f[2::2]:
            even_rows.append(row.strip().replace('\n', ''))
        for row in all_f[1::2]:
            odd_rows.append(row.strip().replace('\n', ''))
    min_rows_len = int(min(len(even_rows), len(odd_rows)))
    even_rows = even_rows[:min_rows_len]
    odd_rows = odd_rows[:min_rows_len]

    concat_rows = []
    for even_r, odd_r in zip(even_rows, odd_rows):
        concat_r = '\t'.join([even_r, odd_r])
        concat_rows.append(concat_r)
    return concat_rows

In [4]:
train_data = load_data(input_train_txt)
valid_data = load_data(input_valid_txt)

# ランダムに並び替える
random.shuffle(train_data)
random.shuffle(valid_data)

In [5]:
with open(processed_train_txt, 'w') as f:
    f.write('\n'.join(train_data))

In [6]:
with open(processed_valid_txt, 'w') as f:
    f.write('\n'.join(valid_data))

Attentionセルを定義する

In [7]:
class Attention(nn.Module):
    """
    Scaled Dot Product Attention(縮小付き内積注意)
    (参考) http://deeplearning.hatenablog.com/entry/transformer
    
    """

    def forward(self, query, key, value, mask=None, dropout=None):
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1))

        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)

        p_attn = F.softmax(scores, dim=-1)

        if dropout is not None:
            p_attn = dropout(p_attn)
        
        """
        Product Attentionとvalueの内積を取る(内積注意)
        """

        return torch.matmul(p_attn, value), p_attn


Multi Head Attentionを定義する

In [8]:
class MultiHeadedAttention(nn.Module):

    def __init__(self, h, d_model, dropout=0.1):
        super().__init__()
        assert d_model % h == 0
        
        """
        d_modelはモデル全体の次元数(largeだと512).head(=h)の数で等分される
        次元数はvalueとkeyで常に同数
        # We assume d_v always equals d_k
        """

        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h
        
        """
        nn.Linear(入力する特徴量のsize,出力〜)
        https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
        """
        self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
        self.output_linear = nn.Linear(d_model, d_model)
        
        """
        Attentionクラスの読み込み
        """
        self.attention = Attention()

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, query, key, value, mask=None):
        """
        queryがバッチサイズになる。マスク無し。
        """
        batch_size = query.size(0)
        
        """
        l(x)の表記の仕方がよくわからない。
        .viewでtensorのサイズを調整。-1ですべての要素が横に並ぶ。
        """
        query, key, value = [l(x).view(batch_size, -1, self.h, self.d_k).transpose(1, 2) for l, x in zip(self.linear_layers, (query, key, value))]

        x, attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
        
        """
        contiguousはメモリの別々の部分に格納されている情報を統合するためにつかう。
        """
        x = x.transpose(1, 2).contiguous().view(batch_size, -1, self.h * self.d_k)

        return self.output_linear(x)


Transformerを定義する

In [9]:
class TransformerBlock(nn.Module):
    """
    Bidirectional Encoder = Transformer (self-attention)
    Transformer = MultiHead_Attention + Feed_Forward with sublayer connection
    """

    def __init__(self, hidden, attn_heads, feed_forward_hidden, dropout):
        """
        :param hidden: hidden size of transformer
        :param attn_heads: head sizes of multi-head attention
        :param feed_forward_hidden: feed_forward_hidden, usually 4*hidden_size
        :param dropout: dropout rate
        """

        super().__init__()
        """
        MultiHeadedAttentionクラスを自己注意としてインスタンス化
        """
        self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden, dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(d_model=hidden, d_ff=feed_forward_hidden, dropout=dropout)
        self.input_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.output_sublayer = SublayerConnection(size=hidden, dropout=dropout)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, mask):
        """
        公開されていないメソッドやインスタンス変数にだけ、アンダースコアを先頭に付けてください。 
        https://teratail.com/questions/41277
        query,key,valueへの3つの自己注意に対応しているのだろうか>_x, _x, _x, 
        """
        x = self.input_sublayer(x, lambda _x: self.attention.forward(_x, _x, _x, mask=mask))
        x = self.output_sublayer(x, self.feed_forward)
        return self.dropout(x)


BERTクラスを定義する

In [10]:
"""
googleのスクリプトの、BertPretrainModel(nn.module)にあたる
"""

class BERT(nn.Module):

    def __init__(self, vocab_size, hidden=768, n_layers=12, attn_heads=12, dropout=0.1):
        """
        :param vocab_size: vocab_size of total words
        :param hidden: BERT model hidden size
        :param n_layers: numbers of Transformer blocks(layers)
        :param attn_heads: number of attention heads
        :param dropout: dropout rate
        """

        super().__init__()
        self.hidden = hidden
        self.n_layers = n_layers
        self.attn_heads = attn_heads
        
        """
        通常feed forwardの隠れ層は4倍
        """
        self.feed_forward_hidden = hidden * 4

        # embedding for BERT
        self.embedding = BERTEmbedding(vocab_size=vocab_size, embed_size=hidden, dropout=dropout)
        
        """
        ・レイヤーをリストで保持してしまうと、、リストで保持しているレイヤーのパラメータは
        パラメータとして認識されず呼び出されない。そういうときはModulelistを使う。
        https://qiita.com/perrying/items/857df46bb6cdc3047bd8
        TransformerBlockクラスをModulelistを使ってインスタンス化
        """
        self.transformer_blocks = nn.ModuleList([TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)])

    def forward(self, x, segment_info):
        # xの中で0以上は1, 0未満は0として, maskテンソルを作る
        """
        dim (int) – the index at which to insert the singleton dimension
        https://pytorch.org/docs/stable/torch.html#torch.unsqueeze
        （予想）x>0のとき、値が１のシングルトンの次元をまず生成。
        次に1度だけ、最初に作った値が１のシングルトンの次元の数だけ横に並べる。
        もういちどそれを値１のシングルトンの次元を生成。
        （よくわからない）
        """
        
        mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)

        x = self.embedding(x, segment_info)

        for transformer in self.transformer_blocks:
            x = transformer.forward(x, mask)
        return x
    
    """
    1.できればapexのインストール
    2.FusedLayerNormをBertLayerFormとしてimport
    3.from apex.normaliztion.used_layer_norm import FusedLayerNorm as BertLayerNorm
    4.(497)BerPreTrainModel以下にある、init_bert_weights メソッドを、BERTクラス(ココ)に追加
    """
    #from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm
    
    def init_bert_weights(self,module):
      #initialize the weights.
    
      if isinstance(module,(nn.Linear,nn.Embedding)):
          #cf https://github.com/pytorch/pytorch/pull/5617
          """
          (148) initializer_range=0.02):
          module.weight.data.normal_(mean=0.0, std=0.02に→self.config.initializer_range)
          """
            
          module.weight.data.normal_(mean=0.0, std=0.02)
           
       #elif isinstance(module, BertLayerNorm)
      elif isinstance(module, LayerNorm):
          module.bias.data.zero_()
          module.weight.data.fill_(1.0)
      if isinstance(module, nn.Linear)and module.bias is not None:
          module.bias.data.zero_()
    
    
    
    


In [11]:
"""
ここに、
Class BertForQuestionAnswering(BERT):
を入れるのがいいかもしれない。
"""

class BertForQuestionAnswering(BERT):
    
   #def __init__(selfトル→,config):
   def __init__(self):
     #super(BertForQuestionAnswering, self).__init__(トル→config)
     super(BertForQuestionAnswering, self).__init__()
     #self.bert = BERT→BertModel(トル→config)
     self.bert = BERT()
     #self.qa_outputs = nn.Linear(hidden→config.hidden_size, 2)
     self.qa_outputs = nn.Linear(hidden, 2)
     self.apply(self.init_bert_weights)   
   
   def forward(self,input_ids,token_type_ids=None,attention_mask=None,start_positions=None,end_positions=None):
     sequence_output,_ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
     logits = self.qa_outputs(sequence_output)
     start_logits, end_logits = logits.split(1, dim=-1)
     start_logits = start_logits.squeeze(-1)
     end_logits = end_logits.squeeze(-1)
    
     if start_positions is not None and end_positions is not None:
         #if we are on multi-GPU, split add a dimension
         if len(start_positions.size()) > 1:
             start_positions = start_positions.squeeze(-1)
         if len(end_positions.size()) > 1:
             end_positions = end_positions.squeeze(-1)
         #sometimes the start/end posisions are outside our model inputs, we ignore these terms
         ignored_index = start_logits.size(1)
         start_positions.clamp_(0, ignored_index)
         end_positions.clamp_(0,ignored_index)
            
         loss_fct = CrossEntropyLoss(ignore_index = ignored_index)
         start_loss = loss_fct(start_logits, start_positions)
         end_loss = loss_fct(end_logits, end_positions)
         total = (start_loss + end_loss) / 2
         return total_loss
     
     else:
         return start_logits, end_logits
     


BERTのEmbedding層を定義する

In [12]:
class TokenEmbedding(nn.Embedding):
    def __init__(self, vocab_size, embed_size=512):
        super().__init__(vocab_size, embed_size, padding_idx=0)

class PositionalEmbedding(nn.Module):

    def __init__(self, d_model, max_len=512):
        super().__init__()

        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)).float().exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]

class SegmentEmbedding(nn.Embedding):
    def __init__(self, embed_size=512):
        super().__init__(3, embed_size, padding_idx=0)

class BERTEmbedding(nn.Module):
    """
    BERT Embedding which is consisted with under features
        1. TokenEmbedding : 通常のEMbedding
        2. PositionalEmbedding : sin, cosを用いた位置情報付きEmbedding
        2. SegmentEmbedding : Sentenceのセグメント情報 (sent_A:1, sent_B:2)
    """
    def __init__(self, vocab_size, embed_size, dropout=0.1):
        super().__init__()
        self.token = TokenEmbedding(vocab_size=vocab_size, embed_size=embed_size)
        self.position = PositionalEmbedding(d_model=self.token.embedding_dim)
        self.segment = SegmentEmbedding(embed_size=self.token.embedding_dim)
        self.dropout = nn.Dropout(p=dropout)
        self.embed_size = embed_size

    def forward(self, sequence, segment_label):
        x = self.token(sequence) + self.position(sequence) + self.segment(segment_label)
        return self.dropout(x)


学習用にマスク予測・隣接文予測の層を追加する

In [13]:
class BERTLM(nn.Module):
    """
    BERT Language Model
    Next Sentence Prediction Model + Masked Language Model
    """

    def __init__(self, bert: BERT, vocab_size):
        """
        :param bert: BERT model which should be trained
        :param vocab_size: total vocab size for masked_lm
        """

        super().__init__()
        self.bert = bert
        self.next_sentence = NextSentencePrediction(self.bert.hidden)
        self.mask_lm = MaskedLanguageModel(self.bert.hidden, vocab_size)

    def forward(self, x, segment_label):
        x = self.bert(x, segment_label)
        return self.next_sentence(x), self.mask_lm(x)


class NextSentencePrediction(nn.Module):
    """
    2クラス分類問題 : is_next, is_not_next
    """

    def __init__(self, hidden):
        """
        :param hidden: BERT model output size
        """
        super().__init__()
        self.linear = nn.Linear(hidden, 2)
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, x):
        return self.softmax(self.linear(x[:, 0]))


class MaskedLanguageModel(nn.Module):
    """
    入力系列のMASKトークンから元の単語を予測する
    nクラス分類問題, nクラス : vocab_size
    """

    def __init__(self, hidden, vocab_size):
        """
        :param hidden: output size of BERT model
        :param vocab_size: total vocab size
        """
        super().__init__()
        self.linear = nn.Linear(hidden, vocab_size)
        self.softmax = nn.LogSoftmax(dim=-1)

    def forward(self, x):
        return self.softmax(self.linear(x))

BERT用のVocabを生成するクラスを定義する

In [14]:
import pickle
import tqdm
from collections import Counter


class TorchVocab(object):
    """
    :property freqs: collections.Counter, コーパス中の単語の出現頻度を保持するオブジェクト
    :property stoi: collections.defaultdict, string → id の対応を示す辞書
    :property itos: collections.defaultdict, id → string の対応を示す辞書
    """
    def __init__(self, counter, max_size=None, min_freq=1, specials=['<pad>', '<oov>'],
                 vectors=None, unk_init=None, vectors_cache=None):
        """
        :param coutenr: collections.Counter, データ中に含まれる単語の頻度を計測するためのcounter
        :param max_size: int, vocabularyの最大のサイズ. Noneの場合は最大値なし. defaultはNone
        :param min_freq: int, vocabulary中の単語の最低出現頻度. この数以下の出現回数の単語はvocabularyに加えられない.
        :param specials: list of str, vocabularyにあらかじめ登録するtoken
        :param vecors: list of vectors, 事前学習済みのベクトル. ex)Vocab.load_vectors
        """
        self.freqs = counter
        counter = counter.copy()
        min_freq = max(min_freq, 1)

        self.itos = list(specials)
        # special tokensの出現頻度はvocabulary作成の際にカウントされない
        for tok in specials:
            del counter[tok]

        max_size = None if max_size is None else max_size + len(self.itos)

        # まず頻度でソートし、次に文字順で並び替える
        words_and_frequencies = sorted(counter.items(), key=lambda tup: tup[0])
        words_and_frequencies.sort(key=lambda tup: tup[1], reverse=True)
        
        # 出現頻度がmin_freq未満のものはvocabに加えない
        for word, freq in words_and_frequencies:
            if freq < min_freq or len(self.itos) == max_size:
                break
            self.itos.append(word)

        # dictのk,vをいれかえてstoiを作成する
        self.stoi = {tok: i for i, tok in enumerate(self.itos)}

        self.vectors = None
        if vectors is not None:
            self.load_vectors(vectors, unk_init=unk_init, cache=vectors_cache)
        else:
            assert unk_init is None and vectors_cache is None

    def __eq__(self, other):
        if self.freqs != other.freqs:
            return False
        if self.stoi != other.stoi:
            return False
        if self.itos != other.itos:
            return False
        if self.vectors != other.vectors:
            return False
        return True

    def __len__(self):
        return len(self.itos)

    def vocab_rerank(self):
        self.stoi = {word: i for i, word in enumerate(self.itos)}

    def extend(self, v, sort=False):
        words = sorted(v.itos) if sort else v.itos
        for w in words:
            if w not in self.stoi:
                self.itos.append(w)
                self.stoi[w] = len(self.itos) - 1


class Vocab(TorchVocab):
    def __init__(self, counter, max_size=None, min_freq=1):
        self.pad_index = 0
        self.unk_index = 1
        self.eos_index = 2
        self.sos_index = 3
        self.mask_index = 4
        super().__init__(counter, specials=["<pad>", "<unk>", "<eos>", "<sos>", "<mask>"], max_size=max_size, min_freq=min_freq)

    # override用
    def to_seq(self, sentece, seq_len, with_eos=False, with_sos=False) -> list:
        pass

    # override用
    def from_seq(self, seq, join=False, with_pad=False):
        pass

    @staticmethod
    def load_vocab(vocab_path: str) -> 'Vocab':
        with open(vocab_path, "rb") as f:
            return pickle.load(f)

    def save_vocab(self, vocab_path):
        with open(vocab_path, "wb") as f:
            pickle.dump(self, f)


# テキストファイルからvocabを作成する
class WordVocab(Vocab):
    def __init__(self, texts, max_size=None, min_freq=1):
        print("Building Vocab")
        counter = Counter()
        for line in texts:
            if isinstance(line, list):
                words = line
            else:
                words = line.replace("\n", "").replace("\t", "").split()

            for word in words:
                counter[word] += 1
        super().__init__(counter, max_size=max_size, min_freq=min_freq)

    def to_seq(self, sentence, seq_len=None, with_eos=False, with_sos=False, with_len=False):
        if isinstance(sentence, str):
            sentence = sentence.split()

        seq = [self.stoi.get(word, self.unk_index) for word in sentence]

        if with_eos:
            seq += [self.eos_index]  # this would be index 1
        if with_sos:
            seq = [self.sos_index] + seq

        origin_seq_len = len(seq)

        if seq_len is None:
            pass
        elif len(seq) <= seq_len:
            seq += [self.pad_index for _ in range(seq_len - len(seq))]
        else:
            seq = seq[:seq_len]

        return (seq, origin_seq_len) if with_len else seq

    def from_seq(self, seq, join=False, with_pad=False):
        words = [self.itos[idx]
                 if idx < len(self.itos)
                 else "<%d>" % idx
                 for idx in seq
                 if not with_pad or idx != self.pad_index]

        return " ".join(words) if join else words

    @staticmethod
    def load_vocab(vocab_path: str) -> 'WordVocab':
        with open(vocab_path, "rb") as f:
            return pickle.load(f)


def build(corpus_path, output_path, vocab_size=None, encoding='utf-8', min_freq=1):
    with open(corpus_path, "r", encoding=encoding) as f:
        vocab = WordVocab(f, max_size=vocab_size, min_freq=min_freq)

    print("VOCAB SIZE:", len(vocab))
    vocab.save_vocab(output_path)

Dataloaderを定義する.
ここで文章中の単語をMASKする処理と,隣り合う文章を一定確率でシャッフルする処理を同時に行う

In [15]:
class BERTDataset(Dataset):
    """
    このクラスで訓練モード化どうかを指定する項目を初期化
    torch.utils.data.Dataset （抽象クラス）
    __len__,__getitem__ ともにDatasetクラスでは必要なメソッド
    """
    def __init__(self, corpus_path, vocab, seq_len, label_path='None', encoding="utf-8", corpus_lines=None, is_train=True):
        self.vocab = vocab
        self.seq_len = seq_len
        self.is_train = is_train

        with open(corpus_path, "r", encoding=encoding) as f:
            self.datas = [line[:-1].split("\t") for line in f]
        if label_path:
            self.labels_data = torch.LongTensor(np.loadtxt(label_path))
        else:
            # ラベル不要の時はダミーデータを埋め込む
            self.labels_data = [0 for _ in range(len(self.datas))]

    def __len__(self):
        return len(self.datas)

    def __getitem__(self, item):
        t1, (t2, is_next_label) = self.datas[item][0], self.random_sent(item)
        t1_random, t1_label = self.random_word(t1)
        t2_random, t2_label = self.random_word(t2)
        labels = self.labels_data[item]

        # [CLS] tag = SOS tag, [SEP] tag = EOS tag
        t1 = [self.vocab.sos_index] + t1_random + [self.vocab.eos_index]
        t2 = t2_random + [self.vocab.eos_index]

        t1_label = [self.vocab.pad_index] + t1_label + [self.vocab.pad_index]
        t2_label = t2_label + [self.vocab.pad_index]

        segment_label = ([1 for _ in range(len(t1))] + [2 for _ in range(len(t2))])[:self.seq_len]
        bert_input = (t1 + t2)[:self.seq_len]
        bert_label = (t1_label + t2_label)[:self.seq_len]

        padding = [self.vocab.pad_index for _ in range(self.seq_len - len(bert_input))]
        bert_input.extend(padding), bert_label.extend(padding), segment_label.extend(padding)

        output = {"bert_input": bert_input,
                  "bert_label": bert_label,
                  "segment_label": segment_label,
                  "is_next": is_next_label,
                  "labels": labels}

        return {key: torch.tensor(value) for key, value in output.items()}

    def random_word(self, sentence):
        tokens = sentence.split()
        output_label = []

        for i, token in enumerate(tokens):
            if self.is_train: # Trainingの時は確率的にMASKする
                prob = random.random()
            else:  # Predictionの時はMASKをしない
                prob = 1.0
            if prob < 0.15:
                prob /= 0.15

                # 80% randomly change token to mask token
                if prob < 0.8:
                    tokens[i] = self.vocab.mask_index

                # 10% randomly change token to random token
                elif prob < 0.9:
                    tokens[i] = random.randrange(len(self.vocab))

                # 10% randomly change token to current token
                else:
                    tokens[i] = self.vocab.stoi.get(token, self.vocab.unk_index)

                output_label.append(self.vocab.stoi.get(token, self.vocab.unk_index))

            else:
                tokens[i] = self.vocab.stoi.get(token, self.vocab.unk_index)
                output_label.append(0)

        return tokens, output_label

    def random_sent(self, index):
        # output_text, label(isNotNext:0, isNext:1)
        if random.random() > 0.5:
            return self.datas[index][1], 1
        else:
            return self.datas[random.randrange(len(self.datas))][1], 0


Trainerクラスを定義する.
BERTの事前学習ではふたつの言語モデル学習を行う.
1. Masked Language Model : 文章中の一部の単語をマスクして,予測を行うタスク.
2. Next Sentence prediction : ある文章の次に来る文章を予測するタスク.

In [16]:
class BERTTrainer:
    #def __init__(self, bert: BERT, vocab_size: int,
    def __init__(self, bert: BertForQuestionAnswering, vocab_size: int,
                 train_dataloader: DataLoader, test_dataloader: DataLoader = None,
                 lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01,
                 with_cuda: bool = True, log_freq: int = 10):
        """
        :param bert: BERT model
        :param vocab_size: vocabに含まれるトータルの単語数
        :param train_dataloader: train dataset data loader
        :param test_dataloader: test dataset data loader [can be None]
        :param lr: 学習率
        :param betas: Adam optimizer betas
        :param weight_decay: Adam optimizer weight decay param
        :param with_cuda: traning with cuda
        :param log_freq: logを表示するiterationの頻度
        """

        # GPU環境において、GPUを指定しているかのフラグ
        cuda_condition = torch.cuda.is_available() and with_cuda
        self.device = torch.device("cuda:0" if cuda_condition else "cpu")

        self.bert = bert
        self.model = BERTLM(bert, vocab_size).to(self.device)

        if torch.cuda.device_count() > 1:
            print("Using %d GPUS for BERT" % torch.cuda.device_count())
            self.model = nn.DataParallel(self.model)

        self.train_data = train_dataloader
        self.test_data = test_dataloader

        self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)

        # masked_token予測のためのLoss関数を設定
        self.criterion = nn.NLLLoss()
        self.log_freq = log_freq
        print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
        
        self.train_lossses = []
        self.train_accs = []

    def train(self, epoch):
        self.iteration(epoch, self.train_data)

    def test(self, epoch):
        self.iteration(epoch, self.test_data, train=False)

    def iteration(self, epoch, data_loader, train=True):
        """
        :param epoch: 現在のepoch
        :param data_loader: torch.utils.data.DataLoader
        :param train: trainかtestかのbool値
        """
        str_code = "train" if train else "test"

        data_iter = tqdm.tqdm(enumerate(data_loader), desc="EP_%s:%d" % (str_code, epoch), total=len(data_loader), bar_format="{l_bar}{r_bar}")


        avg_loss = 0.0
        total_correct = 0
        total_element = 0

        for i, data in data_iter:
            # 0. batch_dataはGPU or CPUに載せる
            data = {key: value.to(self.device) for key, value in data.items()}

            # 1. forward the next_sentence_prediction and masked_lm model
            next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"])

            # 2-1. NLLLoss(negative log likelihood) : next_sentence_predictionのLoss
            next_loss = self.criterion(next_sent_output, data["is_next"])

            # 2-2. NLLLoss(negative log likelihood) : predicting masked token word
            mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"])

            # 2-3. next_lossとmask_lossの合計をlossとする
            loss = next_loss + mask_loss

            # 3. training時のみ,backwardとoptimizer更新を行う
            if train:
                self.optim.zero_grad()
                loss.backward()
                self.optim.step()

            # next sentence prediction accuracy
            correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item()
            avg_loss += loss.item()
            total_correct += correct
            total_element += data["is_next"].nelement()

            post_fix = {
                "epoch": epoch,
                "iter": i,
                "avg_loss": avg_loss / (i + 1),
                "avg_acc": total_correct / total_element * 100,
                "loss": loss.item()
            }

            if i % self.log_freq == 0:
                data_iter.write(str(post_fix))

        print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_acc=", total_correct * 100.0 / total_element)
        self.train_lossses.append(avg_loss / len(data_iter))
        self.train_accs.append(total_correct * 100.0 / total_element)
        
    def save(self, epoch, file_path="output/bert_trained.model"):
        """
        Saving the current BERT model on file_path

        :param epoch: current epoch number
        :param file_path: model output path which gonna be file_path+"ep%d" % epoch
        :return: final_output_path
        """
        output_path = file_path + ".ep%d" % epoch
        torch.save(self.bert.cpu(), output_path)
        self.bert.to(self.device)
        print("EP:%d Model Saved on:" % epoch, output_path)
        return output_path
    
    """
    loadをここに書く場合、こんな感じかもしれない
    
    def load(self, file_path="output/bert_trained.model"):
        pretrained_path = file_path
        torch.load(self.bert.cpu(), pretrained_path)
        self.bert.to(self.device)
        print("Model loaded on:" ,pretrained_path)
        return pretrained_path
    # ココまでで終わり
    """

    
    
        

In [17]:
import datetime
dt_now = str(datetime.datetime.now()).replace(' ', '')

In [18]:
# 訓練用パラメタを定義する
train_dataset=processed_train_txt
test_dataset=processed_valid_txt
vocab_path='./data/vocab'+ dt_now +'.txt'
output_model_path='./output/bertmodel'+ dt_now

hidden=256 #768
layers=8 #12
attn_heads=8 #12
seq_len=60

batch_size=4
epochs=10
num_workers=5
with_cuda=True
log_freq=20
corpus_lines=None

lr=1e-3
adam_weight_decay=0.00
adam_beta1=0.9
adam_beta2=0.999

dropout=0.0

min_freq=7

corpus_path=processed_train_txt
label_path=None

In [19]:
build(corpus_path, vocab_path, min_freq=min_freq)

print("Loading Vocab", vocab_path)
vocab = WordVocab.load_vocab(vocab_path)

print("Loading Train Dataset", train_dataset)
train_dataset = BERTDataset(train_dataset, vocab, seq_len=seq_len, label_path=label_path, corpus_lines=corpus_lines)

print("Loading Test Dataset", test_dataset)
test_dataset = BERTDataset(test_dataset, vocab, seq_len=seq_len, label_path=label_path) if test_dataset is not None else None

print("Creating Dataloader")
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers) if test_dataset is not None else None

print("Building BERT model")
bert = BERT(len(vocab), hidden=hidden, n_layers=layers, attn_heads=attn_heads, dropout=dropout)

Building Vocab
VOCAB SIZE: 5
Loading Vocab ./data/vocab2019-05-1218:59:16.898679.txt
Loading Train Dataset ./data/chatdata/train_X.txt
Loading Test Dataset ./data/chatdata/valid_X.txt
Creating Dataloader
Building BERT model


In [20]:
print("Creating BERT Trainer")
trainer = BERTTrainer(bert, len(vocab), train_dataloader=train_data_loader, test_dataloader=test_data_loader,
                      lr=lr, betas=(adam_beta1, adam_beta2), weight_decay=adam_weight_decay,
                      with_cuda=with_cuda, log_freq=log_freq)

Creating BERT Trainer
Total Parameters: 6321927


In [25]:
print("Training Start")
for epoch in range(epochs):
    trainer.train(epoch)
    # Model Save
    trainer.save(epoch, output_model_path)
    trainer.test(epoch)

Training Start


EP_train:0:   0%|| 1/1000 [00:00<08:22,  1.99it/s]

{'epoch': 0, 'iter': 0, 'avg_loss': 3.053316116333008, 'avg_acc': 50.0, 'loss': 3.053316116333008}


EP_train:0:   2%|| 21/1000 [00:05<04:21,  3.75it/s]

{'epoch': 0, 'iter': 20, 'avg_loss': 2.3360815378172055, 'avg_acc': 50.0, 'loss': 0.8547192811965942}


EP_train:0:   4%|| 41/1000 [00:10<04:23,  3.63it/s]

{'epoch': 0, 'iter': 40, 'avg_loss': 1.6956713913780888, 'avg_acc': 50.609756097560975, 'loss': 1.4307366609573364}


EP_train:0:   6%|| 61/1000 [00:16<05:00,  3.12it/s]

{'epoch': 0, 'iter': 60, 'avg_loss': 1.4845702276244515, 'avg_acc': 52.459016393442624, 'loss': 0.6866318583488464}


EP_train:0:   8%|| 81/1000 [00:24<05:40,  2.70it/s]

{'epoch': 0, 'iter': 80, 'avg_loss': 1.3140193775388194, 'avg_acc': 51.85185185185185, 'loss': 0.5308113098144531}


EP_train:0:  10%|| 101/1000 [00:31<04:20,  3.45it/s]

{'epoch': 0, 'iter': 100, 'avg_loss': 1.2068413011729717, 'avg_acc': 50.99009900990099, 'loss': 0.7212405800819397}


EP_train:0:  12%|| 121/1000 [00:36<04:42,  3.11it/s]

{'epoch': 0, 'iter': 120, 'avg_loss': 1.1333942541098299, 'avg_acc': 50.6198347107438, 'loss': 0.9399799704551697}


EP_train:0:  14%|| 141/1000 [00:42<04:25,  3.23it/s]

{'epoch': 0, 'iter': 140, 'avg_loss': 1.0966468205847215, 'avg_acc': 51.24113475177305, 'loss': 1.6609286069869995}


EP_train:0:  16%|| 161/1000 [00:48<04:05,  3.42it/s]

{'epoch': 0, 'iter': 160, 'avg_loss': 1.062112325258692, 'avg_acc': 50.77639751552795, 'loss': 0.7961816787719727}


EP_train:0:  18%|| 181/1000 [00:55<05:14,  2.60it/s]

{'epoch': 0, 'iter': 180, 'avg_loss': 1.0356643300153603, 'avg_acc': 50.82872928176796, 'loss': 1.3681340217590332}


EP_train:0:  20%|| 201/1000 [01:03<04:24,  3.02it/s]

{'epoch': 0, 'iter': 200, 'avg_loss': 1.0148696840661973, 'avg_acc': 50.87064676616916, 'loss': 0.7883260250091553}


EP_train:0:  22%|| 221/1000 [01:09<03:38,  3.56it/s]

{'epoch': 0, 'iter': 220, 'avg_loss': 0.99233791803545, 'avg_acc': 51.35746606334841, 'loss': 0.3617301881313324}


EP_train:0:  24%|| 241/1000 [01:15<03:43,  3.39it/s]

{'epoch': 0, 'iter': 240, 'avg_loss': 0.9797579581033887, 'avg_acc': 50.829875518672196, 'loss': 0.6784611940383911}


EP_train:0:  26%|| 261/1000 [01:21<03:51,  3.19it/s]

{'epoch': 0, 'iter': 260, 'avg_loss': 0.9618550606814152, 'avg_acc': 50.86206896551724, 'loss': 0.6414287686347961}


EP_train:0:  28%|| 281/1000 [01:28<04:20,  2.76it/s]

{'epoch': 0, 'iter': 280, 'avg_loss': 0.9451135095089568, 'avg_acc': 51.06761565836299, 'loss': 0.5498720407485962}


EP_train:0:  30%|| 301/1000 [01:34<03:12,  3.62it/s]

{'epoch': 0, 'iter': 300, 'avg_loss': 0.9296940563500323, 'avg_acc': 51.49501661129568, 'loss': 0.5168757438659668}


EP_train:0:  32%|| 321/1000 [01:40<03:37,  3.12it/s]

{'epoch': 0, 'iter': 320, 'avg_loss': 0.9197093049104897, 'avg_acc': 51.71339563862928, 'loss': 0.9142669439315796}


EP_train:0:  34%|| 341/1000 [01:47<03:30,  3.14it/s]

{'epoch': 0, 'iter': 340, 'avg_loss': 0.9086554095345684, 'avg_acc': 51.31964809384164, 'loss': 0.7348402142524719}


EP_train:0:  36%|| 361/1000 [01:52<02:53,  3.69it/s]

{'epoch': 0, 'iter': 360, 'avg_loss': 0.8969430744627341, 'avg_acc': 51.8005540166205, 'loss': 0.7574119567871094}


EP_train:0:  38%|| 381/1000 [01:57<02:28,  4.18it/s]

{'epoch': 0, 'iter': 380, 'avg_loss': 0.8855589734111714, 'avg_acc': 52.493438320209975, 'loss': 0.8435767292976379}


EP_train:0:  40%|| 401/1000 [02:03<03:21,  2.98it/s]

{'epoch': 0, 'iter': 400, 'avg_loss': 0.8810329758411185, 'avg_acc': 52.68079800498753, 'loss': 0.8568190932273865}


EP_train:0:  42%|| 421/1000 [02:10<03:19,  2.91it/s]

{'epoch': 0, 'iter': 420, 'avg_loss': 0.8750235247304066, 'avg_acc': 53.028503562945374, 'loss': 0.7871896624565125}


EP_train:0:  44%|| 441/1000 [02:16<02:37,  3.54it/s]

{'epoch': 0, 'iter': 440, 'avg_loss': 0.8672573920635004, 'avg_acc': 53.28798185941043, 'loss': 0.6162328124046326}


EP_train:0:  46%|| 461/1000 [02:22<03:01,  2.98it/s]

{'epoch': 0, 'iter': 460, 'avg_loss': 0.8621957674541727, 'avg_acc': 53.253796095444685, 'loss': 0.6243141889572144}


EP_train:0:  48%|| 481/1000 [02:29<02:43,  3.18it/s]

{'epoch': 0, 'iter': 480, 'avg_loss': 0.8583299362811254, 'avg_acc': 53.118503118503114, 'loss': 0.8395732641220093}


EP_train:0:  50%|| 501/1000 [02:35<02:01,  4.09it/s]

{'epoch': 0, 'iter': 500, 'avg_loss': 0.8544280416073794, 'avg_acc': 52.9441117764471, 'loss': 0.715458869934082}


EP_train:0:  52%|| 521/1000 [02:40<02:42,  2.95it/s]

{'epoch': 0, 'iter': 520, 'avg_loss': 0.85067269841706, 'avg_acc': 52.783109404990405, 'loss': 0.5881235599517822}


EP_train:0:  54%|| 541/1000 [02:48<02:44,  2.79it/s]

{'epoch': 0, 'iter': 540, 'avg_loss': 0.8464116748911286, 'avg_acc': 52.865064695009245, 'loss': 0.756837785243988}


EP_train:0:  56%|| 561/1000 [02:53<01:59,  3.69it/s]

{'epoch': 0, 'iter': 560, 'avg_loss': 0.8427112652403892, 'avg_acc': 52.62923351158645, 'loss': 0.7179768085479736}


EP_train:0:  58%|| 581/1000 [02:59<02:08,  3.26it/s]

{'epoch': 0, 'iter': 580, 'avg_loss': 0.8405131331473435, 'avg_acc': 52.58175559380379, 'loss': 0.7487000226974487}


EP_train:0:  60%|| 601/1000 [03:05<01:46,  3.75it/s]

{'epoch': 0, 'iter': 600, 'avg_loss': 0.8362933351238337, 'avg_acc': 53.036605657237935, 'loss': 0.3766773045063019}


EP_train:0:  62%|| 621/1000 [03:10<01:40,  3.75it/s]

{'epoch': 0, 'iter': 620, 'avg_loss': 0.8328620807332213, 'avg_acc': 52.93880837359099, 'loss': 0.759245753288269}


EP_train:0:  64%|| 641/1000 [03:16<02:09,  2.78it/s]

{'epoch': 0, 'iter': 640, 'avg_loss': 0.8290227646539903, 'avg_acc': 53.042121684867396, 'loss': 0.7017991542816162}


EP_train:0:  66%|| 661/1000 [03:23<01:48,  3.12it/s]

{'epoch': 0, 'iter': 660, 'avg_loss': 0.8257402313785817, 'avg_acc': 52.83661119515885, 'loss': 0.7373155355453491}


EP_train:0:  68%|| 681/1000 [03:28<01:27,  3.64it/s]

{'epoch': 0, 'iter': 680, 'avg_loss': 0.8229299666040006, 'avg_acc': 52.67988252569751, 'loss': 0.8274601101875305}


EP_train:0:  70%|| 701/1000 [03:35<01:56,  2.56it/s]

{'epoch': 0, 'iter': 700, 'avg_loss': 0.8214231872832809, 'avg_acc': 52.49643366619116, 'loss': 0.890002965927124}


EP_train:0:  72%|| 721/1000 [03:42<01:30,  3.07it/s]

{'epoch': 0, 'iter': 720, 'avg_loss': 0.8190724106141251, 'avg_acc': 52.42718446601942, 'loss': 0.6101883053779602}


EP_train:0:  74%|| 741/1000 [03:48<01:14,  3.47it/s]

{'epoch': 0, 'iter': 740, 'avg_loss': 0.8160662859447009, 'avg_acc': 52.29419703103913, 'loss': 0.6921607851982117}


EP_train:0:  76%|| 761/1000 [03:54<01:35,  2.49it/s]

{'epoch': 0, 'iter': 760, 'avg_loss': 0.8131711593487652, 'avg_acc': 52.266754270696445, 'loss': 0.5974047183990479}


EP_train:0:  78%|| 781/1000 [04:01<01:02,  3.51it/s]

{'epoch': 0, 'iter': 780, 'avg_loss': 0.8107571624047701, 'avg_acc': 52.33674775928298, 'loss': 0.6960182189941406}


EP_train:0:  80%|| 801/1000 [04:07<01:00,  3.28it/s]

{'epoch': 0, 'iter': 800, 'avg_loss': 0.808306109969424, 'avg_acc': 52.278401997503124, 'loss': 0.7011044025421143}


EP_train:0:  82%|| 821/1000 [04:13<00:50,  3.56it/s]

{'epoch': 0, 'iter': 820, 'avg_loss': 0.8060331826351229, 'avg_acc': 52.3142509135201, 'loss': 0.7107651829719543}


EP_train:0:  84%|| 841/1000 [04:19<00:46,  3.40it/s]

{'epoch': 0, 'iter': 840, 'avg_loss': 0.802717465570686, 'avg_acc': 52.675386444708685, 'loss': 0.6744073033332825}


EP_train:0:  86%|| 861/1000 [04:26<00:43,  3.16it/s]

{'epoch': 0, 'iter': 860, 'avg_loss': 0.7996618946436598, 'avg_acc': 52.961672473867594, 'loss': 0.7661246657371521}


EP_train:0:  88%|| 881/1000 [04:32<00:35,  3.33it/s]

{'epoch': 0, 'iter': 880, 'avg_loss': 0.7978097310714702, 'avg_acc': 52.95119182746879, 'loss': 0.8218620419502258}


EP_train:0:  90%|| 901/1000 [04:38<00:29,  3.34it/s]

{'epoch': 0, 'iter': 900, 'avg_loss': 0.79658744437465, 'avg_acc': 52.83018867924528, 'loss': 0.7242227792739868}


EP_train:0:  92%|| 921/1000 [04:44<00:28,  2.78it/s]

{'epoch': 0, 'iter': 920, 'avg_loss': 0.7951848162671836, 'avg_acc': 52.82301845819761, 'loss': 0.7950015664100647}


EP_train:0:  94%|| 941/1000 [04:50<00:16,  3.62it/s]

{'epoch': 0, 'iter': 940, 'avg_loss': 0.7933158300674493, 'avg_acc': 52.789585547290116, 'loss': 0.6961479783058167}


EP_train:0:  96%|| 961/1000 [04:55<00:09,  4.16it/s]

{'epoch': 0, 'iter': 960, 'avg_loss': 0.7921012915422777, 'avg_acc': 52.49739854318418, 'loss': 0.6998275518417358}


EP_train:0:  98%|| 981/1000 [05:00<00:05,  3.74it/s]

{'epoch': 0, 'iter': 980, 'avg_loss': 0.7902776937396336, 'avg_acc': 52.49745158002038, 'loss': 0.7908626198768616}


EP_train:0: 100%|| 1000/1000 [05:07<00:00,  3.46it/s]

EP0_train, avg_loss= 0.788886081341654 total_acc= 52.43810952738185
EP:0 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep0



  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
EP_test:0:   1%|| 2/250 [00:00<00:40,  6.09it/s]

{'epoch': 0, 'iter': 0, 'avg_loss': 0.755120575428009, 'avg_acc': 25.0, 'loss': 0.755120575428009}


EP_test:0:   9%|| 23/250 [00:02<00:18, 12.17it/s]

{'epoch': 0, 'iter': 20, 'avg_loss': 0.7027498341742016, 'avg_acc': 50.0, 'loss': 0.6481580138206482}


EP_test:0:  17%|| 43/250 [00:03<00:15, 13.41it/s]

{'epoch': 0, 'iter': 40, 'avg_loss': 0.6987757043140691, 'avg_acc': 51.829268292682926, 'loss': 0.7000062465667725}


EP_test:0:  25%|| 63/250 [00:05<00:14, 12.49it/s]

{'epoch': 0, 'iter': 60, 'avg_loss': 0.7046625487139968, 'avg_acc': 50.0, 'loss': 0.6995897889137268}


EP_test:0:  33%|| 83/250 [00:06<00:11, 14.30it/s]

{'epoch': 0, 'iter': 80, 'avg_loss': 0.7037568541220677, 'avg_acc': 50.0, 'loss': 0.6448919177055359}


EP_test:0:  41%|| 103/250 [00:07<00:09, 14.90it/s]

{'epoch': 0, 'iter': 100, 'avg_loss': 0.7052150138533941, 'avg_acc': 49.504950495049506, 'loss': 0.6444754600524902}


EP_test:0:  49%|| 123/250 [00:09<00:09, 14.07it/s]

{'epoch': 0, 'iter': 120, 'avg_loss': 0.7045326178724115, 'avg_acc': 49.586776859504134, 'loss': 0.7000062465667725}


EP_test:0:  57%|| 143/250 [00:10<00:07, 13.45it/s]

{'epoch': 0, 'iter': 140, 'avg_loss': 0.704260176800667, 'avg_acc': 49.645390070921984, 'loss': 0.7555694580078125}


EP_test:0:  65%|| 163/250 [00:12<00:06, 12.95it/s]

{'epoch': 0, 'iter': 160, 'avg_loss': 0.7039214783573743, 'avg_acc': 49.68944099378882, 'loss': 0.7000062465667725}


EP_test:0:  73%|| 183/250 [00:13<00:04, 13.82it/s]

{'epoch': 0, 'iter': 180, 'avg_loss': 0.7033668496332116, 'avg_acc': 50.0, 'loss': 0.7043018341064453}


EP_test:0:  81%|| 203/250 [00:15<00:03, 14.96it/s]

{'epoch': 0, 'iter': 200, 'avg_loss': 0.7057793072207057, 'avg_acc': 48.88059701492538, 'loss': 0.8067606687545776}


EP_test:0:  89%|| 223/250 [00:16<00:01, 13.79it/s]

{'epoch': 0, 'iter': 220, 'avg_loss': 0.7062452621589419, 'avg_acc': 48.64253393665158, 'loss': 0.6166446805000305}


EP_test:0:  97%|| 243/250 [00:18<00:00, 13.14it/s]

{'epoch': 0, 'iter': 240, 'avg_loss': 0.7060712007071467, 'avg_acc': 48.85892116182573, 'loss': 0.7024070024490356}


EP_test:0: 100%|| 250/250 [00:18<00:00, 13.41it/s]

EP0_test, avg_loss= 0.7057849714756012 total_acc= 49.049049049049046



EP_train:1:   0%|| 1/1000 [00:00<06:52,  2.42it/s]

{'epoch': 1, 'iter': 0, 'avg_loss': 0.7061060070991516, 'avg_acc': 50.0, 'loss': 0.7061060070991516}


EP_train:1:   2%|| 21/1000 [00:07<06:04,  2.69it/s]

{'epoch': 1, 'iter': 20, 'avg_loss': 0.6988442142804464, 'avg_acc': 46.42857142857143, 'loss': 0.7154275178909302}


EP_train:1:   4%|| 41/1000 [00:13<04:18,  3.71it/s]

{'epoch': 1, 'iter': 40, 'avg_loss': 0.7005033463966556, 'avg_acc': 45.1219512195122, 'loss': 0.7062532305717468}


EP_train:1:   6%|| 61/1000 [00:18<04:10,  3.74it/s]

{'epoch': 1, 'iter': 60, 'avg_loss': 0.6998868574861621, 'avg_acc': 44.26229508196721, 'loss': 0.7100659608840942}


EP_train:1:   8%|| 81/1000 [00:25<05:34,  2.75it/s]

{'epoch': 1, 'iter': 80, 'avg_loss': 0.6997677831002224, 'avg_acc': 45.67901234567901, 'loss': 0.6886424422264099}


EP_train:1:  10%|| 101/1000 [00:31<04:54,  3.06it/s]

{'epoch': 1, 'iter': 100, 'avg_loss': 0.7029759689132766, 'avg_acc': 45.79207920792079, 'loss': 0.6759294271469116}


EP_train:1:  12%|| 121/1000 [00:37<04:06,  3.57it/s]

{'epoch': 1, 'iter': 120, 'avg_loss': 0.7020167092646449, 'avg_acc': 48.34710743801653, 'loss': 0.5969694256782532}


EP_train:1:  14%|| 141/1000 [00:44<05:24,  2.64it/s]

{'epoch': 1, 'iter': 140, 'avg_loss': 0.7028294683348203, 'avg_acc': 50.0, 'loss': 0.8330541849136353}


EP_train:1:  16%|| 161/1000 [00:51<04:42,  2.97it/s]

{'epoch': 1, 'iter': 160, 'avg_loss': 0.7061052218727444, 'avg_acc': 50.31055900621118, 'loss': 0.7003761529922485}


EP_train:1:  18%|| 181/1000 [00:57<03:50,  3.55it/s]

{'epoch': 1, 'iter': 180, 'avg_loss': 0.7047146953930512, 'avg_acc': 50.82872928176796, 'loss': 0.6065565347671509}


EP_train:1:  20%|| 201/1000 [01:03<04:59,  2.67it/s]

{'epoch': 1, 'iter': 200, 'avg_loss': 0.7047190230284164, 'avg_acc': 51.11940298507462, 'loss': 0.7520492076873779}


EP_train:1:  22%|| 221/1000 [01:11<04:31,  2.87it/s]

{'epoch': 1, 'iter': 220, 'avg_loss': 0.7053472175857061, 'avg_acc': 51.01809954751131, 'loss': 0.8043636679649353}


EP_train:1:  24%|| 241/1000 [01:17<03:36,  3.50it/s]

{'epoch': 1, 'iter': 240, 'avg_loss': 0.7056800347146157, 'avg_acc': 50.31120331950207, 'loss': 0.7071352005004883}


EP_train:1:  26%|| 261/1000 [01:23<04:20,  2.84it/s]

{'epoch': 1, 'iter': 260, 'avg_loss': 0.7060902002214016, 'avg_acc': 50.67049808429118, 'loss': 0.6658211946487427}


EP_train:1:  28%|| 281/1000 [01:31<04:16,  2.80it/s]

{'epoch': 1, 'iter': 280, 'avg_loss': 0.7059742710768541, 'avg_acc': 50.26690391459074, 'loss': 0.7135890126228333}


EP_train:1:  30%|| 301/1000 [01:36<03:03,  3.80it/s]

{'epoch': 1, 'iter': 300, 'avg_loss': 0.7064353384052796, 'avg_acc': 50.08305647840532, 'loss': 0.6648221611976624}


EP_train:1:  32%|| 321/1000 [01:42<03:20,  3.39it/s]

{'epoch': 1, 'iter': 320, 'avg_loss': 0.7065087224464179, 'avg_acc': 50.155763239875384, 'loss': 0.8980622291564941}


EP_train:1:  34%|| 341/1000 [01:49<03:47,  2.90it/s]

{'epoch': 1, 'iter': 340, 'avg_loss': 0.7067710563234569, 'avg_acc': 50.0733137829912, 'loss': 0.7024841904640198}


EP_train:1:  36%|| 361/1000 [01:55<02:54,  3.65it/s]

{'epoch': 1, 'iter': 360, 'avg_loss': 0.7067257994760107, 'avg_acc': 50.13850415512465, 'loss': 0.7755399942398071}


EP_train:1:  38%|| 381/1000 [02:01<03:00,  3.42it/s]

{'epoch': 1, 'iter': 380, 'avg_loss': 0.7063111109057749, 'avg_acc': 50.39370078740157, 'loss': 0.736779510974884}


EP_train:1:  40%|| 401/1000 [02:08<03:45,  2.66it/s]

{'epoch': 1, 'iter': 400, 'avg_loss': 0.7067499835592256, 'avg_acc': 49.87531172069826, 'loss': 0.7012020945549011}


EP_train:1:  42%|| 421/1000 [02:15<03:03,  3.16it/s]

{'epoch': 1, 'iter': 420, 'avg_loss': 0.7071381090938903, 'avg_acc': 49.58432304038005, 'loss': 0.6912679672241211}


EP_train:1:  44%|| 441/1000 [02:19<02:24,  3.86it/s]

{'epoch': 1, 'iter': 440, 'avg_loss': 0.7071466358070201, 'avg_acc': 49.43310657596372, 'loss': 0.7158809900283813}


EP_train:1:  46%|| 461/1000 [02:26<03:16,  2.74it/s]

{'epoch': 1, 'iter': 460, 'avg_loss': 0.7072834412601662, 'avg_acc': 49.511930585683295, 'loss': 0.7342395782470703}


EP_train:1:  48%|| 481/1000 [02:32<02:22,  3.64it/s]

{'epoch': 1, 'iter': 480, 'avg_loss': 0.7075385642150831, 'avg_acc': 49.74012474012474, 'loss': 0.9364258050918579}


EP_train:1:  50%|| 501/1000 [02:37<01:59,  4.18it/s]

{'epoch': 1, 'iter': 500, 'avg_loss': 0.7069443389327227, 'avg_acc': 50.0, 'loss': 0.5947431325912476}


EP_train:1:  52%|| 521/1000 [02:42<02:09,  3.71it/s]

{'epoch': 1, 'iter': 520, 'avg_loss': 0.7071617976717666, 'avg_acc': 50.0, 'loss': 0.7111731171607971}


EP_train:1:  54%|| 541/1000 [02:48<02:23,  3.19it/s]

{'epoch': 1, 'iter': 540, 'avg_loss': 0.7071005327423928, 'avg_acc': 49.81515711645101, 'loss': 0.6490626335144043}


EP_train:1:  56%|| 561/1000 [02:54<01:58,  3.69it/s]

{'epoch': 1, 'iter': 560, 'avg_loss': 0.7073201600880547, 'avg_acc': 49.82174688057041, 'loss': 0.7420138120651245}


EP_train:1:  58%|| 581/1000 [02:59<01:47,  3.90it/s]

{'epoch': 1, 'iter': 580, 'avg_loss': 0.7071939526244408, 'avg_acc': 49.78485370051635, 'loss': 0.7615401744842529}


EP_train:1:  60%|| 601/1000 [03:05<02:34,  2.58it/s]

{'epoch': 1, 'iter': 600, 'avg_loss': 0.7057223378521036, 'avg_acc': 50.374376039933445, 'loss': 0.6311137080192566}


EP_train:1:  62%|| 621/1000 [03:12<01:53,  3.34it/s]

{'epoch': 1, 'iter': 620, 'avg_loss': 0.7062646036370749, 'avg_acc': 50.1610305958132, 'loss': 0.687217116355896}


EP_train:1:  64%|| 641/1000 [03:18<01:40,  3.56it/s]

{'epoch': 1, 'iter': 640, 'avg_loss': 0.706456447354345, 'avg_acc': 50.42901716068643, 'loss': 0.6869775056838989}


EP_train:1:  66%|| 661/1000 [03:24<02:14,  2.52it/s]

{'epoch': 1, 'iter': 660, 'avg_loss': 0.706796570101953, 'avg_acc': 50.26475037821483, 'loss': 0.7192885875701904}


EP_train:1:  68%|| 681/1000 [03:31<01:34,  3.36it/s]

{'epoch': 1, 'iter': 680, 'avg_loss': 0.7066273031955949, 'avg_acc': 50.22026431718062, 'loss': 0.726588785648346}


EP_train:1:  70%|| 701/1000 [03:36<01:15,  3.98it/s]

{'epoch': 1, 'iter': 700, 'avg_loss': 0.7065542936325073, 'avg_acc': 50.320970042796006, 'loss': 0.6039629578590393}


EP_train:1:  72%|| 721/1000 [03:41<01:15,  3.68it/s]

{'epoch': 1, 'iter': 720, 'avg_loss': 0.7070112773184968, 'avg_acc': 50.173370319001386, 'loss': 0.6922541260719299}


EP_train:1:  74%|| 741/1000 [03:48<01:22,  3.14it/s]

{'epoch': 1, 'iter': 740, 'avg_loss': 0.7071035708653943, 'avg_acc': 50.2361673414305, 'loss': 0.98446124792099}


EP_train:1:  76%|| 761/1000 [03:53<01:04,  3.70it/s]

{'epoch': 1, 'iter': 760, 'avg_loss': 0.7059732958498202, 'avg_acc': 50.49277266754271, 'loss': 0.5824610590934753}


EP_train:1:  78%|| 781/1000 [03:59<01:02,  3.50it/s]

{'epoch': 1, 'iter': 780, 'avg_loss': 0.706850389989329, 'avg_acc': 50.256081946222785, 'loss': 0.7056648135185242}


EP_train:1:  80%|| 801/1000 [04:05<01:01,  3.24it/s]

{'epoch': 1, 'iter': 800, 'avg_loss': 0.7068634268533573, 'avg_acc': 50.43695380774032, 'loss': 0.8409472703933716}


EP_train:1:  82%|| 821/1000 [04:10<00:48,  3.70it/s]

{'epoch': 1, 'iter': 820, 'avg_loss': 0.706988493450231, 'avg_acc': 50.27405602923264, 'loss': 0.6887639760971069}


EP_train:1:  84%|| 841/1000 [04:16<00:42,  3.70it/s]

{'epoch': 1, 'iter': 840, 'avg_loss': 0.7072239405653848, 'avg_acc': 50.1486325802616, 'loss': 0.7134210467338562}


EP_train:1:  86%|| 861/1000 [04:22<00:49,  2.81it/s]

{'epoch': 1, 'iter': 860, 'avg_loss': 0.7072995762134124, 'avg_acc': 50.05807200929152, 'loss': 0.7521803379058838}


EP_train:1:  88%|| 881/1000 [04:28<00:32,  3.63it/s]

{'epoch': 1, 'iter': 880, 'avg_loss': 0.7081697931117827, 'avg_acc': 50.08513053348468, 'loss': 0.6622469425201416}


EP_train:1:  90%|| 901/1000 [04:33<00:23,  4.14it/s]

{'epoch': 1, 'iter': 900, 'avg_loss': 0.7086759922937071, 'avg_acc': 50.02774694783574, 'loss': 0.6505227088928223}


EP_train:1:  92%|| 921/1000 [04:38<00:21,  3.73it/s]

{'epoch': 1, 'iter': 920, 'avg_loss': 0.7085631846184322, 'avg_acc': 50.0814332247557, 'loss': 0.49868538975715637}


EP_train:1:  94%|| 941/1000 [04:45<00:18,  3.21it/s]

{'epoch': 1, 'iter': 940, 'avg_loss': 0.7089009042215652, 'avg_acc': 49.94686503719448, 'loss': 0.6649987697601318}


EP_train:1:  96%|| 961/1000 [04:50<00:10,  3.69it/s]

{'epoch': 1, 'iter': 960, 'avg_loss': 0.7085658381155204, 'avg_acc': 50.052029136316335, 'loss': 0.8483170866966248}


EP_train:1:  98%|| 981/1000 [04:55<00:04,  3.86it/s]

{'epoch': 1, 'iter': 980, 'avg_loss': 0.7093767791561152, 'avg_acc': 49.84709480122324, 'loss': 0.6977942585945129}


EP_train:1: 100%|| 1000/1000 [05:01<00:00,  3.04it/s]

EP1_train, avg_loss= 0.7095372128337621 total_acc= 49.91247811952988
EP:1 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep1



EP_test:1:   1%|| 2/250 [00:00<00:49,  4.99it/s]

{'epoch': 1, 'iter': 0, 'avg_loss': 0.7202409505844116, 'avg_acc': 25.0, 'loss': 0.7202409505844116}


EP_test:1:   9%|| 22/250 [00:02<00:22, 10.36it/s]

{'epoch': 1, 'iter': 20, 'avg_loss': 0.710180748076666, 'avg_acc': 39.285714285714285, 'loss': 0.7219765186309814}


EP_test:1:  17%|| 42/250 [00:04<00:18, 11.07it/s]

{'epoch': 1, 'iter': 40, 'avg_loss': 0.709573581451323, 'avg_acc': 39.63414634146341, 'loss': 0.7210997939109802}


EP_test:1:  25%|| 62/250 [00:05<00:15, 11.90it/s]

{'epoch': 1, 'iter': 60, 'avg_loss': 0.7092378305607154, 'avg_acc': 39.34426229508197, 'loss': 0.6962629556655884}


EP_test:1:  33%|| 82/250 [00:07<00:13, 12.51it/s]

{'epoch': 1, 'iter': 80, 'avg_loss': 0.7052184056352686, 'avg_acc': 43.82716049382716, 'loss': 0.6714261174201965}


EP_test:1:  41%|| 102/250 [00:08<00:11, 12.35it/s]

{'epoch': 1, 'iter': 100, 'avg_loss': 0.7051881327487455, 'avg_acc': 43.81188118811881, 'loss': 0.6714261174201965}


EP_test:1:  49%|| 122/250 [00:10<00:09, 13.18it/s]

{'epoch': 1, 'iter': 120, 'avg_loss': 0.7039106975902211, 'avg_acc': 44.83471074380165, 'loss': 0.6954041123390198}


EP_test:1:  57%|| 142/250 [00:11<00:07, 13.51it/s]

{'epoch': 1, 'iter': 140, 'avg_loss': 0.7032082435932565, 'avg_acc': 45.744680851063826, 'loss': 0.7210997939109802}


EP_test:1:  65%|| 162/250 [00:13<00:06, 14.06it/s]

{'epoch': 1, 'iter': 160, 'avg_loss': 0.7032338921327769, 'avg_acc': 45.807453416149066, 'loss': 0.7210997939109802}


EP_test:1:  73%|| 182/250 [00:14<00:05, 12.26it/s]

{'epoch': 1, 'iter': 180, 'avg_loss': 0.7028991408769597, 'avg_acc': 46.13259668508287, 'loss': 0.6668798923492432}


EP_test:1:  81%|| 202/250 [00:16<00:03, 12.92it/s]

{'epoch': 1, 'iter': 200, 'avg_loss': 0.7024628178990302, 'avg_acc': 46.39303482587065, 'loss': 0.7193821668624878}


EP_test:1:  89%|| 222/250 [00:17<00:02, 12.94it/s]

{'epoch': 1, 'iter': 220, 'avg_loss': 0.7019063215449924, 'avg_acc': 47.05882352941176, 'loss': 0.6962629556655884}


EP_test:1:  97%|| 242/250 [00:19<00:00, 13.20it/s]

{'epoch': 1, 'iter': 240, 'avg_loss': 0.7012976680059156, 'avg_acc': 47.614107883817425, 'loss': 0.6971396207809448}


EP_test:1: 100%|| 250/250 [00:19<00:00, 14.19it/s]

EP1_test, avg_loss= 0.701813051700592 total_acc= 47.147147147147145



EP_train:2:   0%|| 1/1000 [00:00<06:39,  2.50it/s]

{'epoch': 2, 'iter': 0, 'avg_loss': 0.7459366321563721, 'avg_acc': 0.0, 'loss': 0.7459366321563721}


EP_train:2:   2%|| 21/1000 [00:06<06:24,  2.55it/s]

{'epoch': 2, 'iter': 20, 'avg_loss': 0.7165909835270473, 'avg_acc': 59.523809523809526, 'loss': 1.1612720489501953}


EP_train:2:   4%|| 41/1000 [00:13<04:55,  3.25it/s]

{'epoch': 2, 'iter': 40, 'avg_loss': 0.8123760252464108, 'avg_acc': 57.92682926829268, 'loss': 0.684904158115387}


EP_train:2:   6%|| 61/1000 [00:18<03:50,  4.07it/s]

{'epoch': 2, 'iter': 60, 'avg_loss': 0.7979713048114151, 'avg_acc': 55.32786885245902, 'loss': 0.8259761333465576}


EP_train:2:   8%|| 81/1000 [00:24<04:33,  3.36it/s]

{'epoch': 2, 'iter': 80, 'avg_loss': 0.7973247049031434, 'avg_acc': 53.39506172839506, 'loss': 0.983479917049408}


EP_train:2:  10%|| 101/1000 [00:31<05:21,  2.80it/s]

{'epoch': 2, 'iter': 100, 'avg_loss': 0.7861749113786338, 'avg_acc': 51.73267326732673, 'loss': 0.6855630874633789}


EP_train:2:  12%|| 121/1000 [00:38<04:30,  3.25it/s]

{'epoch': 2, 'iter': 120, 'avg_loss': 0.7847115699017098, 'avg_acc': 53.099173553719005, 'loss': 0.7395856380462646}


EP_train:2:  14%|| 141/1000 [00:43<04:31,  3.16it/s]

{'epoch': 2, 'iter': 140, 'avg_loss': 0.7879964946934306, 'avg_acc': 51.24113475177305, 'loss': 1.501700520515442}


EP_train:2:  16%|| 161/1000 [00:50<04:42,  2.97it/s]

{'epoch': 2, 'iter': 160, 'avg_loss': 0.7834464393856918, 'avg_acc': 50.621118012422365, 'loss': 0.7436840534210205}


EP_train:2:  18%|| 181/1000 [00:56<04:18,  3.16it/s]

{'epoch': 2, 'iter': 180, 'avg_loss': 0.7745389512621731, 'avg_acc': 51.10497237569061, 'loss': 0.36629393696784973}


EP_train:2:  20%|| 201/1000 [01:02<03:46,  3.52it/s]

{'epoch': 2, 'iter': 200, 'avg_loss': 0.7717868299385653, 'avg_acc': 50.87064676616916, 'loss': 0.8350690603256226}


EP_train:2:  22%|| 221/1000 [01:09<04:47,  2.71it/s]

{'epoch': 2, 'iter': 220, 'avg_loss': 0.7717737027473445, 'avg_acc': 50.678733031674206, 'loss': 0.6960669159889221}


EP_train:2:  24%|| 241/1000 [01:15<03:31,  3.59it/s]

{'epoch': 2, 'iter': 240, 'avg_loss': 0.7659020473894613, 'avg_acc': 51.037344398340245, 'loss': 0.6184186935424805}


EP_train:2:  26%|| 261/1000 [01:21<03:45,  3.28it/s]

{'epoch': 2, 'iter': 260, 'avg_loss': 0.7812291989566836, 'avg_acc': 50.76628352490421, 'loss': 0.5638412237167358}


EP_train:2:  28%|| 281/1000 [01:26<03:14,  3.70it/s]

{'epoch': 2, 'iter': 280, 'avg_loss': 0.7924885533524684, 'avg_acc': 50.80071174377224, 'loss': 0.9361342191696167}


EP_train:2:  30%|| 301/1000 [01:32<03:23,  3.44it/s]

{'epoch': 2, 'iter': 300, 'avg_loss': 0.799565643226833, 'avg_acc': 50.99667774086378, 'loss': 1.5251554250717163}


EP_train:2:  32%|| 321/1000 [01:39<04:03,  2.78it/s]

{'epoch': 2, 'iter': 320, 'avg_loss': 0.805052945970607, 'avg_acc': 50.23364485981309, 'loss': 0.7596429586410522}


EP_train:2:  34%|| 341/1000 [01:45<03:12,  3.42it/s]

{'epoch': 2, 'iter': 340, 'avg_loss': 0.8063465288274886, 'avg_acc': 50.65982404692082, 'loss': 1.2293970584869385}


EP_train:2:  36%|| 361/1000 [01:51<03:20,  3.18it/s]

{'epoch': 2, 'iter': 360, 'avg_loss': 0.8093110959914995, 'avg_acc': 50.69252077562327, 'loss': 0.8641534447669983}


EP_train:2:  38%|| 381/1000 [01:58<03:30,  2.95it/s]

{'epoch': 2, 'iter': 380, 'avg_loss': 0.8083852640257811, 'avg_acc': 50.39370078740157, 'loss': 0.7813374996185303}


EP_train:2:  40%|| 401/1000 [02:04<02:44,  3.64it/s]

{'epoch': 2, 'iter': 400, 'avg_loss': 0.8088399073401665, 'avg_acc': 50.18703241895261, 'loss': 1.0072498321533203}


EP_train:2:  42%|| 421/1000 [02:09<02:25,  3.97it/s]

{'epoch': 2, 'iter': 420, 'avg_loss': 0.8049318582005356, 'avg_acc': 49.8812351543943, 'loss': 0.8161171674728394}


EP_train:2:  44%|| 441/1000 [02:14<03:04,  3.03it/s]

{'epoch': 2, 'iter': 440, 'avg_loss': 0.8035746988230079, 'avg_acc': 49.65986394557823, 'loss': 0.6500614285469055}


EP_train:2:  46%|| 461/1000 [02:21<02:47,  3.23it/s]

{'epoch': 2, 'iter': 460, 'avg_loss': 0.801435978679136, 'avg_acc': 49.40347071583514, 'loss': 0.7304350733757019}


EP_train:2:  48%|| 481/1000 [02:32<03:37,  2.38it/s]

{'epoch': 2, 'iter': 480, 'avg_loss': 0.7979461508309333, 'avg_acc': 49.53222453222453, 'loss': 0.810120165348053}


EP_train:2:  50%|| 501/1000 [02:48<04:48,  1.73it/s]

{'epoch': 2, 'iter': 500, 'avg_loss': 0.7982824005506294, 'avg_acc': 49.45109780439122, 'loss': 1.0014902353286743}


EP_train:2:  52%|| 521/1000 [03:03<05:42,  1.40it/s]

{'epoch': 2, 'iter': 520, 'avg_loss': 0.7954612378631638, 'avg_acc': 49.520153550863725, 'loss': 0.7122545838356018}


EP_train:2:  54%|| 541/1000 [03:14<04:46,  1.60it/s]

{'epoch': 2, 'iter': 540, 'avg_loss': 0.7931574744769749, 'avg_acc': 49.53789279112754, 'loss': 0.8307071924209595}


EP_train:2:  56%|| 561/1000 [03:25<04:05,  1.79it/s]

{'epoch': 2, 'iter': 560, 'avg_loss': 0.7898837829211232, 'avg_acc': 49.643493761140824, 'loss': 0.7748746871948242}


EP_train:2:  58%|| 581/1000 [03:32<02:34,  2.71it/s]

{'epoch': 2, 'iter': 580, 'avg_loss': 0.7881777268864868, 'avg_acc': 49.35456110154905, 'loss': 0.7741033434867859}


EP_train:2:  60%|| 601/1000 [03:39<02:05,  3.19it/s]

{'epoch': 2, 'iter': 600, 'avg_loss': 0.7861774620598495, 'avg_acc': 49.70881863560732, 'loss': 0.7825435996055603}


EP_train:2:  62%|| 621/1000 [03:45<01:36,  3.95it/s]

{'epoch': 2, 'iter': 620, 'avg_loss': 0.7837983101348418, 'avg_acc': 49.71819645732689, 'loss': 0.5383440852165222}


EP_train:2:  64%|| 641/1000 [03:50<01:37,  3.67it/s]

{'epoch': 2, 'iter': 640, 'avg_loss': 0.7817610465774717, 'avg_acc': 49.9609984399376, 'loss': 0.6921805739402771}


EP_train:2:  66%|| 661/1000 [03:57<01:50,  3.06it/s]

{'epoch': 2, 'iter': 660, 'avg_loss': 0.7800772427052891, 'avg_acc': 49.92435703479576, 'loss': 0.7868083119392395}


EP_train:2:  68%|| 681/1000 [04:03<01:52,  2.83it/s]

{'epoch': 2, 'iter': 680, 'avg_loss': 0.7781682416889679, 'avg_acc': 50.073421439060205, 'loss': 0.7226191759109497}


EP_train:2:  70%|| 701/1000 [04:13<03:20,  1.49it/s]

{'epoch': 2, 'iter': 700, 'avg_loss': 0.7763316969542675, 'avg_acc': 50.0, 'loss': 0.7385532259941101}


EP_train:2:  72%|| 721/1000 [04:24<01:41,  2.76it/s]

{'epoch': 2, 'iter': 720, 'avg_loss': 0.7743920820911161, 'avg_acc': 49.86130374479889, 'loss': 0.7649077773094177}


EP_train:2:  74%|| 741/1000 [04:33<01:27,  2.96it/s]

{'epoch': 2, 'iter': 740, 'avg_loss': 0.772502716396924, 'avg_acc': 49.96626180836707, 'loss': 0.60186767578125}


EP_train:2:  76%|| 761/1000 [04:40<01:42,  2.33it/s]

{'epoch': 2, 'iter': 760, 'avg_loss': 0.7709934015766903, 'avg_acc': 49.83574244415243, 'loss': 0.72685307264328}


EP_train:2:  78%|| 781/1000 [04:49<01:47,  2.04it/s]

{'epoch': 2, 'iter': 780, 'avg_loss': 0.7693142820550332, 'avg_acc': 49.871959026888604, 'loss': 0.6940354704856873}


EP_train:2:  80%|| 801/1000 [05:00<01:41,  1.96it/s]

{'epoch': 2, 'iter': 800, 'avg_loss': 0.7673345769077689, 'avg_acc': 49.812734082397, 'loss': 0.8434231281280518}


EP_train:2:  82%|| 821/1000 [05:10<01:12,  2.46it/s]

{'epoch': 2, 'iter': 820, 'avg_loss': 0.7653995582291655, 'avg_acc': 50.03045066991474, 'loss': 0.747596263885498}


EP_train:2:  84%|| 841/1000 [05:17<00:48,  3.28it/s]

{'epoch': 2, 'iter': 840, 'avg_loss': 0.7647158059354663, 'avg_acc': 50.11890606420928, 'loss': 0.8338350653648376}


EP_train:2:  86%|| 861/1000 [05:23<00:38,  3.61it/s]

{'epoch': 2, 'iter': 860, 'avg_loss': 0.763900039384399, 'avg_acc': 50.26132404181185, 'loss': 0.8803338408470154}


EP_train:2:  88%|| 881/1000 [05:29<00:45,  2.62it/s]

{'epoch': 2, 'iter': 880, 'avg_loss': 0.7636753839464957, 'avg_acc': 50.227014755959146, 'loss': 0.8012706637382507}


EP_train:2:  90%|| 901/1000 [05:36<00:28,  3.42it/s]

{'epoch': 2, 'iter': 900, 'avg_loss': 0.7633519779154814, 'avg_acc': 50.22197558268591, 'loss': 0.6788744926452637}


EP_train:2:  92%|| 921/1000 [05:42<00:22,  3.47it/s]

{'epoch': 2, 'iter': 920, 'avg_loss': 0.7628808643984387, 'avg_acc': 50.3257328990228, 'loss': 0.7960695028305054}


EP_train:2:  94%|| 941/1000 [05:47<00:15,  3.72it/s]

{'epoch': 2, 'iter': 940, 'avg_loss': 0.7623255138684314, 'avg_acc': 50.15940488841658, 'loss': 0.6897934079170227}


EP_train:2:  96%|| 961/1000 [05:53<00:12,  3.12it/s]

{'epoch': 2, 'iter': 960, 'avg_loss': 0.7617821038315901, 'avg_acc': 50.07804370447451, 'loss': 0.7015095353126526}


EP_train:2:  98%|| 981/1000 [05:59<00:05,  3.67it/s]

{'epoch': 2, 'iter': 980, 'avg_loss': 0.7607897065454575, 'avg_acc': 50.076452599388375, 'loss': 0.7124603986740112}


EP_train:2: 100%|| 1000/1000 [06:04<00:00,  3.37it/s]

EP2_train, avg_loss= 0.7597294328082352 total_acc= 50.187546886721684
EP:2 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep2



EP_test:2:   1%|| 2/250 [00:00<00:37,  6.58it/s]

{'epoch': 2, 'iter': 0, 'avg_loss': 0.7256729006767273, 'avg_acc': 50.0, 'loss': 0.7256729006767273}


EP_test:2:   9%|| 23/250 [00:01<00:18, 12.09it/s]

{'epoch': 2, 'iter': 20, 'avg_loss': 0.7290726729801723, 'avg_acc': 48.80952380952381, 'loss': 0.6634030938148499}


EP_test:2:  16%|| 41/250 [00:03<00:20, 10.39it/s]

{'epoch': 2, 'iter': 40, 'avg_loss': 0.7279790668952756, 'avg_acc': 49.390243902439025, 'loss': 0.7871976494789124}


EP_test:2:  25%|| 62/250 [00:05<00:20,  9.27it/s]

{'epoch': 2, 'iter': 60, 'avg_loss': 0.7374204094292688, 'avg_acc': 49.18032786885246, 'loss': 0.7253701686859131}


EP_test:2:  33%|| 82/250 [00:07<00:15, 10.87it/s]

{'epoch': 2, 'iter': 80, 'avg_loss': 0.7327562073866526, 'avg_acc': 49.382716049382715, 'loss': 0.47808387875556946}


EP_test:2:  41%|| 102/250 [00:09<00:13, 10.92it/s]

{'epoch': 2, 'iter': 100, 'avg_loss': 0.7229072770269791, 'avg_acc': 51.98019801980198, 'loss': 0.5245515704154968}


EP_test:2:  49%|| 122/250 [00:11<00:10, 11.80it/s]

{'epoch': 2, 'iter': 120, 'avg_loss': 0.7217276229346095, 'avg_acc': 52.066115702479344, 'loss': 0.980840265750885}


EP_test:2:  57%|| 142/250 [00:12<00:08, 13.03it/s]

{'epoch': 2, 'iter': 140, 'avg_loss': 0.7204047874778721, 'avg_acc': 52.12765957446809, 'loss': 0.6634030938148499}


EP_test:2:  65%|| 162/250 [00:14<00:06, 13.00it/s]

{'epoch': 2, 'iter': 160, 'avg_loss': 0.721572411541613, 'avg_acc': 52.17391304347826, 'loss': 1.0081859827041626}


EP_test:2:  73%|| 182/250 [00:15<00:04, 14.22it/s]

{'epoch': 2, 'iter': 180, 'avg_loss': 0.7205467774064501, 'avg_acc': 52.48618784530387, 'loss': 0.8459241986274719}


EP_test:2:  81%|| 202/250 [00:17<00:03, 14.54it/s]

{'epoch': 2, 'iter': 200, 'avg_loss': 0.7182362011416041, 'avg_acc': 52.86069651741293, 'loss': 0.7256729006767273}


EP_test:2:  89%|| 222/250 [00:18<00:02, 13.57it/s]

{'epoch': 2, 'iter': 220, 'avg_loss': 0.7244688736367549, 'avg_acc': 51.696832579185525, 'loss': 0.6171716451644897}


EP_test:2:  97%|| 242/250 [00:20<00:00, 12.38it/s]

{'epoch': 2, 'iter': 240, 'avg_loss': 0.7284515539640213, 'avg_acc': 50.829875518672196, 'loss': 0.7256729006767273}


EP_test:2: 100%|| 250/250 [00:20<00:00, 13.21it/s]

EP2_test, avg_loss= 0.728859419465065 total_acc= 50.750750750750754



EP_train:3:   0%|| 1/1000 [00:00<06:39,  2.50it/s]

{'epoch': 3, 'iter': 0, 'avg_loss': 0.8494674563407898, 'avg_acc': 25.0, 'loss': 0.8494674563407898}


EP_train:3:   2%|| 21/1000 [00:08<05:30,  2.96it/s]

{'epoch': 3, 'iter': 20, 'avg_loss': 0.734409749507904, 'avg_acc': 46.42857142857143, 'loss': 0.8705322742462158}


EP_train:3:   4%|| 41/1000 [00:15<06:02,  2.65it/s]

{'epoch': 3, 'iter': 40, 'avg_loss': 0.7346831414757705, 'avg_acc': 46.95121951219512, 'loss': 0.6831934452056885}


EP_train:3:   6%|| 61/1000 [00:22<04:40,  3.34it/s]

{'epoch': 3, 'iter': 60, 'avg_loss': 0.7213455756179622, 'avg_acc': 52.04918032786885, 'loss': 0.6637132167816162}


EP_train:3:   8%|| 81/1000 [00:28<04:43,  3.24it/s]

{'epoch': 3, 'iter': 80, 'avg_loss': 0.720604216243014, 'avg_acc': 52.160493827160494, 'loss': 0.7453241944313049}


EP_train:3:  10%|| 101/1000 [00:36<06:55,  2.17it/s]

{'epoch': 3, 'iter': 100, 'avg_loss': 0.7246197512244233, 'avg_acc': 50.742574257425744, 'loss': 0.6473793983459473}


EP_train:3:  12%|| 121/1000 [00:50<10:14,  1.43it/s]

{'epoch': 3, 'iter': 120, 'avg_loss': 0.7228462870948571, 'avg_acc': 49.79338842975206, 'loss': 0.8210897445678711}


EP_train:3:  14%|| 141/1000 [01:05<07:05,  2.02it/s]

{'epoch': 3, 'iter': 140, 'avg_loss': 0.7248053426015462, 'avg_acc': 48.58156028368794, 'loss': 0.7873366475105286}


EP_train:3:  16%|| 161/1000 [01:11<04:22,  3.20it/s]

{'epoch': 3, 'iter': 160, 'avg_loss': 0.7266307301032617, 'avg_acc': 48.75776397515528, 'loss': 0.6394412517547607}


EP_train:3:  18%|| 181/1000 [01:17<03:44,  3.65it/s]

{'epoch': 3, 'iter': 180, 'avg_loss': 0.727393363885458, 'avg_acc': 48.75690607734807, 'loss': 0.6161964535713196}


EP_train:3:  20%|| 201/1000 [01:23<04:01,  3.31it/s]

{'epoch': 3, 'iter': 200, 'avg_loss': 0.7269492622335159, 'avg_acc': 48.258706467661696, 'loss': 0.7109763026237488}


EP_train:3:  22%|| 221/1000 [01:30<05:13,  2.48it/s]

{'epoch': 3, 'iter': 220, 'avg_loss': 0.7258856254735144, 'avg_acc': 48.64253393665158, 'loss': 0.6060464978218079}


EP_train:3:  24%|| 241/1000 [01:36<03:45,  3.37it/s]

{'epoch': 3, 'iter': 240, 'avg_loss': 0.7226115870772556, 'avg_acc': 49.79253112033195, 'loss': 0.5647932291030884}


EP_train:3:  26%|| 261/1000 [01:45<10:37,  1.16it/s]

{'epoch': 3, 'iter': 260, 'avg_loss': 0.724620607164171, 'avg_acc': 49.712643678160916, 'loss': 0.8149665594100952}


EP_train:3:  28%|| 281/1000 [02:04<17:33,  1.47s/it]

{'epoch': 3, 'iter': 280, 'avg_loss': 0.7257582466797472, 'avg_acc': 49.11032028469751, 'loss': 0.6940388083457947}


EP_train:3:  30%|| 301/1000 [02:25<09:46,  1.19it/s]

{'epoch': 3, 'iter': 300, 'avg_loss': 0.7238259917477833, 'avg_acc': 49.75083056478405, 'loss': 0.630979061126709}


EP_train:3:  32%|| 321/1000 [02:34<04:04,  2.77it/s]

{'epoch': 3, 'iter': 320, 'avg_loss': 0.7242927414791607, 'avg_acc': 50.07788161993769, 'loss': 0.8379907011985779}


EP_train:3:  34%|| 341/1000 [02:42<04:06,  2.67it/s]

{'epoch': 3, 'iter': 340, 'avg_loss': 0.7241680399238889, 'avg_acc': 49.780058651026394, 'loss': 0.7294657230377197}


EP_train:3:  36%|| 361/1000 [02:48<02:54,  3.66it/s]

{'epoch': 3, 'iter': 360, 'avg_loss': 0.7243025688418391, 'avg_acc': 49.445983379501385, 'loss': 0.8312135338783264}


EP_train:3:  38%|| 381/1000 [02:54<03:15,  3.17it/s]

{'epoch': 3, 'iter': 380, 'avg_loss': 0.7250352034887929, 'avg_acc': 49.54068241469816, 'loss': 0.5995084643363953}


EP_train:3:  40%|| 401/1000 [03:02<03:43,  2.68it/s]

{'epoch': 3, 'iter': 400, 'avg_loss': 0.7256780196900974, 'avg_acc': 49.81296758104738, 'loss': 0.6856747269630432}


EP_train:3:  42%|| 421/1000 [03:08<02:50,  3.40it/s]

{'epoch': 3, 'iter': 420, 'avg_loss': 0.7254560475111574, 'avg_acc': 50.0, 'loss': 0.7526412606239319}


EP_train:3:  44%|| 441/1000 [03:14<02:52,  3.24it/s]

{'epoch': 3, 'iter': 440, 'avg_loss': 0.7224814530943526, 'avg_acc': 50.51020408163265, 'loss': 0.5813900232315063}


EP_train:3:  46%|| 461/1000 [03:25<05:00,  1.79it/s]

{'epoch': 3, 'iter': 460, 'avg_loss': 0.7247551984745613, 'avg_acc': 50.21691973969631, 'loss': 0.67690509557724}


EP_train:3:  48%|| 481/1000 [03:37<04:57,  1.74it/s]

{'epoch': 3, 'iter': 480, 'avg_loss': 0.7250407654133755, 'avg_acc': 50.15592515592515, 'loss': 0.6945565938949585}


EP_train:3:  50%|| 501/1000 [03:46<02:44,  3.04it/s]

{'epoch': 3, 'iter': 500, 'avg_loss': 0.7245032279910204, 'avg_acc': 50.399201596806385, 'loss': 0.9541381001472473}


EP_train:3:  52%|| 521/1000 [03:52<02:26,  3.27it/s]

{'epoch': 3, 'iter': 520, 'avg_loss': 0.7241273641929517, 'avg_acc': 50.38387715930902, 'loss': 0.9088416695594788}


EP_train:3:  54%|| 541/1000 [03:59<03:16,  2.33it/s]

{'epoch': 3, 'iter': 540, 'avg_loss': 0.7238998865354083, 'avg_acc': 50.32347504621072, 'loss': 0.6459690928459167}


EP_train:3:  56%|| 561/1000 [04:06<02:18,  3.16it/s]

{'epoch': 3, 'iter': 560, 'avg_loss': 0.724278084043938, 'avg_acc': 50.31194295900179, 'loss': 0.713530421257019}


EP_train:3:  58%|| 581/1000 [04:12<01:57,  3.56it/s]

{'epoch': 3, 'iter': 580, 'avg_loss': 0.7274580659944301, 'avg_acc': 50.387263339070564, 'loss': 0.6860936284065247}


EP_train:3:  60%|| 601/1000 [04:18<02:37,  2.53it/s]

{'epoch': 3, 'iter': 600, 'avg_loss': 0.7283538634983354, 'avg_acc': 50.08319467554077, 'loss': 0.711600124835968}


EP_train:3:  62%|| 621/1000 [04:25<01:59,  3.16it/s]

{'epoch': 3, 'iter': 620, 'avg_loss': 0.728172774716084, 'avg_acc': 49.8389694041868, 'loss': 0.7306304574012756}


EP_train:3:  64%|| 641/1000 [04:31<01:39,  3.62it/s]

{'epoch': 3, 'iter': 640, 'avg_loss': 0.7277745180029579, 'avg_acc': 49.84399375975039, 'loss': 0.6964512467384338}


EP_train:3:  66%|| 661/1000 [04:38<02:12,  2.55it/s]

{'epoch': 3, 'iter': 660, 'avg_loss': 0.7271742512797443, 'avg_acc': 49.62178517397882, 'loss': 0.748207151889801}


EP_train:3:  68%|| 681/1000 [04:45<01:43,  3.09it/s]

{'epoch': 3, 'iter': 680, 'avg_loss': 0.7264974669125287, 'avg_acc': 49.74302496328928, 'loss': 0.6888989210128784}


EP_train:3:  70%|| 701/1000 [04:51<01:23,  3.57it/s]

{'epoch': 3, 'iter': 700, 'avg_loss': 0.7259243269704718, 'avg_acc': 49.78601997146933, 'loss': 0.6828657388687134}


EP_train:3:  72%|| 721/1000 [04:57<01:49,  2.54it/s]

{'epoch': 3, 'iter': 720, 'avg_loss': 0.7255208540640657, 'avg_acc': 49.79195561719833, 'loss': 0.69500732421875}


EP_train:3:  74%|| 741/1000 [05:04<01:18,  3.30it/s]

{'epoch': 3, 'iter': 740, 'avg_loss': 0.7248828350371517, 'avg_acc': 49.59514170040486, 'loss': 0.6825965046882629}


EP_train:3:  76%|| 761/1000 [05:10<01:07,  3.55it/s]

{'epoch': 3, 'iter': 760, 'avg_loss': 0.7243707658191861, 'avg_acc': 49.73718791064389, 'loss': 0.6934136152267456}


EP_train:3:  78%|| 781/1000 [05:15<01:07,  3.24it/s]

{'epoch': 3, 'iter': 780, 'avg_loss': 0.723235937262314, 'avg_acc': 50.0, 'loss': 0.7350465655326843}


EP_train:3:  80%|| 801/1000 [05:23<01:13,  2.72it/s]

{'epoch': 3, 'iter': 800, 'avg_loss': 0.7228955036022243, 'avg_acc': 50.0, 'loss': 0.7133358716964722}


EP_train:3:  82%|| 821/1000 [05:29<00:48,  3.66it/s]

{'epoch': 3, 'iter': 820, 'avg_loss': 0.7224692990152612, 'avg_acc': 50.0, 'loss': 0.5391508340835571}


EP_train:3:  84%|| 841/1000 [05:35<00:46,  3.45it/s]

{'epoch': 3, 'iter': 840, 'avg_loss': 0.7226972396039225, 'avg_acc': 49.88109393579073, 'loss': 0.714320957660675}


EP_train:3:  86%|| 861/1000 [05:42<00:51,  2.68it/s]

{'epoch': 3, 'iter': 860, 'avg_loss': 0.7225559829765634, 'avg_acc': 49.91289198606272, 'loss': 0.5904687643051147}


EP_train:3:  88%|| 881/1000 [05:48<00:32,  3.66it/s]

{'epoch': 3, 'iter': 880, 'avg_loss': 0.7223010541158691, 'avg_acc': 49.91486946651532, 'loss': 0.7051641345024109}


EP_train:3:  90%|| 901/1000 [05:54<00:27,  3.66it/s]

{'epoch': 3, 'iter': 900, 'avg_loss': 0.7215762084451288, 'avg_acc': 50.138734739178695, 'loss': 0.6945600509643555}


EP_train:3:  92%|| 921/1000 [06:00<00:26,  3.01it/s]

{'epoch': 3, 'iter': 920, 'avg_loss': 0.7210017002949108, 'avg_acc': 50.4071661237785, 'loss': 0.6281965374946594}


EP_train:3:  94%|| 941/1000 [06:06<00:16,  3.58it/s]

{'epoch': 3, 'iter': 940, 'avg_loss': 0.7207266680429136, 'avg_acc': 50.53134962805525, 'loss': 0.7127733826637268}


EP_train:3:  96%|| 961/1000 [06:11<00:09,  3.93it/s]

{'epoch': 3, 'iter': 960, 'avg_loss': 0.7205107972165424, 'avg_acc': 50.57232049947971, 'loss': 0.8507910370826721}


EP_train:3:  98%|| 981/1000 [06:17<00:06,  2.75it/s]

{'epoch': 3, 'iter': 980, 'avg_loss': 0.7198629934914129, 'avg_acc': 50.68807339449541, 'loss': 0.6000630855560303}


EP_train:3: 100%|| 1000/1000 [06:24<00:00,  3.11it/s]

EP3_train, avg_loss= 0.7203483867943287 total_acc= 50.53763440860215
EP:3 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep3



EP_test:3:   1%|| 2/250 [00:00<00:35,  6.93it/s]

{'epoch': 3, 'iter': 0, 'avg_loss': 0.7712844610214233, 'avg_acc': 25.0, 'loss': 0.7712844610214233}


EP_test:3:   9%|| 22/250 [00:01<00:20, 11.17it/s]

{'epoch': 3, 'iter': 20, 'avg_loss': 0.7292736371358236, 'avg_acc': 41.66666666666667, 'loss': 0.6811007857322693}


EP_test:3:  17%|| 42/250 [00:03<00:15, 13.51it/s]

{'epoch': 3, 'iter': 40, 'avg_loss': 0.7148662761944097, 'avg_acc': 46.34146341463415, 'loss': 0.7852569818496704}


EP_test:3:  25%|| 62/250 [00:04<00:14, 13.36it/s]

{'epoch': 3, 'iter': 60, 'avg_loss': 0.7062757767614771, 'avg_acc': 49.59016393442623, 'loss': 0.7033790946006775}


EP_test:3:  33%|| 82/250 [00:06<00:11, 14.05it/s]

{'epoch': 3, 'iter': 80, 'avg_loss': 0.7028465425526654, 'avg_acc': 50.617283950617285, 'loss': 0.6251077651977539}


EP_test:3:  41%|| 102/250 [00:07<00:11, 12.36it/s]

{'epoch': 3, 'iter': 100, 'avg_loss': 0.7022388134852494, 'avg_acc': 51.23762376237624, 'loss': 0.6826469302177429}


EP_test:3:  49%|| 122/250 [00:09<00:12, 10.57it/s]

{'epoch': 3, 'iter': 120, 'avg_loss': 0.7038295032564273, 'avg_acc': 51.03305785123967, 'loss': 0.7712844610214233}


EP_test:3:  57%|| 142/250 [00:11<00:08, 12.24it/s]

{'epoch': 3, 'iter': 140, 'avg_loss': 0.7051294044399938, 'avg_acc': 50.53191489361703, 'loss': 0.7852569818496704}


EP_test:3:  65%|| 162/250 [00:12<00:06, 12.80it/s]

{'epoch': 3, 'iter': 160, 'avg_loss': 0.7039766037686271, 'avg_acc': 50.931677018633536, 'loss': 0.6131954789161682}


EP_test:3:  73%|| 182/250 [00:14<00:05, 13.09it/s]

{'epoch': 3, 'iter': 180, 'avg_loss': 0.7023123268923048, 'avg_acc': 51.24309392265194, 'loss': 0.7186236381530762}


EP_test:3:  81%|| 202/250 [44:52<13:01, 16.27s/it]   

{'epoch': 3, 'iter': 200, 'avg_loss': 0.7031506649890349, 'avg_acc': 50.87064676616916, 'loss': 0.7033791542053223}


EP_test:3:  89%|| 222/250 [44:53<00:14,  1.87it/s]

{'epoch': 3, 'iter': 220, 'avg_loss': 0.7028992545550765, 'avg_acc': 51.01809954751131, 'loss': 0.7712844610214233}


EP_test:3:  97%|| 242/250 [44:55<00:00, 10.56it/s]

{'epoch': 3, 'iter': 240, 'avg_loss': 0.7022848848979998, 'avg_acc': 51.244813278008294, 'loss': 0.7033790946006775}


EP_test:3: 100%|| 250/250 [44:56<00:00, 12.63it/s]

EP3_test, avg_loss= 0.7027101213932038 total_acc= 51.251251251251254



EP_train:4:   0%|| 1/1000 [00:00<06:47,  2.45it/s]

{'epoch': 4, 'iter': 0, 'avg_loss': 0.7213821411132812, 'avg_acc': 50.0, 'loss': 0.7213821411132812}


EP_train:4:   2%|| 21/1000 [00:06<05:25,  3.00it/s]

{'epoch': 4, 'iter': 20, 'avg_loss': 0.7008558710416158, 'avg_acc': 47.61904761904761, 'loss': 0.6689196825027466}


EP_train:4:   4%|| 41/1000 [00:12<04:23,  3.63it/s]

{'epoch': 4, 'iter': 40, 'avg_loss': 0.7065064034810881, 'avg_acc': 46.95121951219512, 'loss': 0.697757363319397}


EP_train:4:   6%|| 61/1000 [00:17<03:59,  3.92it/s]

{'epoch': 4, 'iter': 60, 'avg_loss': 0.7028590186697538, 'avg_acc': 47.540983606557376, 'loss': 0.6623276472091675}


EP_train:4:   8%|| 81/1000 [00:23<05:27,  2.81it/s]

{'epoch': 4, 'iter': 80, 'avg_loss': 0.7034528696978534, 'avg_acc': 46.60493827160494, 'loss': 0.6960508227348328}


EP_train:4:  10%|| 101/1000 [00:29<03:59,  3.75it/s]

{'epoch': 4, 'iter': 100, 'avg_loss': 0.702640425450731, 'avg_acc': 47.77227722772277, 'loss': 0.6876497268676758}


EP_train:4:  12%|| 121/1000 [00:34<04:27,  3.28it/s]

{'epoch': 4, 'iter': 120, 'avg_loss': 0.7018884839105212, 'avg_acc': 47.72727272727273, 'loss': 0.6700875759124756}


EP_train:4:  14%|| 141/1000 [00:40<03:56,  3.63it/s]

{'epoch': 4, 'iter': 140, 'avg_loss': 0.7021746229618153, 'avg_acc': 48.04964539007092, 'loss': 0.6941172480583191}


EP_train:4:  16%|| 161/1000 [00:45<03:50,  3.63it/s]

{'epoch': 4, 'iter': 160, 'avg_loss': 0.7014943357580197, 'avg_acc': 48.75776397515528, 'loss': 0.6361830830574036}


EP_train:4:  18%|| 181/1000 [00:51<03:39,  3.73it/s]

{'epoch': 4, 'iter': 180, 'avg_loss': 0.702624155671557, 'avg_acc': 48.61878453038674, 'loss': 0.778225839138031}


EP_train:4:  20%|| 201/1000 [00:56<03:49,  3.49it/s]

{'epoch': 4, 'iter': 200, 'avg_loss': 0.7028114448732404, 'avg_acc': 48.00995024875622, 'loss': 0.6986795663833618}


EP_train:4:  22%|| 221/1000 [01:02<03:40,  3.53it/s]

{'epoch': 4, 'iter': 220, 'avg_loss': 0.7023505827420438, 'avg_acc': 48.30316742081448, 'loss': 0.7044405341148376}


EP_train:4:  24%|| 241/1000 [01:07<03:10,  3.99it/s]

{'epoch': 4, 'iter': 240, 'avg_loss': 0.7028547765308396, 'avg_acc': 47.82157676348548, 'loss': 0.7009091973304749}


EP_train:4:  26%|| 261/1000 [01:12<03:19,  3.71it/s]

{'epoch': 4, 'iter': 260, 'avg_loss': 0.7041107802098738, 'avg_acc': 47.605363984674334, 'loss': 0.7065826058387756}


EP_train:4:  28%|| 281/1000 [01:19<03:37,  3.31it/s]

{'epoch': 4, 'iter': 280, 'avg_loss': 0.705160705644465, 'avg_acc': 47.330960854092524, 'loss': 0.9091789722442627}


EP_train:4:  30%|| 301/1000 [01:24<03:04,  3.78it/s]

{'epoch': 4, 'iter': 300, 'avg_loss': 0.7052788633444776, 'avg_acc': 47.92358803986711, 'loss': 0.7732499837875366}


EP_train:4:  32%|| 321/1000 [01:29<02:54,  3.89it/s]

{'epoch': 4, 'iter': 320, 'avg_loss': 0.7049970314881512, 'avg_acc': 47.97507788161994, 'loss': 0.7514991164207458}


EP_train:4:  34%|| 341/1000 [01:35<03:39,  3.01it/s]

{'epoch': 4, 'iter': 340, 'avg_loss': 0.7048282874993914, 'avg_acc': 48.02052785923754, 'loss': 0.6978023052215576}


EP_train:4:  36%|| 361/1000 [01:41<03:18,  3.23it/s]

{'epoch': 4, 'iter': 360, 'avg_loss': 0.7045137304348298, 'avg_acc': 48.13019390581717, 'loss': 0.6964129209518433}


EP_train:4:  38%|| 381/1000 [01:47<02:49,  3.64it/s]

{'epoch': 4, 'iter': 380, 'avg_loss': 0.7052413218916245, 'avg_acc': 48.29396325459317, 'loss': 0.6961765885353088}


EP_train:4:  40%|| 401/1000 [01:52<02:52,  3.48it/s]

{'epoch': 4, 'iter': 400, 'avg_loss': 0.7056698876426107, 'avg_acc': 48.6284289276808, 'loss': 0.6965118646621704}


EP_train:4:  42%|| 421/1000 [01:59<03:06,  3.10it/s]

{'epoch': 4, 'iter': 420, 'avg_loss': 0.706998935787808, 'avg_acc': 48.87173396674584, 'loss': 0.8000500202178955}


EP_train:4:  44%|| 441/1000 [02:05<02:37,  3.55it/s]

{'epoch': 4, 'iter': 440, 'avg_loss': 0.708660322792676, 'avg_acc': 48.97959183673469, 'loss': 0.9219028353691101}


EP_train:4:  46%|| 461/1000 [02:11<02:55,  3.08it/s]

{'epoch': 4, 'iter': 460, 'avg_loss': 0.710427188110455, 'avg_acc': 49.34924078091106, 'loss': 0.5194963216781616}


EP_train:4:  48%|| 481/1000 [02:18<02:48,  3.08it/s]

{'epoch': 4, 'iter': 480, 'avg_loss': 0.722160667924524, 'avg_acc': 49.064449064449065, 'loss': 1.0653067827224731}


EP_train:4:  50%|| 501/1000 [02:24<02:15,  3.67it/s]

{'epoch': 4, 'iter': 500, 'avg_loss': 0.7349233470872015, 'avg_acc': 49.20159680638723, 'loss': 0.7064341306686401}


EP_train:4:  52%|| 521/1000 [02:29<02:07,  3.76it/s]

{'epoch': 4, 'iter': 520, 'avg_loss': 0.7451940532380469, 'avg_acc': 49.32821497120921, 'loss': 2.717867374420166}


EP_train:4:  54%|| 541/1000 [02:35<02:31,  3.04it/s]

{'epoch': 4, 'iter': 540, 'avg_loss': 0.7463262321327618, 'avg_acc': 49.53789279112754, 'loss': 0.614564061164856}


EP_train:4:  56%|| 561/1000 [02:40<01:58,  3.69it/s]

{'epoch': 4, 'iter': 560, 'avg_loss': 0.7489078890618581, 'avg_acc': 49.59893048128342, 'loss': 0.9011319279670715}


EP_train:4:  58%|| 581/1000 [02:45<01:41,  4.12it/s]

{'epoch': 4, 'iter': 580, 'avg_loss': 0.7521048221038253, 'avg_acc': 49.39759036144578, 'loss': 0.7744609713554382}


EP_train:4:  60%|| 601/1000 [02:50<01:39,  4.01it/s]

{'epoch': 4, 'iter': 600, 'avg_loss': 0.7528837177400383, 'avg_acc': 49.25124792013311, 'loss': 0.8015420436859131}


EP_train:4:  62%|| 621/1000 [02:55<01:42,  3.70it/s]

{'epoch': 4, 'iter': 620, 'avg_loss': 0.7542266536162096, 'avg_acc': 49.19484702093398, 'loss': 0.6762591600418091}


EP_train:4:  64%|| 641/1000 [03:00<01:27,  4.12it/s]

{'epoch': 4, 'iter': 640, 'avg_loss': 0.7544042983218772, 'avg_acc': 49.25897035881435, 'loss': 0.7036992907524109}


EP_train:4:  66%|| 661/1000 [03:06<01:25,  3.95it/s]

{'epoch': 4, 'iter': 660, 'avg_loss': 0.7532483251841454, 'avg_acc': 49.281391830559755, 'loss': 0.7035216689109802}


EP_train:4:  68%|| 681/1000 [03:11<01:25,  3.75it/s]

{'epoch': 4, 'iter': 680, 'avg_loss': 0.7526779179618572, 'avg_acc': 49.33920704845815, 'loss': 0.8574291467666626}


EP_train:4:  70%|| 701/1000 [03:16<01:13,  4.09it/s]

{'epoch': 4, 'iter': 700, 'avg_loss': 0.751766366939912, 'avg_acc': 49.2867332382311, 'loss': 0.7033390402793884}


EP_train:4:  72%|| 721/1000 [03:21<01:15,  3.70it/s]

{'epoch': 4, 'iter': 720, 'avg_loss': 0.75181322128366, 'avg_acc': 49.133148404993065, 'loss': 0.7297581434249878}


EP_train:4:  74%|| 741/1000 [03:26<01:07,  3.85it/s]

{'epoch': 4, 'iter': 740, 'avg_loss': 0.7520463505656774, 'avg_acc': 49.392712550607285, 'loss': 0.5943943858146667}


EP_train:4:  76%|| 761/1000 [03:31<00:58,  4.11it/s]

{'epoch': 4, 'iter': 760, 'avg_loss': 0.7526540045434322, 'avg_acc': 49.2772667542707, 'loss': 0.7947160005569458}


EP_train:4:  78%|| 781/1000 [03:36<01:00,  3.65it/s]

{'epoch': 4, 'iter': 780, 'avg_loss': 0.7520380220477315, 'avg_acc': 49.167733674775924, 'loss': 0.7175090312957764}


EP_train:4:  80%|| 801/1000 [03:41<00:54,  3.64it/s]

{'epoch': 4, 'iter': 800, 'avg_loss': 0.7518093733677406, 'avg_acc': 49.03245942571785, 'loss': 0.8011648654937744}


EP_train:4:  82%|| 821/1000 [03:46<00:44,  4.05it/s]

{'epoch': 4, 'iter': 820, 'avg_loss': 0.7511820996069007, 'avg_acc': 48.873325213154686, 'loss': 0.6510437726974487}


EP_train:4:  84%|| 841/1000 [03:51<00:42,  3.75it/s]

{'epoch': 4, 'iter': 840, 'avg_loss': 0.7500653578395934, 'avg_acc': 48.90011890606421, 'loss': 0.6950672268867493}


EP_train:4:  86%|| 861/1000 [03:56<00:33,  4.10it/s]

{'epoch': 4, 'iter': 860, 'avg_loss': 0.7493429387834153, 'avg_acc': 48.89663182346109, 'loss': 0.6825351119041443}


EP_train:4:  88%|| 881/1000 [04:02<00:30,  3.92it/s]

{'epoch': 4, 'iter': 880, 'avg_loss': 0.7481814749733409, 'avg_acc': 48.86492622020431, 'loss': 0.6718334555625916}


EP_train:4:  90%|| 901/1000 [04:06<00:26,  3.75it/s]

{'epoch': 4, 'iter': 900, 'avg_loss': 0.7473658684818911, 'avg_acc': 48.834628190899, 'loss': 0.6819285154342651}


EP_train:4:  92%|| 921/1000 [04:12<00:19,  4.10it/s]

{'epoch': 4, 'iter': 920, 'avg_loss': 0.7463198120055576, 'avg_acc': 48.85993485342019, 'loss': 0.6726290583610535}


EP_train:4:  94%|| 941/1000 [04:17<00:16,  3.64it/s]

{'epoch': 4, 'iter': 940, 'avg_loss': 0.7458464873521158, 'avg_acc': 48.80446333687566, 'loss': 0.6943491101264954}


EP_train:4:  96%|| 961/1000 [04:22<00:10,  3.76it/s]

{'epoch': 4, 'iter': 960, 'avg_loss': 0.7450987785651458, 'avg_acc': 48.62122788761706, 'loss': 0.7594621777534485}


EP_train:4:  98%|| 981/1000 [04:27<00:04,  4.12it/s]

{'epoch': 4, 'iter': 980, 'avg_loss': 0.7440853020770591, 'avg_acc': 48.75127420998981, 'loss': 0.5452268123626709}


EP_train:4: 100%|| 1000/1000 [04:32<00:00,  3.94it/s]

EP4_train, avg_loss= 0.7439063585102558 total_acc= 48.66216554138535
EP:4 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep4



EP_test:4:   1%|| 2/250 [00:00<00:40,  6.19it/s]

{'epoch': 4, 'iter': 0, 'avg_loss': 0.6936553120613098, 'avg_acc': 50.0, 'loss': 0.6936553120613098}


EP_test:4:   9%|| 23/250 [00:02<00:17, 12.64it/s]

{'epoch': 4, 'iter': 20, 'avg_loss': 0.6955420232954479, 'avg_acc': 46.42857142857143, 'loss': 0.6904228329658508}


EP_test:4:  17%|| 43/250 [00:03<00:15, 13.30it/s]

{'epoch': 4, 'iter': 40, 'avg_loss': 0.6948149393244487, 'avg_acc': 52.4390243902439, 'loss': 0.6936553120613098}


EP_test:4:  25%|| 63/250 [00:04<00:13, 13.76it/s]

{'epoch': 4, 'iter': 60, 'avg_loss': 0.695023524956625, 'avg_acc': 50.409836065573764, 'loss': 0.7078202962875366}


EP_test:4:  33%|| 83/250 [00:06<00:11, 14.76it/s]

{'epoch': 4, 'iter': 80, 'avg_loss': 0.6930511086075394, 'avg_acc': 54.01234567901234, 'loss': 0.6892116069793701}


EP_test:4:  41%|| 103/250 [00:07<00:10, 13.94it/s]

{'epoch': 4, 'iter': 100, 'avg_loss': 0.69448189216085, 'avg_acc': 52.22772277227723, 'loss': 0.6876652240753174}


EP_test:4:  49%|| 123/250 [00:09<00:09, 13.07it/s]

{'epoch': 4, 'iter': 120, 'avg_loss': 0.695716993375258, 'avg_acc': 51.446280991735534, 'loss': 0.7159952521324158}


EP_test:4:  57%|| 143/250 [00:10<00:08, 12.57it/s]

{'epoch': 4, 'iter': 140, 'avg_loss': 0.6965777062355204, 'avg_acc': 50.35460992907801, 'loss': 0.7150089144706726}


EP_test:4:  65%|| 163/250 [00:12<00:06, 14.29it/s]

{'epoch': 4, 'iter': 160, 'avg_loss': 0.6970789699080568, 'avg_acc': 49.06832298136646, 'loss': 0.7033766508102417}


EP_test:4:  73%|| 183/250 [00:13<00:04, 13.54it/s]

{'epoch': 4, 'iter': 180, 'avg_loss': 0.6969587167323623, 'avg_acc': 49.72375690607735, 'loss': 0.7083386182785034}


EP_test:4:  81%|| 203/250 [00:15<00:03, 14.33it/s]

{'epoch': 4, 'iter': 200, 'avg_loss': 0.6969849319007266, 'avg_acc': 49.25373134328358, 'loss': 0.6973865628242493}


EP_test:4:  89%|| 223/250 [00:16<00:02, 12.93it/s]

{'epoch': 4, 'iter': 220, 'avg_loss': 0.6971803045920117, 'avg_acc': 49.321266968325794, 'loss': 0.7136110663414001}


EP_test:4:  97%|| 243/250 [00:18<00:00, 12.22it/s]

{'epoch': 4, 'iter': 240, 'avg_loss': 0.6970216409299383, 'avg_acc': 49.481327800829874, 'loss': 0.6994460821151733}


EP_test:4: 100%|| 250/250 [00:18<00:00, 13.27it/s]

EP4_test, avg_loss= 0.6971935393810272 total_acc= 49.549549549549546



EP_train:5:   0%|| 1/1000 [00:00<06:29,  2.56it/s]

{'epoch': 5, 'iter': 0, 'avg_loss': 0.6994460821151733, 'avg_acc': 50.0, 'loss': 0.6994460821151733}


EP_train:5:   2%|| 21/1000 [00:05<04:27,  3.66it/s]

{'epoch': 5, 'iter': 20, 'avg_loss': 0.7019017509051731, 'avg_acc': 50.0, 'loss': 0.6402953267097473}


EP_train:5:   4%|| 41/1000 [00:11<05:47,  2.76it/s]

{'epoch': 5, 'iter': 40, 'avg_loss': 0.6983449829787742, 'avg_acc': 54.87804878048781, 'loss': 0.6939634680747986}


EP_train:5:   6%|| 61/1000 [00:17<04:44,  3.30it/s]

{'epoch': 5, 'iter': 60, 'avg_loss': 0.699179143690672, 'avg_acc': 55.73770491803278, 'loss': 0.6308316588401794}


EP_train:5:   8%|| 81/1000 [00:23<03:45,  4.08it/s]

{'epoch': 5, 'iter': 80, 'avg_loss': 0.7001845472388797, 'avg_acc': 54.32098765432099, 'loss': 0.7247812151908875}


EP_train:5:  10%|| 101/1000 [00:28<04:04,  3.67it/s]

{'epoch': 5, 'iter': 100, 'avg_loss': 0.7013365493552519, 'avg_acc': 54.70297029702971, 'loss': 0.6948955059051514}


EP_train:5:  12%|| 121/1000 [00:33<03:53,  3.76it/s]

{'epoch': 5, 'iter': 120, 'avg_loss': 0.7000546265732158, 'avg_acc': 53.925619834710744, 'loss': 0.5387744903564453}


EP_train:5:  14%|| 141/1000 [00:40<05:38,  2.54it/s]

{'epoch': 5, 'iter': 140, 'avg_loss': 0.7010823071848417, 'avg_acc': 53.54609929078015, 'loss': 0.7118341326713562}


EP_train:5:  16%|| 161/1000 [00:47<04:27,  3.14it/s]

{'epoch': 5, 'iter': 160, 'avg_loss': 0.6996918037441207, 'avg_acc': 53.57142857142857, 'loss': 0.656503438949585}


EP_train:5:  18%|| 181/1000 [00:52<03:23,  4.02it/s]

{'epoch': 5, 'iter': 180, 'avg_loss': 0.7012272332254694, 'avg_acc': 53.03867403314917, 'loss': 0.7237453460693359}


EP_train:5:  20%|| 201/1000 [00:57<03:23,  3.93it/s]

{'epoch': 5, 'iter': 200, 'avg_loss': 0.7012815994409779, 'avg_acc': 52.363184079601986, 'loss': 0.6905146837234497}


EP_train:5:  22%|| 221/1000 [01:02<03:31,  3.68it/s]

{'epoch': 5, 'iter': 220, 'avg_loss': 0.701948671319366, 'avg_acc': 52.37556561085973, 'loss': 0.7087090611457825}


EP_train:5:  24%|| 241/1000 [01:09<04:42,  2.69it/s]

{'epoch': 5, 'iter': 240, 'avg_loss': 0.7035643084909906, 'avg_acc': 52.80082987551867, 'loss': 0.586658239364624}


EP_train:5:  26%|| 261/1000 [01:16<03:45,  3.28it/s]

{'epoch': 5, 'iter': 260, 'avg_loss': 0.7053636449506913, 'avg_acc': 52.394636015325666, 'loss': 0.6944879293441772}


EP_train:5:  28%|| 281/1000 [01:21<03:43,  3.22it/s]

{'epoch': 5, 'iter': 280, 'avg_loss': 0.7055268675831289, 'avg_acc': 52.22419928825622, 'loss': 0.7090456485748291}


EP_train:5:  30%|| 301/1000 [01:27<03:15,  3.58it/s]

{'epoch': 5, 'iter': 300, 'avg_loss': 0.7056753514613028, 'avg_acc': 52.32558139534884, 'loss': 0.7276787161827087}


EP_train:5:  32%|| 321/1000 [01:33<03:14,  3.49it/s]

{'epoch': 5, 'iter': 320, 'avg_loss': 0.7066027353114428, 'avg_acc': 51.557632398753896, 'loss': 0.6978524923324585}


EP_train:5:  34%|| 341/1000 [01:39<03:20,  3.28it/s]

{'epoch': 5, 'iter': 340, 'avg_loss': 0.7072805996514485, 'avg_acc': 51.686217008797655, 'loss': 0.6984809041023254}


EP_train:5:  36%|| 361/1000 [01:45<03:00,  3.53it/s]

{'epoch': 5, 'iter': 360, 'avg_loss': 0.7066067775861048, 'avg_acc': 51.8005540166205, 'loss': 0.6809477210044861}


EP_train:5:  38%|| 381/1000 [01:51<03:51,  2.68it/s]

{'epoch': 5, 'iter': 380, 'avg_loss': 0.7056276546375645, 'avg_acc': 51.96850393700787, 'loss': 0.8663169145584106}


EP_train:5:  40%|| 401/1000 [01:58<02:47,  3.57it/s]

{'epoch': 5, 'iter': 400, 'avg_loss': 0.7053033419678039, 'avg_acc': 51.99501246882793, 'loss': 0.7790384292602539}


EP_train:5:  42%|| 421/1000 [02:03<02:21,  4.10it/s]

{'epoch': 5, 'iter': 420, 'avg_loss': 0.7054589407744147, 'avg_acc': 51.959619952494066, 'loss': 0.7002147436141968}


EP_train:5:  44%|| 441/1000 [02:08<02:30,  3.71it/s]

{'epoch': 5, 'iter': 440, 'avg_loss': 0.7054359295200598, 'avg_acc': 52.09750566893424, 'loss': 0.6955379843711853}


EP_train:5:  46%|| 461/1000 [02:14<02:49,  3.18it/s]

{'epoch': 5, 'iter': 460, 'avg_loss': 0.7049457265342911, 'avg_acc': 52.33188720173536, 'loss': 0.7442639470100403}


EP_train:5:  48%|| 481/1000 [02:20<02:23,  3.62it/s]

{'epoch': 5, 'iter': 480, 'avg_loss': 0.7053384029939616, 'avg_acc': 51.975051975051976, 'loss': 0.6926770806312561}


EP_train:5:  50%|| 501/1000 [02:26<02:29,  3.33it/s]

{'epoch': 5, 'iter': 500, 'avg_loss': 0.7050988474291956, 'avg_acc': 52.04590818363274, 'loss': 0.6997556686401367}


EP_train:5:  52%|| 521/1000 [02:31<02:05,  3.82it/s]

{'epoch': 5, 'iter': 520, 'avg_loss': 0.7053679971456985, 'avg_acc': 51.96737044145874, 'loss': 0.7215021252632141}


EP_train:5:  54%|| 541/1000 [02:37<02:04,  3.69it/s]

{'epoch': 5, 'iter': 540, 'avg_loss': 0.705303674925277, 'avg_acc': 51.80221811460258, 'loss': 0.7137423753738403}


EP_train:5:  56%|| 561/1000 [02:43<02:15,  3.24it/s]

{'epoch': 5, 'iter': 560, 'avg_loss': 0.7051903194284694, 'avg_acc': 51.69340463458111, 'loss': 0.6842779517173767}


EP_train:5:  58%|| 581/1000 [02:49<01:59,  3.52it/s]

{'epoch': 5, 'iter': 580, 'avg_loss': 0.705221203231155, 'avg_acc': 51.50602409638554, 'loss': 0.7133264541625977}


EP_train:5:  60%|| 601/1000 [02:55<02:07,  3.12it/s]

{'epoch': 5, 'iter': 600, 'avg_loss': 0.7048904862062705, 'avg_acc': 51.663893510815306, 'loss': 0.6700941324234009}


EP_train:5:  62%|| 621/1000 [03:01<01:52,  3.37it/s]

{'epoch': 5, 'iter': 620, 'avg_loss': 0.7048462807844227, 'avg_acc': 51.81159420289855, 'loss': 0.7125193476676941}


EP_train:5:  64%|| 641/1000 [03:08<02:18,  2.59it/s]

{'epoch': 5, 'iter': 640, 'avg_loss': 0.7047632035338749, 'avg_acc': 51.6770670826833, 'loss': 0.6468567848205566}


EP_train:5:  66%|| 661/1000 [03:14<01:33,  3.61it/s]

{'epoch': 5, 'iter': 660, 'avg_loss': 0.704930651927319, 'avg_acc': 51.70196671709532, 'loss': 1.077373743057251}


EP_train:5:  68%|| 681/1000 [03:19<01:18,  4.06it/s]

{'epoch': 5, 'iter': 680, 'avg_loss': 0.7046519506957912, 'avg_acc': 51.76211453744494, 'loss': 0.8091441988945007}


EP_train:5:  70%|| 701/1000 [03:24<01:22,  3.65it/s]

{'epoch': 5, 'iter': 700, 'avg_loss': 0.7044694067833257, 'avg_acc': 51.890156918687595, 'loss': 0.6643015742301941}


EP_train:5:  72%|| 721/1000 [03:31<01:28,  3.14it/s]

{'epoch': 5, 'iter': 720, 'avg_loss': 0.7044566683316198, 'avg_acc': 51.69902912621359, 'loss': 0.6975255012512207}


EP_train:5:  74%|| 741/1000 [03:37<01:09,  3.71it/s]

{'epoch': 5, 'iter': 740, 'avg_loss': 0.7044941942621017, 'avg_acc': 51.61943319838057, 'loss': 0.7011316418647766}


EP_train:5:  76%|| 761/1000 [03:42<01:03,  3.77it/s]

{'epoch': 5, 'iter': 760, 'avg_loss': 0.7046558517429111, 'avg_acc': 51.77398160315374, 'loss': 0.9962867498397827}


EP_train:5:  78%|| 781/1000 [03:47<00:53,  4.09it/s]

{'epoch': 5, 'iter': 780, 'avg_loss': 0.7047574879952185, 'avg_acc': 51.696542893726, 'loss': 0.7755950689315796}


EP_train:5:  80%|| 801/1000 [03:52<00:53,  3.70it/s]

{'epoch': 5, 'iter': 800, 'avg_loss': 0.7049894607841001, 'avg_acc': 51.68539325842697, 'loss': 0.7922235727310181}


EP_train:5:  82%|| 821/1000 [03:59<00:55,  3.21it/s]

{'epoch': 5, 'iter': 820, 'avg_loss': 0.7044419100673712, 'avg_acc': 51.9183922046285, 'loss': 0.8193628787994385}


EP_train:5:  84%|| 841/1000 [04:05<00:47,  3.34it/s]

{'epoch': 5, 'iter': 840, 'avg_loss': 0.7043948544191548, 'avg_acc': 52.02140309155767, 'loss': 0.6718418002128601}


EP_train:5:  86%|| 861/1000 [04:10<00:42,  3.27it/s]

{'epoch': 5, 'iter': 860, 'avg_loss': 0.7047737856913665, 'avg_acc': 52.06155632984901, 'loss': 0.5484604239463806}


EP_train:5:  88%|| 881/1000 [04:18<00:40,  2.96it/s]

{'epoch': 5, 'iter': 880, 'avg_loss': 0.7052026498601872, 'avg_acc': 51.73098751418842, 'loss': 0.7198769450187683}


EP_train:5:  90%|| 901/1000 [04:23<00:27,  3.62it/s]

{'epoch': 5, 'iter': 900, 'avg_loss': 0.7051737274763719, 'avg_acc': 51.63706992230854, 'loss': 0.6948524117469788}


EP_train:5:  92%|| 921/1000 [04:28<00:19,  4.13it/s]

{'epoch': 5, 'iter': 920, 'avg_loss': 0.7052699594104199, 'avg_acc': 51.54723127035831, 'loss': 0.693553626537323}


EP_train:5:  94%|| 941/1000 [04:34<00:19,  3.04it/s]

{'epoch': 5, 'iter': 940, 'avg_loss': 0.7051623773372135, 'avg_acc': 51.59404888416578, 'loss': 0.7072586417198181}


EP_train:5:  96%|| 961/1000 [04:40<00:12,  3.21it/s]

{'epoch': 5, 'iter': 960, 'avg_loss': 0.7051758755010075, 'avg_acc': 51.63891779396462, 'loss': 0.735969603061676}


EP_train:5:  98%|| 981/1000 [04:46<00:04,  3.93it/s]

{'epoch': 5, 'iter': 980, 'avg_loss': 0.7049431288035759, 'avg_acc': 51.52905198776758, 'loss': 0.688430666923523}


EP_train:5: 100%|| 1000/1000 [04:51<00:00,  3.61it/s]

EP5_train, avg_loss= 0.7049662756323815 total_acc= 51.437859464866214
EP:5 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep5



EP_test:5:   1%|| 2/250 [00:00<00:34,  7.15it/s]

{'epoch': 5, 'iter': 0, 'avg_loss': 0.659518837928772, 'avg_acc': 75.0, 'loss': 0.659518837928772}


EP_test:5:   9%|| 22/250 [00:01<00:19, 11.92it/s]

{'epoch': 5, 'iter': 20, 'avg_loss': 0.6896552188055856, 'avg_acc': 55.952380952380956, 'loss': 0.7388315200805664}


EP_test:5:  17%|| 42/250 [00:03<00:15, 13.56it/s]

{'epoch': 5, 'iter': 40, 'avg_loss': 0.6952763563249169, 'avg_acc': 51.829268292682926, 'loss': 0.6928585171699524}


EP_test:5:  25%|| 62/250 [00:04<00:13, 14.46it/s]

{'epoch': 5, 'iter': 60, 'avg_loss': 0.6974069857206501, 'avg_acc': 50.81967213114754, 'loss': 0.7388315200805664}


EP_test:5:  33%|| 82/250 [00:06<00:12, 13.74it/s]

{'epoch': 5, 'iter': 80, 'avg_loss': 0.6964369158685944, 'avg_acc': 51.54320987654321, 'loss': 0.6789147257804871}


EP_test:5:  41%|| 102/250 [00:07<00:11, 12.54it/s]

{'epoch': 5, 'iter': 100, 'avg_loss': 0.697166386807319, 'avg_acc': 51.23762376237624, 'loss': 0.621333658695221}


EP_test:5:  49%|| 122/250 [00:09<00:09, 13.20it/s]

{'epoch': 5, 'iter': 120, 'avg_loss': 0.697562776321222, 'avg_acc': 50.82644628099173, 'loss': 0.701249897480011}


EP_test:5:  57%|| 142/250 [00:10<00:07, 14.16it/s]

{'epoch': 5, 'iter': 140, 'avg_loss': 0.6997086448026887, 'avg_acc': 49.645390070921984, 'loss': 0.7206457257270813}


EP_test:5:  65%|| 162/250 [00:12<00:06, 14.29it/s]

{'epoch': 5, 'iter': 160, 'avg_loss': 0.7004827586760433, 'avg_acc': 49.22360248447205, 'loss': 0.7006464004516602}


EP_test:5:  73%|| 182/250 [00:13<00:04, 13.85it/s]

{'epoch': 5, 'iter': 180, 'avg_loss': 0.7035202482787285, 'avg_acc': 47.51381215469613, 'loss': 0.7399626970291138}


EP_test:5:  81%|| 202/250 [00:14<00:03, 12.05it/s]

{'epoch': 5, 'iter': 200, 'avg_loss': 0.7015103499094645, 'avg_acc': 48.88059701492538, 'loss': 0.7000126242637634}


EP_test:5:  89%|| 222/250 [00:16<00:02, 12.49it/s]

{'epoch': 5, 'iter': 220, 'avg_loss': 0.7001728091844067, 'avg_acc': 49.6606334841629, 'loss': 0.6971005201339722}


EP_test:5:  97%|| 242/250 [00:18<00:00, 13.73it/s]

{'epoch': 5, 'iter': 240, 'avg_loss': 0.7013551743693371, 'avg_acc': 49.1701244813278, 'loss': 0.6971005201339722}


EP_test:5: 100%|| 250/250 [00:18<00:00, 15.05it/s]

EP5_test, avg_loss= 0.7007824041843415 total_acc= 49.449449449449446



EP_train:6:   0%|| 1/1000 [00:00<06:29,  2.56it/s]

{'epoch': 6, 'iter': 0, 'avg_loss': 0.6911234259605408, 'avg_acc': 50.0, 'loss': 0.6911234259605408}


EP_train:6:   2%|| 21/1000 [00:06<05:12,  3.14it/s]

{'epoch': 6, 'iter': 20, 'avg_loss': 0.7035643174534753, 'avg_acc': 51.19047619047619, 'loss': 0.7171903848648071}


EP_train:6:   4%|| 41/1000 [00:14<05:59,  2.67it/s]

{'epoch': 6, 'iter': 40, 'avg_loss': 0.6992040959800162, 'avg_acc': 52.4390243902439, 'loss': 0.674718976020813}


EP_train:6:   6%|| 61/1000 [00:20<04:36,  3.40it/s]

{'epoch': 6, 'iter': 60, 'avg_loss': 0.6992860524380793, 'avg_acc': 53.278688524590166, 'loss': 0.708371102809906}


EP_train:6:   8%|| 81/1000 [00:26<04:38,  3.29it/s]

{'epoch': 6, 'iter': 80, 'avg_loss': 0.6993226798964135, 'avg_acc': 52.46913580246913, 'loss': 0.7342219948768616}


EP_train:6:  10%|| 101/1000 [00:32<04:18,  3.48it/s]

{'epoch': 6, 'iter': 100, 'avg_loss': 0.6990267563574385, 'avg_acc': 52.722772277227726, 'loss': 0.6973429322242737}


EP_train:6:  12%|| 121/1000 [00:39<06:11,  2.37it/s]

{'epoch': 6, 'iter': 120, 'avg_loss': 0.6949787957609193, 'avg_acc': 54.75206611570248, 'loss': 0.62371826171875}


EP_train:6:  14%|| 141/1000 [00:46<04:41,  3.05it/s]

{'epoch': 6, 'iter': 140, 'avg_loss': 0.7004028245912376, 'avg_acc': 53.01418439716312, 'loss': 0.7764878273010254}


EP_train:6:  16%|| 161/1000 [00:52<04:23,  3.19it/s]

{'epoch': 6, 'iter': 160, 'avg_loss': 0.6999054602954699, 'avg_acc': 53.41614906832298, 'loss': 0.6773296594619751}


EP_train:6:  18%|| 181/1000 [00:59<05:11,  2.63it/s]

{'epoch': 6, 'iter': 180, 'avg_loss': 0.7005222400249038, 'avg_acc': 52.762430939226526, 'loss': 0.6948460340499878}


EP_train:6:  20%|| 201/1000 [01:05<03:41,  3.61it/s]

{'epoch': 6, 'iter': 200, 'avg_loss': 0.7008439323202295, 'avg_acc': 52.23880597014925, 'loss': 0.6566556096076965}


EP_train:6:  22%|| 221/1000 [01:10<03:10,  4.08it/s]

{'epoch': 6, 'iter': 220, 'avg_loss': 0.7015088459485257, 'avg_acc': 52.03619909502263, 'loss': 0.7736281752586365}


EP_train:6:  24%|| 241/1000 [01:16<04:07,  3.07it/s]

{'epoch': 6, 'iter': 240, 'avg_loss': 0.7010477054168575, 'avg_acc': 51.97095435684648, 'loss': 0.7061728239059448}


EP_train:6:  26%|| 261/1000 [01:23<04:20,  2.83it/s]

{'epoch': 6, 'iter': 260, 'avg_loss': 0.7015016617902851, 'avg_acc': 50.76628352490421, 'loss': 0.7107987999916077}


EP_train:6:  28%|| 281/1000 [01:30<04:42,  2.55it/s]

{'epoch': 6, 'iter': 280, 'avg_loss': 0.7014277770425925, 'avg_acc': 50.88967971530249, 'loss': 0.8641729354858398}


EP_train:6:  30%|| 301/1000 [01:36<03:29,  3.34it/s]

{'epoch': 6, 'iter': 300, 'avg_loss': 0.7019773917736801, 'avg_acc': 50.99667774086378, 'loss': 0.6757336854934692}


EP_train:6:  32%|| 321/1000 [01:43<03:40,  3.08it/s]

{'epoch': 6, 'iter': 320, 'avg_loss': 0.7048022901157723, 'avg_acc': 51.479750778816204, 'loss': 0.6895413994789124}


EP_train:6:  34%|| 341/1000 [01:51<04:05,  2.69it/s]

{'epoch': 6, 'iter': 340, 'avg_loss': 0.7061596044347433, 'avg_acc': 50.95307917888563, 'loss': 0.6820855736732483}


EP_train:6:  36%|| 361/1000 [01:57<03:03,  3.48it/s]

{'epoch': 6, 'iter': 360, 'avg_loss': 0.7066230625326944, 'avg_acc': 50.96952908587258, 'loss': 0.6698846817016602}


EP_train:6:  38%|| 381/1000 [02:03<03:21,  3.07it/s]

{'epoch': 6, 'iter': 380, 'avg_loss': 0.7083641084160391, 'avg_acc': 50.45931758530183, 'loss': 0.6609797477722168}


EP_train:6:  40%|| 401/1000 [02:09<03:09,  3.17it/s]

{'epoch': 6, 'iter': 400, 'avg_loss': 0.707929751224946, 'avg_acc': 50.374064837905244, 'loss': 0.7389494180679321}


EP_train:6:  42%|| 421/1000 [02:16<03:13,  2.99it/s]

{'epoch': 6, 'iter': 420, 'avg_loss': 0.7076325764848614, 'avg_acc': 50.5938242280285, 'loss': 0.7846130132675171}


EP_train:6:  44%|| 441/1000 [02:21<02:32,  3.67it/s]

{'epoch': 6, 'iter': 440, 'avg_loss': 0.7073739425125035, 'avg_acc': 50.62358276643991, 'loss': 0.7094593048095703}


EP_train:6:  46%|| 461/1000 [02:27<02:12,  4.06it/s]

{'epoch': 6, 'iter': 460, 'avg_loss': 0.7069770636889526, 'avg_acc': 50.488069414316705, 'loss': 0.700950026512146}


EP_train:6:  48%|| 481/1000 [02:32<02:53,  3.00it/s]

{'epoch': 6, 'iter': 480, 'avg_loss': 0.7071161623308416, 'avg_acc': 50.51975051975052, 'loss': 0.792443037033081}


EP_train:6:  50%|| 501/1000 [02:39<02:50,  2.92it/s]

{'epoch': 6, 'iter': 500, 'avg_loss': 0.7070043302343753, 'avg_acc': 50.34930139720559, 'loss': 0.6954681277275085}


EP_train:6:  52%|| 521/1000 [02:45<02:12,  3.62it/s]

{'epoch': 6, 'iter': 520, 'avg_loss': 0.7070358114141878, 'avg_acc': 50.3358925143954, 'loss': 0.6851365566253662}


EP_train:6:  54%|| 541/1000 [02:51<02:22,  3.22it/s]

{'epoch': 6, 'iter': 540, 'avg_loss': 0.7068446262045841, 'avg_acc': 50.415896487985215, 'loss': 0.7667571306228638}


EP_train:6:  56%|| 561/1000 [02:57<02:09,  3.38it/s]

{'epoch': 6, 'iter': 560, 'avg_loss': 0.7060361082753608, 'avg_acc': 50.84670231729055, 'loss': 0.5796040892601013}


EP_train:6:  58%|| 581/1000 [03:04<02:21,  2.97it/s]

{'epoch': 6, 'iter': 580, 'avg_loss': 0.7068921032132462, 'avg_acc': 50.4302925989673, 'loss': 0.6407487392425537}


EP_train:6:  60%|| 601/1000 [03:09<01:50,  3.62it/s]

{'epoch': 6, 'iter': 600, 'avg_loss': 0.7068281217839277, 'avg_acc': 50.49916805324459, 'loss': 0.42638298869132996}


EP_train:6:  62%|| 621/1000 [03:17<04:58,  1.27it/s]

{'epoch': 6, 'iter': 620, 'avg_loss': 0.759752519470722, 'avg_acc': 50.32206119162641, 'loss': 19.12369155883789}


EP_train:6:  64%|| 641/1000 [03:46<13:56,  2.33s/it]

{'epoch': 6, 'iter': 640, 'avg_loss': 0.8372925265217069, 'avg_acc': 50.0, 'loss': 0.5930954813957214}


EP_train:6:  66%|| 661/1000 [04:08<07:17,  1.29s/it]

{'epoch': 6, 'iter': 660, 'avg_loss': 0.8671272550392529, 'avg_acc': 50.0, 'loss': 1.4197560548782349}


EP_train:6:  68%|| 681/1000 [04:20<01:53,  2.82it/s]

{'epoch': 6, 'iter': 680, 'avg_loss': 0.8762461121266546, 'avg_acc': 49.74302496328928, 'loss': 1.4068835973739624}


EP_train:6:  70%|| 701/1000 [04:26<01:27,  3.40it/s]

{'epoch': 6, 'iter': 700, 'avg_loss': 0.874873949531014, 'avg_acc': 49.50071326676177, 'loss': 0.7218911051750183}


EP_train:6:  72%|| 721/1000 [04:46<07:07,  1.53s/it]

{'epoch': 6, 'iter': 720, 'avg_loss': 0.874695039081714, 'avg_acc': 49.58391123439667, 'loss': 0.8934783935546875}


EP_train:6:  74%|| 741/1000 [04:59<01:35,  2.70it/s]

{'epoch': 6, 'iter': 740, 'avg_loss': 0.8737754102710851, 'avg_acc': 49.358974358974365, 'loss': 0.6999792456626892}


EP_train:6:  76%|| 761/1000 [05:05<01:21,  2.93it/s]

{'epoch': 6, 'iter': 760, 'avg_loss': 0.8706882867849531, 'avg_acc': 49.17871222076216, 'loss': 0.6604697108268738}


EP_train:6:  78%|| 781/1000 [05:14<01:55,  1.89it/s]

{'epoch': 6, 'iter': 780, 'avg_loss': 0.8673875778770401, 'avg_acc': 49.32778489116517, 'loss': 0.6748985648155212}


EP_train:6:  80%|| 801/1000 [05:22<00:56,  3.51it/s]

{'epoch': 6, 'iter': 800, 'avg_loss': 0.8643722225041053, 'avg_acc': 49.59425717852684, 'loss': 0.5918950438499451}


EP_train:6:  82%|| 821/1000 [05:28<00:54,  3.27it/s]

{'epoch': 6, 'iter': 820, 'avg_loss': 0.8606871655131446, 'avg_acc': 49.66504263093788, 'loss': 0.4162122309207916}


EP_train:6:  84%|| 841/1000 [05:34<00:53,  2.99it/s]

{'epoch': 6, 'iter': 840, 'avg_loss': 0.8565961182445252, 'avg_acc': 49.910820451843044, 'loss': 0.6278334856033325}


EP_train:6:  86%|| 861/1000 [05:42<00:52,  2.66it/s]

{'epoch': 6, 'iter': 860, 'avg_loss': 0.8537947919423087, 'avg_acc': 49.767711962833914, 'loss': 0.7435362339019775}


EP_train:6:  88%|| 881/1000 [05:48<00:36,  3.30it/s]

{'epoch': 6, 'iter': 880, 'avg_loss': 0.8505321699538174, 'avg_acc': 49.74460839954597, 'loss': 0.45729488134384155}


EP_train:6:  90%|| 901/1000 [05:54<00:31,  3.17it/s]

{'epoch': 6, 'iter': 900, 'avg_loss': 0.8488685721934636, 'avg_acc': 49.69478357380688, 'loss': 0.7387285232543945}


EP_train:6:  92%|| 921/1000 [06:02<00:29,  2.64it/s]

{'epoch': 6, 'iter': 920, 'avg_loss': 0.846060190090948, 'avg_acc': 49.8099891422367, 'loss': 0.5390661358833313}


EP_train:6:  94%|| 941/1000 [06:08<00:15,  3.76it/s]

{'epoch': 6, 'iter': 940, 'avg_loss': 0.8431104747232101, 'avg_acc': 49.840595111583426, 'loss': 0.6841375827789307}


EP_train:6:  96%|| 961/1000 [06:14<00:11,  3.40it/s]

{'epoch': 6, 'iter': 960, 'avg_loss': 0.8407944398704417, 'avg_acc': 49.81789802289282, 'loss': 0.715068519115448}


EP_train:6:  98%|| 981/1000 [06:21<00:06,  2.81it/s]

{'epoch': 6, 'iter': 980, 'avg_loss': 0.8388302116621585, 'avg_acc': 49.84709480122324, 'loss': 1.078095555305481}


EP_train:6: 100%|| 1000/1000 [06:27<00:00,  3.68it/s]

EP6_train, avg_loss= 0.836577676717192 total_acc= 49.83745936484121
EP:6 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep6



EP_test:6:   1%|| 2/250 [00:00<00:33,  7.30it/s]

{'epoch': 6, 'iter': 0, 'avg_loss': 0.5351279973983765, 'avg_acc': 75.0, 'loss': 0.5351279973983765}


EP_test:6:   9%|| 22/250 [00:01<00:16, 13.51it/s]

{'epoch': 6, 'iter': 20, 'avg_loss': 0.7313939787092663, 'avg_acc': 51.19047619047619, 'loss': 0.6958503127098083}


EP_test:6:  17%|| 42/250 [00:03<00:16, 12.85it/s]

{'epoch': 6, 'iter': 40, 'avg_loss': 0.7127766514696726, 'avg_acc': 51.829268292682926, 'loss': 1.244178056716919}


EP_test:6:  25%|| 62/250 [00:04<00:14, 13.38it/s]

{'epoch': 6, 'iter': 60, 'avg_loss': 0.7094453851707646, 'avg_acc': 52.459016393442624, 'loss': 0.7130774259567261}


EP_test:6:  33%|| 82/250 [00:06<00:13, 12.17it/s]

{'epoch': 6, 'iter': 80, 'avg_loss': 0.7163699957323663, 'avg_acc': 52.77777777777778, 'loss': 1.0443886518478394}


EP_test:6:  41%|| 102/250 [00:07<00:12, 12.13it/s]

{'epoch': 6, 'iter': 100, 'avg_loss': 0.7139027280972736, 'avg_acc': 50.99009900990099, 'loss': 0.6383423209190369}


EP_test:6:  49%|| 122/250 [00:09<00:10, 12.53it/s]

{'epoch': 6, 'iter': 120, 'avg_loss': 0.7094358686080648, 'avg_acc': 52.066115702479344, 'loss': 0.7579478025436401}


EP_test:6:  57%|| 142/250 [00:10<00:07, 14.14it/s]

{'epoch': 6, 'iter': 140, 'avg_loss': 0.7100795129089491, 'avg_acc': 51.95035460992907, 'loss': 0.9572611451148987}


EP_test:6:  65%|| 162/250 [00:12<00:06, 13.60it/s]

{'epoch': 6, 'iter': 160, 'avg_loss': 0.7116476536167334, 'avg_acc': 51.70807453416148, 'loss': 0.6958503127098083}


EP_test:6:  73%|| 182/250 [00:13<00:05, 13.12it/s]

{'epoch': 6, 'iter': 180, 'avg_loss': 0.7147459247823578, 'avg_acc': 50.966850828729285, 'loss': 0.7596464157104492}


EP_test:6:  81%|| 202/250 [00:15<00:03, 12.87it/s]

{'epoch': 6, 'iter': 200, 'avg_loss': 0.7167953457405318, 'avg_acc': 50.74626865671642, 'loss': 0.6985514760017395}


EP_test:6:  89%|| 222/250 [00:16<00:02, 13.65it/s]

{'epoch': 6, 'iter': 220, 'avg_loss': 0.7178655289686643, 'avg_acc': 50.56561085972851, 'loss': 0.43446728587150574}


EP_test:6:  97%|| 242/250 [00:18<00:00, 14.33it/s]

{'epoch': 6, 'iter': 240, 'avg_loss': 0.7210204916623618, 'avg_acc': 50.31120331950207, 'loss': 0.9206694960594177}


EP_test:6: 100%|| 250/250 [00:18<00:00, 14.85it/s]

EP6_test, avg_loss= 0.7216880363225937 total_acc= 50.450450450450454



EP_train:7:   0%|| 1/1000 [00:00<06:26,  2.59it/s]

{'epoch': 7, 'iter': 0, 'avg_loss': 0.7324420809745789, 'avg_acc': 25.0, 'loss': 0.7324420809745789}


EP_train:7:   2%|| 21/1000 [00:07<06:38,  2.46it/s]

{'epoch': 7, 'iter': 20, 'avg_loss': 0.7341648581482115, 'avg_acc': 47.61904761904761, 'loss': 0.6877478957176208}


EP_train:7:   4%|| 41/1000 [00:14<04:53,  3.27it/s]

{'epoch': 7, 'iter': 40, 'avg_loss': 0.7192231685650058, 'avg_acc': 51.829268292682926, 'loss': 1.1152799129486084}


EP_train:7:   6%|| 61/1000 [00:19<03:59,  3.93it/s]

{'epoch': 7, 'iter': 60, 'avg_loss': 0.7249067396414085, 'avg_acc': 52.04918032786885, 'loss': 0.6758270263671875}


EP_train:7:   8%|| 81/1000 [00:25<04:37,  3.31it/s]

{'epoch': 7, 'iter': 80, 'avg_loss': 0.7247198930493107, 'avg_acc': 49.382716049382715, 'loss': 0.8266031742095947}


EP_train:7:  10%|| 101/1000 [00:32<04:52,  3.07it/s]

{'epoch': 7, 'iter': 100, 'avg_loss': 0.7203937397144808, 'avg_acc': 49.257425742574256, 'loss': 0.6944807171821594}


EP_train:7:  12%|| 121/1000 [00:37<04:08,  3.54it/s]

{'epoch': 7, 'iter': 120, 'avg_loss': 0.7200110268986915, 'avg_acc': 48.34710743801653, 'loss': 0.7992045879364014}


EP_train:7:  14%|| 141/1000 [00:43<03:58,  3.60it/s]

{'epoch': 7, 'iter': 140, 'avg_loss': 0.7202550147442107, 'avg_acc': 47.695035460992905, 'loss': 0.7173459529876709}


EP_train:7:  16%|| 161/1000 [00:49<05:17,  2.64it/s]

{'epoch': 7, 'iter': 160, 'avg_loss': 0.7186130851692294, 'avg_acc': 48.29192546583851, 'loss': 0.7327880263328552}


EP_train:7:  18%|| 181/1000 [00:56<04:24,  3.10it/s]

{'epoch': 7, 'iter': 180, 'avg_loss': 0.7141376821046376, 'avg_acc': 49.58563535911602, 'loss': 0.49701443314552307}


EP_train:7:  20%|| 201/1000 [01:01<03:48,  3.50it/s]

{'epoch': 7, 'iter': 200, 'avg_loss': 0.7142303219778621, 'avg_acc': 49.75124378109453, 'loss': 0.7044610381126404}


EP_train:7:  22%|| 221/1000 [01:08<03:42,  3.50it/s]

{'epoch': 7, 'iter': 220, 'avg_loss': 0.713708910332546, 'avg_acc': 49.547511312217196, 'loss': 0.9354389905929565}


EP_train:7:  24%|| 241/1000 [01:14<04:33,  2.77it/s]

{'epoch': 7, 'iter': 240, 'avg_loss': 0.7122689588188631, 'avg_acc': 50.20746887966805, 'loss': 0.7345439791679382}


EP_train:7:  26%|| 261/1000 [01:21<04:24,  2.79it/s]

{'epoch': 7, 'iter': 260, 'avg_loss': 0.7118499462860297, 'avg_acc': 50.09578544061303, 'loss': 0.6349539756774902}


EP_train:7:  28%|| 281/1000 [01:28<03:24,  3.52it/s]

{'epoch': 7, 'iter': 280, 'avg_loss': 0.7111117499366774, 'avg_acc': 50.177935943060504, 'loss': 0.6357917189598083}


EP_train:7:  30%|| 301/1000 [01:34<03:32,  3.28it/s]

{'epoch': 7, 'iter': 300, 'avg_loss': 0.7121120569515862, 'avg_acc': 50.16611295681063, 'loss': 0.7710021734237671}


EP_train:7:  32%|| 321/1000 [01:39<03:32,  3.20it/s]

{'epoch': 7, 'iter': 320, 'avg_loss': 0.7120525388517113, 'avg_acc': 50.0, 'loss': 0.6941359043121338}


EP_train:7:  34%|| 341/1000 [01:46<03:48,  2.88it/s]

{'epoch': 7, 'iter': 340, 'avg_loss': 0.7116625353562867, 'avg_acc': 49.56011730205279, 'loss': 0.7516734004020691}


EP_train:7:  36%|| 361/1000 [01:53<03:02,  3.50it/s]

{'epoch': 7, 'iter': 360, 'avg_loss': 0.7115883915543226, 'avg_acc': 49.584487534626035, 'loss': 0.6947638988494873}


EP_train:7:  38%|| 381/1000 [01:58<02:50,  3.63it/s]

{'epoch': 7, 'iter': 380, 'avg_loss': 0.7112325540208441, 'avg_acc': 49.4750656167979, 'loss': 0.7616514563560486}


EP_train:7:  40%|| 401/1000 [02:04<04:00,  2.49it/s]

{'epoch': 7, 'iter': 400, 'avg_loss': 0.7114216078100656, 'avg_acc': 49.3142144638404, 'loss': 0.6737237572669983}


EP_train:7:  42%|| 421/1000 [02:12<03:08,  3.08it/s]

{'epoch': 7, 'iter': 420, 'avg_loss': 0.7121562654077299, 'avg_acc': 49.16864608076009, 'loss': 0.7224656939506531}


EP_train:7:  44%|| 441/1000 [02:17<02:38,  3.53it/s]

{'epoch': 7, 'iter': 440, 'avg_loss': 0.7127220001485612, 'avg_acc': 49.26303854875284, 'loss': 0.6990136504173279}


EP_train:7:  46%|| 461/1000 [02:25<03:54,  2.30it/s]

{'epoch': 7, 'iter': 460, 'avg_loss': 0.7125971823478211, 'avg_acc': 49.13232104121475, 'loss': 0.6372912526130676}


EP_train:7:  48%|| 481/1000 [02:31<02:28,  3.48it/s]

{'epoch': 7, 'iter': 480, 'avg_loss': 0.712081165608646, 'avg_acc': 49.37629937629938, 'loss': 0.5798208117485046}


EP_train:7:  50%|| 501/1000 [02:36<02:06,  3.95it/s]

{'epoch': 7, 'iter': 500, 'avg_loss': 0.7124887710202954, 'avg_acc': 49.15169660678643, 'loss': 0.6635708212852478}


EP_train:7:  52%|| 521/1000 [02:42<02:49,  2.82it/s]

{'epoch': 7, 'iter': 520, 'avg_loss': 0.7129962043318319, 'avg_acc': 49.18426103646833, 'loss': 0.6968490481376648}


EP_train:7:  54%|| 541/1000 [02:49<02:27,  3.12it/s]

{'epoch': 7, 'iter': 540, 'avg_loss': 0.7132322901167844, 'avg_acc': 49.21441774491682, 'loss': 0.706306517124176}


EP_train:7:  56%|| 561/1000 [02:55<02:05,  3.50it/s]

{'epoch': 7, 'iter': 560, 'avg_loss': 0.7134453474200345, 'avg_acc': 48.84135472370767, 'loss': 0.7305458784103394}


EP_train:7:  58%|| 581/1000 [03:01<02:16,  3.06it/s]

{'epoch': 7, 'iter': 580, 'avg_loss': 0.7142431824219617, 'avg_acc': 49.010327022375215, 'loss': 0.7297647595405579}


EP_train:7:  60%|| 601/1000 [03:09<02:26,  2.73it/s]

{'epoch': 7, 'iter': 600, 'avg_loss': 0.7162373358517042, 'avg_acc': 49.292845257903494, 'loss': 0.724540114402771}


EP_train:7:  62%|| 621/1000 [03:15<01:51,  3.41it/s]

{'epoch': 7, 'iter': 620, 'avg_loss': 0.7181108942834268, 'avg_acc': 49.15458937198068, 'loss': 0.6415547132492065}


EP_train:7:  64%|| 641/1000 [03:21<01:59,  3.01it/s]

{'epoch': 7, 'iter': 640, 'avg_loss': 0.7191748900830095, 'avg_acc': 49.25897035881435, 'loss': 0.7976245880126953}


EP_train:7:  66%|| 661/1000 [03:29<01:50,  3.07it/s]

{'epoch': 7, 'iter': 660, 'avg_loss': 0.7218074430676343, 'avg_acc': 49.5839636913767, 'loss': 1.0677894353866577}


EP_train:7:  68%|| 681/1000 [03:35<01:43,  3.08it/s]

{'epoch': 7, 'iter': 680, 'avg_loss': 0.7269863977306215, 'avg_acc': 49.48604992657856, 'loss': 1.1859402656555176}


EP_train:7:  70%|| 701/1000 [03:41<01:28,  3.38it/s]

{'epoch': 7, 'iter': 700, 'avg_loss': 0.7271794280975249, 'avg_acc': 49.50071326676177, 'loss': 0.5541365742683411}


EP_train:7:  72%|| 721/1000 [03:48<01:56,  2.40it/s]

{'epoch': 7, 'iter': 720, 'avg_loss': 0.7300473690115629, 'avg_acc': 49.34119278779473, 'loss': 0.7783647179603577}


EP_train:7:  74%|| 741/1000 [03:54<01:12,  3.55it/s]

{'epoch': 7, 'iter': 740, 'avg_loss': 0.7306538050071752, 'avg_acc': 49.122807017543856, 'loss': 0.7075079679489136}


EP_train:7:  76%|| 761/1000 [03:59<00:59,  4.03it/s]

{'epoch': 7, 'iter': 760, 'avg_loss': 0.7297665296261947, 'avg_acc': 49.21156373193167, 'loss': 0.7665949463844299}


EP_train:7:  78%|| 781/1000 [04:05<01:00,  3.63it/s]

{'epoch': 7, 'iter': 780, 'avg_loss': 0.7298922808542753, 'avg_acc': 49.07170294494238, 'loss': 0.6879546642303467}


EP_train:7:  80%|| 801/1000 [04:13<01:15,  2.63it/s]

{'epoch': 7, 'iter': 800, 'avg_loss': 0.7297968840628826, 'avg_acc': 49.03245942571785, 'loss': 0.893449068069458}


EP_train:7:  82%|| 821/1000 [04:19<00:51,  3.51it/s]

{'epoch': 7, 'iter': 820, 'avg_loss': 0.7300777465794758, 'avg_acc': 48.90377588306943, 'loss': 1.447723388671875}


EP_train:7:  84%|| 841/1000 [04:25<00:51,  3.10it/s]

{'epoch': 7, 'iter': 840, 'avg_loss': 0.7297011371458328, 'avg_acc': 48.90011890606421, 'loss': 0.6719704866409302}


EP_train:7:  86%|| 861/1000 [04:33<00:51,  2.69it/s]

{'epoch': 7, 'iter': 860, 'avg_loss': 0.7294864587390603, 'avg_acc': 48.983739837398375, 'loss': 0.696617603302002}


EP_train:7:  88%|| 881/1000 [04:39<00:33,  3.54it/s]

{'epoch': 7, 'iter': 880, 'avg_loss': 0.7290730905316338, 'avg_acc': 48.97843359818388, 'loss': 0.6942924857139587}


EP_train:7:  90%|| 901/1000 [04:45<00:31,  3.18it/s]

{'epoch': 7, 'iter': 900, 'avg_loss': 0.7285113458363515, 'avg_acc': 49.056603773584904, 'loss': 0.7967941164970398}


EP_train:7:  92%|| 921/1000 [04:52<00:30,  2.60it/s]

{'epoch': 7, 'iter': 920, 'avg_loss': 0.7283522055669406, 'avg_acc': 49.0499457111835, 'loss': 0.748487114906311}


EP_train:7:  94%|| 941/1000 [04:59<00:16,  3.49it/s]

{'epoch': 7, 'iter': 940, 'avg_loss': 0.7277752153424983, 'avg_acc': 49.07013815090329, 'loss': 0.7386857271194458}


EP_train:7:  96%|| 961/1000 [05:05<00:13,  2.80it/s]

{'epoch': 7, 'iter': 960, 'avg_loss': 0.7272857415812569, 'avg_acc': 49.0894901144641, 'loss': 0.7044357657432556}


EP_train:7:  98%|| 981/1000 [05:13<00:07,  2.69it/s]

{'epoch': 7, 'iter': 980, 'avg_loss': 0.7267244876828519, 'avg_acc': 49.184505606523956, 'loss': 0.7438211441040039}


EP_train:7: 100%|| 1000/1000 [05:18<00:00,  3.74it/s]

EP7_train, avg_loss= 0.726533742070198 total_acc= 49.03725931482871
EP:7 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep7



EP_test:7:   1%|| 2/250 [00:00<00:33,  7.42it/s]

{'epoch': 7, 'iter': 0, 'avg_loss': 0.6947989463806152, 'avg_acc': 50.0, 'loss': 0.6947989463806152}


EP_test:7:   9%|| 22/250 [00:01<00:17, 13.25it/s]

{'epoch': 7, 'iter': 20, 'avg_loss': 0.6961365313757033, 'avg_acc': 50.0, 'loss': 0.6870856881141663}


EP_test:7:  17%|| 42/250 [00:03<00:16, 12.46it/s]

{'epoch': 7, 'iter': 40, 'avg_loss': 0.7015237968142439, 'avg_acc': 47.5609756097561, 'loss': 0.6700724959373474}


EP_test:7:  25%|| 62/250 [00:04<00:15, 12.34it/s]

{'epoch': 7, 'iter': 60, 'avg_loss': 0.7023098126786654, 'avg_acc': 47.13114754098361, 'loss': 0.7113104462623596}


EP_test:7:  33%|| 82/250 [00:06<00:13, 12.36it/s]

{'epoch': 7, 'iter': 80, 'avg_loss': 0.7026980011551468, 'avg_acc': 46.2962962962963, 'loss': 0.7486244440078735}


EP_test:7:  41%|| 102/250 [00:07<00:10, 13.86it/s]

{'epoch': 7, 'iter': 100, 'avg_loss': 0.70058141427465, 'avg_acc': 47.77227722772277, 'loss': 0.6825690865516663}


EP_test:7:  49%|| 122/250 [00:09<00:09, 13.33it/s]

{'epoch': 7, 'iter': 120, 'avg_loss': 0.7006382193447145, 'avg_acc': 47.93388429752066, 'loss': 0.7171763777732849}


EP_test:7:  57%|| 142/250 [00:10<00:08, 13.02it/s]

{'epoch': 7, 'iter': 140, 'avg_loss': 0.6994733983743275, 'avg_acc': 48.93617021276596, 'loss': 0.6696370840072632}


EP_test:7:  65%|| 162/250 [00:12<00:07, 12.48it/s]

{'epoch': 7, 'iter': 160, 'avg_loss': 0.698704106837326, 'avg_acc': 49.378881987577635, 'loss': 0.6670227646827698}


EP_test:7:  73%|| 182/250 [00:13<00:05, 12.60it/s]

{'epoch': 7, 'iter': 180, 'avg_loss': 0.6985883064032918, 'avg_acc': 49.171270718232044, 'loss': 0.6613238453865051}


EP_test:7:  81%|| 202/250 [00:15<00:03, 13.70it/s]

{'epoch': 7, 'iter': 200, 'avg_loss': 0.6978829076041037, 'avg_acc': 49.62686567164179, 'loss': 0.6660575866699219}


EP_test:7:  89%|| 222/250 [00:16<00:02, 13.41it/s]

{'epoch': 7, 'iter': 220, 'avg_loss': 0.6974871975803807, 'avg_acc': 50.113122171945705, 'loss': 0.7131614685058594}


EP_test:7:  97%|| 242/250 [00:18<00:00, 13.60it/s]

{'epoch': 7, 'iter': 240, 'avg_loss': 0.6976763022391134, 'avg_acc': 50.0, 'loss': 0.6744450926780701}


EP_test:7: 100%|| 250/250 [00:18<00:00, 13.35it/s]

EP7_test, avg_loss= 0.6977613074779511 total_acc= 49.949949949949946



EP_train:8:   0%|| 1/1000 [00:00<07:03,  2.36it/s]

{'epoch': 8, 'iter': 0, 'avg_loss': 0.6927812099456787, 'avg_acc': 50.0, 'loss': 0.6927812099456787}


EP_train:8:   2%|| 21/1000 [00:07<06:46,  2.41it/s]

{'epoch': 8, 'iter': 20, 'avg_loss': 0.705146849155426, 'avg_acc': 48.80952380952381, 'loss': 0.5931065082550049}


EP_train:8:   4%|| 41/1000 [00:14<05:03,  3.16it/s]

{'epoch': 8, 'iter': 40, 'avg_loss': 0.7033808027825704, 'avg_acc': 50.609756097560975, 'loss': 0.650261402130127}


EP_train:8:   6%|| 61/1000 [00:20<04:46,  3.27it/s]

{'epoch': 8, 'iter': 60, 'avg_loss': 0.7056571587187345, 'avg_acc': 50.0, 'loss': 0.7602463960647583}


EP_train:8:   8%|| 81/1000 [00:27<05:45,  2.66it/s]

{'epoch': 8, 'iter': 80, 'avg_loss': 0.706026370142713, 'avg_acc': 50.0, 'loss': 0.7076482772827148}


EP_train:8:  10%|| 101/1000 [00:33<04:14,  3.54it/s]

{'epoch': 8, 'iter': 100, 'avg_loss': 0.7046493592828807, 'avg_acc': 50.495049504950494, 'loss': 0.6657036542892456}


EP_train:8:  12%|| 121/1000 [00:38<03:40,  3.99it/s]

{'epoch': 8, 'iter': 120, 'avg_loss': 0.7066217197859583, 'avg_acc': 50.0, 'loss': 0.7141242623329163}


EP_train:8:  14%|| 141/1000 [00:44<04:30,  3.17it/s]

{'epoch': 8, 'iter': 140, 'avg_loss': 0.7055596354159903, 'avg_acc': 49.46808510638298, 'loss': 0.6970957517623901}


EP_train:8:  16%|| 161/1000 [00:51<05:06,  2.73it/s]

{'epoch': 8, 'iter': 160, 'avg_loss': 0.7063956105190775, 'avg_acc': 49.22360248447205, 'loss': 0.7448360919952393}


EP_train:8:  18%|| 181/1000 [00:57<03:56,  3.47it/s]

{'epoch': 8, 'iter': 180, 'avg_loss': 0.7059763834621366, 'avg_acc': 49.30939226519337, 'loss': 0.7173354029655457}


EP_train:8:  20%|| 201/1000 [01:04<04:09,  3.21it/s]

{'epoch': 8, 'iter': 200, 'avg_loss': 0.7045805246675786, 'avg_acc': 49.75124378109453, 'loss': 0.578070878982544}


EP_train:8:  22%|| 221/1000 [01:10<04:15,  3.05it/s]

{'epoch': 8, 'iter': 220, 'avg_loss': 0.7055062524873207, 'avg_acc': 49.20814479638009, 'loss': 0.7212550640106201}


EP_train:8:  24%|| 241/1000 [01:17<04:35,  2.75it/s]

{'epoch': 8, 'iter': 240, 'avg_loss': 0.70346966571333, 'avg_acc': 49.79253112033195, 'loss': 0.5806015729904175}


EP_train:8:  26%|| 261/1000 [01:24<03:31,  3.49it/s]

{'epoch': 8, 'iter': 260, 'avg_loss': 0.7029090937070006, 'avg_acc': 50.47892720306514, 'loss': 0.7072546482086182}


EP_train:8:  28%|| 281/1000 [01:30<03:44,  3.20it/s]

{'epoch': 8, 'iter': 280, 'avg_loss': 0.7028397005647952, 'avg_acc': 50.71174377224199, 'loss': 0.7104979157447815}


EP_train:8:  30%|| 301/1000 [01:35<03:21,  3.46it/s]

{'epoch': 8, 'iter': 300, 'avg_loss': 0.7022312323120345, 'avg_acc': 50.83056478405316, 'loss': 0.7119090557098389}


EP_train:8:  32%|| 321/1000 [01:42<04:42,  2.41it/s]

{'epoch': 8, 'iter': 320, 'avg_loss': 0.7033828840448849, 'avg_acc': 50.389408099688474, 'loss': 0.7244313359260559}


EP_train:8:  34%|| 341/1000 [01:49<03:08,  3.50it/s]

{'epoch': 8, 'iter': 340, 'avg_loss': 0.7032881688512316, 'avg_acc': 50.43988269794721, 'loss': 0.8324451446533203}


EP_train:8:  36%|| 361/1000 [01:54<02:39,  4.01it/s]

{'epoch': 8, 'iter': 360, 'avg_loss': 0.7027507604324257, 'avg_acc': 50.96952908587258, 'loss': 0.7167201638221741}


EP_train:8:  38%|| 381/1000 [01:59<02:56,  3.50it/s]

{'epoch': 8, 'iter': 380, 'avg_loss': 0.7013248498514881, 'avg_acc': 51.44356955380578, 'loss': 0.46972259879112244}


EP_train:8:  40%|| 401/1000 [02:06<03:09,  3.15it/s]

{'epoch': 8, 'iter': 400, 'avg_loss': 0.7029587343297992, 'avg_acc': 51.12219451371571, 'loss': 0.6467206478118896}


EP_train:8:  42%|| 421/1000 [02:12<02:47,  3.46it/s]

{'epoch': 8, 'iter': 420, 'avg_loss': 0.7027399318376799, 'avg_acc': 51.12826603325416, 'loss': 0.6394945979118347}


EP_train:8:  44%|| 441/1000 [02:18<02:50,  3.28it/s]

{'epoch': 8, 'iter': 440, 'avg_loss': 0.7027832687982356, 'avg_acc': 51.13378684807256, 'loss': 0.6991735100746155}


EP_train:8:  46%|| 461/1000 [02:25<02:53,  3.10it/s]

{'epoch': 8, 'iter': 460, 'avg_loss': 0.7028388190295329, 'avg_acc': 50.97613882863341, 'loss': 0.6537669897079468}


EP_train:8:  48%|| 481/1000 [02:31<02:22,  3.65it/s]

{'epoch': 8, 'iter': 480, 'avg_loss': 0.7028173628938916, 'avg_acc': 50.83160083160083, 'loss': 0.7113686800003052}


EP_train:8:  50%|| 501/1000 [02:36<02:12,  3.76it/s]

{'epoch': 8, 'iter': 500, 'avg_loss': 0.7031574161823638, 'avg_acc': 50.64870259481038, 'loss': 0.7621786594390869}


EP_train:8:  52%|| 521/1000 [02:42<02:53,  2.76it/s]

{'epoch': 8, 'iter': 520, 'avg_loss': 0.7033614118703267, 'avg_acc': 50.479846449136275, 'loss': 0.7114284634590149}


EP_train:8:  54%|| 541/1000 [02:48<02:13,  3.44it/s]

{'epoch': 8, 'iter': 540, 'avg_loss': 0.7036410992665564, 'avg_acc': 50.554528650646944, 'loss': 0.6390543580055237}


EP_train:8:  56%|| 561/1000 [02:54<01:47,  4.10it/s]

{'epoch': 8, 'iter': 560, 'avg_loss': 0.706939282211178, 'avg_acc': 50.57932263814616, 'loss': 1.0042072534561157}


EP_train:8:  58%|| 581/1000 [02:59<01:56,  3.61it/s]

{'epoch': 8, 'iter': 580, 'avg_loss': 0.7166750446962202, 'avg_acc': 50.21514629948365, 'loss': 0.7285059094429016}


EP_train:8:  60%|| 601/1000 [03:06<02:05,  3.19it/s]

{'epoch': 8, 'iter': 600, 'avg_loss': 0.7177200749293342, 'avg_acc': 50.45757071547421, 'loss': 0.6070630550384521}


EP_train:8:  62%|| 621/1000 [03:11<01:42,  3.71it/s]

{'epoch': 8, 'iter': 620, 'avg_loss': 0.7186578396533806, 'avg_acc': 50.28180354267311, 'loss': 0.6522635817527771}


EP_train:8:  64%|| 641/1000 [03:17<01:45,  3.39it/s]

{'epoch': 8, 'iter': 640, 'avg_loss': 0.7192610632889729, 'avg_acc': 50.50702028081123, 'loss': 0.5343160629272461}


EP_train:8:  66%|| 661/1000 [03:23<02:07,  2.66it/s]

{'epoch': 8, 'iter': 660, 'avg_loss': 0.7196709661097822, 'avg_acc': 50.416036308623305, 'loss': 0.7292141318321228}


EP_train:8:  68%|| 681/1000 [03:29<01:30,  3.52it/s]

{'epoch': 8, 'iter': 680, 'avg_loss': 0.7185552080822412, 'avg_acc': 50.73421439060205, 'loss': 0.6598149538040161}


EP_train:8:  70%|| 701/1000 [03:34<01:13,  4.05it/s]

{'epoch': 8, 'iter': 700, 'avg_loss': 0.7189170836382007, 'avg_acc': 50.606276747503564, 'loss': 0.7231926321983337}


EP_train:8:  72%|| 721/1000 [03:40<01:28,  3.17it/s]

{'epoch': 8, 'iter': 720, 'avg_loss': 0.7187832670800398, 'avg_acc': 50.55478502080444, 'loss': 0.7846668362617493}


EP_train:8:  74%|| 741/1000 [03:47<01:24,  3.06it/s]

{'epoch': 8, 'iter': 740, 'avg_loss': 0.7182471114292479, 'avg_acc': 50.67476383265856, 'loss': 0.8347131013870239}


EP_train:8:  76%|| 761/1000 [03:52<00:59,  4.02it/s]

{'epoch': 8, 'iter': 760, 'avg_loss': 0.7184332933438435, 'avg_acc': 50.65703022339028, 'loss': 0.8250650763511658}


EP_train:8:  78%|| 781/1000 [03:57<00:59,  3.68it/s]

{'epoch': 8, 'iter': 780, 'avg_loss': 0.7182584451499577, 'avg_acc': 50.70422535211267, 'loss': 0.7220290899276733}


EP_train:8:  80%|| 801/1000 [04:04<01:06,  2.98it/s]

{'epoch': 8, 'iter': 800, 'avg_loss': 0.7178425887104277, 'avg_acc': 50.84269662921348, 'loss': 0.7230108380317688}


EP_train:8:  82%|| 821/1000 [04:10<00:48,  3.68it/s]

{'epoch': 8, 'iter': 820, 'avg_loss': 0.717735424038844, 'avg_acc': 50.76126674786845, 'loss': 0.7551653385162354}


EP_train:8:  84%|| 841/1000 [04:15<00:38,  4.09it/s]

{'epoch': 8, 'iter': 840, 'avg_loss': 0.7180977708235933, 'avg_acc': 50.62425683709869, 'loss': 0.648155927658081}


EP_train:8:  86%|| 861/1000 [04:20<00:46,  2.98it/s]

{'epoch': 8, 'iter': 860, 'avg_loss': 0.7184504166712744, 'avg_acc': 50.40650406504065, 'loss': 0.6782472133636475}


EP_train:8:  88%|| 881/1000 [04:28<00:40,  2.91it/s]

{'epoch': 8, 'iter': 880, 'avg_loss': 0.718253500886456, 'avg_acc': 50.42565266742338, 'loss': 0.7432844638824463}


EP_train:8:  90%|| 901/1000 [04:34<00:28,  3.48it/s]

{'epoch': 8, 'iter': 900, 'avg_loss': 0.7173485317320194, 'avg_acc': 50.49944506104328, 'loss': 0.5625847578048706}


EP_train:8:  92%|| 921/1000 [04:41<00:31,  2.51it/s]

{'epoch': 8, 'iter': 920, 'avg_loss': 0.717544411409173, 'avg_acc': 50.4885993485342, 'loss': 0.6876316070556641}


EP_train:8:  94%|| 941/1000 [04:47<00:18,  3.19it/s]

{'epoch': 8, 'iter': 940, 'avg_loss': 0.7175555938615302, 'avg_acc': 50.371944739638685, 'loss': 0.67904132604599}


EP_train:8:  96%|| 961/1000 [04:53<00:09,  4.02it/s]

{'epoch': 8, 'iter': 960, 'avg_loss': 0.7170755711852201, 'avg_acc': 50.4942767950052, 'loss': 0.6992769837379456}


EP_train:8:  98%|| 981/1000 [04:58<00:05,  3.72it/s]

{'epoch': 8, 'iter': 980, 'avg_loss': 0.7170659811610962, 'avg_acc': 50.50968399592253, 'loss': 0.7192912101745605}


EP_train:8: 100%|| 1000/1000 [05:04<00:00,  2.83it/s]

EP8_train, avg_loss= 0.7168902532458306 total_acc= 50.412603150787696
EP:8 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep8



EP_test:8:   1%|| 2/250 [00:00<00:40,  6.10it/s]

{'epoch': 8, 'iter': 0, 'avg_loss': 0.6983381509780884, 'avg_acc': 50.0, 'loss': 0.6983381509780884}


EP_test:8:   8%|| 21/250 [00:01<00:20, 11.36it/s]

{'epoch': 8, 'iter': 20, 'avg_loss': 0.6933656476792835, 'avg_acc': 53.57142857142857, 'loss': 0.694616973400116}


EP_test:8:  17%|| 43/250 [00:03<00:16, 12.34it/s]

{'epoch': 8, 'iter': 40, 'avg_loss': 0.6940822107035939, 'avg_acc': 53.65853658536586, 'loss': 0.7013108134269714}


EP_test:8:  25%|| 63/250 [00:05<00:14, 12.91it/s]

{'epoch': 8, 'iter': 60, 'avg_loss': 0.6936705259026074, 'avg_acc': 54.50819672131148, 'loss': 0.6725661158561707}


EP_test:8:  33%|| 83/250 [00:06<00:13, 12.69it/s]

{'epoch': 8, 'iter': 80, 'avg_loss': 0.6933535344806718, 'avg_acc': 54.629629629629626, 'loss': 0.6725661158561707}


EP_test:8:  41%|| 103/250 [00:08<00:10, 13.67it/s]

{'epoch': 8, 'iter': 100, 'avg_loss': 0.6915679326151857, 'avg_acc': 56.43564356435643, 'loss': 0.6855290532112122}


EP_test:8:  49%|| 123/250 [00:09<00:09, 13.08it/s]

{'epoch': 8, 'iter': 120, 'avg_loss': 0.6930566410387843, 'avg_acc': 54.75206611570248, 'loss': 0.6880117058753967}


EP_test:8:  56%|| 141/250 [00:11<00:10, 10.85it/s]

{'epoch': 8, 'iter': 140, 'avg_loss': 0.6942833429532693, 'avg_acc': 53.54609929078015, 'loss': 0.6946169137954712}


EP_test:8:  65%|| 163/250 [00:13<00:06, 12.86it/s]

{'epoch': 8, 'iter': 160, 'avg_loss': 0.6939335195174129, 'avg_acc': 53.881987577639755, 'loss': 0.7263134717941284}


EP_test:8:  73%|| 183/250 [00:14<00:05, 13.12it/s]

{'epoch': 8, 'iter': 180, 'avg_loss': 0.6942535128382689, 'avg_acc': 53.45303867403315, 'loss': 0.6977270245552063}


EP_test:8:  81%|| 203/250 [00:16<00:03, 13.24it/s]

{'epoch': 8, 'iter': 200, 'avg_loss': 0.6938223548196442, 'avg_acc': 53.60696517412935, 'loss': 0.6751427054405212}


EP_test:8:  89%|| 223/250 [00:17<00:02, 12.44it/s]

{'epoch': 8, 'iter': 220, 'avg_loss': 0.6944333112617423, 'avg_acc': 52.94117647058824, 'loss': 0.665407121181488}


EP_test:8:  97%|| 243/250 [00:19<00:00, 14.31it/s]

{'epoch': 8, 'iter': 240, 'avg_loss': 0.6946419084220506, 'avg_acc': 52.9045643153527, 'loss': 0.6572090983390808}


EP_test:8: 100%|| 250/250 [00:19<00:00, 12.66it/s]

EP8_test, avg_loss= 0.6949264004230499 total_acc= 52.552552552552555



EP_train:9:   0%|| 1/1000 [00:00<07:08,  2.33it/s]

{'epoch': 9, 'iter': 0, 'avg_loss': 0.7176583409309387, 'avg_acc': 50.0, 'loss': 0.7176583409309387}


EP_train:9:   2%|| 21/1000 [00:07<06:26,  2.53it/s]

{'epoch': 9, 'iter': 20, 'avg_loss': 0.7016254578317914, 'avg_acc': 50.0, 'loss': 0.6827939748764038}


EP_train:9:   4%|| 41/1000 [00:14<05:01,  3.18it/s]

{'epoch': 9, 'iter': 40, 'avg_loss': 0.7036346538764674, 'avg_acc': 52.4390243902439, 'loss': 0.7430979013442993}


EP_train:9:   6%|| 61/1000 [00:20<04:32,  3.45it/s]

{'epoch': 9, 'iter': 60, 'avg_loss': 0.7055409643493715, 'avg_acc': 51.229508196721305, 'loss': 0.7131204605102539}


EP_train:9:   8%|| 81/1000 [00:27<06:11,  2.47it/s]

{'epoch': 9, 'iter': 80, 'avg_loss': 0.7032051027556996, 'avg_acc': 51.54320987654321, 'loss': 0.6923386454582214}


EP_train:9:  10%|| 101/1000 [00:33<04:49,  3.10it/s]

{'epoch': 9, 'iter': 100, 'avg_loss': 0.7020271963412219, 'avg_acc': 51.48514851485149, 'loss': 0.64937824010849}


EP_train:9:  12%|| 121/1000 [00:39<03:34,  4.09it/s]

{'epoch': 9, 'iter': 120, 'avg_loss': 0.7067893748441019, 'avg_acc': 51.2396694214876, 'loss': 0.6952921748161316}


EP_train:9:  14%|| 141/1000 [00:44<05:01,  2.84it/s]

{'epoch': 9, 'iter': 140, 'avg_loss': 0.7074132324955987, 'avg_acc': 50.70921985815603, 'loss': 0.7610461711883545}


EP_train:9:  16%|| 161/1000 [00:51<04:18,  3.25it/s]

{'epoch': 9, 'iter': 160, 'avg_loss': 0.7093119393594517, 'avg_acc': 50.77639751552795, 'loss': 0.6744508147239685}


EP_train:9:  18%|| 181/1000 [00:56<03:25,  3.98it/s]

{'epoch': 9, 'iter': 180, 'avg_loss': 0.7098135979465359, 'avg_acc': 50.41436464088398, 'loss': 0.695801317691803}


EP_train:9:  20%|| 201/1000 [01:02<03:44,  3.57it/s]

{'epoch': 9, 'iter': 200, 'avg_loss': 0.7261167219770488, 'avg_acc': 50.99502487562189, 'loss': 0.5872260928153992}


EP_train:9:  22%|| 221/1000 [01:09<04:53,  2.65it/s]

{'epoch': 9, 'iter': 220, 'avg_loss': 0.7365209602527489, 'avg_acc': 50.90497737556561, 'loss': 0.9215488433837891}


EP_train:9:  24%|| 241/1000 [01:16<03:51,  3.28it/s]

{'epoch': 9, 'iter': 240, 'avg_loss': 0.741352360463736, 'avg_acc': 50.62240663900415, 'loss': 0.7072957158088684}


EP_train:9:  26%|| 261/1000 [01:22<04:02,  3.04it/s]

{'epoch': 9, 'iter': 260, 'avg_loss': 0.740390712902007, 'avg_acc': 50.47892720306514, 'loss': 0.9591094255447388}


EP_train:9:  28%|| 281/1000 [01:28<03:30,  3.42it/s]

{'epoch': 9, 'iter': 280, 'avg_loss': 0.7394124548948532, 'avg_acc': 50.355871886120994, 'loss': 0.7089301943778992}


EP_train:9:  30%|| 301/1000 [01:35<04:59,  2.33it/s]

{'epoch': 9, 'iter': 300, 'avg_loss': 0.7390600493877038, 'avg_acc': 50.0, 'loss': 0.6418399810791016}


EP_train:9:  32%|| 321/1000 [01:42<03:40,  3.08it/s]

{'epoch': 9, 'iter': 320, 'avg_loss': 0.7375201396760168, 'avg_acc': 49.84423676012461, 'loss': 0.7184992432594299}


EP_train:9:  34%|| 341/1000 [01:48<03:18,  3.32it/s]

{'epoch': 9, 'iter': 340, 'avg_loss': 0.7382994207206709, 'avg_acc': 50.146627565982406, 'loss': 0.8526623249053955}


EP_train:9:  36%|| 361/1000 [01:55<04:09,  2.56it/s]

{'epoch': 9, 'iter': 360, 'avg_loss': 0.737778643945908, 'avg_acc': 50.20775623268698, 'loss': 0.6979628205299377}


EP_train:9:  38%|| 381/1000 [02:01<02:56,  3.50it/s]

{'epoch': 9, 'iter': 380, 'avg_loss': 0.7344859636283609, 'avg_acc': 50.72178477690289, 'loss': 0.31860023736953735}


EP_train:9:  40%|| 401/1000 [02:06<02:29,  4.01it/s]

{'epoch': 9, 'iter': 400, 'avg_loss': 0.7351747443179537, 'avg_acc': 50.935162094763086, 'loss': 0.7450419068336487}


EP_train:9:  42%|| 421/1000 [02:13<02:56,  3.28it/s]

{'epoch': 9, 'iter': 420, 'avg_loss': 0.7333881302499997, 'avg_acc': 51.306413301662715, 'loss': 0.5990946888923645}


EP_train:9:  44%|| 441/1000 [02:19<03:15,  2.86it/s]

{'epoch': 9, 'iter': 440, 'avg_loss': 0.7354473371371255, 'avg_acc': 51.92743764172335, 'loss': 1.1826260089874268}


EP_train:9:  46%|| 461/1000 [02:27<03:16,  2.74it/s]

{'epoch': 9, 'iter': 460, 'avg_loss': 0.7358460346572182, 'avg_acc': 51.95227765726681, 'loss': 0.6860650181770325}


EP_train:9:  48%|| 481/1000 [02:34<02:51,  3.03it/s]

{'epoch': 9, 'iter': 480, 'avg_loss': 0.7349428661291539, 'avg_acc': 52.02702702702703, 'loss': 0.6304455399513245}


EP_train:9:  50%|| 501/1000 [02:40<02:32,  3.27it/s]

{'epoch': 9, 'iter': 500, 'avg_loss': 0.7351629810552754, 'avg_acc': 52.145708582834324, 'loss': 0.8478464484214783}


EP_train:9:  52%|| 521/1000 [02:47<03:17,  2.43it/s]

{'epoch': 9, 'iter': 520, 'avg_loss': 0.7340806432983582, 'avg_acc': 52.159309021113245, 'loss': 0.7202892899513245}


EP_train:9:  54%|| 541/1000 [02:54<02:29,  3.07it/s]

{'epoch': 9, 'iter': 540, 'avg_loss': 0.7337015674220546, 'avg_acc': 51.75600739371534, 'loss': 0.7190499901771545}


EP_train:9:  56%|| 561/1000 [03:00<02:17,  3.20it/s]

{'epoch': 9, 'iter': 560, 'avg_loss': 0.734639969559755, 'avg_acc': 51.7825311942959, 'loss': 0.7336651682853699}


EP_train:9:  58%|| 581/1000 [03:07<02:52,  2.43it/s]

{'epoch': 9, 'iter': 580, 'avg_loss': 0.733680770884744, 'avg_acc': 51.63511187607573, 'loss': 0.7549792528152466}


EP_train:9:  60%|| 601/1000 [03:14<02:06,  3.14it/s]

{'epoch': 9, 'iter': 600, 'avg_loss': 0.7334278827213904, 'avg_acc': 51.49750415973378, 'loss': 0.8436692357063293}


EP_train:9:  62%|| 621/1000 [03:20<01:56,  3.26it/s]

{'epoch': 9, 'iter': 620, 'avg_loss': 0.7327582247123339, 'avg_acc': 51.52979066022544, 'loss': 0.7341057658195496}


EP_train:9:  64%|| 641/1000 [03:27<02:20,  2.55it/s]

{'epoch': 9, 'iter': 640, 'avg_loss': 0.7320070309788221, 'avg_acc': 51.326053042121686, 'loss': 0.7148861885070801}


EP_train:9:  66%|| 661/1000 [03:33<01:36,  3.50it/s]

{'epoch': 9, 'iter': 660, 'avg_loss': 0.7316308743214733, 'avg_acc': 50.945537065052946, 'loss': 0.7122833728790283}


EP_train:9:  68%|| 681/1000 [03:38<01:19,  4.01it/s]

{'epoch': 9, 'iter': 680, 'avg_loss': 0.7305808236518883, 'avg_acc': 51.027900146842875, 'loss': 0.6763398051261902}


EP_train:9:  70%|| 701/1000 [03:44<01:45,  2.84it/s]

{'epoch': 9, 'iter': 700, 'avg_loss': 0.7303378214945382, 'avg_acc': 50.92724679029958, 'loss': 0.7611858248710632}


EP_train:9:  72%|| 721/1000 [03:52<01:30,  3.08it/s]

{'epoch': 9, 'iter': 720, 'avg_loss': 0.7295639132422068, 'avg_acc': 50.90152565880721, 'loss': 0.6992362141609192}


EP_train:9:  74%|| 741/1000 [03:58<01:11,  3.60it/s]

{'epoch': 9, 'iter': 740, 'avg_loss': 0.7282667371435606, 'avg_acc': 51.14709851551957, 'loss': 0.9756765365600586}


EP_train:9:  76%|| 761/1000 [04:04<01:32,  2.58it/s]

{'epoch': 9, 'iter': 760, 'avg_loss': 0.7283935957929699, 'avg_acc': 51.01839684625493, 'loss': 0.722295343875885}


EP_train:9:  78%|| 781/1000 [04:12<01:14,  2.92it/s]

{'epoch': 9, 'iter': 780, 'avg_loss': 0.7275976126395862, 'avg_acc': 51.18437900128041, 'loss': 0.6977225542068481}


EP_train:9:  80%|| 801/1000 [04:18<00:59,  3.36it/s]

{'epoch': 9, 'iter': 800, 'avg_loss': 0.7264915316897013, 'avg_acc': 51.4669163545568, 'loss': 0.7246012091636658}


EP_train:9:  82%|| 821/1000 [04:25<01:12,  2.46it/s]

{'epoch': 9, 'iter': 820, 'avg_loss': 0.7256777989887945, 'avg_acc': 51.55298416565165, 'loss': 0.6627234220504761}


EP_train:9:  84%|| 841/1000 [04:31<00:48,  3.29it/s]

{'epoch': 9, 'iter': 840, 'avg_loss': 0.724844167250409, 'avg_acc': 51.724137931034484, 'loss': 0.6962646245956421}


EP_train:9:  86%|| 861/1000 [04:37<00:36,  3.83it/s]

{'epoch': 9, 'iter': 860, 'avg_loss': 0.7242106595806751, 'avg_acc': 51.771196283391404, 'loss': 0.7224710583686829}


EP_train:9:  88%|| 881/1000 [04:42<00:36,  3.28it/s]

{'epoch': 9, 'iter': 880, 'avg_loss': 0.7235502345481133, 'avg_acc': 51.70261066969353, 'loss': 0.6659991145133972}


EP_train:9:  90%|| 901/1000 [04:50<00:37,  2.61it/s]

{'epoch': 9, 'iter': 900, 'avg_loss': 0.7229732274223578, 'avg_acc': 51.692563817980016, 'loss': 0.7218537926673889}


EP_train:9:  92%|| 921/1000 [04:57<00:23,  3.34it/s]

{'epoch': 9, 'iter': 920, 'avg_loss': 0.7219765618500117, 'avg_acc': 51.76438653637351, 'loss': 0.7106046080589294}


EP_train:9:  94%|| 941/1000 [05:03<00:18,  3.13it/s]

{'epoch': 9, 'iter': 940, 'avg_loss': 0.7219017455740322, 'avg_acc': 51.726886291179596, 'loss': 0.7210842370986938}


EP_train:9:  96%|| 961/1000 [05:09<00:11,  3.39it/s]

{'epoch': 9, 'iter': 960, 'avg_loss': 0.7217259847294466, 'avg_acc': 51.664932362122784, 'loss': 0.5910884141921997}


EP_train:9:  98%|| 981/1000 [05:17<00:07,  2.56it/s]

{'epoch': 9, 'iter': 980, 'avg_loss': 0.7215225032353194, 'avg_acc': 51.60550458715596, 'loss': 0.6573938131332397}


EP_train:9: 100%|| 1000/1000 [05:23<00:00,  3.34it/s]

EP9_train, avg_loss= 0.7211227002628148 total_acc= 51.58789697424356
EP:9 Model Saved on: ./output/bertmodel2019-05-1114:59:01.616888.ep9



EP_test:9:   1%|| 2/250 [00:00<00:30,  8.06it/s]

{'epoch': 9, 'iter': 0, 'avg_loss': 0.6904822587966919, 'avg_acc': 50.0, 'loss': 0.6904822587966919}


EP_test:9:   9%|| 22/250 [00:01<00:17, 13.00it/s]

{'epoch': 9, 'iter': 20, 'avg_loss': 0.6976396044095358, 'avg_acc': 55.952380952380956, 'loss': 0.6849022507667542}


EP_test:9:  17%|| 42/250 [00:03<00:16, 12.54it/s]

{'epoch': 9, 'iter': 40, 'avg_loss': 0.6979305497029933, 'avg_acc': 49.390243902439025, 'loss': 0.7054044008255005}


EP_test:9:  25%|| 62/250 [00:04<00:14, 12.55it/s]

{'epoch': 9, 'iter': 60, 'avg_loss': 0.6979566050357506, 'avg_acc': 47.13114754098361, 'loss': 0.6935654282569885}


EP_test:9:  33%|| 82/250 [00:06<00:13, 12.26it/s]

{'epoch': 9, 'iter': 80, 'avg_loss': 0.6980067800592493, 'avg_acc': 45.98765432098765, 'loss': 0.6935654282569885}


EP_test:9:  41%|| 102/250 [00:07<00:11, 13.10it/s]

{'epoch': 9, 'iter': 100, 'avg_loss': 0.6979931457207935, 'avg_acc': 45.79207920792079, 'loss': 0.6877457499504089}


EP_test:9:  49%|| 122/250 [00:09<00:09, 13.24it/s]

{'epoch': 9, 'iter': 120, 'avg_loss': 0.697890945702545, 'avg_acc': 46.074380165289256, 'loss': 0.6849023103713989}


EP_test:9:  57%|| 142/250 [00:10<00:07, 14.13it/s]

{'epoch': 9, 'iter': 140, 'avg_loss': 0.6977113265517756, 'avg_acc': 46.63120567375886, 'loss': 0.6818191409111023}


EP_test:9:  65%|| 162/250 [00:12<00:06, 12.67it/s]

{'epoch': 9, 'iter': 160, 'avg_loss': 0.6977932493879188, 'avg_acc': 46.8944099378882, 'loss': 0.6849022507667542}


EP_test:9:  73%|| 182/250 [00:13<00:05, 12.12it/s]

{'epoch': 9, 'iter': 180, 'avg_loss': 0.6977500342532416, 'avg_acc': 46.408839779005525, 'loss': 0.6964454650878906}


EP_test:9:  81%|| 202/250 [00:15<00:03, 12.03it/s]

{'epoch': 9, 'iter': 200, 'avg_loss': 0.6978874814451037, 'avg_acc': 46.01990049751244, 'loss': 0.6967412233352661}


EP_test:9:  89%|| 222/250 [00:17<00:02,  9.56it/s]

{'epoch': 9, 'iter': 220, 'avg_loss': 0.6976574519101311, 'avg_acc': 46.60633484162896, 'loss': 0.7054044008255005}


EP_test:9:  97%|| 242/250 [00:19<00:00,  9.25it/s]

{'epoch': 9, 'iter': 240, 'avg_loss': 0.6977619325966261, 'avg_acc': 47.19917012448133, 'loss': 0.7022286057472229}


EP_test:9: 100%|| 250/250 [00:20<00:00, 11.29it/s]

EP9_test, avg_loss= 0.6977665433883667 total_acc= 47.047047047047045





In [73]:
"""
     (参考) QAの初期化
     def __init__(self):
      super(BertForQuestionAnswering, self).__init__()
      self.bert = BERT()
      self.qa_outputs = nn.Linear(hidden, 2)
      self.apply(self.init_bert_weights)  
      #def prediction(self, "dummyQ")
"""


class QAprediction(BERTTrainer):
    
    #trainerの初期化
    def __init__(self, bert: BertForQuestionAnswering, vocab_size: int,
                 #train_dataloader: DataLoader, test_dataloader: DataLoader = None,
                 #train_dataloaderも使わないのでNoneに、逆にtest_dataloaderのひとつだけを使う
                 test_dataloader: DataLoader, train_dataloader: DataLoader = None, 
                 #model.eval()にしてあるので関係なさそう
                 lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01,
                 with_cuda: bool = True, log_freq: int = 10):
        """
        :param bert: BERT model
        :param vocab_size: vocabに含まれるトータルの単語数
        :param train_dataloader: train dataset data loader
        :param test_dataloader: test dataset data loader [can be None]
        :param lr: 学習率
        :param betas: Adam optimizer betas
        :param weight_decay: Adam optimizer weight decay param
        :param with_cuda: traning with cuda
        :param log_freq: logを表示するiterationの頻度
        """
    
    #def test(self, epoch):
    #test_dataにindexを渡す
    #学習したモデルのロード
    def prediction(self, file_path="output/bert_trained.model"):
        """
        test_data のインデックスは何だろう？
        
        """
        self.test_data = test_dataloader
        # test_data (=dataloader)の中にあるデータのひとつだけを指定して推論してみる。下は１０番目。
        index = 10
        
        predict_data = self.test_data[index]
        pretrained_path = file_path
        torch.load(self.bert.cpu(), pretrained_path)
        
        model.eval()
        
        qa_output = model(predict_data)
        
        print("予測結果は{}".format(qa_output))

        #イテレーションはいらない。
        #self.iteration(epoch, self.test_data, train=False)