In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

# set up person only VOC dataset

In [2]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath)

# make Dataset
voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']
color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 128  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 64

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

000005
000007
000009
000012
000016
000017
000019
000020
000021
000023
000024
000026
000030
000032
000033
000034
000035
000036
000039
000041
000042
000044
000046
000047
000048
000050
000051
000052
000060
000061
000063
000064
000065
000066
000072
000073
000077
000078
000081
000083
000089
000091
000093
000095
000099
000101
000102
000104
000107
000109
000110
000112
000113
000117
000118
000120
000121
000122
000123
000125
000129
000130
000131
000132
000133
000134
000138
000140
000141
000142
000143
000146
000147
000150
000153
000154
000156
000158
000159
000161
000162
000163
000164
000165
000169
000170
000171
000173
000174
000177
000180
000184
000187
000189
000190
000192
000193
000194
000198
000200
000203
000207
000208
000209
000210
000211
000214
000215
000218
000219
000220
000221
000222
000224
000225
000228
000229
000232
000233
000235
000236
000241
000242
000244
000245
000246
000249
000250
000251
000256
000257
000259
000262
000263
000266
000268
000269
000270
000275
000276
000278
000282
000285

004960
004961
004962
004963
004966
004967
004968
004972
004973
004974
004976
004977
004982
004983
004984
004985
004986
004987
004990
004991
004992
004994
004995
004997
004998
004999
005001
005003
005004
005006
005007
005014
005016
005018
005020
005023
005024
005026
005027
005028
005029
005032
005033
005036
005037
005039
005042
005045
005047
005052
005054
005055
005056
005057
005058
005061
005062
005063
005064
005065
005067
005068
005071
005072
005073
005077
005078
005079
005081
005084
005085
005086
005090
005093
005094
005097
005101
005102
005104
005107
005108
005110
005111
005114
005116
005121
005122
005124
005128
005129
005130
005131
005134
005135
005136
005138
005143
005144
005145
005146
005150
005153
005156
005159
005160
005161
005168
005169
005171
005173
005175
005176
005177
005179
005181
005183
005185
005186
005189
005190
005191
005195
005199
005202
005203
005208
005209
005210
005212
005214
005215
005217
005219
005220
005222
005223
005224
005229
005230
005231
005236
005239
005242

008933
008936
008939
008940
008942
008943
008944
008948
008951
008953
008955
008958
008960
008961
008962
008965
008966
008967
008968
008969
008970
008971
008973
008975
008976
008978
008979
008980
008982
008983
008985
008987
008988
008989
008995
008997
008999
009000
009002
009004
009005
009006
009007
009015
009016
009018
009019
009020
009022
009024
009027
009029
009032
009034
009035
009036
009037
009039
009042
009045
009048
009049
009051
009053
009058
009059
009060
009063
009064
009066
009068
009072
009073
009078
009079
009080
009085
009086
009087
009089
009091
009094
009098
009099
009100
009105
009106
009108
009112
009113
009114
009116
009117
009121
009123
009126
009128
009129
009131
009133
009136
009138
009141
009144
009147
009148
009150
009151
009153
009155
009157
009159
009160
009161
009162
009163
009166
009168
009173
009174
009175
009177
009178
009179
009180
009181
009184
009185
009186
009187
009189
009191
009192
009193
009194
009195
009196
009197
009200
009202
009205
009208
009209

In [3]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([64, 3, 128, 128])
64
torch.Size([1, 5])


In [4]:
targets[1]

tensor([[0.6791, 0.1970, 0.8268, 0.2903, 9.0000]])

# test with ssd model.

In [5]:
from utils.blazeface import SSD

BlazeFace(
  (features): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (act): ReLU(inplace)
    )
    (4): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, a

In [6]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 21,  # 背景クラスを含めた合計クラス数
    'input_size': 128,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [16, 8],  # 各sourceの画像サイズ
    'steps': [4, 8],  # DBOXの大きさを決める
    'min_sizes': [30, 60],  # DBOXの大きさを決める
    'max_sizes': [60, 128],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# set inits for loc and conf
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using: cuda:0
set weights!


In [7]:
print(net)

SSD(
  (blaze): BlazeFace(
    (features): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
          (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (act): ReLU(inplace)
      )
      (4): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
   

In [8]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=2, device=device)

# optim
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [9]:
def get_current_lr(epoch):
    lr = 1e-3
    for i,lr_decay_epoch in enumerate([120,180]):
        if epoch >= lr_decay_epoch:
            lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    print("lr is:", lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [10]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):
        
        adjust_learning_rate(optimizer, epoch)
        
        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 5 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は5回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/blazeface128VOC_' +
                       str(epoch+1) + '.pth')

# start training here

In [11]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
lr is: 0.001
-------------
Epoch 1/200
-------------
train
Iteration 10 || Loss: 29.6023 || 10iter: 10.9506 sec.
Iteration 20 || Loss: 23.3059 || 10iter: 4.6900 sec.
Iteration 30 || Loss: 18.4569 || 10iter: 4.3560 sec.
Iteration 40 || Loss: 18.7418 || 10iter: 4.4052 sec.
Iteration 50 || Loss: 15.1519 || 10iter: 4.7336 sec.
Iteration 60 || Loss: 15.2802 || 10iter: 4.8466 sec.
Iteration 70 || Loss: 13.5241 || 10iter: 3.3809 sec.
-------------
epoch 1 || Epoch_TRAIN_Loss:1651.0105 ||Epoch_VAL_Loss:0.0000
timer:  40.3107 sec.
lr is: 0.001
-------------
Epoch 2/200
-------------
train
Iteration 80 || Loss: 15.9082 || 10iter: 4.1762 sec.
Iteration 90 || Loss: 16.3893 || 10iter: 6.9769 sec.
Iteration 100 || Loss: 14.1174 || 10iter: 5.0676 sec.
Iteration 110 || Loss: 13.2159 || 10iter: 4.4170 sec.
Iteration 120 || Loss: 12.5102 || 10iter: 4.8756 sec.
Iteration 130 || Loss: 13.7482 || 10iter: 4.5590 sec.
Iteration 140 || Loss: 12.0504 || 10iter: 4.7415 sec.
Iteration 150 || 

Iteration 1120 || Loss: 8.4006 || 10iter: 6.0264 sec.
Iteration 1130 || Loss: 9.0219 || 10iter: 4.5365 sec.
Iteration 1140 || Loss: 8.2331 || 10iter: 4.6713 sec.
Iteration 1150 || Loss: 8.6595 || 10iter: 4.6912 sec.
Iteration 1160 || Loss: 8.4130 || 10iter: 4.4023 sec.
Iteration 1170 || Loss: 8.1138 || 10iter: 5.0531 sec.
Iteration 1180 || Loss: 8.1990 || 10iter: 2.7085 sec.
-------------
val
-------------
epoch 15 || Epoch_TRAIN_Loss:676.4399 ||Epoch_VAL_Loss:335.2492
timer:  50.3796 sec.
lr is: 0.001
-------------
Epoch 16/200
-------------
train
Iteration 1190 || Loss: 8.7035 || 10iter: 7.3222 sec.
Iteration 1200 || Loss: 8.7335 || 10iter: 5.4954 sec.
Iteration 1210 || Loss: 8.1417 || 10iter: 5.1213 sec.
Iteration 1220 || Loss: 8.8950 || 10iter: 4.3846 sec.
Iteration 1230 || Loss: 7.9730 || 10iter: 4.6609 sec.
Iteration 1240 || Loss: 8.8896 || 10iter: 4.5830 sec.
Iteration 1250 || Loss: 8.3721 || 10iter: 4.2469 sec.
Iteration 1260 || Loss: 8.0381 || 10iter: 2.6585 sec.
-------------

Iteration 2220 || Loss: 8.2445 || 10iter: 8.7083 sec.
Iteration 2230 || Loss: 8.0514 || 10iter: 5.8645 sec.
Iteration 2240 || Loss: 7.8717 || 10iter: 4.2027 sec.
Iteration 2250 || Loss: 6.9328 || 10iter: 4.7247 sec.
Iteration 2260 || Loss: 8.0714 || 10iter: 4.4041 sec.
Iteration 2270 || Loss: 8.1670 || 10iter: 4.8034 sec.
Iteration 2280 || Loss: 7.1901 || 10iter: 3.9027 sec.
Iteration 2290 || Loss: 7.4076 || 10iter: 2.6778 sec.
-------------
epoch 29 || Epoch_TRAIN_Loss:632.6935 ||Epoch_VAL_Loss:0.0000
timer:  39.8111 sec.
lr is: 0.001
-------------
Epoch 30/200
-------------
train
Iteration 2300 || Loss: 7.8879 || 10iter: 11.2488 sec.
Iteration 2310 || Loss: 8.2625 || 10iter: 5.1958 sec.
Iteration 2320 || Loss: 7.9251 || 10iter: 4.2402 sec.
Iteration 2330 || Loss: 7.4470 || 10iter: 4.7375 sec.
Iteration 2340 || Loss: 8.2425 || 10iter: 4.5130 sec.
Iteration 2350 || Loss: 7.5599 || 10iter: 4.7223 sec.
Iteration 2360 || Loss: 8.4366 || 10iter: 3.8192 sec.
Iteration 2370 || Loss: 7.7690 |

Iteration 3320 || Loss: 7.5642 || 10iter: 5.2141 sec.
Iteration 3330 || Loss: 7.6851 || 10iter: 6.2053 sec.
Iteration 3340 || Loss: 7.3296 || 10iter: 4.8349 sec.
Iteration 3350 || Loss: 7.7371 || 10iter: 4.7184 sec.
Iteration 3360 || Loss: 7.2479 || 10iter: 4.6146 sec.
Iteration 3370 || Loss: 7.4304 || 10iter: 4.4780 sec.
Iteration 3380 || Loss: 7.4144 || 10iter: 4.7501 sec.
Iteration 3390 || Loss: 7.7933 || 10iter: 3.1247 sec.
-------------
epoch 43 || Epoch_TRAIN_Loss:619.9213 ||Epoch_VAL_Loss:0.0000
timer:  40.1257 sec.
lr is: 0.001
-------------
Epoch 44/200
-------------
train
Iteration 3400 || Loss: 8.3615 || 10iter: 6.1860 sec.
Iteration 3410 || Loss: 7.3907 || 10iter: 5.8462 sec.
Iteration 3420 || Loss: 8.0827 || 10iter: 4.7511 sec.
Iteration 3430 || Loss: 7.6008 || 10iter: 4.8242 sec.
Iteration 3440 || Loss: 7.7430 || 10iter: 4.6976 sec.
Iteration 3450 || Loss: 7.5046 || 10iter: 4.5491 sec.
Iteration 3460 || Loss: 7.7838 || 10iter: 4.8837 sec.
Iteration 3470 || Loss: 7.6497 ||

Iteration 4430 || Loss: 7.3362 || 10iter: 7.9547 sec.
Iteration 4440 || Loss: 8.2832 || 10iter: 5.3994 sec.
Iteration 4450 || Loss: 7.6814 || 10iter: 4.9281 sec.
Iteration 4460 || Loss: 7.5738 || 10iter: 4.3818 sec.
Iteration 4470 || Loss: 8.1004 || 10iter: 4.3034 sec.
Iteration 4480 || Loss: 8.3416 || 10iter: 4.7159 sec.
Iteration 4490 || Loss: 7.3484 || 10iter: 4.2141 sec.
Iteration 4500 || Loss: 7.6546 || 10iter: 2.6682 sec.
-------------
epoch 57 || Epoch_TRAIN_Loss:602.7313 ||Epoch_VAL_Loss:0.0000
timer:  39.6745 sec.
lr is: 0.001
-------------
Epoch 58/200
-------------
train
Iteration 4510 || Loss: 7.9473 || 10iter: 8.6252 sec.
Iteration 4520 || Loss: 7.4184 || 10iter: 5.7824 sec.
Iteration 4530 || Loss: 7.3579 || 10iter: 4.4959 sec.
Iteration 4540 || Loss: 7.3421 || 10iter: 4.5024 sec.
Iteration 4550 || Loss: 8.0270 || 10iter: 4.8960 sec.
Iteration 4560 || Loss: 8.6034 || 10iter: 4.6456 sec.
Iteration 4570 || Loss: 8.4665 || 10iter: 3.8839 sec.
Iteration 4580 || Loss: 7.6092 ||

Iteration 5540 || Loss: 6.9874 || 10iter: 10.7431 sec.
Iteration 5550 || Loss: 8.0100 || 10iter: 4.8436 sec.
Iteration 5560 || Loss: 7.4643 || 10iter: 4.8327 sec.
Iteration 5570 || Loss: 7.5365 || 10iter: 4.5915 sec.
Iteration 5580 || Loss: 7.8349 || 10iter: 4.5827 sec.
Iteration 5590 || Loss: 7.5802 || 10iter: 4.4773 sec.
Iteration 5600 || Loss: 7.6261 || 10iter: 3.6598 sec.
-------------
epoch 71 || Epoch_TRAIN_Loss:598.6575 ||Epoch_VAL_Loss:0.0000
timer:  40.3422 sec.
lr is: 0.001
-------------
Epoch 72/200
-------------
train
Iteration 5610 || Loss: 7.6670 || 10iter: 3.9917 sec.
Iteration 5620 || Loss: 7.6091 || 10iter: 6.9906 sec.
Iteration 5630 || Loss: 7.1984 || 10iter: 5.0881 sec.
Iteration 5640 || Loss: 6.6879 || 10iter: 4.8719 sec.
Iteration 5650 || Loss: 7.4332 || 10iter: 4.8356 sec.
Iteration 5660 || Loss: 8.4924 || 10iter: 4.5809 sec.
Iteration 5670 || Loss: 7.2329 || 10iter: 4.7111 sec.
Iteration 5680 || Loss: 7.4658 || 10iter: 3.2002 sec.
-------------
epoch 72 || Epoch_

Iteration 6650 || Loss: 7.1282 || 10iter: 5.8680 sec.
Iteration 6660 || Loss: 7.4400 || 10iter: 5.7051 sec.
Iteration 6670 || Loss: 7.5945 || 10iter: 4.7411 sec.
Iteration 6680 || Loss: 7.2295 || 10iter: 4.6780 sec.
Iteration 6690 || Loss: 7.4645 || 10iter: 4.6156 sec.
Iteration 6700 || Loss: 7.2158 || 10iter: 4.2716 sec.
Iteration 6710 || Loss: 7.4447 || 10iter: 2.8125 sec.
-------------
val
-------------
epoch 85 || Epoch_TRAIN_Loss:588.3253 ||Epoch_VAL_Loss:302.2940
timer:  50.9072 sec.
lr is: 0.001
-------------
Epoch 86/200
-------------
train
Iteration 6720 || Loss: 7.6274 || 10iter: 7.1147 sec.
Iteration 6730 || Loss: 7.9492 || 10iter: 5.7072 sec.
Iteration 6740 || Loss: 7.9173 || 10iter: 4.7434 sec.
Iteration 6750 || Loss: 7.5764 || 10iter: 4.8336 sec.
Iteration 6760 || Loss: 7.3553 || 10iter: 4.7206 sec.
Iteration 6770 || Loss: 7.5243 || 10iter: 4.8761 sec.
Iteration 6780 || Loss: 8.0348 || 10iter: 4.4266 sec.
Iteration 6790 || Loss: 7.3649 || 10iter: 2.6742 sec.
-------------

Iteration 7750 || Loss: 7.1686 || 10iter: 8.9728 sec.
Iteration 7760 || Loss: 7.1841 || 10iter: 5.9214 sec.
Iteration 7770 || Loss: 7.6217 || 10iter: 4.2811 sec.
Iteration 7780 || Loss: 7.0230 || 10iter: 4.7475 sec.
Iteration 7790 || Loss: 7.3366 || 10iter: 4.6355 sec.
Iteration 7800 || Loss: 7.2007 || 10iter: 4.5196 sec.
Iteration 7810 || Loss: 7.0459 || 10iter: 3.8236 sec.
Iteration 7820 || Loss: 6.9496 || 10iter: 2.6530 sec.
-------------
epoch 99 || Epoch_TRAIN_Loss:582.1453 ||Epoch_VAL_Loss:0.0000
timer:  40.1773 sec.
lr is: 0.001
-------------
Epoch 100/200
-------------
train
Iteration 7830 || Loss: 7.2789 || 10iter: 9.8923 sec.
Iteration 7840 || Loss: 7.0057 || 10iter: 5.2014 sec.
Iteration 7850 || Loss: 7.7371 || 10iter: 4.4825 sec.
Iteration 7860 || Loss: 7.2310 || 10iter: 4.4938 sec.
Iteration 7870 || Loss: 7.8994 || 10iter: 4.8376 sec.
Iteration 7880 || Loss: 6.9370 || 10iter: 4.5235 sec.
Iteration 7890 || Loss: 8.2298 || 10iter: 3.7078 sec.
Iteration 7900 || Loss: 6.5381 |

Iteration 8850 || Loss: 7.0904 || 10iter: 4.6496 sec.
Iteration 8860 || Loss: 7.1688 || 10iter: 6.5432 sec.
Iteration 8870 || Loss: 7.2400 || 10iter: 5.0953 sec.
Iteration 8880 || Loss: 7.0444 || 10iter: 4.3575 sec.
Iteration 8890 || Loss: 7.4830 || 10iter: 5.0300 sec.
Iteration 8900 || Loss: 7.9879 || 10iter: 4.3857 sec.
Iteration 8910 || Loss: 6.8580 || 10iter: 5.1090 sec.
Iteration 8920 || Loss: 7.3038 || 10iter: 3.0980 sec.
-------------
epoch 113 || Epoch_TRAIN_Loss:573.6355 ||Epoch_VAL_Loss:0.0000
timer:  40.4196 sec.
lr is: 0.001
-------------
Epoch 114/200
-------------
train
Iteration 8930 || Loss: 7.4717 || 10iter: 5.7147 sec.
Iteration 8940 || Loss: 7.1344 || 10iter: 6.3528 sec.
Iteration 8950 || Loss: 7.1244 || 10iter: 4.9913 sec.
Iteration 8960 || Loss: 7.3127 || 10iter: 4.7169 sec.
Iteration 8970 || Loss: 7.4502 || 10iter: 4.6688 sec.
Iteration 8980 || Loss: 7.2919 || 10iter: 4.4717 sec.
Iteration 8990 || Loss: 7.5356 || 10iter: 4.9085 sec.
Iteration 9000 || Loss: 7.1884 

Iteration 9960 || Loss: 7.4915 || 10iter: 8.0207 sec.
Iteration 9970 || Loss: 7.2640 || 10iter: 5.1966 sec.
Iteration 9980 || Loss: 7.7932 || 10iter: 4.6986 sec.
Iteration 9990 || Loss: 7.2663 || 10iter: 4.9063 sec.
Iteration 10000 || Loss: 8.0473 || 10iter: 5.3923 sec.
Iteration 10010 || Loss: 7.2607 || 10iter: 5.1468 sec.
Iteration 10020 || Loss: 6.8290 || 10iter: 4.1784 sec.
Iteration 10030 || Loss: 6.8988 || 10iter: 2.6921 sec.
-------------
epoch 127 || Epoch_TRAIN_Loss:581.0813 ||Epoch_VAL_Loss:0.0000
timer:  41.3371 sec.
lr is: 0.0001
-------------
Epoch 128/200
-------------
train
Iteration 10040 || Loss: 7.1013 || 10iter: 8.3769 sec.
Iteration 10050 || Loss: 6.4887 || 10iter: 5.7794 sec.
Iteration 10060 || Loss: 7.3477 || 10iter: 4.7893 sec.
Iteration 10070 || Loss: 6.7089 || 10iter: 4.5065 sec.
Iteration 10080 || Loss: 7.0928 || 10iter: 4.6466 sec.
Iteration 10090 || Loss: 7.0040 || 10iter: 4.8737 sec.
Iteration 10100 || Loss: 7.1648 || 10iter: 3.9944 sec.
Iteration 10110 || 

-------------
epoch 140 || Epoch_TRAIN_Loss:575.1957 ||Epoch_VAL_Loss:293.7180
timer:  50.9668 sec.
lr is: 0.0001
-------------
Epoch 141/200
-------------
train
Iteration 11070 || Loss: 6.9538 || 10iter: 10.8916 sec.
Iteration 11080 || Loss: 7.7461 || 10iter: 5.0452 sec.
Iteration 11090 || Loss: 6.8263 || 10iter: 4.3716 sec.
Iteration 11100 || Loss: 7.1041 || 10iter: 4.7142 sec.
Iteration 11110 || Loss: 7.4043 || 10iter: 4.5860 sec.
Iteration 11120 || Loss: 7.3300 || 10iter: 4.8669 sec.
Iteration 11130 || Loss: 7.9089 || 10iter: 3.4477 sec.
-------------
epoch 141 || Epoch_TRAIN_Loss:571.6904 ||Epoch_VAL_Loss:0.0000
timer:  40.6133 sec.
lr is: 0.0001
-------------
Epoch 142/200
-------------
train
Iteration 11140 || Loss: 7.2441 || 10iter: 4.6571 sec.
Iteration 11150 || Loss: 7.4624 || 10iter: 6.4610 sec.
Iteration 11160 || Loss: 7.6889 || 10iter: 4.9664 sec.
Iteration 11170 || Loss: 7.5451 || 10iter: 4.5932 sec.
Iteration 11180 || Loss: 6.8067 || 10iter: 4.6131 sec.
Iteration 11190 |

Iteration 12150 || Loss: 7.3470 || 10iter: 4.6345 sec.
Iteration 12160 || Loss: 7.1112 || 10iter: 2.8005 sec.
-------------
epoch 154 || Epoch_TRAIN_Loss:574.5252 ||Epoch_VAL_Loss:0.0000
timer:  40.1482 sec.
lr is: 0.0001
-------------
Epoch 155/200
-------------
train
Iteration 12170 || Loss: 7.3502 || 10iter: 7.0269 sec.
Iteration 12180 || Loss: 6.7901 || 10iter: 5.8352 sec.
Iteration 12190 || Loss: 7.3249 || 10iter: 4.6013 sec.
Iteration 12200 || Loss: 7.6315 || 10iter: 4.7466 sec.
Iteration 12210 || Loss: 6.5737 || 10iter: 4.8769 sec.
Iteration 12220 || Loss: 7.8693 || 10iter: 4.7575 sec.
Iteration 12230 || Loss: 7.1512 || 10iter: 4.6599 sec.
Iteration 12240 || Loss: 7.3771 || 10iter: 2.7224 sec.
-------------
val
-------------
epoch 155 || Epoch_TRAIN_Loss:573.9549 ||Epoch_VAL_Loss:293.3489
timer:  50.4585 sec.
lr is: 0.0001
-------------
Epoch 156/200
-------------
train
Iteration 12250 || Loss: 7.5770 || 10iter: 7.4807 sec.
Iteration 12260 || Loss: 6.8033 || 10iter: 5.6085 sec.


Iteration 13230 || Loss: 7.8335 || 10iter: 4.6327 sec.
Iteration 13240 || Loss: 7.8003 || 10iter: 5.1521 sec.
Iteration 13250 || Loss: 7.9116 || 10iter: 4.6609 sec.
Iteration 13260 || Loss: 7.4263 || 10iter: 3.9394 sec.
Iteration 13270 || Loss: 7.2538 || 10iter: 2.6949 sec.
-------------
epoch 168 || Epoch_TRAIN_Loss:574.1076 ||Epoch_VAL_Loss:0.0000
timer:  40.7270 sec.
lr is: 0.0001
-------------
Epoch 169/200
-------------
train
Iteration 13280 || Loss: 7.2439 || 10iter: 9.5531 sec.
Iteration 13290 || Loss: 7.2440 || 10iter: 5.6237 sec.
Iteration 13300 || Loss: 7.0751 || 10iter: 4.6113 sec.
Iteration 13310 || Loss: 7.2790 || 10iter: 4.8045 sec.
Iteration 13320 || Loss: 7.3349 || 10iter: 4.7545 sec.
Iteration 13330 || Loss: 7.2753 || 10iter: 4.5395 sec.
Iteration 13340 || Loss: 7.1332 || 10iter: 4.0341 sec.
Iteration 13350 || Loss: 7.0883 || 10iter: 2.6836 sec.
-------------
epoch 169 || Epoch_TRAIN_Loss:573.5362 ||Epoch_VAL_Loss:0.0000
timer:  41.1576 sec.
lr is: 0.0001
-------------

Iteration 14310 || Loss: 7.6364 || 10iter: 7.3449 sec.
Iteration 14320 || Loss: 6.7037 || 10iter: 4.6634 sec.
Iteration 14330 || Loss: 7.5829 || 10iter: 4.4787 sec.
Iteration 14340 || Loss: 7.7472 || 10iter: 5.2835 sec.
Iteration 14350 || Loss: 7.3572 || 10iter: 4.9510 sec.
Iteration 14360 || Loss: 7.7453 || 10iter: 4.8470 sec.
Iteration 14370 || Loss: 7.0304 || 10iter: 3.0764 sec.
-------------
epoch 182 || Epoch_TRAIN_Loss:575.7130 ||Epoch_VAL_Loss:0.0000
timer:  40.9863 sec.
lr is: 1e-05
-------------
Epoch 183/200
-------------
train
Iteration 14380 || Loss: 7.2030 || 10iter: 5.0208 sec.
Iteration 14390 || Loss: 7.0232 || 10iter: 6.4780 sec.
Iteration 14400 || Loss: 7.0671 || 10iter: 4.5976 sec.
Iteration 14410 || Loss: 6.8240 || 10iter: 4.6595 sec.
Iteration 14420 || Loss: 6.4890 || 10iter: 4.5177 sec.
Iteration 14430 || Loss: 7.0021 || 10iter: 4.4433 sec.
Iteration 14440 || Loss: 7.1840 || 10iter: 4.7339 sec.
Iteration 14450 || Loss: 7.8378 || 10iter: 3.1606 sec.
-------------
ep

Iteration 15410 || Loss: 7.6410 || 10iter: 7.5616 sec.
Iteration 15420 || Loss: 7.3759 || 10iter: 5.5895 sec.
Iteration 15430 || Loss: 7.6558 || 10iter: 4.8027 sec.
Iteration 15440 || Loss: 7.0454 || 10iter: 4.4895 sec.
Iteration 15450 || Loss: 7.3891 || 10iter: 4.6120 sec.
Iteration 15460 || Loss: 7.1927 || 10iter: 4.6221 sec.
Iteration 15470 || Loss: 7.1433 || 10iter: 4.6311 sec.
Iteration 15480 || Loss: 7.2046 || 10iter: 2.6545 sec.
-------------
epoch 196 || Epoch_TRAIN_Loss:576.4735 ||Epoch_VAL_Loss:0.0000
timer:  40.3655 sec.
lr is: 1e-05
-------------
Epoch 197/200
-------------
train
Iteration 15490 || Loss: 6.9888 || 10iter: 7.5767 sec.
Iteration 15500 || Loss: 7.5062 || 10iter: 6.0118 sec.
Iteration 15510 || Loss: 7.4012 || 10iter: 4.5765 sec.
Iteration 15520 || Loss: 7.2515 || 10iter: 4.4953 sec.
Iteration 15530 || Loss: 7.1827 || 10iter: 4.7971 sec.
Iteration 15540 || Loss: 7.3055 || 10iter: 4.4811 sec.
Iteration 15550 || Loss: 7.0113 || 10iter: 4.5126 sec.
Iteration 15560 