In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

# set up person only VOC dataset

In [2]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath, cls="person")

# make Dataset
voc_classes = ['person']
color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 128  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

000009
000017
000021
000023
000030
000032
000035
000041
000048
000050
000051
000066
000073
000081
000083
000089
000101
000104
000110
000113
000125
000129
000131
000133
000138
000146
000150
000159
000162
000163
000164
000165
000169
000170
000171
000173
000174
000177
000190
000192
000193
000194
000200
000210
000218
000220
000222
000229
000232
000245
000251
000257
000259
000269
000275
000276
000278
000282
000285
000288
000298
000302
000305
000308
000320
000321
000322
000323
000328
000331
000337
000338
000352
000359
000367
000372
000374
000382
000394
000406
000407
000411
000419
000428
000433
000435
000438
000443
000446
000448
000463
000468
000470
000476
000477
000480
000482
000483
000498
000499
000500
000515
000516
000518
000520
000523
000524
000525
000526
000530
000531
000535
000541
000545
000554
000555
000579
000583
000589
000591
000597
000612
000613
000625
000626
000628
000633
000648
000654
000677
000684
000688
000690
000694
000695
000702
000709
000717
000726
000731
000733
000739
000742

009481
009490
009494
009496
009497
009499
009500
009504
009512
009517
009518
009520
009524
009526
009531
009532
009537
009541
009542
009546
009551
009557
009558
009565
009567
009568
009573
009577
009579
009585
009591
009596
009600
009603
009609
009611
009613
009614
009617
009618
009638
009641
009647
009649
009654
009655
009656
009659
009666
009668
009671
009676
009684
009687
009691
009693
009698
009703
009711
009712
009713
009717
009718
009721
009726
009732
009734
009738
009747
009754
009755
009756
009762
009767
009773
009776
009780
009781
009789
009792
009796
009800
009809
009813
009822
009828
009841
009845
009848
009851
009859
009867
009868
009869
009874
009877
009878
009879
009880
009881
009882
009900
009902
009917
009918
009926
009935
009942
009944
009946
009947
009949
009950
009954
009958


In [3]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([32, 3, 128, 128])
32
torch.Size([6, 5])


In [4]:
targets[1]

tensor([[0.3773, 0.2824, 0.7485, 1.0000, 0.0000],
        [0.8037, 0.3389, 0.9356, 0.5000, 0.0000],
        [0.8896, 0.3368, 0.9540, 0.4979, 0.0000],
        [0.3650, 0.3410, 0.4479, 0.4874, 0.0000],
        [0.3006, 0.3410, 0.3405, 0.3954, 0.0000],
        [0.0767, 0.3410, 0.1166, 0.4079, 0.0000]])

# test with ssd model.

In [5]:
from utils.blazeface import SSD

BlazeFace(
  (features): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (act): ReLU(inplace)
    )
    (4): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, a

In [6]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 2,  # 背景クラスを含めた合計クラス数
    'input_size': 128,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [16, 8],  # 各sourceの画像サイズ
    'steps': [4, 8],  # DBOXの大きさを決める
    'min_sizes': [30, 60],  # DBOXの大きさを決める
    'max_sizes': [60, 128],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# set inits for loc and conf
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using: cuda:0
set weights!


In [7]:
print(net)

SSD(
  (blaze): BlazeFace(
    (features): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
          (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (act): ReLU(inplace)
      )
      (4): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
   

In [8]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=2, device=device)

# optim
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [9]:
def get_current_lr(epoch):
    lr = 1e-3
    for i,lr_decay_epoch in enumerate([120,180]):
        if epoch >= lr_decay_epoch:
            lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    print("lr is:", lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [10]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):
        
        adjust_learning_rate(optimizer, epoch)
        
        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 5 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は5回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/blazeface128_' +
                       str(epoch+1) + '.pth')

# start training here

In [11]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
lr is: 0.001
-------------
Epoch 1/200
-------------
train
Iteration 10 || Loss: 25.8795 || 10iter: 9.9486 sec.
Iteration 20 || Loss: 18.7910 || 10iter: 4.5899 sec.
Iteration 30 || Loss: 15.2810 || 10iter: 4.9723 sec.
Iteration 40 || Loss: 16.1553 || 10iter: 5.2477 sec.
Iteration 50 || Loss: 16.6958 || 10iter: 3.9402 sec.
Iteration 60 || Loss: 18.2191 || 10iter: 3.1149 sec.
-------------
epoch 1 || Epoch_TRAIN_Loss:1324.4515 ||Epoch_VAL_Loss:0.0000
timer:  34.0132 sec.
lr is: 0.001
-------------
Epoch 2/200
-------------
train
Iteration 70 || Loss: 13.3653 || 10iter: 5.0666 sec.
Iteration 80 || Loss: 13.0765 || 10iter: 6.5523 sec.
Iteration 90 || Loss: 11.3279 || 10iter: 4.7422 sec.
Iteration 100 || Loss: 12.5673 || 10iter: 5.2613 sec.
Iteration 110 || Loss: 11.8278 || 10iter: 4.9162 sec.
Iteration 120 || Loss: 11.2398 || 10iter: 4.4973 sec.
Iteration 130 || Loss: 12.2265 || 10iter: 3.3242 sec.
-------------
epoch 2 || Epoch_TRAIN_Loss:805.6247 ||Epoch_VAL_Loss:0.00

Iteration 1060 || Loss: 6.3088 || 10iter: 6.1092 sec.
Iteration 1070 || Loss: 6.4473 || 10iter: 4.9865 sec.
Iteration 1080 || Loss: 6.3467 || 10iter: 5.4380 sec.
Iteration 1090 || Loss: 6.7570 || 10iter: 5.4601 sec.
Iteration 1100 || Loss: 5.7319 || 10iter: 3.3467 sec.
Iteration 1110 || Loss: 6.3377 || 10iter: 3.3797 sec.
Iteration 1120 || Loss: 5.1928 || 10iter: 3.5788 sec.
-------------
epoch 17 || Epoch_TRAIN_Loss:392.6125 ||Epoch_VAL_Loss:0.0000
timer:  33.3167 sec.
lr is: 0.001
-------------
Epoch 18/200
-------------
train
Iteration 1130 || Loss: 5.5649 || 10iter: 8.1390 sec.
Iteration 1140 || Loss: 6.3331 || 10iter: 4.7450 sec.
Iteration 1150 || Loss: 5.8808 || 10iter: 5.2080 sec.
Iteration 1160 || Loss: 5.2652 || 10iter: 5.1664 sec.
Iteration 1170 || Loss: 5.7686 || 10iter: 4.6720 sec.
Iteration 1180 || Loss: 5.4543 || 10iter: 3.7601 sec.
-------------
epoch 18 || Epoch_TRAIN_Loss:389.4132 ||Epoch_VAL_Loss:0.0000
timer:  34.3516 sec.
lr is: 0.001
-------------
Epoch 19/200
----

Iteration 2120 || Loss: 5.3887 || 10iter: 9.6099 sec.
Iteration 2130 || Loss: 6.1761 || 10iter: 5.3010 sec.
Iteration 2140 || Loss: 4.8753 || 10iter: 5.2826 sec.
Iteration 2150 || Loss: 4.7907 || 10iter: 5.2660 sec.
Iteration 2160 || Loss: 6.0817 || 10iter: 4.7120 sec.
Iteration 2170 || Loss: 5.7536 || 10iter: 3.6680 sec.
-------------
epoch 33 || Epoch_TRAIN_Loss:355.0768 ||Epoch_VAL_Loss:0.0000
timer:  36.4611 sec.
lr is: 0.001
-------------
Epoch 34/200
-------------
train
Iteration 2180 || Loss: 5.1250 || 10iter: 2.9772 sec.
Iteration 2190 || Loss: 5.7580 || 10iter: 4.0049 sec.
Iteration 2200 || Loss: 5.2643 || 10iter: 6.6321 sec.
Iteration 2210 || Loss: 5.4131 || 10iter: 5.8710 sec.
Iteration 2220 || Loss: 6.4108 || 10iter: 4.7266 sec.
Iteration 2230 || Loss: 5.8163 || 10iter: 5.1801 sec.
Iteration 2240 || Loss: 5.4080 || 10iter: 3.3746 sec.
-------------
epoch 34 || Epoch_TRAIN_Loss:358.6712 ||Epoch_VAL_Loss:0.0000
timer:  34.2534 sec.
lr is: 0.001
-------------
Epoch 35/200
----

Iteration 3170 || Loss: 5.1814 || 10iter: 5.1039 sec.
Iteration 3180 || Loss: 5.6592 || 10iter: 6.2243 sec.
Iteration 3190 || Loss: 6.0801 || 10iter: 5.2291 sec.
Iteration 3200 || Loss: 5.3549 || 10iter: 5.5239 sec.
Iteration 3210 || Loss: 5.4534 || 10iter: 4.8087 sec.
Iteration 3220 || Loss: 5.2018 || 10iter: 3.4829 sec.
Iteration 3230 || Loss: 6.2092 || 10iter: 3.0859 sec.
-------------
epoch 49 || Epoch_TRAIN_Loss:356.2334 ||Epoch_VAL_Loss:0.0000
timer:  34.5465 sec.
lr is: 0.001
-------------
Epoch 50/200
-------------
train
Iteration 3240 || Loss: 5.4426 || 10iter: 6.0703 sec.
Iteration 3250 || Loss: 4.9422 || 10iter: 3.5880 sec.
Iteration 3260 || Loss: 5.6458 || 10iter: 2.8721 sec.
Iteration 3270 || Loss: 5.2662 || 10iter: 6.1502 sec.
Iteration 3280 || Loss: 4.8227 || 10iter: 5.5038 sec.
Iteration 3290 || Loss: 5.2385 || 10iter: 4.4660 sec.
Iteration 3300 || Loss: 4.7495 || 10iter: 3.2601 sec.
-------------
val
-------------
epoch 50 || Epoch_TRAIN_Loss:349.8790 ||Epoch_VAL_Loss:

Iteration 4230 || Loss: 5.4939 || 10iter: 6.8056 sec.
Iteration 4240 || Loss: 5.2311 || 10iter: 5.0919 sec.
Iteration 4250 || Loss: 5.5800 || 10iter: 4.9113 sec.
Iteration 4260 || Loss: 5.4981 || 10iter: 5.2577 sec.
Iteration 4270 || Loss: 5.3561 || 10iter: 5.1521 sec.
Iteration 4280 || Loss: 4.9830 || 10iter: 3.4323 sec.
Iteration 4290 || Loss: 5.1192 || 10iter: 2.9277 sec.
-------------
val
-------------
epoch 65 || Epoch_TRAIN_Loss:341.3625 ||Epoch_VAL_Loss:173.2099
timer:  45.6405 sec.
lr is: 0.001
-------------
Epoch 66/200
-------------
train
Iteration 4300 || Loss: 4.7466 || 10iter: 8.5946 sec.
Iteration 4310 || Loss: 4.7963 || 10iter: 5.0224 sec.
Iteration 4320 || Loss: 5.0785 || 10iter: 5.1820 sec.
Iteration 4330 || Loss: 6.1385 || 10iter: 4.2181 sec.
Iteration 4340 || Loss: 4.7994 || 10iter: 3.3485 sec.
Iteration 4350 || Loss: 4.7037 || 10iter: 3.0367 sec.
-------------
epoch 66 || Epoch_TRAIN_Loss:345.6071 ||Epoch_VAL_Loss:0.0000
timer:  32.3211 sec.
lr is: 0.001
-----------

Iteration 5290 || Loss: 5.8236 || 10iter: 9.1433 sec.
Iteration 5300 || Loss: 5.6233 || 10iter: 4.6711 sec.
Iteration 5310 || Loss: 4.9910 || 10iter: 3.8720 sec.
Iteration 5320 || Loss: 4.9847 || 10iter: 3.4416 sec.
Iteration 5330 || Loss: 4.9147 || 10iter: 5.3576 sec.
Iteration 5340 || Loss: 5.2100 || 10iter: 4.1819 sec.
-------------
epoch 81 || Epoch_TRAIN_Loss:341.8613 ||Epoch_VAL_Loss:0.0000
timer:  32.8916 sec.
lr is: 0.001
-------------
Epoch 82/200
-------------
train
Iteration 5350 || Loss: 5.3285 || 10iter: 5.0570 sec.
Iteration 5360 || Loss: 4.8386 || 10iter: 6.1383 sec.
Iteration 5370 || Loss: 4.9861 || 10iter: 5.4103 sec.
Iteration 5380 || Loss: 5.0347 || 10iter: 5.4113 sec.
Iteration 5390 || Loss: 5.3398 || 10iter: 3.6432 sec.
Iteration 5400 || Loss: 5.9817 || 10iter: 3.4042 sec.
Iteration 5410 || Loss: 5.5117 || 10iter: 3.4888 sec.
-------------
epoch 82 || Epoch_TRAIN_Loss:336.0098 ||Epoch_VAL_Loss:0.0000
timer:  33.8359 sec.
lr is: 0.001
-------------
Epoch 83/200
----

Iteration 6340 || Loss: 5.0237 || 10iter: 5.4696 sec.
Iteration 6350 || Loss: 4.9915 || 10iter: 3.5655 sec.
Iteration 6360 || Loss: 5.2389 || 10iter: 3.2122 sec.
Iteration 6370 || Loss: 4.4454 || 10iter: 6.3995 sec.
Iteration 6380 || Loss: 4.9309 || 10iter: 5.4992 sec.
Iteration 6390 || Loss: 4.4740 || 10iter: 5.1999 sec.
Iteration 6400 || Loss: 5.0778 || 10iter: 3.3934 sec.
-------------
epoch 97 || Epoch_TRAIN_Loss:335.5139 ||Epoch_VAL_Loss:0.0000
timer:  33.5124 sec.
lr is: 0.001
-------------
Epoch 98/200
-------------
train
Iteration 6410 || Loss: 4.8398 || 10iter: 8.0204 sec.
Iteration 6420 || Loss: 6.6632 || 10iter: 4.9559 sec.
Iteration 6430 || Loss: 5.1607 || 10iter: 3.8757 sec.
Iteration 6440 || Loss: 5.1572 || 10iter: 3.3153 sec.
Iteration 6450 || Loss: 4.6252 || 10iter: 4.0564 sec.
Iteration 6460 || Loss: 5.3590 || 10iter: 2.9433 sec.
-------------
epoch 98 || Epoch_TRAIN_Loss:328.7714 ||Epoch_VAL_Loss:0.0000
timer:  29.0120 sec.
lr is: 0.001
-------------
Epoch 99/200
----

Iteration 7400 || Loss: 4.9684 || 10iter: 8.4274 sec.
Iteration 7410 || Loss: 4.9709 || 10iter: 5.6534 sec.
Iteration 7420 || Loss: 4.4108 || 10iter: 4.5757 sec.
Iteration 7430 || Loss: 4.7467 || 10iter: 3.1042 sec.
Iteration 7440 || Loss: 5.2273 || 10iter: 3.4762 sec.
Iteration 7450 || Loss: 4.9126 || 10iter: 5.6674 sec.
-------------
epoch 113 || Epoch_TRAIN_Loss:326.4717 ||Epoch_VAL_Loss:0.0000
timer:  33.8519 sec.
lr is: 0.001
-------------
Epoch 114/200
-------------
train
Iteration 7460 || Loss: 5.1688 || 10iter: 3.9603 sec.
Iteration 7470 || Loss: 5.1433 || 10iter: 5.7243 sec.
Iteration 7480 || Loss: 4.6256 || 10iter: 4.3318 sec.
Iteration 7490 || Loss: 4.3604 || 10iter: 5.1535 sec.
Iteration 7500 || Loss: 4.5564 || 10iter: 4.8400 sec.
Iteration 7510 || Loss: 4.6913 || 10iter: 3.5670 sec.
Iteration 7520 || Loss: 4.6262 || 10iter: 3.1823 sec.
-------------
epoch 114 || Epoch_TRAIN_Loss:332.6653 ||Epoch_VAL_Loss:0.0000
timer:  31.9722 sec.
lr is: 0.001
-------------
Epoch 115/200


Iteration 8450 || Loss: 4.6176 || 10iter: 4.4044 sec.
Iteration 8460 || Loss: 5.3746 || 10iter: 3.5587 sec.
Iteration 8470 || Loss: 4.3954 || 10iter: 3.4591 sec.
Iteration 8480 || Loss: 4.5788 || 10iter: 4.6207 sec.
Iteration 8490 || Loss: 5.0249 || 10iter: 3.5967 sec.
Iteration 8500 || Loss: 4.3568 || 10iter: 2.9964 sec.
Iteration 8510 || Loss: 5.0629 || 10iter: 2.7939 sec.
-------------
epoch 129 || Epoch_TRAIN_Loss:320.2180 ||Epoch_VAL_Loss:0.0000
timer:  27.4902 sec.
lr is: 0.0001
-------------
Epoch 130/200
-------------
train
Iteration 8520 || Loss: 5.1128 || 10iter: 7.1871 sec.
Iteration 8530 || Loss: 4.8602 || 10iter: 4.9726 sec.
Iteration 8540 || Loss: 4.8701 || 10iter: 5.2550 sec.
Iteration 8550 || Loss: 4.5935 || 10iter: 5.1303 sec.
Iteration 8560 || Loss: 4.1438 || 10iter: 5.0995 sec.
Iteration 8570 || Loss: 5.0767 || 10iter: 3.7465 sec.
Iteration 8580 || Loss: 6.4428 || 10iter: 2.9809 sec.
-------------
val
-------------
epoch 130 || Epoch_TRAIN_Loss:322.0315 ||Epoch_VAL_L

Iteration 9510 || Loss: 4.6919 || 10iter: 5.6067 sec.
Iteration 9520 || Loss: 5.9755 || 10iter: 3.5960 sec.
Iteration 9530 || Loss: 8.2865 || 10iter: 5.3110 sec.
Iteration 9540 || Loss: 4.8583 || 10iter: 5.9674 sec.
Iteration 9550 || Loss: 4.5766 || 10iter: 4.9652 sec.
Iteration 9560 || Loss: 4.7931 || 10iter: 4.2462 sec.
Iteration 9570 || Loss: 4.0062 || 10iter: 3.1297 sec.
-------------
val
-------------
epoch 145 || Epoch_TRAIN_Loss:327.4488 ||Epoch_VAL_Loss:164.6513
timer:  43.5690 sec.
lr is: 0.0001
-------------
Epoch 146/200
-------------
train
Iteration 9580 || Loss: 4.5971 || 10iter: 6.4001 sec.
Iteration 9590 || Loss: 4.9993 || 10iter: 6.5721 sec.
Iteration 9600 || Loss: 5.2672 || 10iter: 5.3286 sec.
Iteration 9610 || Loss: 5.8114 || 10iter: 4.8602 sec.
Iteration 9620 || Loss: 4.1949 || 10iter: 4.8850 sec.
Iteration 9630 || Loss: 5.1824 || 10iter: 3.4773 sec.
-------------
epoch 146 || Epoch_TRAIN_Loss:327.6493 ||Epoch_VAL_Loss:0.0000
timer:  33.5899 sec.
lr is: 0.0001
------

-------------
epoch 160 || Epoch_TRAIN_Loss:316.5791 ||Epoch_VAL_Loss:164.7865
timer:  42.1644 sec.
lr is: 0.0001
-------------
Epoch 161/200
-------------
train
Iteration 10570 || Loss: 6.0005 || 10iter: 6.1736 sec.
Iteration 10580 || Loss: 4.8639 || 10iter: 6.2610 sec.
Iteration 10590 || Loss: 5.4910 || 10iter: 5.7215 sec.
Iteration 10600 || Loss: 4.8533 || 10iter: 4.7865 sec.
Iteration 10610 || Loss: 5.5204 || 10iter: 4.9801 sec.
Iteration 10620 || Loss: 4.4629 || 10iter: 3.3868 sec.
-------------
epoch 161 || Epoch_TRAIN_Loss:322.1030 ||Epoch_VAL_Loss:0.0000
timer:  33.4837 sec.
lr is: 0.0001
-------------
Epoch 162/200
-------------
train
Iteration 10630 || Loss: 4.8515 || 10iter: 5.4857 sec.
Iteration 10640 || Loss: 5.5003 || 10iter: 4.2352 sec.
Iteration 10650 || Loss: 4.8769 || 10iter: 3.6301 sec.
Iteration 10660 || Loss: 4.8427 || 10iter: 4.8025 sec.
Iteration 10670 || Loss: 4.7288 || 10iter: 6.7819 sec.
Iteration 10680 || Loss: 4.7539 || 10iter: 4.4086 sec.
Iteration 10690 ||

Iteration 11590 || Loss: 4.8813 || 10iter: 3.8428 sec.
Iteration 11600 || Loss: 4.3704 || 10iter: 3.4456 sec.
Iteration 11610 || Loss: 5.2291 || 10iter: 4.4330 sec.
-------------
epoch 176 || Epoch_TRAIN_Loss:320.1908 ||Epoch_VAL_Loss:0.0000
timer:  33.7727 sec.
lr is: 0.0001
-------------
Epoch 177/200
-------------
train
Iteration 11620 || Loss: 4.6144 || 10iter: 5.5711 sec.
Iteration 11630 || Loss: 4.2258 || 10iter: 5.0864 sec.
Iteration 11640 || Loss: 4.1578 || 10iter: 5.2454 sec.
Iteration 11650 || Loss: 4.5457 || 10iter: 5.6194 sec.
Iteration 11660 || Loss: 5.1554 || 10iter: 4.7318 sec.
Iteration 11670 || Loss: 4.6454 || 10iter: 3.3378 sec.
Iteration 11680 || Loss: 4.5446 || 10iter: 3.0090 sec.
-------------
epoch 177 || Epoch_TRAIN_Loss:319.8769 ||Epoch_VAL_Loss:0.0000
timer:  33.5152 sec.
lr is: 0.0001
-------------
Epoch 178/200
-------------
train
Iteration 11690 || Loss: 4.8656 || 10iter: 6.5267 sec.
Iteration 11700 || Loss: 4.4989 || 10iter: 3.0973 sec.
Iteration 11710 || L

Iteration 12610 || Loss: 4.3798 || 10iter: 5.5793 sec.
Iteration 12620 || Loss: 4.4647 || 10iter: 5.1616 sec.
Iteration 12630 || Loss: 5.5162 || 10iter: 4.8770 sec.
Iteration 12640 || Loss: 4.3813 || 10iter: 5.3605 sec.
Iteration 12650 || Loss: 4.8488 || 10iter: 4.3335 sec.
Iteration 12660 || Loss: 5.0741 || 10iter: 3.3244 sec.
Iteration 12670 || Loss: 4.6730 || 10iter: 2.9862 sec.
-------------
epoch 192 || Epoch_TRAIN_Loss:322.8654 ||Epoch_VAL_Loss:0.0000
timer:  32.5527 sec.
lr is: 1e-05
-------------
Epoch 193/200
-------------
train
Iteration 12680 || Loss: 4.7536 || 10iter: 9.5488 sec.
Iteration 12690 || Loss: 4.2966 || 10iter: 4.7999 sec.
Iteration 12700 || Loss: 4.9862 || 10iter: 5.3979 sec.
Iteration 12710 || Loss: 4.8596 || 10iter: 5.0418 sec.
Iteration 12720 || Loss: 4.4760 || 10iter: 4.6922 sec.
Iteration 12730 || Loss: 4.8177 || 10iter: 3.9262 sec.
-------------
epoch 193 || Epoch_TRAIN_Loss:321.2833 ||Epoch_VAL_Loss:0.0000
timer:  35.9907 sec.
lr is: 1e-05
-------------
E