In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

# set up person only VOC dataset

In [2]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath, cls="person")

# make Dataset
voc_classes = ['person']
color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 256  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 64

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

000009
000017
000021
000023
000030
000032
000035
000041
000048
000050
000051
000066
000073
000081
000083
000089
000101
000104
000110
000113
000125
000129
000131
000133
000138
000146
000150
000159
000162
000163
000164
000165
000169
000170
000171
000173
000174
000177
000190
000192
000193
000194
000200
000210
000218
000220
000222
000229
000232
000245
000251
000257
000259
000269
000275
000276
000278
000282
000285
000288
000298
000302
000305
000308
000320
000321
000322
000323
000328
000331
000337
000338
000352
000359
000367
000372
000374
000382
000394
000406
000407
000411
000419
000428
000433
000435
000438
000443
000446
000448
000463
000468
000470
000476
000477
000480
000482
000483
000498
000499
000500
000515
000516
000518
000520
000523
000524
000525
000526
000530
000531
000535
000541
000545
000554
000555
000579
000583
000589
000591
000597
000612
000613
000625
000626
000628
000633
000648
000654
000677
000684
000688
000690
000694
000695
000702
000709
000717
000726
000731
000733
000739
000742

009479
009481
009490
009494
009496
009497
009499
009500
009504
009512
009517
009518
009520
009524
009526
009531
009532
009537
009541
009542
009546
009551
009557
009558
009565
009567
009568
009573
009577
009579
009585
009591
009596
009600
009603
009609
009611
009613
009614
009617
009618
009638
009641
009647
009649
009654
009655
009656
009659
009666
009668
009671
009676
009684
009687
009691
009693
009698
009703
009711
009712
009713
009717
009718
009721
009726
009732
009734
009738
009747
009754
009755
009756
009762
009767
009773
009776
009780
009781
009789
009792
009796
009800
009809
009813
009822
009828
009841
009845
009848
009851
009859
009867
009868
009869
009874
009877
009878
009879
009880
009881
009882
009900
009902
009917
009918
009926
009935
009942
009944
009946
009947
009949
009950
009954
009958


In [3]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([64, 3, 256, 256])
64
torch.Size([1, 5])


In [4]:
targets[1]

tensor([[0.5719, 0.8359, 0.7700, 1.0000, 0.0000]])

# test with ssd model.

In [5]:
from utils.blazeface import SSD256

BlazeFace(
  (features): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (act): ReLU(inplace)
    )
    (4): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, a

In [6]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 2,  # 背景クラスを含めた合計クラス数
    'input_size': 256,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [16, 8],  # 各sourceの画像サイズ
    'steps': [8, 16],  # DBOXの大きさを決める
    'min_sizes': [16, 32],  # DBOXの大きさを決める
    'max_sizes': [32, 100],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD256(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# set inits for loc and conf
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using: cuda:0
set weights!


In [7]:
print(net)

SSD256(
  (blaze): BlazeFace(
    (features): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
          (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (act): ReLU(inplace)
      )
      (4): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))


In [8]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=3, device=device)

# optim
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [9]:
def get_current_lr(epoch):
    lr = 1e-2
    for i,lr_decay_epoch in enumerate([120,180]):
        if epoch >= lr_decay_epoch:
            lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    print("lr is:", lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [10]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):
        
        adjust_learning_rate(optimizer, epoch)
        
        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 5 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は5回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/blazeface256_' +
                       str(epoch+1) + '.pth')

# start training here

In [11]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
lr is: 0.01
-------------
Epoch 1/200
-------------
train
Iteration 10 || Loss: 81.5671 || 10iter: 24.0458 sec.
Iteration 20 || Loss: 28.4457 || 10iter: 10.1055 sec.
Iteration 30 || Loss: 30.6882 || 10iter: 8.7572 sec.
-------------
epoch 1 || Epoch_TRAIN_Loss:8738.2591 ||Epoch_VAL_Loss:0.0000
timer:  48.1610 sec.
lr is: 0.01
-------------
Epoch 2/200
-------------
train
Iteration 40 || Loss: 20.8605 || 10iter: 17.0132 sec.
Iteration 50 || Loss: 27.1527 || 10iter: 9.3169 sec.
Iteration 60 || Loss: 25.2783 || 10iter: 11.0028 sec.
-------------
epoch 2 || Epoch_TRAIN_Loss:790.0189 ||Epoch_VAL_Loss:0.0000
timer:  42.7755 sec.
lr is: 0.01
-------------
Epoch 3/200
-------------
train
Iteration 70 || Loss: 18.5457 || 10iter: 10.1124 sec.
Iteration 80 || Loss: 18.0264 || 10iter: 9.8165 sec.
Iteration 90 || Loss: 24.9605 || 10iter: 8.0947 sec.
-------------
epoch 3 || Epoch_TRAIN_Loss:746.5560 ||Epoch_VAL_Loss:0.0000
timer:  38.1531 sec.
lr is: 0.01
-------------
Epoch 4/2

Iteration 810 || Loss: 22.4748 || 10iter: 11.5128 sec.
Iteration 820 || Loss: 23.0246 || 10iter: 10.6205 sec.
-------------
val
-------------
epoch 25 || Epoch_TRAIN_Loss:711.7038 ||Epoch_VAL_Loss:269.0385
timer:  52.7412 sec.
lr is: 0.01
-------------
Epoch 26/200
-------------
train
Iteration 830 || Loss: 22.3911 || 10iter: 16.0342 sec.
Iteration 840 || Loss: 16.9133 || 10iter: 10.9981 sec.
Iteration 850 || Loss: 17.6606 || 10iter: 9.9709 sec.
-------------
epoch 26 || Epoch_TRAIN_Loss:661.2042 ||Epoch_VAL_Loss:0.0000
timer:  45.7276 sec.
lr is: 0.01
-------------
Epoch 27/200
-------------
train
Iteration 860 || Loss: 19.7520 || 10iter: 8.1320 sec.
Iteration 870 || Loss: 21.2943 || 10iter: 10.1756 sec.
Iteration 880 || Loss: 20.0147 || 10iter: 8.1477 sec.
Iteration 890 || Loss: 22.7548 || 10iter: 10.7322 sec.
-------------
epoch 27 || Epoch_TRAIN_Loss:689.7718 ||Epoch_VAL_Loss:0.0000
timer:  38.1446 sec.
lr is: 0.01
-------------
Epoch 28/200
-------------
train
Iteration 900 || Los

Iteration 1600 || Loss: 20.9633 || 10iter: 13.3503 sec.
Iteration 1610 || Loss: 19.3398 || 10iter: 10.7308 sec.
-------------
epoch 49 || Epoch_TRAIN_Loss:637.2569 ||Epoch_VAL_Loss:0.0000
timer:  41.7186 sec.
lr is: 0.01
-------------
Epoch 50/200
-------------
train
Iteration 1620 || Loss: 14.0267 || 10iter: 10.2344 sec.
Iteration 1630 || Loss: 19.4465 || 10iter: 15.6448 sec.
Iteration 1640 || Loss: 18.8537 || 10iter: 10.6346 sec.
Iteration 1650 || Loss: 17.4327 || 10iter: 8.7033 sec.
-------------
val
-------------
epoch 50 || Epoch_TRAIN_Loss:615.1622 ||Epoch_VAL_Loss:270.1684
timer:  61.0433 sec.
lr is: 0.01
-------------
Epoch 51/200
-------------
train
Iteration 1660 || Loss: 17.5467 || 10iter: 15.8594 sec.
Iteration 1670 || Loss: 23.0072 || 10iter: 8.3667 sec.
Iteration 1680 || Loss: 15.3119 || 10iter: 11.4726 sec.
-------------
epoch 51 || Epoch_TRAIN_Loss:632.0865 ||Epoch_VAL_Loss:0.0000
timer:  38.4340 sec.
lr is: 0.01
-------------
Epoch 52/200
-------------
train
Iteration 

Iteration 2390 || Loss: 17.3252 || 10iter: 14.8271 sec.
Iteration 2400 || Loss: 17.5437 || 10iter: 9.9154 sec.
-------------
epoch 73 || Epoch_TRAIN_Loss:596.1089 ||Epoch_VAL_Loss:0.0000
timer:  42.6380 sec.
lr is: 0.01
-------------
Epoch 74/200
-------------
train
Iteration 2410 || Loss: 20.4745 || 10iter: 10.6523 sec.
Iteration 2420 || Loss: 17.5412 || 10iter: 16.2489 sec.
Iteration 2430 || Loss: 17.4865 || 10iter: 8.9898 sec.
Iteration 2440 || Loss: 18.0117 || 10iter: 9.6704 sec.
-------------
epoch 74 || Epoch_TRAIN_Loss:570.8829 ||Epoch_VAL_Loss:0.0000
timer:  47.8353 sec.
lr is: 0.01
-------------
Epoch 75/200
-------------
train
Iteration 2450 || Loss: 22.5227 || 10iter: 19.4072 sec.
Iteration 2460 || Loss: 17.9949 || 10iter: 9.2984 sec.
Iteration 2470 || Loss: 15.7306 || 10iter: 7.8510 sec.
-------------
val
-------------
epoch 75 || Epoch_TRAIN_Loss:581.2257 ||Epoch_VAL_Loss:3740.8610
timer:  56.9266 sec.
lr is: 0.01
-------------
Epoch 76/200
-------------
train
Iteration 24

Iteration 3180 || Loss: 13.6904 || 10iter: 15.3308 sec.
Iteration 3190 || Loss: 15.7092 || 10iter: 9.4292 sec.
Iteration 3200 || Loss: 16.7830 || 10iter: 10.7257 sec.
-------------
epoch 97 || Epoch_TRAIN_Loss:542.5671 ||Epoch_VAL_Loss:0.0000
timer:  48.0180 sec.
lr is: 0.01
-------------
Epoch 98/200
-------------
train
Iteration 3210 || Loss: 17.7641 || 10iter: 18.8460 sec.
Iteration 3220 || Loss: 21.1642 || 10iter: 9.8546 sec.
Iteration 3230 || Loss: 14.3739 || 10iter: 10.9097 sec.
-------------
epoch 98 || Epoch_TRAIN_Loss:556.2729 ||Epoch_VAL_Loss:0.0000
timer:  43.2607 sec.
lr is: 0.01
-------------
Epoch 99/200
-------------
train
Iteration 3240 || Loss: 16.5628 || 10iter: 11.2453 sec.
Iteration 3250 || Loss: 14.6437 || 10iter: 9.6203 sec.
Iteration 3260 || Loss: 14.3833 || 10iter: 9.9722 sec.
-------------
epoch 99 || Epoch_TRAIN_Loss:554.9253 ||Epoch_VAL_Loss:0.0000
timer:  37.5946 sec.
lr is: 0.01
-------------
Epoch 100/200
-------------
train
Iteration 3270 || Loss: 21.2495

Iteration 3970 || Loss: 13.9775 || 10iter: 20.7118 sec.
Iteration 3980 || Loss: 16.9664 || 10iter: 12.9422 sec.
Iteration 3990 || Loss: 13.0430 || 10iter: 8.4477 sec.
-------------
epoch 121 || Epoch_TRAIN_Loss:526.5011 ||Epoch_VAL_Loss:0.0000
timer:  44.6458 sec.
lr is: 0.001
-------------
Epoch 122/200
-------------
train
Iteration 4000 || Loss: 16.9235 || 10iter: 20.1007 sec.
Iteration 4010 || Loss: 14.6651 || 10iter: 11.3972 sec.
Iteration 4020 || Loss: 18.0612 || 10iter: 8.9391 sec.
-------------
epoch 122 || Epoch_TRAIN_Loss:510.4020 ||Epoch_VAL_Loss:0.0000
timer:  47.4316 sec.
lr is: 0.001
-------------
Epoch 123/200
-------------
train
Iteration 4030 || Loss: 14.3651 || 10iter: 13.1026 sec.
Iteration 4040 || Loss: 16.9409 || 10iter: 9.7502 sec.
Iteration 4050 || Loss: 17.5814 || 10iter: 8.4886 sec.
-------------
epoch 123 || Epoch_TRAIN_Loss:510.1609 ||Epoch_VAL_Loss:0.0000
timer:  39.6128 sec.
lr is: 0.001
-------------
Epoch 124/200
-------------
train
Iteration 4060 || Loss:

Iteration 4760 || Loss: 13.6692 || 10iter: 21.4209 sec.
Iteration 4770 || Loss: 14.6492 || 10iter: 11.5358 sec.
Iteration 4780 || Loss: 14.7407 || 10iter: 9.3157 sec.
-------------
val
-------------
epoch 145 || Epoch_TRAIN_Loss:510.8362 ||Epoch_VAL_Loss:247.6289
timer:  60.6802 sec.
lr is: 0.001
-------------
Epoch 146/200
-------------
train
Iteration 4790 || Loss: 14.0819 || 10iter: 12.1420 sec.
Iteration 4800 || Loss: 17.4267 || 10iter: 14.8064 sec.
Iteration 4810 || Loss: 16.3825 || 10iter: 10.5899 sec.
-------------
epoch 146 || Epoch_TRAIN_Loss:521.3581 ||Epoch_VAL_Loss:0.0000
timer:  44.4529 sec.
lr is: 0.001
-------------
Epoch 147/200
-------------
train
Iteration 4820 || Loss: 15.9556 || 10iter: 12.4755 sec.
Iteration 4830 || Loss: 14.2589 || 10iter: 14.3981 sec.
Iteration 4840 || Loss: 15.0441 || 10iter: 8.6102 sec.
Iteration 4850 || Loss: 16.3986 || 10iter: 10.3595 sec.
-------------
epoch 147 || Epoch_TRAIN_Loss:527.8767 ||Epoch_VAL_Loss:0.0000
timer:  46.8753 sec.
lr is:

Iteration 5550 || Loss: 13.3402 || 10iter: 12.8609 sec.
Iteration 5560 || Loss: 15.0820 || 10iter: 14.8367 sec.
Iteration 5570 || Loss: 13.6989 || 10iter: 9.6025 sec.
-------------
epoch 169 || Epoch_TRAIN_Loss:502.5866 ||Epoch_VAL_Loss:0.0000
timer:  43.2681 sec.
lr is: 0.001
-------------
Epoch 170/200
-------------
train
Iteration 5580 || Loss: 19.9757 || 10iter: 13.0584 sec.
Iteration 5590 || Loss: 14.3168 || 10iter: 14.9969 sec.
Iteration 5600 || Loss: 19.7331 || 10iter: 10.1278 sec.
Iteration 5610 || Loss: 15.9746 || 10iter: 9.7071 sec.
-------------
val
-------------
epoch 170 || Epoch_TRAIN_Loss:513.2119 ||Epoch_VAL_Loss:249.7699
timer:  62.7161 sec.
lr is: 0.001
-------------
Epoch 171/200
-------------
train
Iteration 5620 || Loss: 13.5127 || 10iter: 17.8377 sec.
Iteration 5630 || Loss: 14.3498 || 10iter: 13.4415 sec.
Iteration 5640 || Loss: 12.2965 || 10iter: 9.1253 sec.
-------------
epoch 171 || Epoch_TRAIN_Loss:494.6920 ||Epoch_VAL_Loss:0.0000
timer:  42.8588 sec.
lr is: 

Iteration 6340 || Loss: 14.9446 || 10iter: 10.3124 sec.
Iteration 6350 || Loss: 19.1058 || 10iter: 9.0337 sec.
Iteration 6360 || Loss: 15.3531 || 10iter: 12.9847 sec.
-------------
epoch 193 || Epoch_TRAIN_Loss:498.3725 ||Epoch_VAL_Loss:0.0000
timer:  40.6998 sec.
lr is: 0.0001
-------------
Epoch 194/200
-------------
train
Iteration 6370 || Loss: 16.9619 || 10iter: 5.0523 sec.
Iteration 6380 || Loss: 17.9271 || 10iter: 13.0958 sec.
Iteration 6390 || Loss: 16.5877 || 10iter: 12.8005 sec.
Iteration 6400 || Loss: 13.1628 || 10iter: 9.1485 sec.
-------------
epoch 194 || Epoch_TRAIN_Loss:496.2443 ||Epoch_VAL_Loss:0.0000
timer:  41.6786 sec.
lr is: 0.0001
-------------
Epoch 195/200
-------------
train
Iteration 6410 || Loss: 13.2891 || 10iter: 18.1550 sec.
Iteration 6420 || Loss: 12.0449 || 10iter: 13.9978 sec.
Iteration 6430 || Loss: 13.6067 || 10iter: 8.6420 sec.
-------------
val
-------------
epoch 195 || Epoch_TRAIN_Loss:483.4290 ||Epoch_VAL_Loss:242.8790
timer:  61.6523 sec.
lr is:

That's all :)