In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

In [14]:
os.makedirs("weights", exist_ok=True)
os.makedirs("log", exist_ok=True)
input_size = 256

# set up person only VOC dataset

In [3]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath, cls="person")

# extend with VOC2012
vocpath = "../VOCdevkit/VOC2012"
train_img_list2, train_anno_list2, _, _ = make_datapath_list(vocpath, cls="person", VOC2012=True)

train_img_list.extend(train_img_list2)
train_anno_list.extend(train_anno_list2)

# make Dataset
voc_classes = ['person']
color_mean = (104, 117, 123)  # (BGR)の色の平均値

print("trainlist: ", len(train_img_list))
print("vallist: ", len(val_img_list))

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

val not found
trainlist:  6469
vallist:  2097


In [4]:
train_dataset[0]

(tensor([[[  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          ...,
          [  0.0000,   0.0000,   0.0000,  ..., -66.0397, -60.8152, -67.4163],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000]],
 
         [[  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          ...,
          [  0.0000,   0.0000,   0.0000,  ..., -24.9051, -10.3171, -15.8597],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000]],
 
         [[  0.0000,   0.000

In [5]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([32, 3, 256, 256])
32
torch.Size([1, 5])


In [6]:
targets[1]

tensor([[0.0573, 0.0000, 1.0000, 1.0000, 0.0000]])

# test with ssd model.

In [7]:
from utils.blazeface import SSD256

In [8]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 2,  # 背景クラスを含めた合計クラス数
    'input_size': 256,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [16, 8],  # 各sourceの画像サイズ
    'steps': [8, 16],  # DBOXの大きさを決める
    'min_sizes': [16, 32],  # DBOXの大きさを決める
    'max_sizes': [32, 100],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD256(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# set inits for loc and conf
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using: cuda:0
set weights!


In [9]:
print(net)

SSD256(
  (blaze): BlazeFace(
    (features): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
          (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (act): ReLU(inplace=True)
      )
      (4): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), strid

In [10]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=3, device=device)

# optim
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=1e-4, weight_decay=5e-4)

In [11]:
def get_current_lr(epoch):
    lr = 1e-4
    for i,lr_decay_epoch in enumerate([120,180]):
        if epoch >= lr_decay_epoch:
            lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    print("lr is:", lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [15]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):
        
        adjust_learning_rate(optimizer, epoch)
        
        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 5 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は5回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log/log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/blazeface256_' +
                       str(epoch+1) + '.pth')

# start training here

In [None]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
lr is: 0.0001
-------------
Epoch 1/200
-------------
train
Iteration 10 || Loss: 65.4531 || 10iter: 7.2785 sec.
Iteration 20 || Loss: 49.4294 || 10iter: 3.5289 sec.
Iteration 30 || Loss: 64.4441 || 10iter: 3.9851 sec.
Iteration 40 || Loss: 37.6062 || 10iter: 4.2480 sec.
Iteration 50 || Loss: 56.9393 || 10iter: 4.0935 sec.
Iteration 60 || Loss: 50.8827 || 10iter: 4.1093 sec.
Iteration 70 || Loss: 60.1804 || 10iter: 4.0369 sec.
Iteration 80 || Loss: 43.3787 || 10iter: 3.9698 sec.
Iteration 90 || Loss: 48.7653 || 10iter: 3.9149 sec.
Iteration 100 || Loss: 49.1596 || 10iter: 4.0868 sec.
Iteration 110 || Loss: 39.0327 || 10iter: 4.1437 sec.
Iteration 120 || Loss: 46.1667 || 10iter: 4.0960 sec.
Iteration 130 || Loss: 58.0489 || 10iter: 4.1082 sec.
Iteration 140 || Loss: 49.2085 || 10iter: 3.9622 sec.
Iteration 150 || Loss: 48.5656 || 10iter: 4.1017 sec.
Iteration 160 || Loss: 39.4221 || 10iter: 3.7092 sec.
Iteration 170 || Loss: 47.2192 || 10iter: 3.7537 sec.
Iteration 1

Iteration 1330 || Loss: 28.5187 || 10iter: 3.6563 sec.
Iteration 1340 || Loss: 23.2645 || 10iter: 3.6676 sec.
Iteration 1350 || Loss: 25.6298 || 10iter: 3.7454 sec.
Iteration 1360 || Loss: 27.1195 || 10iter: 3.6629 sec.
Iteration 1370 || Loss: 32.8886 || 10iter: 3.7117 sec.
Iteration 1380 || Loss: 25.4674 || 10iter: 3.6986 sec.
Iteration 1390 || Loss: 27.7631 || 10iter: 3.7003 sec.
Iteration 1400 || Loss: 25.8313 || 10iter: 3.7335 sec.
Iteration 1410 || Loss: 23.4439 || 10iter: 3.5650 sec.
Iteration 1420 || Loss: 25.9289 || 10iter: 3.3516 sec.
-------------
epoch 7 || Epoch_TRAIN_Loss:5163.7777 ||Epoch_VAL_Loss:0.0000
timer:  78.0187 sec.
lr is: 0.0001
-------------
Epoch 8/200
-------------
train
Iteration 1430 || Loss: 21.4215 || 10iter: 6.7832 sec.
Iteration 1440 || Loss: 22.4837 || 10iter: 3.6024 sec.
Iteration 1450 || Loss: 30.6683 || 10iter: 3.6801 sec.
Iteration 1460 || Loss: 22.0837 || 10iter: 3.7614 sec.
Iteration 1470 || Loss: 26.2949 || 10iter: 3.6932 sec.
Iteration 1480 || 

Iteration 2640 || Loss: 23.5733 || 10iter: 2.7122 sec.
Iteration 2650 || Loss: 21.6336 || 10iter: 5.1560 sec.
Iteration 2660 || Loss: 22.8657 || 10iter: 3.7301 sec.
Iteration 2670 || Loss: 30.8782 || 10iter: 3.6904 sec.
Iteration 2680 || Loss: 23.3613 || 10iter: 3.6443 sec.
Iteration 2690 || Loss: 22.7834 || 10iter: 3.7788 sec.
Iteration 2700 || Loss: 29.5346 || 10iter: 3.6708 sec.
Iteration 2710 || Loss: 19.5737 || 10iter: 3.6891 sec.
Iteration 2720 || Loss: 21.3496 || 10iter: 3.7199 sec.
Iteration 2730 || Loss: 26.8876 || 10iter: 3.7551 sec.
Iteration 2740 || Loss: 18.1088 || 10iter: 3.7981 sec.
Iteration 2750 || Loss: 28.0163 || 10iter: 3.6853 sec.
Iteration 2760 || Loss: 23.5762 || 10iter: 3.7476 sec.
Iteration 2770 || Loss: 21.2818 || 10iter: 3.6769 sec.
Iteration 2780 || Loss: 19.8663 || 10iter: 3.7176 sec.
Iteration 2790 || Loss: 21.9565 || 10iter: 3.7084 sec.
Iteration 2800 || Loss: 21.4363 || 10iter: 3.6855 sec.
Iteration 2810 || Loss: 27.1589 || 10iter: 3.7573 sec.
Iteration 

Iteration 3960 || Loss: 16.7632 || 10iter: 3.7216 sec.
Iteration 3970 || Loss: 19.1432 || 10iter: 3.7196 sec.
Iteration 3980 || Loss: 20.0888 || 10iter: 3.7248 sec.
Iteration 3990 || Loss: 16.3594 || 10iter: 3.7372 sec.
Iteration 4000 || Loss: 15.7826 || 10iter: 3.7239 sec.
Iteration 4010 || Loss: 19.5723 || 10iter: 3.7168 sec.
Iteration 4020 || Loss: 34.8089 || 10iter: 3.6296 sec.
Iteration 4030 || Loss: 21.8820 || 10iter: 3.9800 sec.
Iteration 4040 || Loss: 19.1816 || 10iter: 3.8548 sec.
Iteration 4050 || Loss: 28.2533 || 10iter: 3.4571 sec.
Iteration 4060 || Loss: 14.2667 || 10iter: 3.0954 sec.
-------------
val
-------------
epoch 20 || Epoch_TRAIN_Loss:4473.8668 ||Epoch_VAL_Loss:1208.6176
timer:  89.2991 sec.
lr is: 0.0001
-------------
Epoch 21/200
-------------
train
Iteration 4070 || Loss: 23.6523 || 10iter: 7.2052 sec.
Iteration 4080 || Loss: 22.0044 || 10iter: 3.6781 sec.
Iteration 4090 || Loss: 22.4187 || 10iter: 3.7297 sec.
Iteration 4100 || Loss: 20.8925 || 10iter: 3.7202 

Iteration 5270 || Loss: 25.1058 || 10iter: 3.4244 sec.
-------------
epoch 26 || Epoch_TRAIN_Loss:4096.9632 ||Epoch_VAL_Loss:0.0000
timer:  77.6829 sec.
lr is: 0.0001
-------------
Epoch 27/200
-------------
train
Iteration 5280 || Loss: 20.7962 || 10iter: 3.7265 sec.
Iteration 5290 || Loss: 18.6235 || 10iter: 4.3867 sec.
Iteration 5300 || Loss: 17.9558 || 10iter: 3.6300 sec.
Iteration 5310 || Loss: 15.5754 || 10iter: 3.7107 sec.
Iteration 5320 || Loss: 16.9064 || 10iter: 3.6808 sec.
Iteration 5330 || Loss: 15.4374 || 10iter: 3.6695 sec.
Iteration 5340 || Loss: 14.8704 || 10iter: 3.6597 sec.
Iteration 5350 || Loss: 18.9577 || 10iter: 3.6573 sec.
Iteration 5360 || Loss: 13.3798 || 10iter: 3.6443 sec.
Iteration 5370 || Loss: 16.7852 || 10iter: 3.8316 sec.
Iteration 5380 || Loss: 21.1659 || 10iter: 3.8952 sec.
Iteration 5390 || Loss: 18.2064 || 10iter: 3.6599 sec.
Iteration 5400 || Loss: 25.2633 || 10iter: 3.7423 sec.
Iteration 5410 || Loss: 20.3947 || 10iter: 3.7587 sec.
Iteration 5420 |

Iteration 6560 || Loss: 16.8238 || 10iter: 3.7327 sec.
Iteration 6570 || Loss: 16.9673 || 10iter: 3.7372 sec.
Iteration 6580 || Loss: 14.3727 || 10iter: 3.6690 sec.
Iteration 6590 || Loss: 19.3253 || 10iter: 3.7383 sec.
Iteration 6600 || Loss: 20.7637 || 10iter: 3.6662 sec.
Iteration 6610 || Loss: 20.9996 || 10iter: 3.7265 sec.
Iteration 6620 || Loss: 29.5664 || 10iter: 3.6675 sec.
Iteration 6630 || Loss: 16.4456 || 10iter: 3.6129 sec.
Iteration 6640 || Loss: 11.2212 || 10iter: 3.6585 sec.
Iteration 6650 || Loss: 14.7575 || 10iter: 3.6366 sec.
Iteration 6660 || Loss: 15.4217 || 10iter: 3.7411 sec.
Iteration 6670 || Loss: 17.8295 || 10iter: 3.7646 sec.
Iteration 6680 || Loss: 20.5394 || 10iter: 3.7568 sec.
Iteration 6690 || Loss: 18.8835 || 10iter: 3.4688 sec.
-------------
epoch 33 || Epoch_TRAIN_Loss:3821.9962 ||Epoch_VAL_Loss:0.0000
timer:  77.7964 sec.
lr is: 0.0001
-------------
Epoch 34/200
-------------
train
Iteration 6700 || Loss: 20.6415 || 10iter: 2.7417 sec.
Iteration 6710 |

Iteration 7890 || Loss: 10.2626 || 10iter: 3.7720 sec.
Iteration 7900 || Loss: 20.4448 || 10iter: 3.6607 sec.
Iteration 7910 || Loss: 18.5823 || 10iter: 3.4080 sec.
-------------
epoch 39 || Epoch_TRAIN_Loss:3547.8779 ||Epoch_VAL_Loss:0.0000
timer:  77.7022 sec.
lr is: 0.0001
-------------
Epoch 40/200
-------------
train
Iteration 7920 || Loss: 14.6962 || 10iter: 3.7900 sec.
Iteration 7930 || Loss: 16.0847 || 10iter: 4.3577 sec.
Iteration 7940 || Loss: 18.1994 || 10iter: 3.6313 sec.
Iteration 7950 || Loss: 13.2733 || 10iter: 3.6488 sec.
Iteration 7960 || Loss: 20.0387 || 10iter: 3.7032 sec.
Iteration 7970 || Loss: 17.3104 || 10iter: 3.7290 sec.
Iteration 7980 || Loss: 20.9605 || 10iter: 3.6292 sec.
Iteration 7990 || Loss: 26.6363 || 10iter: 3.6916 sec.
Iteration 8000 || Loss: 22.2831 || 10iter: 3.6844 sec.
Iteration 8010 || Loss: 19.4677 || 10iter: 3.6767 sec.
Iteration 8020 || Loss: 17.8550 || 10iter: 3.7372 sec.
Iteration 8030 || Loss: 16.6206 || 10iter: 3.6819 sec.
Iteration 8040 |

Iteration 9170 || Loss: 12.2147 || 10iter: 3.6213 sec.
Iteration 9180 || Loss: 24.2948 || 10iter: 3.7246 sec.
Iteration 9190 || Loss: 14.9573 || 10iter: 3.7672 sec.
Iteration 9200 || Loss: 17.8833 || 10iter: 3.6662 sec.
Iteration 9210 || Loss: 17.9376 || 10iter: 3.6182 sec.
Iteration 9220 || Loss: 18.9377 || 10iter: 3.7235 sec.
Iteration 9230 || Loss: 17.3420 || 10iter: 3.6764 sec.
Iteration 9240 || Loss: 12.1125 || 10iter: 3.7763 sec.
Iteration 9250 || Loss: 15.5098 || 10iter: 3.7415 sec.
Iteration 9260 || Loss: 14.1703 || 10iter: 3.7315 sec.
Iteration 9270 || Loss: 24.3206 || 10iter: 3.6601 sec.
Iteration 9280 || Loss: 13.8033 || 10iter: 3.7178 sec.
Iteration 9290 || Loss: 20.5112 || 10iter: 3.7322 sec.
Iteration 9300 || Loss: 15.5769 || 10iter: 3.7399 sec.
Iteration 9310 || Loss: 12.4265 || 10iter: 3.7030 sec.
Iteration 9320 || Loss: 17.3238 || 10iter: 3.7286 sec.
Iteration 9330 || Loss: 20.0848 || 10iter: 3.4005 sec.
-------------
epoch 46 || Epoch_TRAIN_Loss:3475.7597 ||Epoch_VAL_

Iteration 10480 || Loss: 15.0476 || 10iter: 3.7181 sec.
Iteration 10490 || Loss: 13.3308 || 10iter: 3.7177 sec.
Iteration 10500 || Loss: 15.1352 || 10iter: 3.6998 sec.
Iteration 10510 || Loss: 10.5504 || 10iter: 3.7742 sec.
Iteration 10520 || Loss: 14.2654 || 10iter: 3.7324 sec.
Iteration 10530 || Loss: 15.0251 || 10iter: 3.6632 sec.
Iteration 10540 || Loss: 15.9008 || 10iter: 3.6131 sec.
Iteration 10550 || Loss: 15.7385 || 10iter: 3.3658 sec.
-------------
epoch 52 || Epoch_TRAIN_Loss:3398.9321 ||Epoch_VAL_Loss:0.0000
timer:  77.6237 sec.
lr is: 0.0001
-------------
Epoch 53/200
-------------
train
Iteration 10560 || Loss: 18.3004 || 10iter: 4.5645 sec.
Iteration 10570 || Loss: 13.9067 || 10iter: 4.0636 sec.
Iteration 10580 || Loss: 13.8171 || 10iter: 3.6400 sec.
Iteration 10590 || Loss: 28.1352 || 10iter: 3.6973 sec.
Iteration 10600 || Loss: 13.2282 || 10iter: 3.6958 sec.
Iteration 10610 || Loss: 14.0883 || 10iter: 3.6847 sec.
Iteration 10620 || Loss: 19.3089 || 10iter: 3.7708 sec.
I

Iteration 11770 || Loss: 13.1593 || 10iter: 3.3807 sec.
-------------
epoch 58 || Epoch_TRAIN_Loss:3359.0923 ||Epoch_VAL_Loss:0.0000
timer:  83.6792 sec.
lr is: 0.0001
-------------
Epoch 59/200
-------------
train
Iteration 11780 || Loss: 20.0513 || 10iter: 5.2577 sec.
Iteration 11790 || Loss: 15.3899 || 10iter: 4.1176 sec.
Iteration 11800 || Loss: 17.8105 || 10iter: 3.7039 sec.
Iteration 11810 || Loss: 19.2686 || 10iter: 3.6753 sec.
Iteration 11820 || Loss: 19.4741 || 10iter: 3.8104 sec.
Iteration 11830 || Loss: 16.1798 || 10iter: 3.7478 sec.
Iteration 11840 || Loss: 14.2493 || 10iter: 3.6433 sec.
Iteration 11850 || Loss: 17.7511 || 10iter: 3.6538 sec.
Iteration 11860 || Loss: 12.8664 || 10iter: 3.7308 sec.
Iteration 11870 || Loss: 13.2553 || 10iter: 3.6680 sec.
Iteration 11880 || Loss: 13.0394 || 10iter: 3.7161 sec.
Iteration 11890 || Loss: 13.0261 || 10iter: 3.7745 sec.
Iteration 11900 || Loss: 16.5923 || 10iter: 3.6335 sec.
Iteration 11910 || Loss: 15.0761 || 10iter: 3.6966 sec.
I

Iteration 13040 || Loss: 14.4235 || 10iter: 3.7192 sec.
Iteration 13050 || Loss: 13.7528 || 10iter: 3.7523 sec.
Iteration 13060 || Loss: 16.3154 || 10iter: 3.6990 sec.
Iteration 13070 || Loss: 14.0896 || 10iter: 3.6995 sec.
Iteration 13080 || Loss: 16.7728 || 10iter: 3.6286 sec.
Iteration 13090 || Loss: 10.7634 || 10iter: 3.7171 sec.
Iteration 13100 || Loss: 12.5050 || 10iter: 3.7209 sec.
Iteration 13110 || Loss: 20.6924 || 10iter: 3.7600 sec.
Iteration 13120 || Loss: 16.7491 || 10iter: 4.0064 sec.
Iteration 13130 || Loss: 14.7157 || 10iter: 3.6367 sec.
Iteration 13140 || Loss: 15.7824 || 10iter: 3.7233 sec.
Iteration 13150 || Loss: 15.7547 || 10iter: 3.6895 sec.
Iteration 13160 || Loss: 16.3591 || 10iter: 3.7655 sec.
Iteration 13170 || Loss: 17.4687 || 10iter: 3.6627 sec.
Iteration 13180 || Loss: 12.8630 || 10iter: 3.6264 sec.
Iteration 13190 || Loss: 14.1964 || 10iter: 3.3797 sec.
-------------
val
-------------
epoch 65 || Epoch_TRAIN_Loss:3311.8594 ||Epoch_VAL_Loss:1058.6844
timer:

That's all :)