# setup dataset

In [1]:
# import stuff
import os
import numpy as np
import time
import pandas as pd

import torch
import torch.utils.data as data
from itertools import product as product

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function

In [2]:
# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn


## make data.Dataset for training

In [3]:
# load files
# set your VOCdevkit path!
vocpath = "../VOCdevkit/VOC2007"
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath)

vocpath = "../VOCdevkit/VOC2012"
train_img_list2, train_anno_list2, _, _ = make_datapath_list(vocpath)

train_img_list.extend(train_img_list2)
train_anno_list.extend(train_anno_list2)

print("trainlist: ", len(train_img_list))
print("vallist: ", len(val_img_list))

# make Dataset
voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 300  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

# Dataloaderに入れるデータセットファイル。
# ゲットで叩くと画像とGTを前処理して出力してくれる。
train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 24

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

# 辞書型変数にまとめる
dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

trainlist:  16551
vallist:  4952


In [4]:
# 動作の確認
batch_iterator = iter(dataloaders_dict["val"])  # イタレータに変換
images, targets = next(batch_iterator)  # 1番目の要素を取り出す
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # ミニバッチのサイズのリスト、各要素は[n, 5]、nは物体数

torch.Size([24, 3, 300, 300])
24
torch.Size([1, 5])


# define SSD model

In [5]:
from utils.retinanet import RetinaFPN as SSD
from utils.retinanet import Bottleneck

In [6]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 21,  # 背景クラスを含めた合計クラス数
    'input_size': 300,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6, 6, 6, 4, 4],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [38, 19, 10, 5, 3, 1],  # 各sourceの画像サイズ
    'steps': [8, 16, 32, 64, 100, 300],  # DBOXの大きさを決める
    'min_sizes': [30, 60, 111, 162, 213, 264],  # DBOXの大きさを決める
    'max_sizes': [60, 111, 162, 213, 264, 315],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(Bottleneck, [2,2,2,2], phase="train", cfg=ssd_cfg)

# SSDのweightsを設定
#print("using vgg weights")
#vgg_weights = torch.load("./weights/vgg16_reducedfc.pth")
#net.vgg.load_state_dict(vgg_weights)

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)
            
# 初期値を適応
#net.toplayer.apply(weights_init)
#net.smooth1.apply(weights_init)
#net.smooth2.apply(weights_init)
#net.latlayer1.apply(weights_init)
#net.latlayer2.apply(weights_init)
#net.conv6.apply(weights_init)
#net.conv7.apply(weights_init)
#net.conv8.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [7]:
print(net)

RetinaFPN(
  (layer0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

In [8]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=3, device=device)

# optim
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [9]:
def get_current_lr(epoch):
    lr = 1e-3
    for i,lr_decay_epoch in enumerate([120,180]):
        if epoch >= lr_decay_epoch:
            lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    print("lr is:", lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [10]:
# モデルを学習させる関数を作成


def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device:", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):
        
        adjust_learning_rate(optimizer, epoch)
        
        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('(train)')
            else:
                if((epoch+1) % 10 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('(val)')
                else:
                    # 検証は10回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iter {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/retinanet300_' +
                       str(epoch+1) + '.pth')


In [None]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device: cuda:0
lr is: 0.001
-------------
Epoch 1/200
-------------
(train)


  "See the documentation of nn.Upsample for details.".format(mode))


Iter 10 || Loss: 15.1949 || 10iter: 3.6307 sec.
Iter 20 || Loss: 10.6273 || 10iter: 1.3889 sec.
Iter 30 || Loss: 9.0887 || 10iter: 1.4254 sec.
Iter 40 || Loss: 8.8716 || 10iter: 1.3998 sec.
Iter 50 || Loss: 8.0458 || 10iter: 1.4715 sec.
Iter 60 || Loss: 8.0764 || 10iter: 1.4587 sec.
Iter 70 || Loss: 7.6231 || 10iter: 1.3860 sec.
Iter 80 || Loss: 7.4728 || 10iter: 1.4671 sec.
Iter 90 || Loss: 7.5047 || 10iter: 1.4452 sec.
Iter 100 || Loss: 7.3107 || 10iter: 1.4086 sec.
Iter 110 || Loss: 6.9095 || 10iter: 1.4143 sec.
Iter 120 || Loss: 7.5343 || 10iter: 1.4765 sec.
Iter 130 || Loss: 6.7206 || 10iter: 1.4207 sec.
Iter 140 || Loss: 7.4364 || 10iter: 1.4636 sec.
Iter 150 || Loss: 7.2030 || 10iter: 1.4497 sec.
Iter 160 || Loss: 7.2976 || 10iter: 1.4449 sec.
Iter 170 || Loss: 7.1923 || 10iter: 1.4712 sec.
Iter 180 || Loss: 7.2418 || 10iter: 1.4644 sec.
Iter 190 || Loss: 6.6303 || 10iter: 1.4115 sec.
Iter 200 || Loss: 6.6464 || 10iter: 1.4184 sec.
Iter 210 || Loss: 6.6838 || 10iter: 1.4364 sec.

  "See the documentation of nn.Upsample for details.".format(mode))


Iter 700 || Loss: 5.8074 || 10iter: 2.8835 sec.
Iter 710 || Loss: 5.6515 || 10iter: 1.3655 sec.
Iter 720 || Loss: 5.4100 || 10iter: 1.4087 sec.
Iter 730 || Loss: 5.8192 || 10iter: 1.4216 sec.
Iter 740 || Loss: 5.4620 || 10iter: 1.4234 sec.
Iter 750 || Loss: 6.0821 || 10iter: 1.3745 sec.
Iter 760 || Loss: 5.4831 || 10iter: 1.3818 sec.
Iter 770 || Loss: 5.8731 || 10iter: 1.4171 sec.
Iter 780 || Loss: 5.4119 || 10iter: 1.4034 sec.
Iter 790 || Loss: 5.2094 || 10iter: 1.4024 sec.
Iter 800 || Loss: 5.9166 || 10iter: 1.4924 sec.
Iter 810 || Loss: 5.4892 || 10iter: 1.4225 sec.
Iter 820 || Loss: 5.0315 || 10iter: 1.3873 sec.
Iter 830 || Loss: 5.3579 || 10iter: 1.4441 sec.
Iter 840 || Loss: 5.3510 || 10iter: 1.4417 sec.
Iter 850 || Loss: 5.3508 || 10iter: 1.4245 sec.
Iter 860 || Loss: 5.4362 || 10iter: 1.4411 sec.
Iter 870 || Loss: 5.4825 || 10iter: 1.4147 sec.
Iter 880 || Loss: 5.2356 || 10iter: 1.3731 sec.
Iter 890 || Loss: 5.6775 || 10iter: 1.4839 sec.
Iter 900 || Loss: 5.4433 || 10iter: 1.43

Iter 2320 || Loss: 4.8994 || 10iter: 1.5180 sec.
Iter 2330 || Loss: 5.4873 || 10iter: 1.4168 sec.
Iter 2340 || Loss: 4.9252 || 10iter: 1.4234 sec.
Iter 2350 || Loss: 4.3950 || 10iter: 1.5062 sec.
Iter 2360 || Loss: 4.6753 || 10iter: 1.4297 sec.
Iter 2370 || Loss: 5.2964 || 10iter: 1.4560 sec.
Iter 2380 || Loss: 4.3958 || 10iter: 1.4509 sec.
Iter 2390 || Loss: 4.9358 || 10iter: 1.4289 sec.
Iter 2400 || Loss: 4.8022 || 10iter: 1.4790 sec.
Iter 2410 || Loss: 4.6298 || 10iter: 1.4215 sec.
Iter 2420 || Loss: 4.5151 || 10iter: 1.4668 sec.
Iter 2430 || Loss: 4.6984 || 10iter: 1.4036 sec.
Iter 2440 || Loss: 4.8150 || 10iter: 1.4643 sec.
Iter 2450 || Loss: 4.8885 || 10iter: 1.4594 sec.
Iter 2460 || Loss: 4.0130 || 10iter: 1.4730 sec.
Iter 2470 || Loss: 5.0089 || 10iter: 1.4257 sec.
Iter 2480 || Loss: 4.6807 || 10iter: 1.4710 sec.
Iter 2490 || Loss: 3.9390 || 10iter: 1.4222 sec.
Iter 2500 || Loss: 4.3685 || 10iter: 1.4103 sec.
Iter 2510 || Loss: 5.8880 || 10iter: 1.4374 sec.
Iter 2520 || Loss: 4

Iter 3930 || Loss: 4.3457 || 10iter: 1.4028 sec.
Iter 3940 || Loss: 4.3299 || 10iter: 1.4169 sec.
Iter 3950 || Loss: 4.5807 || 10iter: 1.4578 sec.
Iter 3960 || Loss: 4.2687 || 10iter: 1.4376 sec.
Iter 3970 || Loss: 4.1582 || 10iter: 1.4147 sec.
Iter 3980 || Loss: 3.9000 || 10iter: 1.4297 sec.
Iter 3990 || Loss: 4.5963 || 10iter: 1.3898 sec.
Iter 4000 || Loss: 4.2883 || 10iter: 1.4596 sec.
Iter 4010 || Loss: 4.4846 || 10iter: 1.4382 sec.
Iter 4020 || Loss: 4.4053 || 10iter: 1.4101 sec.
Iter 4030 || Loss: 4.4456 || 10iter: 1.4339 sec.
Iter 4040 || Loss: 4.6380 || 10iter: 1.3754 sec.
Iter 4050 || Loss: 3.5988 || 10iter: 1.4605 sec.
Iter 4060 || Loss: 4.3380 || 10iter: 1.4072 sec.
Iter 4070 || Loss: 4.6243 || 10iter: 1.4606 sec.
Iter 4080 || Loss: 4.3436 || 10iter: 1.3962 sec.
Iter 4090 || Loss: 4.4858 || 10iter: 1.4553 sec.
Iter 4100 || Loss: 4.3095 || 10iter: 1.3985 sec.
Iter 4110 || Loss: 3.9933 || 10iter: 1.4147 sec.
Iter 4120 || Loss: 4.6023 || 10iter: 1.4588 sec.
Iter 4130 || Loss: 3

Iter 5530 || Loss: 4.1857 || 10iter: 2.6783 sec.
Iter 5540 || Loss: 4.1984 || 10iter: 1.3193 sec.
Iter 5550 || Loss: 3.7496 || 10iter: 1.4471 sec.
Iter 5560 || Loss: 3.6405 || 10iter: 1.4076 sec.
Iter 5570 || Loss: 3.9753 || 10iter: 1.4535 sec.
Iter 5580 || Loss: 3.6695 || 10iter: 1.4506 sec.
Iter 5590 || Loss: 4.0877 || 10iter: 1.3734 sec.
Iter 5600 || Loss: 4.1738 || 10iter: 1.4080 sec.
Iter 5610 || Loss: 4.3432 || 10iter: 1.4254 sec.
Iter 5620 || Loss: 3.5003 || 10iter: 1.3670 sec.
Iter 5630 || Loss: 3.6132 || 10iter: 1.4152 sec.
Iter 5640 || Loss: 3.8148 || 10iter: 1.4501 sec.
Iter 5650 || Loss: 4.0367 || 10iter: 1.4227 sec.
Iter 5660 || Loss: 3.9660 || 10iter: 1.3893 sec.
Iter 5670 || Loss: 4.5214 || 10iter: 1.4831 sec.
Iter 5680 || Loss: 4.5838 || 10iter: 1.4464 sec.
Iter 5690 || Loss: 4.2297 || 10iter: 1.4295 sec.
Iter 5700 || Loss: 3.6111 || 10iter: 1.4325 sec.
Iter 5710 || Loss: 3.7454 || 10iter: 1.4134 sec.
Iter 5720 || Loss: 3.5318 || 10iter: 1.4382 sec.
Iter 5730 || Loss: 3

Iter 7140 || Loss: 4.6483 || 10iter: 1.5041 sec.
Iter 7150 || Loss: 3.7236 || 10iter: 1.4328 sec.
Iter 7160 || Loss: 3.8499 || 10iter: 1.3907 sec.
Iter 7170 || Loss: 3.9485 || 10iter: 1.4429 sec.
Iter 7180 || Loss: 4.0420 || 10iter: 1.4592 sec.
Iter 7190 || Loss: 4.2499 || 10iter: 1.4048 sec.
Iter 7200 || Loss: 4.5529 || 10iter: 1.3983 sec.
Iter 7210 || Loss: 3.8654 || 10iter: 1.4178 sec.
Iter 7220 || Loss: 3.6709 || 10iter: 1.4989 sec.
Iter 7230 || Loss: 3.8606 || 10iter: 1.3819 sec.
Iter 7240 || Loss: 4.5533 || 10iter: 1.4815 sec.
Iter 7250 || Loss: 3.8362 || 10iter: 1.4892 sec.
Iter 7260 || Loss: 4.2666 || 10iter: 1.4364 sec.
Iter 7270 || Loss: 4.0060 || 10iter: 1.3931 sec.
Iter 7280 || Loss: 3.9244 || 10iter: 1.4612 sec.
Iter 7290 || Loss: 4.0875 || 10iter: 1.4243 sec.
Iter 7300 || Loss: 4.1049 || 10iter: 1.4152 sec.
Iter 7310 || Loss: 4.0240 || 10iter: 1.4510 sec.
Iter 7320 || Loss: 3.8090 || 10iter: 1.4252 sec.
Iter 7330 || Loss: 4.0534 || 10iter: 1.4547 sec.
Iter 7340 || Loss: 3

Iter 8750 || Loss: 3.5880 || 10iter: 1.3984 sec.
Iter 8760 || Loss: 4.3141 || 10iter: 1.4397 sec.
Iter 8770 || Loss: 4.0955 || 10iter: 1.4113 sec.
Iter 8780 || Loss: 3.6998 || 10iter: 1.3620 sec.
Iter 8790 || Loss: 4.1088 || 10iter: 1.4730 sec.
Iter 8800 || Loss: 4.1670 || 10iter: 1.4591 sec.
Iter 8810 || Loss: 5.0476 || 10iter: 1.4160 sec.
Iter 8820 || Loss: 3.7472 || 10iter: 1.4053 sec.
Iter 8830 || Loss: 4.0829 || 10iter: 1.4948 sec.
Iter 8840 || Loss: 3.6424 || 10iter: 1.4307 sec.
Iter 8850 || Loss: 3.6773 || 10iter: 1.4146 sec.
Iter 8860 || Loss: 3.9882 || 10iter: 1.4064 sec.
Iter 8870 || Loss: 3.5894 || 10iter: 1.4212 sec.
Iter 8880 || Loss: 4.0267 || 10iter: 1.4641 sec.
Iter 8890 || Loss: 3.6033 || 10iter: 1.4482 sec.
Iter 8900 || Loss: 3.5607 || 10iter: 1.4185 sec.
Iter 8910 || Loss: 4.0905 || 10iter: 1.3945 sec.
Iter 8920 || Loss: 4.0302 || 10iter: 1.4182 sec.
Iter 8930 || Loss: 4.4502 || 10iter: 1.4434 sec.
Iter 8940 || Loss: 3.4112 || 10iter: 1.4416 sec.
Iter 8950 || Loss: 4

Iter 10350 || Loss: 3.5312 || 10iter: 1.2590 sec.
-------------
epoch 15 || Epoch_TRAIN_Loss:2624.0052 ||Epoch_VAL_Loss:0.0000
timer:  101.9795 sec.
lr is: 0.001
-------------
Epoch 16/200
-------------
(train)
Iter 10360 || Loss: 3.4463 || 10iter: 2.7430 sec.
Iter 10370 || Loss: 4.2109 || 10iter: 1.4140 sec.
Iter 10380 || Loss: 3.7024 || 10iter: 1.4895 sec.
Iter 10390 || Loss: 3.5558 || 10iter: 1.3590 sec.
Iter 10400 || Loss: 3.8585 || 10iter: 1.4462 sec.
Iter 10410 || Loss: 4.0474 || 10iter: 1.4776 sec.
Iter 10420 || Loss: 3.8031 || 10iter: 1.3825 sec.
Iter 10430 || Loss: 4.2866 || 10iter: 1.4374 sec.
Iter 10440 || Loss: 3.6566 || 10iter: 1.4137 sec.
Iter 10450 || Loss: 4.0127 || 10iter: 1.4598 sec.
Iter 10460 || Loss: 3.7038 || 10iter: 1.4144 sec.
Iter 10470 || Loss: 3.9162 || 10iter: 1.4569 sec.
Iter 10480 || Loss: 4.0726 || 10iter: 1.4472 sec.
Iter 10490 || Loss: 3.5154 || 10iter: 1.4126 sec.
Iter 10500 || Loss: 3.7611 || 10iter: 1.4085 sec.
Iter 10510 || Loss: 3.3622 || 10iter: 1

Iter 11900 || Loss: 3.4627 || 10iter: 1.4086 sec.
Iter 11910 || Loss: 3.1708 || 10iter: 1.3903 sec.
Iter 11920 || Loss: 3.5011 || 10iter: 1.3954 sec.
Iter 11930 || Loss: 4.2554 || 10iter: 1.4501 sec.
Iter 11940 || Loss: 3.5948 || 10iter: 1.4509 sec.
Iter 11950 || Loss: 3.7489 || 10iter: 1.5099 sec.
Iter 11960 || Loss: 3.9827 || 10iter: 1.4291 sec.
Iter 11970 || Loss: 3.7399 || 10iter: 1.4461 sec.
Iter 11980 || Loss: 3.4009 || 10iter: 1.4794 sec.
Iter 11990 || Loss: 3.9977 || 10iter: 1.4070 sec.
Iter 12000 || Loss: 4.0635 || 10iter: 1.4289 sec.
Iter 12010 || Loss: 3.8358 || 10iter: 1.4296 sec.
Iter 12020 || Loss: 3.5076 || 10iter: 1.4274 sec.
Iter 12030 || Loss: 3.7929 || 10iter: 1.4432 sec.
Iter 12040 || Loss: 3.9350 || 10iter: 1.4935 sec.
Iter 12050 || Loss: 3.2453 || 10iter: 1.4686 sec.
Iter 12060 || Loss: 3.9101 || 10iter: 1.4043 sec.
Iter 12070 || Loss: 3.4580 || 10iter: 1.4366 sec.
Iter 12080 || Loss: 3.6532 || 10iter: 1.4468 sec.
Iter 12090 || Loss: 3.6707 || 10iter: 1.3975 sec.


Iter 13480 || Loss: 3.6380 || 10iter: 1.4267 sec.
Iter 13490 || Loss: 3.8184 || 10iter: 1.4100 sec.
Iter 13500 || Loss: 2.9719 || 10iter: 1.4525 sec.
Iter 13510 || Loss: 3.5191 || 10iter: 1.4252 sec.
Iter 13520 || Loss: 3.6616 || 10iter: 1.4139 sec.
Iter 13530 || Loss: 3.4481 || 10iter: 1.4499 sec.
Iter 13540 || Loss: 3.3606 || 10iter: 1.4577 sec.
Iter 13550 || Loss: 3.4478 || 10iter: 1.4206 sec.
Iter 13560 || Loss: 3.6069 || 10iter: 1.4334 sec.
Iter 13570 || Loss: 3.7036 || 10iter: 1.4186 sec.
Iter 13580 || Loss: 3.1618 || 10iter: 1.4344 sec.
Iter 13590 || Loss: 3.9750 || 10iter: 1.3860 sec.
Iter 13600 || Loss: 3.2660 || 10iter: 1.5068 sec.
Iter 13610 || Loss: 3.4853 || 10iter: 1.4697 sec.
Iter 13620 || Loss: 3.6283 || 10iter: 1.3883 sec.
Iter 13630 || Loss: 3.8234 || 10iter: 1.4530 sec.
Iter 13640 || Loss: 3.9860 || 10iter: 1.4382 sec.
Iter 13650 || Loss: 3.6520 || 10iter: 1.4318 sec.
Iter 13660 || Loss: 3.4380 || 10iter: 1.4021 sec.
Iter 13670 || Loss: 3.1552 || 10iter: 1.4090 sec.


Iter 15050 || Loss: 3.7774 || 10iter: 1.4611 sec.
Iter 15060 || Loss: 3.5208 || 10iter: 1.4274 sec.
Iter 15070 || Loss: 3.3452 || 10iter: 1.4335 sec.
Iter 15080 || Loss: 3.7464 || 10iter: 1.3888 sec.
Iter 15090 || Loss: 3.9257 || 10iter: 1.5274 sec.
Iter 15100 || Loss: 3.9649 || 10iter: 1.4123 sec.
Iter 15110 || Loss: 3.3378 || 10iter: 1.4354 sec.
Iter 15120 || Loss: 3.5724 || 10iter: 1.3964 sec.
Iter 15130 || Loss: 4.2620 || 10iter: 1.4763 sec.
Iter 15140 || Loss: 3.3942 || 10iter: 1.4275 sec.
Iter 15150 || Loss: 3.1242 || 10iter: 1.4152 sec.
Iter 15160 || Loss: 3.6009 || 10iter: 1.4485 sec.
Iter 15170 || Loss: 3.5214 || 10iter: 1.3271 sec.
Iter 15180 || Loss: 3.7927 || 10iter: 1.2568 sec.
-------------
epoch 22 || Epoch_TRAIN_Loss:2500.9984 ||Epoch_VAL_Loss:0.0000
timer:  102.2357 sec.
lr is: 0.001
-------------
Epoch 23/200
-------------
(train)
Iter 15190 || Loss: 3.8257 || 10iter: 2.7592 sec.
Iter 15200 || Loss: 3.6803 || 10iter: 1.3765 sec.
Iter 15210 || Loss: 3.5840 || 10iter: 1

Iter 16600 || Loss: 3.3716 || 10iter: 1.5386 sec.
Iter 16610 || Loss: 3.1631 || 10iter: 1.4641 sec.
Iter 16620 || Loss: 4.2524 || 10iter: 1.4133 sec.
Iter 16630 || Loss: 3.4334 || 10iter: 1.4034 sec.
Iter 16640 || Loss: 3.2362 || 10iter: 1.3889 sec.
Iter 16650 || Loss: 3.9987 || 10iter: 1.3700 sec.
Iter 16660 || Loss: 3.4859 || 10iter: 1.4322 sec.
Iter 16670 || Loss: 3.1562 || 10iter: 1.3962 sec.
Iter 16680 || Loss: 3.8412 || 10iter: 1.4015 sec.
Iter 16690 || Loss: 3.0863 || 10iter: 1.4188 sec.
Iter 16700 || Loss: 3.1556 || 10iter: 1.4699 sec.
Iter 16710 || Loss: 3.5915 || 10iter: 1.4358 sec.
Iter 16720 || Loss: 3.6305 || 10iter: 1.4620 sec.
Iter 16730 || Loss: 3.7823 || 10iter: 1.4055 sec.
Iter 16740 || Loss: 3.4683 || 10iter: 1.4418 sec.
Iter 16750 || Loss: 3.8251 || 10iter: 1.3688 sec.
Iter 16760 || Loss: 3.2802 || 10iter: 1.3985 sec.
Iter 16770 || Loss: 3.9553 || 10iter: 1.4585 sec.
Iter 16780 || Loss: 3.4067 || 10iter: 1.4310 sec.
Iter 16790 || Loss: 3.1205 || 10iter: 1.4749 sec.


Iter 18180 || Loss: 4.0990 || 10iter: 1.4560 sec.
Iter 18190 || Loss: 3.3583 || 10iter: 1.4049 sec.
Iter 18200 || Loss: 3.6154 || 10iter: 1.4473 sec.
Iter 18210 || Loss: 2.7192 || 10iter: 1.4466 sec.
Iter 18220 || Loss: 3.8758 || 10iter: 1.4336 sec.
Iter 18230 || Loss: 3.7849 || 10iter: 1.4259 sec.
Iter 18240 || Loss: 3.4431 || 10iter: 1.4354 sec.
Iter 18250 || Loss: 3.7518 || 10iter: 1.5044 sec.
Iter 18260 || Loss: 3.3834 || 10iter: 1.4580 sec.
Iter 18270 || Loss: 3.6703 || 10iter: 1.4298 sec.
Iter 18280 || Loss: 3.5344 || 10iter: 1.4140 sec.
Iter 18290 || Loss: 3.3847 || 10iter: 1.4407 sec.
Iter 18300 || Loss: 2.8861 || 10iter: 1.4643 sec.
Iter 18310 || Loss: 3.2278 || 10iter: 1.4421 sec.
Iter 18320 || Loss: 3.7913 || 10iter: 1.3921 sec.
Iter 18330 || Loss: 3.3890 || 10iter: 1.4255 sec.
Iter 18340 || Loss: 3.8030 || 10iter: 1.4798 sec.
Iter 18350 || Loss: 3.3507 || 10iter: 1.4451 sec.
Iter 18360 || Loss: 3.8270 || 10iter: 1.4420 sec.
Iter 18370 || Loss: 3.9451 || 10iter: 1.4661 sec.


Iter 19760 || Loss: 2.2794 || 10iter: 1.4139 sec.
Iter 19770 || Loss: 4.2602 || 10iter: 1.4437 sec.
Iter 19780 || Loss: 3.3424 || 10iter: 1.4763 sec.
Iter 19790 || Loss: 3.1074 || 10iter: 1.5038 sec.
Iter 19800 || Loss: 4.3617 || 10iter: 1.4808 sec.
Iter 19810 || Loss: 3.0624 || 10iter: 1.3894 sec.
Iter 19820 || Loss: 2.9589 || 10iter: 1.4177 sec.
Iter 19830 || Loss: 3.1582 || 10iter: 1.4136 sec.
Iter 19840 || Loss: 4.0858 || 10iter: 1.4008 sec.
Iter 19850 || Loss: 3.2042 || 10iter: 1.4367 sec.
Iter 19860 || Loss: 3.2770 || 10iter: 1.4169 sec.
Iter 19870 || Loss: 3.5381 || 10iter: 1.4345 sec.
Iter 19880 || Loss: 3.7213 || 10iter: 1.4483 sec.
Iter 19890 || Loss: 3.5045 || 10iter: 1.4627 sec.
Iter 19900 || Loss: 3.3306 || 10iter: 1.3756 sec.
Iter 19910 || Loss: 3.8794 || 10iter: 1.4793 sec.
Iter 19920 || Loss: 3.5874 || 10iter: 1.4236 sec.
Iter 19930 || Loss: 3.4147 || 10iter: 1.4141 sec.
Iter 19940 || Loss: 3.4987 || 10iter: 1.3961 sec.
Iter 19950 || Loss: 3.2184 || 10iter: 1.5002 sec.


Iter 21330 || Loss: 3.7154 || 10iter: 1.4107 sec.
Iter 21340 || Loss: 3.3825 || 10iter: 1.5209 sec.
Iter 21350 || Loss: 3.2818 || 10iter: 1.4882 sec.
Iter 21360 || Loss: 3.4244 || 10iter: 1.4301 sec.
Iter 21370 || Loss: 3.4515 || 10iter: 1.4441 sec.
Iter 21380 || Loss: 3.0089 || 10iter: 1.3329 sec.
Iter 21390 || Loss: 3.4864 || 10iter: 1.2548 sec.
-------------
epoch 31 || Epoch_TRAIN_Loss:2384.7828 ||Epoch_VAL_Loss:0.0000
timer:  102.2685 sec.
lr is: 0.001
-------------
Epoch 32/200
-------------
(train)
Iter 21400 || Loss: 3.3839 || 10iter: 2.7535 sec.
Iter 21410 || Loss: 3.0419 || 10iter: 1.3967 sec.
Iter 21420 || Loss: 3.0637 || 10iter: 1.4241 sec.
Iter 21430 || Loss: 3.7290 || 10iter: 1.4410 sec.
Iter 21440 || Loss: 3.7062 || 10iter: 1.4310 sec.
Iter 21450 || Loss: 3.5017 || 10iter: 1.3722 sec.
Iter 21460 || Loss: 3.3623 || 10iter: 1.4024 sec.
Iter 21470 || Loss: 3.1142 || 10iter: 1.4100 sec.
Iter 21480 || Loss: 3.6819 || 10iter: 1.4195 sec.
Iter 21490 || Loss: 3.5266 || 10iter: 1

Iter 22880 || Loss: 3.7956 || 10iter: 1.5252 sec.
Iter 22890 || Loss: 3.6096 || 10iter: 1.4504 sec.
Iter 22900 || Loss: 2.9143 || 10iter: 1.4856 sec.
Iter 22910 || Loss: 3.4555 || 10iter: 1.4295 sec.
Iter 22920 || Loss: 2.9262 || 10iter: 1.4382 sec.
Iter 22930 || Loss: 3.6756 || 10iter: 1.3857 sec.
Iter 22940 || Loss: 3.3816 || 10iter: 1.4370 sec.
Iter 22950 || Loss: 3.3522 || 10iter: 1.4360 sec.
Iter 22960 || Loss: 3.6501 || 10iter: 1.3920 sec.
Iter 22970 || Loss: 3.0724 || 10iter: 1.4833 sec.
Iter 22980 || Loss: 3.6974 || 10iter: 1.5151 sec.
Iter 22990 || Loss: 3.6840 || 10iter: 1.4808 sec.
Iter 23000 || Loss: 3.4196 || 10iter: 1.4138 sec.
Iter 23010 || Loss: 3.0384 || 10iter: 1.4526 sec.
Iter 23020 || Loss: 3.6917 || 10iter: 1.4462 sec.
Iter 23030 || Loss: 3.3259 || 10iter: 1.3772 sec.
Iter 23040 || Loss: 3.1784 || 10iter: 1.4259 sec.
Iter 23050 || Loss: 2.6568 || 10iter: 1.4727 sec.
Iter 23060 || Loss: 3.4303 || 10iter: 1.4556 sec.
Iter 23070 || Loss: 3.5965 || 10iter: 1.4907 sec.


Iter 24460 || Loss: 3.3945 || 10iter: 1.4558 sec.
Iter 24470 || Loss: 3.5500 || 10iter: 1.4281 sec.
Iter 24480 || Loss: 2.6730 || 10iter: 1.4279 sec.
Iter 24490 || Loss: 3.2336 || 10iter: 1.3957 sec.
Iter 24500 || Loss: 3.1592 || 10iter: 1.4244 sec.
Iter 24510 || Loss: 3.6971 || 10iter: 1.4125 sec.
Iter 24520 || Loss: 3.1116 || 10iter: 1.3935 sec.
Iter 24530 || Loss: 3.6193 || 10iter: 1.3951 sec.
Iter 24540 || Loss: 3.9835 || 10iter: 1.3621 sec.
Iter 24550 || Loss: 3.5488 || 10iter: 1.4176 sec.
Iter 24560 || Loss: 2.9170 || 10iter: 1.4775 sec.
Iter 24570 || Loss: 3.3394 || 10iter: 1.4267 sec.
Iter 24580 || Loss: 3.3847 || 10iter: 1.4038 sec.
Iter 24590 || Loss: 2.7259 || 10iter: 1.4888 sec.
Iter 24600 || Loss: 3.7070 || 10iter: 1.5194 sec.
Iter 24610 || Loss: 3.6668 || 10iter: 1.4323 sec.
Iter 24620 || Loss: 3.6539 || 10iter: 1.4278 sec.
Iter 24630 || Loss: 3.6356 || 10iter: 1.4065 sec.
Iter 24640 || Loss: 3.7232 || 10iter: 1.4457 sec.
Iter 24650 || Loss: 3.8788 || 10iter: 1.4432 sec.


Iter 26040 || Loss: 3.5247 || 10iter: 1.4479 sec.
Iter 26050 || Loss: 3.4741 || 10iter: 1.4352 sec.
Iter 26060 || Loss: 3.3761 || 10iter: 1.4317 sec.
Iter 26070 || Loss: 3.2763 || 10iter: 1.4052 sec.
Iter 26080 || Loss: 3.7198 || 10iter: 1.4712 sec.
Iter 26090 || Loss: 2.8506 || 10iter: 1.4592 sec.
Iter 26100 || Loss: 3.4990 || 10iter: 1.4301 sec.
Iter 26110 || Loss: 3.5279 || 10iter: 1.4547 sec.
Iter 26120 || Loss: 4.6572 || 10iter: 1.3985 sec.
Iter 26130 || Loss: 3.5475 || 10iter: 1.4435 sec.
Iter 26140 || Loss: 3.3135 || 10iter: 1.4078 sec.
Iter 26150 || Loss: 3.2343 || 10iter: 1.4244 sec.
Iter 26160 || Loss: 3.1445 || 10iter: 1.4054 sec.
Iter 26170 || Loss: 2.5993 || 10iter: 1.4333 sec.
Iter 26180 || Loss: 3.6560 || 10iter: 1.4488 sec.
Iter 26190 || Loss: 3.1434 || 10iter: 1.4806 sec.
Iter 26200 || Loss: 3.2791 || 10iter: 1.5201 sec.
Iter 26210 || Loss: 3.6920 || 10iter: 1.3659 sec.
Iter 26220 || Loss: 3.2069 || 10iter: 1.2664 sec.
-------------
epoch 38 || Epoch_TRAIN_Loss:2319.32