In [1]:
import argparse
import os
import time
import sys

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data import DataLoader
import torch.optim as optim
import fastText

os.environ['CUDA_VISIBLE_DEVICES']="4,5,6,7"

In [2]:
from models import models
from dataset import openimages, imagenet
from utils.loss import HardNegativeContrastiveLoss

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
class AverageMeter(object):

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [5]:
def train(train_loader, model, criterion, optimizer, epoch, print_freq=1000):
    #amp_handle = amp.init()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    model = model.train()
    print("Start training")
    end = time.time()
    for i, (imgs, caps) in enumerate(train_loader):
        if i%2 == 1:
                print("%2.2f"% (i/len(train_loader)*100), '\%', end='\r')
        input_imgs, target = imgs.cuda(), caps.cuda()
        

        data_time.update(time.time() - end)

        optimizer.zero_grad()
        
        output_imgs = model(input_imgs)
        
        
        loss = criterion(output_imgs, target)
        
        #with amp_handle.scale_loss(loss, optimizer) as scaled_loss:
        #    scaled_loss.backward()
        loss.backward()
        optimizer.step()
        
        losses.update(loss.item(), imgs.size(0))

        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 or i == (len(train_loader) - 1):
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses))

    return losses.avg, batch_time.avg, data_time.avg

In [6]:
def validate(val_loader, model, criterion, print_freq=1000):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    model = model.eval()

    imgs_enc = list()
    caps_enc = list()
    end = time.time()
    for i, (imgs, caps, lengths) in enumerate(val_loader):

        input_imgs, input_caps = imgs.cuda(), caps.cuda()

        # measure data loading time
        data_time.update(time.time() - end)

        with torch.no_grad():
            output_imgs = model(input_imgs)
            loss = criterion(output_imgs, input_caps)

        imgs_enc.append(output_imgs.cpu().data.numpy())
        caps_enc.append(output_caps.cpu().data.numpy())
        losses.update(loss.item(), imgs.size(0))

        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 or i == (len(val_loader) - 1):
            print('Data: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                      i, len(val_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses))

    recall  = eval_recall(imgs_enc, caps_enc)
    print(recall)
    return losses.avg, batch_time.avg, data_time.avg, recall

In [7]:
normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

prepro = transforms.Compose([
    transforms.RandomResizedCrop(224),

    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize,
])

prepro_val = transforms.Compose([
    transforms.Resize((350, 350)),
    transforms.ToTensor(),
    normalize,
])

In [8]:
m = nn.DataParallel(models.ImageProjection().train().cuda())

In [9]:
for params in m.parameters():
    params.requires_grad=False

In [10]:
for params in m.module.projection.parameters():
    params.requires_grad=True

In [11]:
def collate_embeds(data):
    images, targets = zip(*data)
    images = torch.stack(images, 0)
    targets = torch.Tensor(np.stack(targets, 0))

    return images, targets

In [12]:
imagenet_dataset = imagenet.FullImageNet('/data/datasets/imageNet/images/', transform=prepro)
#imagenet_val_dataset = imagenet.FullImageNet("/data/datasets/imageNet/images/", transform=prepro_val, sset="val")

Reading dataset in  26.35032606124878  sec


In [13]:
train_loader = DataLoader(imagenet_dataset, batch_size=3072, shuffle=True, drop_last=True,
                            num_workers=20, collate_fn=collate_embeds, pin_memory=True)

In [14]:
opti = optim.Adam(filter(lambda p: p.requires_grad, m.parameters()), lr=0.001)

In [15]:
criterion = HardNegativeContrastiveLoss().cuda()

In [16]:
for i in range(2):
    train(train_loader, m, criterion, opti, i, print_freq=50)

Start training


OSError: Traceback (most recent call last):
  File "/opt/conda/envs/py36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/opt/conda/envs/py36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/workspace/query-image/dataset/imagenet.py", line 47, in __getitem__
    return self.ts(Image.open(im).convert("RGB")), txt_emb
  File "/opt/conda/envs/py36/lib/python3.6/site-packages/PIL/Image.py", line 2687, in open
    % (filename if filename else fp))
OSError: cannot identify image file '/data/datasets/imageNet/images/n04516116/n04516116_7553.jpg'


In [23]:
from PIL import Image

In [24]:
Image.open('/data/datasets/imageNet/images/n03885904/n03885904_17721.jpg')

OSError: cannot identify image file '/data/datasets/imageNet/images/n03885904/n03885904_17721.jpg'

In [17]:
opti = optim.Adam(filter(lambda p: p.requires_grad, m.parameters()), lr=0.00025)

In [19]:
for i in range(2,4):
    train(train_loader, m, criterion, opti, i, print_freq=50)

Start training
Epoch: [2][0/1495]	Time 51.399 (51.399)	Data 49.598 (49.598)	Loss 2897.5864 (2897.5864)	
Epoch: [2][50/1495]	Time 2.044 (3.032)	Data 0.170 (1.167)	Loss 3546.5110 (3190.0439)	
Epoch: [2][100/1495]	Time 1.921 (2.544)	Data 0.170 (0.682)	Loss 5747.5391 (3783.3799)	
Epoch: [2][150/1495]	Time 2.500 (2.393)	Data 0.287 (0.518)	Loss 5154.7490 (4246.2801)	
Epoch: [2][200/1495]	Time 2.065 (2.310)	Data 0.171 (0.436)	Loss 3955.3203 (4320.9193)	
Epoch: [2][250/1495]	Time 1.943 (2.260)	Data 0.171 (0.385)	Loss 5116.1045 (4462.7477)	
Epoch: [2][300/1495]	Time 1.925 (2.230)	Data 0.172 (0.352)	Loss 3654.9590 (4431.6050)	
Epoch: [2][350/1495]	Time 2.220 (2.206)	Data 0.173 (0.328)	Loss 3426.4272 (4308.5145)	
Epoch: [2][400/1495]	Time 1.919 (2.185)	Data 0.171 (0.310)	Loss 2850.0166 (4164.0453)	
Epoch: [2][450/1495]	Time 2.144 (2.174)	Data 0.173 (0.296)	Loss 2675.8628 (4002.5395)	
Epoch: [2][500/1495]	Time 2.383 (2.162)	Data 0.171 (0.286)	Loss 2553.7883 (3872.6730)	
Epoch: [2][550/1495]	Time 1

(3474.2665076622598, 2.0876847224092003, 0.22014132152034288)

In [20]:
for param in m.parameters():
    param.requires_grad = True

# Keep the first layer of resnet frozen
for i in range(0, 6):
    for param in m.module.base_layer[i].parameters():
        param.requires_grad = False

In [21]:
train_loader = DataLoader(train_dataset, batch_size=450, shuffle=True, drop_last=True,
                            num_workers=20, collate_fn=collate_embeds, pin_memory=True)
opti = optim.Adam(filter(lambda p: p.requires_grad, m.module.parameters()), lr=0.00005)

In [22]:
for _ in range(5):
    train(train_loader, m, criterion, opti, i, print_freq=100)
    i += 1

Start training
Epoch: [5][0/10208]	Time 13.304 (13.304)	Data 12.050 (12.050)	Loss 520.9606 (520.9606)	
Epoch: [5][100/10208]	Time 0.718 (0.890)	Data 0.025 (0.145)	Loss 250.8136 (324.0566)	
Epoch: [5][200/10208]	Time 0.937 (0.826)	Data 0.026 (0.086)	Loss 241.2368 (284.0829)	
Epoch: [5][300/10208]	Time 0.718 (0.804)	Data 0.025 (0.066)	Loss 223.3953 (266.4430)	
Epoch: [5][400/10208]	Time 0.717 (0.794)	Data 0.026 (0.056)	Loss 223.3347 (255.7287)	
Epoch: [5][500/10208]	Time 0.717 (0.788)	Data 0.025 (0.050)	Loss 219.5050 (248.9875)	
Epoch: [5][600/10208]	Time 0.716 (0.784)	Data 0.026 (0.046)	Loss 211.9663 (243.6552)	
Epoch: [5][700/10208]	Time 1.029 (0.781)	Data 0.025 (0.043)	Loss 211.2588 (239.4960)	
Epoch: [5][800/10208]	Time 0.717 (0.779)	Data 0.025 (0.041)	Loss 213.6814 (236.2502)	
Epoch: [5][900/10208]	Time 0.723 (0.777)	Data 0.026 (0.040)	Loss 209.8525 (233.5860)	
Epoch: [5][1000/10208]	Time 0.756 (0.776)	Data 0.026 (0.038)	Loss 210.1955 (231.2169)	
Epoch: [5][1100/10208]	Time 0.717 (0

Epoch: [5][9400/10208]	Time 0.720 (0.764)	Data 0.026 (0.027)	Loss 193.9820 (203.8731)	
Epoch: [5][9500/10208]	Time 0.713 (0.764)	Data 0.025 (0.027)	Loss 195.3975 (203.7804)	
Epoch: [5][9600/10208]	Time 1.132 (0.764)	Data 0.026 (0.027)	Loss 197.2472 (203.6910)	
Epoch: [5][9700/10208]	Time 0.719 (0.764)	Data 0.025 (0.027)	Loss 194.8236 (203.6068)	
Epoch: [5][9800/10208]	Time 0.724 (0.764)	Data 0.026 (0.027)	Loss 193.7630 (203.5197)	
Epoch: [5][9900/10208]	Time 0.754 (0.764)	Data 0.025 (0.027)	Loss 194.5895 (203.4344)	
Epoch: [5][10000/10208]	Time 0.715 (0.764)	Data 0.025 (0.027)	Loss 194.6336 (203.3457)	
Epoch: [5][10100/10208]	Time 0.953 (0.764)	Data 0.025 (0.027)	Loss 197.3025 (203.2578)	
Epoch: [5][10200/10208]	Time 0.709 (0.764)	Data 0.025 (0.027)	Loss 194.4759 (203.1727)	
Epoch: [5][10207/10208]	Time 0.710 (0.764)	Data 0.025 (0.027)	Loss 194.9759 (203.1671)	
Start training
Epoch: [6][0/10208]	Time 14.458 (14.458)	Data 13.648 (13.648)	Loss 194.3483 (194.3483)	
Epoch: [6][100/10208]	T

Epoch: [6][8400/10208]	Time 0.720 (0.767)	Data 0.026 (0.028)	Loss 188.2408 (191.4714)	
Epoch: [6][8500/10208]	Time 1.069 (0.767)	Data 0.027 (0.028)	Loss 187.5262 (191.4339)	
Epoch: [6][8600/10208]	Time 0.717 (0.767)	Data 0.026 (0.028)	Loss 188.1460 (191.3976)	
Epoch: [6][8700/10208]	Time 0.722 (0.767)	Data 0.026 (0.028)	Loss 187.9334 (191.3643)	
Epoch: [6][8800/10208]	Time 0.718 (0.767)	Data 0.026 (0.028)	Loss 187.2130 (191.3291)	
Epoch: [6][8900/10208]	Time 0.720 (0.767)	Data 0.026 (0.028)	Loss 188.0001 (191.2926)	
Epoch: [6][9000/10208]	Time 0.981 (0.767)	Data 0.026 (0.028)	Loss 188.5044 (191.2569)	
Epoch: [6][9100/10208]	Time 0.718 (0.767)	Data 0.026 (0.028)	Loss 188.4612 (191.2190)	
Epoch: [6][9200/10208]	Time 0.720 (0.767)	Data 0.026 (0.028)	Loss 188.1525 (191.1826)	
Epoch: [6][9300/10208]	Time 0.723 (0.767)	Data 0.025 (0.028)	Loss 187.7855 (191.1456)	
Epoch: [6][9400/10208]	Time 0.719 (0.767)	Data 0.025 (0.028)	Loss 190.3804 (191.1062)	
Epoch: [6][9500/10208]	Time 1.121 (0.767)	D

Epoch: [7][7400/10208]	Time 1.085 (0.768)	Data 0.025 (0.028)	Loss 182.0837 (184.5095)	
Epoch: [7][7500/10208]	Time 0.723 (0.768)	Data 0.026 (0.028)	Loss 182.6496 (184.4794)	
Epoch: [7][7600/10208]	Time 0.717 (0.768)	Data 0.026 (0.028)	Loss 182.0073 (184.4497)	
Epoch: [7][7700/10208]	Time 0.720 (0.768)	Data 0.026 (0.028)	Loss 183.6725 (184.4220)	
Epoch: [7][7800/10208]	Time 0.722 (0.768)	Data 0.026 (0.028)	Loss 181.9570 (184.3930)	
Epoch: [7][7900/10208]	Time 1.152 (0.768)	Data 0.025 (0.028)	Loss 181.2991 (184.3644)	
Epoch: [7][8000/10208]	Time 0.715 (0.768)	Data 0.025 (0.028)	Loss 182.9015 (184.3362)	
Epoch: [7][8100/10208]	Time 0.723 (0.768)	Data 0.026 (0.028)	Loss 181.6902 (184.3089)	
Epoch: [7][8200/10208]	Time 0.716 (0.768)	Data 0.026 (0.028)	Loss 182.5096 (184.2811)	
Epoch: [7][8300/10208]	Time 0.719 (0.768)	Data 0.025 (0.028)	Loss 181.6448 (184.2531)	
Epoch: [7][8400/10208]	Time 0.949 (0.768)	Data 0.026 (0.028)	Loss 182.5660 (184.2264)	
Epoch: [7][8500/10208]	Time 0.716 (0.768)	D

Epoch: [8][6400/10208]	Time 0.718 (0.769)	Data 0.026 (0.028)	Loss 180.4082 (180.9177)	
Epoch: [8][6500/10208]	Time 0.720 (0.769)	Data 0.026 (0.028)	Loss 180.2464 (180.9119)	
Epoch: [8][6600/10208]	Time 0.719 (0.769)	Data 0.026 (0.028)	Loss 181.5696 (180.9061)	
Epoch: [8][6700/10208]	Time 0.718 (0.769)	Data 0.026 (0.028)	Loss 181.0537 (180.9014)	
Epoch: [8][6800/10208]	Time 0.941 (0.769)	Data 0.026 (0.028)	Loss 181.0194 (180.8995)	
Epoch: [8][6900/10208]	Time 0.718 (0.769)	Data 0.026 (0.028)	Loss 180.2021 (180.8946)	
Epoch: [8][7000/10208]	Time 0.716 (0.769)	Data 0.025 (0.028)	Loss 180.9061 (180.8905)	
Epoch: [8][7100/10208]	Time 0.723 (0.769)	Data 0.026 (0.028)	Loss 182.4232 (180.8921)	
Epoch: [8][7200/10208]	Time 0.719 (0.769)	Data 0.025 (0.028)	Loss 180.0021 (180.8927)	
Epoch: [8][7300/10208]	Time 1.162 (0.769)	Data 0.025 (0.028)	Loss 180.6150 (180.8893)	
Epoch: [8][7400/10208]	Time 0.719 (0.769)	Data 0.025 (0.028)	Loss 180.4927 (180.8836)	
Epoch: [8][7500/10208]	Time 0.717 (0.769)	D

Epoch: [9][5400/10208]	Time 0.719 (0.770)	Data 0.026 (0.028)	Loss 180.2778 (180.5521)	
Epoch: [9][5500/10208]	Time 0.726 (0.770)	Data 0.026 (0.028)	Loss 180.6453 (180.5453)	
Epoch: [9][5600/10208]	Time 0.719 (0.770)	Data 0.026 (0.028)	Loss 180.0227 (180.5379)	
Epoch: [9][5700/10208]	Time 0.959 (0.770)	Data 0.026 (0.028)	Loss 180.2872 (180.5318)	
Epoch: [9][5800/10208]	Time 0.720 (0.770)	Data 0.026 (0.028)	Loss 180.1253 (180.5261)	
Epoch: [9][5900/10208]	Time 0.717 (0.770)	Data 0.025 (0.028)	Loss 180.3584 (180.5197)	
Epoch: [9][6000/10208]	Time 0.725 (0.770)	Data 0.026 (0.028)	Loss 180.0548 (180.5138)	
Epoch: [9][6100/10208]	Time 0.722 (0.770)	Data 0.026 (0.028)	Loss 180.6653 (180.5081)	
Epoch: [9][6200/10208]	Time 1.087 (0.770)	Data 0.026 (0.028)	Loss 179.6538 (180.5022)	
Epoch: [9][6300/10208]	Time 0.722 (0.770)	Data 0.026 (0.028)	Loss 180.0162 (180.4962)	
Epoch: [9][6400/10208]	Time 0.722 (0.770)	Data 0.026 (0.028)	Loss 180.1648 (180.4921)	
Epoch: [9][6500/10208]	Time 0.726 (0.770)	D

In [35]:
embed = fastText.load_model("/data/m.portaz/wiki.en.bin")
train_dataset = openimages.OpenImagesText(image_dir="/data/datasets/openimages/images/train/", 
                          dataset_file="/data/datasets/openimages/train-words.csv",
                          embeddings=embed, 
                          transform=prepro, random=0.5)

Reading dataset file
Done reading  4593616  lines.


In [23]:
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, drop_last=True,
                            num_workers=20, collate_fn=collate_embeds, pin_memory=True)
opti = optim.Adam(filter(lambda p: p.requires_grad, m.module.parameters()), lr=0.000025)

In [25]:
for i in range(8,20):
    train(train_loader, m, criterion, opti, i, print_freq=200)

Start training
Epoch: [8][0/8971]	Time 14.081 (14.081)	Data 13.128 (13.128)	Loss 203.7532 (203.7532)	
Epoch: [8][200/8971]	Time 0.771 (0.899)	Data 0.029 (0.095)	Loss 204.3013 (204.3451)	
Epoch: [8][400/8971]	Time 0.775 (0.865)	Data 0.029 (0.062)	Loss 204.0117 (204.3509)	
Epoch: [8][600/8971]	Time 0.801 (0.854)	Data 0.057 (0.051)	Loss 205.5808 (204.3916)	
Epoch: [8][800/8971]	Time 0.773 (0.849)	Data 0.029 (0.046)	Loss 202.7911 (204.3755)	
Epoch: [8][1000/8971]	Time 0.770 (0.847)	Data 0.029 (0.043)	Loss 204.2082 (204.3662)	
Epoch: [8][1200/8971]	Time 0.771 (0.844)	Data 0.029 (0.041)	Loss 205.3881 (204.3614)	
Epoch: [8][1400/8971]	Time 1.136 (0.843)	Data 0.029 (0.039)	Loss 203.9379 (204.3733)	
Epoch: [8][1600/8971]	Time 0.777 (0.841)	Data 0.029 (0.038)	Loss 203.9836 (204.3724)	
Epoch: [8][1800/8971]	Time 0.769 (0.840)	Data 0.029 (0.037)	Loss 204.9033 (204.3789)	
Epoch: [8][2000/8971]	Time 0.771 (0.839)	Data 0.029 (0.036)	Loss 205.5240 (204.3724)	
Epoch: [8][2200/8971]	Time 0.776 (0.839)	D

Epoch: [10][600/8971]	Time 0.783 (0.862)	Data 0.029 (0.058)	Loss 203.2500 (204.1352)	
Epoch: [10][800/8971]	Time 0.774 (0.856)	Data 0.029 (0.051)	Loss 202.9549 (204.1200)	
Epoch: [10][1000/8971]	Time 0.769 (0.852)	Data 0.028 (0.047)	Loss 204.3714 (204.1246)	
Epoch: [10][1200/8971]	Time 0.783 (0.849)	Data 0.030 (0.044)	Loss 204.2313 (204.1340)	
Epoch: [10][1400/8971]	Time 0.771 (0.847)	Data 0.029 (0.042)	Loss 205.0178 (204.1320)	
Epoch: [10][1600/8971]	Time 0.775 (0.845)	Data 0.029 (0.040)	Loss 204.3758 (204.1343)	
Epoch: [10][1800/8971]	Time 0.777 (0.844)	Data 0.029 (0.039)	Loss 204.5458 (204.1445)	
Epoch: [10][2000/8971]	Time 1.235 (0.843)	Data 0.029 (0.038)	Loss 205.3938 (204.1541)	
Epoch: [10][2200/8971]	Time 0.778 (0.841)	Data 0.029 (0.037)	Loss 203.8353 (204.1493)	
Epoch: [10][2400/8971]	Time 0.790 (0.841)	Data 0.029 (0.037)	Loss 204.0684 (204.1451)	
Epoch: [10][2600/8971]	Time 0.774 (0.840)	Data 0.030 (0.036)	Loss 204.5103 (204.1424)	
Epoch: [10][2800/8971]	Time 0.776 (0.840)	Dat

Epoch: [12][1000/8971]	Time 0.808 (0.848)	Data 0.029 (0.046)	Loss 204.4128 (204.0132)	
Epoch: [12][1200/8971]	Time 0.772 (0.846)	Data 0.029 (0.043)	Loss 204.1503 (204.0112)	
Epoch: [12][1400/8971]	Time 0.804 (0.844)	Data 0.060 (0.041)	Loss 204.8344 (204.0116)	
Epoch: [12][1600/8971]	Time 1.162 (0.843)	Data 0.029 (0.040)	Loss 203.1547 (204.0114)	
Epoch: [12][1800/8971]	Time 0.807 (0.841)	Data 0.029 (0.038)	Loss 204.5919 (204.0193)	
Epoch: [12][2000/8971]	Time 0.781 (0.841)	Data 0.029 (0.038)	Loss 204.5637 (204.0241)	
Epoch: [12][2200/8971]	Time 0.771 (0.840)	Data 0.029 (0.037)	Loss 203.9501 (204.0193)	
Epoch: [12][2400/8971]	Time 0.780 (0.840)	Data 0.029 (0.036)	Loss 202.7738 (204.0200)	
Epoch: [12][2600/8971]	Time 1.309 (0.839)	Data 0.029 (0.036)	Loss 203.2190 (204.0217)	
Epoch: [12][2800/8971]	Time 0.776 (0.839)	Data 0.029 (0.035)	Loss 204.3593 (204.0297)	
Epoch: [12][3000/8971]	Time 0.779 (0.839)	Data 0.030 (0.035)	Loss 203.8006 (204.0258)	
Epoch: [12][3200/8971]	Time 0.777 (0.838)	D

Epoch: [14][1400/8971]	Time 0.779 (0.848)	Data 0.030 (0.041)	Loss 203.7614 (203.9089)	
Epoch: [14][1600/8971]	Time 0.772 (0.848)	Data 0.029 (0.039)	Loss 204.0774 (203.9075)	
Epoch: [14][1800/8971]	Time 0.781 (0.846)	Data 0.029 (0.038)	Loss 203.8282 (203.9117)	
Epoch: [14][2000/8971]	Time 0.778 (0.845)	Data 0.029 (0.037)	Loss 203.3480 (203.9176)	
Epoch: [14][2200/8971]	Time 1.246 (0.845)	Data 0.029 (0.037)	Loss 204.6489 (203.9158)	
Epoch: [14][2400/8971]	Time 0.774 (0.844)	Data 0.029 (0.036)	Loss 204.0349 (203.9189)	
Epoch: [14][2600/8971]	Time 0.780 (0.843)	Data 0.029 (0.036)	Loss 205.1529 (203.9182)	
Epoch: [14][2800/8971]	Time 0.779 (0.843)	Data 0.030 (0.035)	Loss 203.7554 (203.9236)	
Epoch: [14][3000/8971]	Time 0.777 (0.843)	Data 0.029 (0.035)	Loss 202.7901 (203.9304)	
Epoch: [14][3200/8971]	Time 0.777 (0.842)	Data 0.029 (0.035)	Loss 203.7024 (203.9301)	
Epoch: [14][3400/8971]	Time 0.777 (0.842)	Data 0.029 (0.034)	Loss 204.3577 (203.9284)	
Epoch: [14][3600/8971]	Time 0.774 (0.842)	D

Epoch: [16][1800/8971]	Time 1.032 (0.835)	Data 0.029 (0.038)	Loss 203.1852 (203.8172)	
Epoch: [16][2000/8971]	Time 0.782 (0.834)	Data 0.029 (0.037)	Loss 202.1582 (203.8106)	
Epoch: [16][2200/8971]	Time 0.778 (0.833)	Data 0.029 (0.036)	Loss 203.8252 (203.8102)	
Epoch: [16][2400/8971]	Time 0.774 (0.833)	Data 0.029 (0.036)	Loss 203.4420 (203.8118)	
Epoch: [16][2600/8971]	Time 0.766 (0.832)	Data 0.029 (0.035)	Loss 204.2855 (203.8070)	
Epoch: [16][2800/8971]	Time 0.832 (0.832)	Data 0.029 (0.035)	Loss 203.1121 (203.8080)	
Epoch: [16][3000/8971]	Time 0.775 (0.831)	Data 0.029 (0.035)	Loss 204.2240 (203.8085)	
Epoch: [16][3200/8971]	Time 0.776 (0.831)	Data 0.029 (0.034)	Loss 202.7381 (203.8079)	
Epoch: [16][3400/8971]	Time 0.769 (0.830)	Data 0.029 (0.034)	Loss 202.6956 (203.8082)	
Epoch: [16][3600/8971]	Time 0.777 (0.830)	Data 0.029 (0.034)	Loss 203.8226 (203.8101)	
Epoch: [16][3800/8971]	Time 0.772 (0.829)	Data 0.029 (0.034)	Loss 204.3057 (203.8107)	
Epoch: [16][4000/8971]	Time 0.777 (0.829)	D

Epoch: [18][2200/8971]	Time 0.809 (0.845)	Data 0.029 (0.038)	Loss 204.2485 (203.7120)	
Epoch: [18][2400/8971]	Time 1.117 (0.845)	Data 0.029 (0.037)	Loss 202.2464 (203.7078)	
Epoch: [18][2600/8971]	Time 0.772 (0.844)	Data 0.029 (0.037)	Loss 204.3545 (203.7077)	
Epoch: [18][2800/8971]	Time 0.777 (0.843)	Data 0.029 (0.036)	Loss 203.9313 (203.7031)	
Epoch: [18][3000/8971]	Time 0.775 (0.843)	Data 0.029 (0.036)	Loss 204.3213 (203.7076)	
Epoch: [18][3200/8971]	Time 0.772 (0.842)	Data 0.029 (0.035)	Loss 202.5703 (203.7031)	
Epoch: [18][3400/8971]	Time 0.770 (0.842)	Data 0.029 (0.035)	Loss 203.4874 (203.7017)	
Epoch: [18][3600/8971]	Time 0.794 (0.842)	Data 0.029 (0.035)	Loss 204.3802 (203.7056)	
Epoch: [18][3800/8971]	Time 0.775 (0.842)	Data 0.029 (0.034)	Loss 204.1137 (203.7105)	
Epoch: [18][4000/8971]	Time 0.778 (0.841)	Data 0.029 (0.034)	Loss 203.3958 (203.7099)	
Epoch: [18][4200/8971]	Time 1.150 (0.841)	Data 0.029 (0.034)	Loss 202.9584 (203.7128)	
Epoch: [18][4400/8971]	Time 0.777 (0.841)	D