In [29]:
import argparse
import os
import time
import sys

import torch
import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data import DataLoader
import torch.optim as optim

os.environ['CUDA_VISIBLE_DEVICES']="4,5,6,7"

In [2]:
from models import models
from dataset import openimages
from utils.loss import HardNegativeContrastiveLoss

In [3]:
class AverageMeter(object):

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [26]:
def train(train_loader, model, criterion, optimizer, epoch, print_freq=1000):
    #amp_handle = amp.init()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    model = model.train()
    print("Start training")
    end = time.time()
    for i, (imgs, caps) in enumerate(train_loader):
        if i%2 == 1:
                print("%2.2f"% (i/len(train_loader)*100), '\%', end='\r')
        input_imgs, target = imgs.cuda(), caps.cuda()
        

        data_time.update(time.time() - end)

        optimizer.zero_grad()
        
        output_imgs, _ = model(input_imgs)
        
        
        loss = criterion(output_imgs, target)
        
        #with amp_handle.scale_loss(loss, optimizer) as scaled_loss:
        #    scaled_loss.backward()
        loss.backward()
        optimizer.step()
        
        losses.update(loss.item(), imgs.size(0))

        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 or i == (len(train_loader) - 1):
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses))

    return losses.avg, batch_time.avg, data_time.avg

In [5]:
normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

prepro = transforms.Compose([
    transforms.RandomResizedCrop(256),

    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize,
])

prepro_val = transforms.Compose([
    transforms.Resize((350, 350)),
    transforms.ToTensor(),
    normalize,
])

In [33]:
m = models.JointEmbedding().train().cuda()

In [35]:
for params in m.projection.parameters():
    params.requires_grad=True

In [36]:
m = torch.nn.DataParallel(m)

In [19]:
def collate_embeds(data):
    images, targets = zip(*data)
    images = torch.stack(images, 0)
    targets = torch.Tensor(np.stack(targets, 0))

    return images, targets

In [37]:
train_dataset = openimages.OpenImages(image_dir="/data/datasets/openimages/images/train/", 
                          bbox_file="/data/datasets/openimages/train-annotations-bbox.csv", 
                         classes="/data/datasets/openimages/class-descriptions-boxable.csv", 
                          transform=prepro)

In [38]:
train_loader = DataLoader(train_dataset, batch_size=300, shuffle=True, drop_last=True,
                            num_workers=10, collate_fn=collate_embeds, pin_memory=True)

In [39]:
opti = optim.Adam(filter(lambda p: p.requires_grad, m.parameters()), lr=0.001)

In [40]:
criterion = HardNegativeContrastiveLoss().cuda()

In [None]:
train(train_loader, m, criterion, opti, 1, print_freq=1000)

Start training
