In [1]:
import torch

import os

'''
If you are using Colab, make sure to add the relevant CIR folder to the system path. Alternatively, 
 you can adjust the import statements below based on the location of your files.
'''

from data_loader import get_loader
from utils import Ranker
from todo import Model, Criterion, train, val

In [2]:
# You can change hyper-parameters to achieve higher retrieval performance
class Args():
    def __init__(self):
        # Dataset
        self.data_root = "./" # Remember to change the path to your data
        self.data_set = "dress"
        self.image_root = os.path.join(self.data_root, 'data/resized_images/')
        self.caption_path = os.path.join(self.data_root, 'data/captions/cap.{}.{}.json')
        self.split_path = os.path.join(self.data_root, 'data/image_splits/split.{}.{}.json')

        # Model
        self.embed_dim = 512
        self.vision_feature_dim = 512
        self.text_feature_dim = 512

        # Training
        self.log_step = 15
        self.batch_size = 64
        self.learning_rate = 0.001
        self.num_workers = 4
        self.epochs = 3
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
args = Args()

In [3]:
# Build data loader
data_loader = get_loader(
    args.image_root.format(args.data_set),
    args.caption_path.format(args.data_set, 'train'),
    args.batch_size,
    shuffle=True, 
    return_target=True, 
    num_workers=args.num_workers,
)
data_loader_dev = get_loader(
    args.image_root.format(args.data_set),
    args.caption_path.format(args.data_set, 'val'),
    args.batch_size, 
    shuffle=False, 
    return_target=True, 
    num_workers=args.num_workers,
)

In [4]:
# Build model, criterion, oprimizer, evaluator
model = Model(args.vision_feature_dim, args.text_feature_dim, args.embed_dim)
model.to(args.device)
model.train()
criterion = Criterion()
current_lr = args.learning_rate
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=current_lr)
ranker = Ranker(root=args.image_root.format(args.data_set), image_split_file=args.split_path.format(args.data_set, 'val'), transform=None, num_workers=args.num_workers)

In [5]:
# Start training
best_score = 0
for epoch in range(args.epochs):
    train(data_loader, model, criterion, optimizer, args.log_step)
    best_score = val(data_loader_dev, model, ranker, best_score)
print(best_score)

training loss: 4.212
training loss: 3.114
training loss: 2.795
training loss: 2.803
training loss: 2.570
training loss: 2.475
training loss: 2.522
updating emb
emb updated
-----------------------------------------------------------------------------
| score  0.85232 /  0.85232 
-----------------------------------------------------------------------------
best_dev_score: 0.8523198668170356
training loss: 2.346
training loss: 2.124
training loss: 2.054
training loss: 2.089
training loss: 2.129
training loss: 2.340
training loss: 2.370
updating emb
emb updated
-----------------------------------------------------------------------------
| score  0.87494 /  0.87494 
-----------------------------------------------------------------------------
best_dev_score: 0.8749444255474071
training loss: 1.791
training loss: 2.071
training loss: 1.938
training loss: 2.034
training loss: 1.795
training loss: 1.990
training loss: 1.955
updating emb
emb updated
--------------------------------------------