In [0]:
from google.colab import drive
drive.mount('/content/drive/')
root_path = '/content/drive/My Drive/p409k/'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
import torch

device = torch.device('cpu')
if torch.cuda.is_available():
    torch.cuda.is_available()

In [0]:
from PIL import Image
from torchvision import transforms
from torch import nn
import torch

class ItemRepresentation:
    def __init__(self):
        # Pool5 layer is a layer before FC layer
        model = torch.hub.load('pytorch/vision:v0.4.2', 'resnet50',
                               pretrained=True)
        self.resnet50_pool5 = nn.Sequential(*(list(model.children())[:-1]))
        self.resnet50_pool5 = self.resnet50_pool5.to(device)
        self.resnet50_pool5.eval()
        
        self.preprocess = transforms.Compose([
            # No need for resize/crop; already cropped (p409k)
            transforms.ToTensor(),
            # Normalize to [0,1]
            transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
        ])
        
    def __call__(self, image_paths):
        input_tensors = list()
        for image_path in image_paths:
            input_image = Image.open(image_path)
            input_tensors.append(self.preprocess(input_image))
        input_batch = torch.stack(input_tensors).to(device)
        
        with torch.no_grad():
            output = self.resnet50_pool5(input_batch)
        return output.squeeze()
get_item_representation = ItemRepresentation()

def get_outfit_representation(upper_image_path, lower_image_path):
    upper = get_item_representation([upper_image_path])
    lower = get_item_representation([lower_image_path])
    return torch.cat((upper, lower))

Downloading: "https://github.com/pytorch/vision/archive/v0.4.2.zip" to /root/.cache/torch/hub/v0.4.2.zip
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/checkpoints/resnet50-19c8e357.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 345MB/s]


In [0]:
from os import path

sample_path = path.join(root_path, "images_processed/upper/191900716.jpg")
x = get_outfit_representation(sample_path, sample_path)
print(x, x.shape)

tensor([0.0951, 0.8592, 0.2885,  ..., 0.1037, 0.0512, 0.9049]) torch.Size([4096])


In [0]:
from os import listdir
from os import path
import json

def load_p409k_processed(p409k_base_dir):
    p409k_upper_dir = path.join(p409k_base_dir, "images_processed/upper")
    p409k_lower_dir = path.join(p409k_base_dir, "images_processed/lower")

    p409k_train_pos_jl_path = path.join(p409k_base_dir, "train_pos_samples.jl")
    p409k_train_neg_jl_path = path.join(p409k_base_dir, "train_neg_samples.jl")
    
    item_path_set = list()
    def update_item_path_set(train_jl_path, label):
        with open(train_jl_path) as train_jl:
            for line in train_jl:
                items_id = json.loads(line)["items_id"]
                partlist = json.loads(line)["partlist"]
                id_part_dict = dict(zip(partlist, items_id))

                upper_id = id_part_dict.get("upper")
                lower_id = id_part_dict.get("lower")

                if upper_id is not None and lower_id is not None:
                    upper_path = path.join(p409k_upper_dir, 
                                           ''.join([str(upper_id), ".jpg"]))
                    lower_path = path.join(p409k_lower_dir, 
                                           ''.join([str(lower_id), ".jpg"]))

                    if path.exists(upper_path) and path.exists(lower_path):
                        item_path_set.append((upper_path, lower_path, label))

    update_item_path_set(p409k_train_pos_jl_path, 1)
    update_item_path_set(p409k_train_neg_jl_path, 0)
    
    return item_path_set

In [0]:
p409k_train = load_p409k_processed(root_path)

In [0]:
def save_representation_set(p409k_set, output_path, batch_size=1000):    
    outfit_dirs = list()
    outfit_lab_rep_list = list()
    for i, (upper_dir, lower_dir, label) in enumerate(p409k_set):
        outfit_dirs.append(upper_dir)
        outfit_dirs.append(lower_dir)

        if (i+1) % batch_size == 0:
            print("getting",  len(outfit_dirs), "item representations...")

            item_representations = get_item_representation(outfit_dirs)
            for j in range(0, len(item_representations), 2):
                upper_rep = item_representations[j]
                lower_rep = item_representations[j+1]
                label = torch.FloatTensor([label], device=device)
                outfit_lab_rep_list.append(torch.cat((upper_rep, lower_rep, label)))
            outfit_dirs.clear()

            print(i, "items processed.", len(p409k_set) - i, "remaining.")
        
    outfit_lab_rep_mat = torch.stack(outfit_lab_rep_list)
    torch.save(outfit_lab_rep_mat, output_path)

In [0]:
from os import path

tensor_path = path.join(root_path, "train_set.tensor")

In [0]:
save_representation_set(p409k_train, tensor_path, batch_size=1000)

getting 2000 item representations...


KeyboardInterrupt: ignored

In [0]:
data_loader = torch.load(tensor_path)
data_size = data_loader.shape[0]

# Shuffle original dataset to reduce errors
data_loader = data_loader[torch.randperm(data_loader.shape[0])]

train_loader = data_loader[:int(data_size * 0.8)]
test_loader = data_loader[int(data_size * 0.8):]

print(train_loader, train_loader.shape)
print(test_loader, test_loader.shape)

tensor([[0.3501, 0.1749, 0.3288,  ..., 0.6051, 0.1327, 0.0000],
        [0.0139, 0.3527, 0.7099,  ..., 1.2617, 0.0731, 0.0000],
        [0.2860, 0.6210, 1.4664,  ..., 0.2445, 0.6714, 0.0000],
        ...,
        [0.0473, 0.2222, 1.1932,  ..., 0.1383, 0.2736, 0.0000],
        [0.0486, 0.3050, 0.1207,  ..., 0.3767, 0.1071, 0.0000],
        [0.0144, 0.2007, 0.6139,  ..., 0.3865, 0.3197, 1.0000]],
       device='cuda:0') torch.Size([45750, 4097])
tensor([[0.0046, 0.5384, 0.0571,  ..., 0.5425, 0.3653, 1.0000],
        [0.2000, 0.2527, 0.4394,  ..., 0.2264, 0.1734, 0.0000],
        [0.6829, 0.6930, 0.2769,  ..., 0.6239, 0.0976, 0.0000],
        ...,
        [0.2112, 0.3004, 1.1272,  ..., 0.3817, 0.5879, 1.0000],
        [0.0325, 0.5289, 0.2858,  ..., 0.0367, 0.8935, 0.0000],
        [0.0195, 0.4293, 0.0242,  ..., 0.2305, 0.2437, 0.0000]],
       device='cuda:0') torch.Size([11438, 4097])


In [0]:
import os 
import numpy as np
import torch 
from PIL import Image

import torch.nn as nn 
import torch.nn.functional as F
from torch import optim
# outfit representation = 4096 dimensional representation(upper 2048 + lower 2048)

device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')

class ScoringModel(nn.Module):
    def __init__(self):
        super(ScoringModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.BatchNorm1d(4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 2)
            ).to(device)     

    def forward(self, input):
        out = self.fc(input)
        return F.log_softmax(out, dim=-1)
    
# iteration: 400,000
# stochastic gradient descent with momentum
# initial learning rate = 10 ^ (-4)
# momentum = 0.9

net = ScoringModel()
net.to(device)
print(net)

# create a loss function
criterion = nn.NLLLoss()

# create a stochastic gradient descent optimizer 
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)


# run the main training loop
epochs = 4000
batch_size = 100
log_interval = 10
for epoch in range(epochs):
    data_list = list()
    target_list = list()
    for i, train_data in enumerate(train_loader):
        data, target = train_data[:4096], int(train_data[4096])
        data_list.append(data)
        target_list.append(target)
        
        if (i+1) % batch_size == 0:
            data = torch.stack(data_list).to(device)
            target = torch.tensor(target_list, device=device)

            optimizer.zero_grad()
            net_out = net(data)
            loss = criterion(net_out, target)
            loss.backward()
            optimizer.step()

            batch_n = (i+1) // batch_size
            print('Train Epoch: {}/{} [{}/{} ({:.0f}%)]\tLoss:: {:.6f}'.format(
                (epoch+1), epochs, batch_n * batch_size, len(train_loader),
                100. * batch_n * batch_size // len(train_loader), loss.item()
            ))

Model(
  (fc): Sequential(
    (0): Linear(in_features=4096, out_features=4096, bias=True)
    (1): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=2, bias=True)
  )
)


In [0]:
# run a test loop 
net.eval()

test_loss = 0
correct = 0

correct_positive_score = 0.0
correct_negative_score = 0.0
incorrect_positive_score = 0.0
incorrect_negative_score = 0.0
correct_positive = 0
correct_negative = 0
incorrect_positive = 0
incorrect_negative = 0

for test_data in test_loader:
    data, target = test_data[:4096], torch.tensor([int(test_data[4096])],
                                                  device=device)
    data = data.view(-1, 4096)

    with torch.no_grad():
        net_out = net(data)
    
    # sum up batch loss 
    test_loss += criterion(net_out, target)
    # get the index of the max log-probability. 
    # shape of max(1) = (batch_size, 2(maximum value, index))
    pred = net_out.data.max(1)[1]
    correct += pred.eq(target.data).sum()

    # apply softmax for score normalization
    softmax = F.softmax(net_out, dim=1)
    outfit_score = softmax[:,1].item()*100

    if pred.eq(target):
        if pred == 1:
            correct_positive_score += outfit_score
            correct_positive += 1
        else:
            correct_negative_score += outfit_score
            correct_negative += 1
    else:
        if pred == 1:
            incorrect_positive_score += outfit_score
            incorrect_positive += 1
        else:
            incorrect_negative_score += outfit_score
            incorrect_negative += 1

print(correct_score, correct, incorrect_score, incorrect)

test_loss /= test_loader.shape[0]
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, test_loader.shape[0],
    100. * correct / test_loader.shape[0]))
print('\nAvg. Total Positive/Negative Outfit Score: {:.4f}, {:.4f}\n'.format(
    (correct_positive_score + incorrect_positive_score) 
        / (correct_positive + incorrect_positive),
    (correct_negative_score + incorrect_negative_score
        / (correct_negative + incorrect_negative))
))
print('\nAvg. Correct Positive/Negative Outfit Score: {:.4f}, {:.4f}\n'.format(
    correct_positive_score / correct_positive, 
    correct_negative_score / correct_negative))
print('\nAvg. Incorrect Positive/Negative Outfit Score: {:.4f}, {:.4f}\n'.format(
    incorrect_positive_score / incorrect_positive, 
    incorrect_negative_score / incorrect_negative))

56744.32205725461 tensor(8847, device='cuda:0') 20101.169992610812 536

Test set: Average loss: 0.4904, Accuracy: 8847/11438 (77%)


Avg. Total Positive/Negative Outfit Score: 66.3604, 152805.9875


Avg. Correct Positive/Negative Outfit Score: 68.4402, 22.2771


Avg. Incorrect Positive/Negative Outfit Score: 59.2926, 31.9447



In [0]:
# Save Entire ScoringModel  
torch.save(net.state_dict(), path.join(root_path, "model-10"))

In [0]:
net = ScoringModel()
net = net.to(device)

state_dict = torch.load(path.join(root_path, "model-10"))
net.load_state_dict(state_dict)
net.eval()

upper_path = sample_path
lower_path = sample_path
outfit_rep = get_outfit_representation(upper_path, lower_path)

with torch.no_grad():
    net_out = net(outfit_rep.unsqueeze(0))
    softmax = F.softmax(net_out, dim=1)

outfit_score = softmax[:,1].item()*100
print(outfit_score)

NameError: ignored