In [2]:
from __future__ import print_function
from __future__ import division

import sys
import torch
import torch.utils.data as dataF
import os
import time
import pickle
import numpy as np
from PIL import Image
import re
import io

import json
import matplotlib.pyplot as plt
from torchvision import transforms, datasets, models
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F



from IPython.display import clear_output
import torch.nn as nn
import torch.optim as optim

import itertools
import collections
import pdb
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
cuda

True

In [3]:
# Path for file dset_dataloader.json
def open_json(path):
    f = open(path) 
    data = json.load(f) 
    f.close()
    return data 

def flatten(S):
    if S == []:
        return S
    if isinstance(S[0], list):
        return flatten(S[0]) + flatten(S[1:])
    return S[:1] + flatten(S[1:])

### Bar to visualize progress

In [4]:
def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))
    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

In [5]:
# IMAGES
im_path_fur = '../images/v2/full'

# ONEHOTS
onehots_vocab_p = '../json_files/ADARI_furniture_onehots_vocab.json'
onehots_w2i_p = '../json_files/ADARI_furniture_onehots_w2i.json'
onehots_i2w_p = '../json_files/ADARI_furniture_onehots_i2w.json'
onehots_p = '../json_files/ADARI_furniture_onehots.json'

# FILES FOR DATALOADER
# dset_words_p = "/Users/manuelladron/iCloud_archive/Documents/_CMU/PHD-CD/PHD-CD_Research/ADARI/json_files/cleaned/ADARI_v2/furniture/ADARI_furniture_words.json"
# vocab_p = "/Users/manuelladron/iCloud_archive/Documents/_CMU/PHD-CD/PHD-CD_Research/ADARI/json_files/cleaned/ADARI_v2/furniture/ADARI_furniture_vocab_adjs.json"

In [6]:
# Open json files with embeddings 
onehots = open_json(onehots_p)
onehots_vocab = open_json(onehots_vocab_p)
onehots_w2i = open_json(onehots_w2i_p)
onehots_i2w = open_json(onehots_i2w_p)

### Split dataset into train, validation and test

In [7]:
import random

def shuffle_dict(d):
    l = list(d.items())
    random.shuffle(l)
    d = dict(l)
    return d

def splitDict(d_img_words, percent, val_number):

    val_n = val_number
    train_test_size = len(d_img_words) - val_n
    train_n = int(train_test_size*percent)
    test_n = train_test_size - train_n
    
    d_img_words = shuffle_dict(d_img_words)
    
    im_words = iter(d_img_words.items())      
    
    # Image - words
    dtrain_imw = dict(itertools.islice(im_words, train_n))  
    dtest_imw = dict(itertools.islice(im_words, test_n))   
    dval_imw = dict(itertools.islice(im_words, val_n))
    
    
    print('trainset size: ', len(dtrain_imw), 'dataset size: ',len(dtest_imw), 'val set size: ', len(dval_imw))
    return dtrain_imw, dtest_imw, dval_imw 

In [8]:
dtrain_w, dtest_w, dval_w = splitDict(onehots, .8, 1000)

trainset size:  13225 dataset size:  3307 val set size:  1000


In [9]:
class MyDataset(Dataset):
    def __init__(self, onehots, w2i, i2w, image_path):
        self.onehots_d = onehots
        self.w2i = w2i
        self.i2w = i2w
        self.images_names = list(self.onehots_d.keys()) # names
        self.onehots = list(self.onehots_d.values()) # onehots
        self.image_path = image_path
    
    def __len__(self):
        return len(self.onehots)
    
    def get_image_tensor(self, image_name):
        """
        Gets image name and returns a tensor
        """
        name = self.image_path + "/" + image_name
        img = Image.open(name)
        img = transforms.Compose([
        transforms.Resize(64),
        transforms.CenterCrop(64),
        transforms.ToTensor()])(img)
        
        return img

    def __getitem__(self, index):
        name_image = self.images_names[index]
        img = self.get_image_tensor(name_image)
        onehot = torch.FloatTensor(self.onehots[index])
        
        return img, onehot

In [10]:
dataset_train = MyDataset(dtrain_w, onehots_w2i, onehots_i2w, im_path_fur)
dataset_test = MyDataset(dtest_w, onehots_w2i, onehots_i2w,im_path_fur)
dataset_val = MyDataset(dval_w, onehots_w2i, onehots_i2w,  im_path_fur)

In [11]:
batch_size = 64
num_workers = 8 if cuda else 0

In [14]:
train_dataloader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=False)
test_dataloader = DataLoader(dataset_test, batch_size=batch_size, shuffle=False, num_workers=num_workers, drop_last=False)
val_dataloader = DataLoader(dataset_val, batch_size=len(dataset_val), shuffle=False, num_workers=num_workers, drop_last=False)

In [15]:
# to test dataloader
it = iter(test_dataloader)
print(next(it))


[tensor([[[[0.8863, 0.8902, 0.8980,  ..., 0.8784, 0.8706, 0.8667],
          [0.8902, 0.8941, 0.8980,  ..., 0.8784, 0.8745, 0.8706],
          [0.8902, 0.8980, 0.9020,  ..., 0.8824, 0.8784, 0.8745],
          ...,
          [0.6314, 0.6471, 0.6667,  ..., 0.6196, 0.6314, 0.6431],
          [0.6157, 0.6431, 0.6588,  ..., 0.6078, 0.6275, 0.6275],
          [0.5922, 0.6196, 0.6431,  ..., 0.6196, 0.6039, 0.6000]],

         [[0.8863, 0.8902, 0.8980,  ..., 0.8784, 0.8706, 0.8667],
          [0.8902, 0.8941, 0.8980,  ..., 0.8784, 0.8745, 0.8706],
          [0.8902, 0.8980, 0.9020,  ..., 0.8824, 0.8784, 0.8745],
          ...,
          [0.6118, 0.6314, 0.6510,  ..., 0.5961, 0.6078, 0.6196],
          [0.5961, 0.6235, 0.6392,  ..., 0.5882, 0.5961, 0.5922],
          [0.5725, 0.5961, 0.6157,  ..., 0.5961, 0.5765, 0.5725]],

         [[0.8863, 0.8902, 0.8980,  ..., 0.8784, 0.8706, 0.8667],
          [0.8902, 0.8941, 0.8980,  ..., 0.8784, 0.8745, 0.8706],
          [0.8902, 0.8980, 0.9020,  ..., 

### Image embedding

In [16]:
feature_extract = False # so we update the whole model 
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [17]:
def initialize_model(num_classes, feature_extract, use_pretrained=False):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    model_ft = models.resnet152(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, num_classes)
    input_size = 64
    
    return model_ft, input_size


In [18]:
num_classes = len(onehots_w2i)
model, input_size = initialize_model(num_classes, feature_extract)
model.fc.weight.shape
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [19]:
from sklearn.metrics import precision_score, f1_score, accuracy_score
from torch.optim import lr_scheduler

device = torch.device("cuda" if cuda else "cpu")
model = model.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
sgdr_partial = lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=0.005)
plateau = lr_scheduler.ReduceLROnPlateau(optimizer, patience=2)

In [20]:
def train_epoch(model, loader, optimizer, criterion, scheduler):
    model.train()
    running_loss = 0.0
    running_corrects = 0.0
    running_precision = 0.0
    running_accuracy = 0.0
    result = []
    
    start_time = time.time()
    for batch_idx, (image, target) in enumerate(loader):   
        image = image.to(device)
        target = target.to(device)
        output = model(image)
        loss = criterion(output, target) # Averaging losses from all vector components. 

        # We use sigmoid for prediction. Sigmoid is applied to each individual class in the 2700 classes. 
        # Therefore, if it's more than 0.5, we predict it as True. 
        preds = torch.sigmoid(output).data > 0.5
        preds = preds.to(torch.float32)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()   
        
        running_loss += loss.item() * image.shape[0]
        
        # Metrics 
        f1 = f1_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy(), average="samples")*image.size(0)
        precision = precision_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy(), average="samples")*image.size(0)
        accuracy = accuracy_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy())*image.size(0)

        running_corrects += f1
        running_precision += precision
        running_accuracy += accuracy

    end_time = time.time()
    epoch_loss = running_loss / len(loader.dataset)
    epoch_f1 = running_corrects / len(loader.dataset)
    epoch_precision = running_precision / len(loader.dataset)
    epoch_acc = running_accuracy / len(loader.dataset)
    
    result.append('Training Loss: {:.4f} F1: {:.4f} Acc: {:.4f} Prec: {:.4f}'.format(epoch_loss, epoch_f1, epoch_acc, epoch_precision))
    print(result)
    return epoch_loss, epoch_f1

In [21]:
def test_epoch(model, loader, criterion):
    model.eval()
    
    running_loss = 0.0
    running_corrects = 0.0
    running_precision = 0.0
    running_accuracy = 0.0

    result = []
    with torch.no_grad():
        for batch_idx, (image, target) in enumerate(loader):   
            image = image.to(device)
            target = target.to(device)

            output = model(image)
            loss = criterion(output, target)

            preds = torch.sigmoid(output).data > 0.5
            preds = preds.to(torch.float32)

            running_loss += loss.item() * image.shape[0]
            
            # Metrics 
            f1 = f1_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy(), average="samples")*image.size(0)
            precision = precision_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy(), average="samples")*image.size(0)
            accuracy = accuracy_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy())*image.size(0)

            running_corrects += f1
            running_precision += precision
            running_accuracy += accuracy
        
    epoch_loss = running_loss / len(loader.dataset)
    epoch_f1 = running_corrects / len(loader.dataset)
    epoch_precision = running_precision / len(loader.dataset)
    epoch_acc = running_accuracy / len(loader.dataset)
    
    result.append('Testing Loss: {:.4f} F1: {:.4f} Acc: {:.4f} Prec: {:.4f}'.format(epoch_loss, epoch_f1, epoch_acc, epoch_precision))
    print(result)
    return epoch_loss, epoch_f1

In [22]:
len(train_dataloader.dataset), len(test_dataloader.dataset)

(13225, 3307)

In [23]:
def createCheckpoint(filename, batch_size):
    checkpoint = {
              'epoch': 5,
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              "batch_size":batch_size,
    } # save all important stuff
    torch.save(checkpoint , filename)

In [None]:
train_losses = []
train_accs = []
test_losses = []
test_accs = []

save_model_path = './saved_models/multilabel_adam0.003_plateau/'
epochs=50
best_loss = 1000.0
for i in range(epochs):
    print('-----Training epoch {}/{} --------'.format(i,epochs-1))
    tr_loss, tr_acc = train_epoch(model, train_dataloader, optimizer, criterion, sgdr_partial)
    print('train epoch: {}, loss: {}'.format(i, tr_loss))
    print()
    print('------Testing epoch {}/{} --------'.format(i,epochs-1))
    tst_loss, tst_acc = test_epoch(model, test_dataloader, criterion)
    print('test epoch: {}, loss: {}'.format(i, tst_loss))
    plateau.step(tst_loss)
    
    train_losses.append(tr_loss)
    train_accs.append(tr_acc)
    test_losses.append(tst_loss)
    test_accs.append(tst_acc)
    
    if tst_loss < best_loss:
        best_loss = tst_loss
        save_path_resnet = save_model_path + 'resnet_{}.pt'.format(i)
        createCheckpoint(save_path_resnet, 64)
    

-----Training epoch 0/49 --------


  _warn_prf(average, modifier, msg_start, len(result))


['Training Loss: 0.0203 F1: 0.0200 Acc: 0.0000 Prec: 0.0986']
train epoch: 0, loss: 0.02031747657995819

------Testing epoch 0/49 --------
['Testing Loss: 0.0147 F1: 0.0777 Acc: 0.0000 Prec: 0.4179']
test epoch: 0, loss: 0.014714983216449415
-----Training epoch 1/49 --------
['Training Loss: 0.0147 F1: 0.0187 Acc: 0.0000 Prec: 0.1002']
train epoch: 1, loss: 0.014700609476205193

------Testing epoch 1/49 --------
['Testing Loss: 0.0147 F1: 0.0014 Acc: 0.0000 Prec: 0.0067']
test epoch: 1, loss: 0.014739728231678061
-----Training epoch 2/49 --------


### INFERENCE

In [1]:
def evaluate(model, eval_set):
    model.eval()
    model.to(device)
    for batch_idx, (image, target) in enumerate(loader):   
        image = image.to(device)
        target = target.to(device)

        output = model(image)
        loss = criterion(output, target)

        preds = torch.sigmoid(output).data > 0.5
        preds = preds.to(torch.float32)

        running_loss += loss.item() * image.shape[0]

        # Metrics 
        f1 = f1_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy(), average="samples")*image.size(0)
        precision = precision_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy(), average="samples")*image.size(0)
        accuracy = accuracy_score(target.to("cpu").to(torch.int).numpy(), preds.to("cpu").to(torch.int).numpy())*image.size(0)

        running_corrects += f1
        running_precision += precision
        running_accuracy += accuracy
        
        
    epoch_loss = running_loss / len(loader.dataset)
    epoch_f1 = running_corrects / len(loader.dataset)
    epoch_precision = running_precision / len(loader.dataset)
    epoch_acc = running_accuracy / len(loader.dataset)
    
    result.append('Testing Loss: {:.4f} F1: {:.4f} Acc: {:.4f} Prec: {:.4f}'.format(epoch_loss, epoch_f1, epoch_acc, epoch_precision))
    print(result)
    return epoch_loss, epoch_f1