In [28]:
import torch, torchvision
import torchvision.transforms as transforms
from PIL import Image
import json, datetime

CLASS_LABEL_PATH = "../../ADARI/furniture/ADARI_furniture_onehots.json"
IMAGE_FOLDER = "../../ADARI/v2/full"


In [20]:
def open_json(path):
    f = open(path) 
    data = json.load(f) 
    f.close()
    return data 

class ADARIMultiHotDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, class_label_file, image_size):
        super(ADARIMultiHotDataset).__init__()
        
        self.image_size = image_size
        self.image_folder = image_folder
        self.class_label_file = class_label_file
        self.transform = transforms.Compose([
                               transforms.Resize(image_size),
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ])
        self.im_to_one_hots = open_json(self.class_label_file)
        self.im_names = list(self.im_to_one_hots.keys())
        self.num_classes = len(self.im_to_one_hots[self.im_names[0]])
        
    def __len__(self):
        return len(self.im_names)
        
    def __getitem__(self, idx):
        imname = self.im_names[idx]
        
        img = Image.open(self.image_folder + '/' + imname)
        return self.transform(img), torch.tensor(self.im_to_one_hots[imname])
        

In [21]:
# Load Data

data = ADARIMultiHotDataset(IMAGE_FOLDER, CLASS_LABEL_PATH, 64)
vocab_size = data.num_classes

train_set, test_set = torch.utils.data.random_split(data, 
                                                    [int(.8 * len(data)), len(data) - int(.8 * len(data))], 
                                                    generator=torch.Generator().manual_seed(42))

In [22]:
# Create model

vgg = torchvision.models.vgg16()
vgg.classifier[6] = torch.nn.Linear(4096, vocab_size)


In [23]:
# Training Parameters
batch_size = 128
num_workers = 1
lr = 0.01
num_epochs = 100
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [26]:
# Training loop

def train(model):
    model.train()
    model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    dataloader = torch.utils.data.DataLoader(train_set, 
                                            batch_size=batch_size, 
                                            shuffle=True, 
                                            num_workers=num_workers)
    optimizer = torch.optim.Adam(vgg.parameters(), lr=lr)
    
    
    for epoch in range(num_epochs):
        losses = []
        for im, labels in dataloader:
            im = im.to(device)
            labels = labels.to(device)
            
            l_hat = vgg(im)
            loss = criterion(l_hat, labels)
            loss.backward()
            optimizer.step()
            
            losses.append(loss.item())
            print(losses[-1])
        print(f"Avg Loss at Epoch {epoch}: {sum(losses) / len(losses)}")
        
        

In [29]:
model_name = datetime.datetime.now()
try:
    train(vgg)
except KeyboardInterrupt:
    pass

torch.save(vgg.state_dict(), f"VGG16_ADARI_{model_name}.pth")

AttributeError: 'Subset' object has no attribute 'im_names'

In [32]:
im = Image.open(IMAGE_FOLDER + '/' + "0a2e5ec5079d9424e239d3dc639f7e1d20c6fba9.jpg")
im = transforms.Compose([transforms.Resize(64),
                               transforms.CenterCrop(64),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ])(im)
print(vgg(im.reshape(1, im.shape[0], im.shape[1], im.shape[2])))

torch.Size([1, 12288])


RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 3, 3, 3], but got 2-dimensional input of size [1, 12288] instead