# Loading Requirements

In [2]:
from collections import Counter, defaultdict
from IPython import display
from nltk import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
from PIL import Image
from torch import nn, optim
from torch.autograd import Variable
from torchvision import models, transforms

import json
import matplotlib
import math
import time
import numpy as np
import matplotlib.pyplot as plt
import random
import torch
import torch.nn.functional as F
import torchvision
import os

ImportError: No module named torch

# Prepocessing

The following code preprocesses the data, collecting the image array as needed and preparing for ANP classifying.

In [None]:
vso_images_folder = "data/vso/vso_images_with_cc/"

train_anp_tags = []
train_image_addresses = []
train_image_to_anp_tag = {}
for subdir in os.listdir(vso_images_folder):
    if subdir.endswith("_train"):
        train_anp_tags.append(subdir.replace("_train", "").replace("_", " "))
        for filename in os.listdir(vso_images_folder + subdir):
            if filename.endswith(".jpg"):
                train_image_addresses.append(vso_images_folder + subdir + "/"  + filename)
                train_image_to_anp_tag[vso_images_folder + subdir + "/"  + filename] = subdir.replace("_train", "").replace("_", " ")

validation_anp_tags = []
validation_image_addresses = []
validation_image_to_anp_tag = {}
for subdir in os.listdir(vso_images_folder):
        if subdir.endswith("_validation"):
                validation_anp_tags.append(subdir.replace("_validation", "").replace("_", " "))
                for filename in os.listdir(vso_images_folder + subdir):
                        if filename.endswith(".jpg"):
                                validation_image_addresses.append(vso_images_folder + subdir + "/"  + filename)
                                validation_image_to_anp_tag[vso_images_folder + subdir + "/"  + filename] = subdir.replace("_validation", "").replace("_", " ")

test_anp_tags = []
test_image_addresses = []
test_image_to_anp_tag = {}
for subdir in os.listdir(vso_images_folder):
        if subdir.endswith("_test"):
                test_anp_tags.append(subdir.replace("_test", "").replace("_", " "))
                for filename in os.listdir(vso_images_folder + subdir):
                        if filename.endswith(".jpg"):
                                test_image_addresses.append(vso_images_folder + subdir + "/"  + filename)
                                test_image_to_anp_tag[vso_images_folder + subdir + "/"  + filename] = subdir.replace("_test", "").replace("_", " ")

anp_tag_to_vector = {}
for i, tag in enumerate(train_anp_tags):
    anp_vector = np.zeros(len(train_anp_tags))
    anp_vector[i] = 1
    anp_tag_to_vector[tag] = anp_vector
                                
print("Number of train images: ", len(train_image_to_anp_tag))
print("Number of validation images: ", len(validation_image_to_anp_tag))
print("Number of test images: ", len(test_image_to_anp_tag))

least_height = 1000000
least_width = 1000000
count = 0
for img in train_image_addresses:
    try:
        height, width = Image.open(img).size
        if height <= 256 or width <= 256:
            count += 1
            train_image_addresses.remove(img)
            continue
        if height < least_height:
            least_height = height
        if width < least_width:
            least_width = width
    except:
        print("Error occured at ", img)
        train_image_addresses.remove(img)
        
for img in validation_image_addresses:
    try:
        height, width = Image.open(img).size
        if height <= 256 or width <= 256:
            count += 1
            validation_image_addresses.remove(img)
            continue
        if height < least_height:
            least_height = height
        if width < least_width:
            least_width = width
    except:
        print("Error occured at ", img)
        validation_image_addresses.remove(img)
        
for img in test_image_addresses:
    try:
        height, width = Image.open(img).size
        if height <= 256 or width <= 256:
            count += 1
            test_image_addresses.remove(img)
            continue
        if height < least_height:
            least_height = height
        if width < least_width:
            least_width = width
    except:
        print("Error occured at ", img)
        test_image_addresses.remove(img)
        
print("Removed images: ", count)
        
print("Minimum height of an image: ", least_height)
print("Minimum width of an image: ", least_width)
    
img_size = 256
loader = transforms.Compose([
  transforms.Resize(img_size),
  transforms.CenterCrop(img_size),
  transforms.ToTensor(),
])

def load_image(filename, volatile=False):
    """
    Simple function to load and preprocess the images.
    """
    print(filename)
    image = Image.open(filename).convert('RGB')
    image_tensor = loader(image).float()
    image_var = Variable(image_tensor, volatile=volatile).unsqueeze(0)
    return image_var.cuda()

#print(load_image('data/vso/vso_images_with_cc/amazing_flowers/1066918516_e27cbf795e.jpg'))

# Sample Data Points

Showing a few examples of the sample data points.

In [None]:
display.display(display.Image(train_image_addresses[0]))
print("Image: ", train_image_addresses[0])
print("Associated ANP tag: ", train_image_to_anp_tag[train_image_addresses[0]])

# Model Defition

The following define the model used for the ANP classifier.

In [None]:
class ANPClassifier(nn.Module):
    def __init__(self, output_size):
        super(ANPClassifier, self).__init__()
        
        self.output_size = output_size
        self.resnet = torchvision.models.resnet101(pretrained=False)
        self.resnet.fc = nn.Linear(in_features=2048, out_features=output_size, bias=True)

    def forward(self, X):
        return self.resnet(X)

main_model = ANPClassifier(len(train_anp_tags))
main_model

# Training

Following describes training procedure used to form the ANP classifier.

In [None]:
USE_CUDA = True # switch to true when training on GPU(s)

def train_pass(image_input, target_output, model, optimizer, criterion):
    """
    Given batch of images, completes one pass of training on the model,
    using the given optimizer and criterion.
    """

    if USE_CUDA:
        image_input = image_input.cuda()
        target_output = target_output.cuda()
        model = model.cuda()
        criterion = criterion.cuda()

    optimizer.zero_grad()
    model_output = model(image_input)
    
    loss = criterion(model_output, target_output)
    loss.backward()
    optimizer.step()

    return loss.data.cpu().numpy()

def load_batch(image_addresses, volatile=False):
    
    img_tensor = load_image(image_addresses[0])
    for i in range(1, len(image_addresses)):
        img_tensor = torch.cat((img_tensor, load_image(image_addresses[i])))
        
    target_tensor = torch.from_numpy(anp_tag_to_vector[train_image_to_anp_tag[image_addresses[0]]]).unsqueeze(0)
    for i in range(1, len(image_addresses)):
        target_tensor = torch.cat((target_tensor, torch.from_numpy(anp_tag_to_vector[train_image_to_anp_tag[image_addresses[i]]]).unsqueeze(0)))
        
    return img_tensor, target_tensor.float()

def train(model, learning_rate=0.0001, batch_size=50, epochs=50):
    
    # defining criterion and optimizer
    criterion = nn.MultiLabelSoftMarginLoss()
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)
    
    number_of_batches = math.ceil(len(train_image_addresses)/batch_size)
    indexes = np.arange(len(train_image_addresses))
    
    loss_arr = []
    for epoch in range(epochs):
        avgloss = 0.0
        start = time.time()
        avg_loss_arr = []
        for batch in range(number_of_batches):
            train_indexes = [train_image_addresses[i] for i in indexes[batch*batch_size:(batch+1)*batch_size]]
            image_batch, target_batch = load_batch(train_indexes)
            loss = train_pass(image_batch, target_batch, model, optimizer, criterion)
            avgloss += loss
            if batch%50 == 0:
                print ("Done Batch:", batch, "\tAverage Loss Per Batch:", avgloss/(batch+1), "\t Current Batch Loss: ", loss)
        loss_arr.append(avgloss/(batch+1))
        print ("Epoch:",epoch, "\tTime:", time.time() - start, "\tAverage Loss Per Batch::", avgloss/(batch+1))
        torch.save({'epoch': epoch ,'state_dict': decoder.state_dict(),'optimizer': decoder_optimizer.state_dict()}, open("outputs/anp_classifier_batch_"+str(epoch), "wb+"))
    loss_arr = np.array(loss_arr)
    np.save(open('outputs/loss_anp_classifier', 'wb+'), loss_arr)

train(main_model)