In [150]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
import PIL
import numpy as np

In [151]:
dir = '../data/train/'

In [152]:
# pretrenirani faster rcnn uz pomoc kog ce se izvlaciti regioni slika
def get_faster_rcnn():
    model = models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True, 
        pretrained_backbone=True, 
        trainable_backbone_layers=0)
    model.eval()
    return model

In [153]:
def get_images():
    # TODO: DOBAVITI SVE SLIKE ::((
    test_image_name = 'VizWiz_train_00000258.jpg'
    image = PIL.Image.open(dir + test_image_name)
    image = np.array(image)
    transform = transforms.Compose([transforms.ToTensor()])
    tensor = transform(image)
    # prvi element uredjenog para je lista tenzora (transformisanih slika), dok je
    # drugi element lista naziva tih slika
    return ([tensor], [test_image_name])

In [154]:
def get_regions(images):
    # images je uredjeni par, prvo su slike, a onda nazivi
    image_titles = images[1]
    images = images[0]
    # mapa gde je kljuc naziv slike, a vrednost lista regiona, tako da je svaki region
    # tenzor sa cetiri vrednosti (x1, x2, y1, y2) (?)
    images_regions = {}
    model = get_faster_rcnn()
    predictions = model(images)
    for imgindex, prediction in enumerate(predictions):
        title = image_titles[imgindex]
        images_regions[title] = []
        boxes = prediction['boxes']
        scores = prediction['scores']
        for regindex, score in enumerate(scores):
            # score se moze i menjati
            if score > 0.45:
                images_regions[title].append(boxes[regindex].detach().numpy())
        print(title + ': ', len(images_regions[title]), ' regions')
    return images_regions

In [155]:
def get_feature_vectors(image_regions):
    images_feature_vectors = {}
    for title in image_regions.keys():
        image_path = dir + title
        image = PIL.Image.open(image_path)
        feature_vectors = []
        for region in image_regions[title]:
            x1 = region[0]
            y1 = region[1]
            x2 = region[2]
            y2 = region[3]
            region_image = image.crop((x1, y1, x2, y2))
            # oblik feature vektora - [1 1 1 .... 1 1] - duzine 512
            feature_vectors.append(get_feature_vector(region_image))
        images_feature_vectors[title] = feature_vectors
    return images_feature_vectors

In [156]:
def get_feature_vector(image):
    model = models.resnet18(pretrained = True)
    layer = model._modules.get('avgpool')
    model.eval()
    
    scaler = transforms.Resize((224, 224))
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    to_tensor = transforms.ToTensor()
    
    # 2. Create a PyTorch Variable with the transformed image
    t_img = Variable(normalize(to_tensor(scaler(image))).unsqueeze(0))
    # 3. Create a vector of zeros that will hold our feature vector
    #    The 'avgpool' layer has an output size of 512
    my_embedding = None
    # 4. Define a function that will copy the output of a layer
    def my_hook(module_, input_, output_):
        nonlocal my_embedding
        my_embedding = output_
    # 5. Attach that function to our selected layer
    h = layer.register_forward_hook(my_hook)
    # 6. Run the model on our transformed image
    model(t_img)
    # 7. Detach our copy function from the layer
    h.remove()
    # 8. Return the feature vector
    my_embedding = my_embedding.squeeze(2).squeeze(2).squeeze(0)
    return my_embedding.detach().numpy()

In [157]:
images_feature_vectors = get_feature_vectors(get_regions(get_images()))

VizWiz_train_00000258.jpg:  15  regions
