In [16]:
import torch
from torch import nn
from torchvision import transforms

import cv2

In [21]:
# del new_model, resnet18, resnet50, resnet101
torch.cuda.empty_cache()

## Preprocessing

In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

### Train

In [None]:
preprocessing = transforms.Compose([
    transforms.RandomSizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize,
])

### Val

In [None]:
preprocessing = transforms.Compose([
    transforms.Scale(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
])

In [7]:
INPUT_WIDTH = 224

In [8]:
normalise = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]) 

In [9]:
def preprocess(np_image_bgr):
    '''
    Preprocessing for embedder network: Flips BGR to RGB, resize, convert to torch tensor, normalise with imagenet mean and variance, reshape. Note: input image yet to be loaded to GPU through tensor.cuda()

    Parameters
    ----------
    np_image_bgr : ndarray
        (H x W x C) in BGR

    Returns
    -------
    Torch Tensor

    '''
    np_image_rgb = np_image_bgr[...,::-1]
    np_image_rgb = cv2.resize(np_image_rgb, (INPUT_WIDTH, INPUT_WIDTH))
    input_image = transforms.ToTensor()(np_image_rgb)
    input_image = normalise(input_image) 
    input_image = input_image.view(1,3,INPUT_WIDTH,INPUT_WIDTH)
    return input_image

In [94]:
from torch import nn
from pathlib import Path
import torchvision.models as models

class Resnet_Embedder(object):
    '''
    Resnet_Embedder loads a resnet pretrained on Imagenet1000, with classification layer removed, exposing the bottleneck layer, outputing a feature of size 1280. 
    '''
    def __init__(self, model_wts_path = None, depth=50, bs=8):
        if model_wts_path is None:
            pretrained=True
        else:
            pretrained=False
            assert Path(model_wts_path).is_file(),'Model path does not exists!'
            
        if depth == 18:
            full_model = models.resnet18(pretrained=pretrained)
        elif depth == 50:
            full_model = models.resnet50(pretrained=pretrained)
        elif depth == 101:
            full_model = models.resnet101(pretrained=pretrained)
        else:
            assert True,'Resnet with specified depth {} does not exist!'.format(depth)

        with torch.no_grad():
            self.model = nn.Sequential(*list(full_model.children())[:-1])
            del full_model
        self.model.cuda() #loads model to gpu
        self.model.eval() #inference mode, deactivates dropout layers 
        print('Resnet{} Embedder initialised!'.format(depth))
        self.bs = bs
        self.model.forward(torch.zeros(self.bs, 3, INPUT_WIDTH, INPUT_WIDTH).cuda()) #warmup

    def preprocess(self, np_image_bgr):
        '''
        Preprocessing for embedder network: Flips BGR to RGB, resize, convert to torch tensor, normalise with imagenet mean and variance, reshape. Note: input image yet to be loaded to GPU through tensor.cuda()

        Parameters
        ----------
        np_image_bgr : ndarray
            (H x W x C) in BGR

        Returns
        -------
        Torch Tensor

        '''
        np_image_rgb = np_image_bgr[...,::-1]
        np_image_rgb = cv2.resize(np_image_rgb, (INPUT_WIDTH, INPUT_WIDTH))
        input_image = transforms.ToTensor()(np_image_rgb)
        input_image = normalise(input_image) 
        input_image = input_image.view(1,3,INPUT_WIDTH,INPUT_WIDTH)
        return input_image

    def predict(self, np_image_bgr):
        '''
        singly inference

        Params
        ------
        np_image_bgr : ndarray
            list of (H x W x C) in BGR
        
        Returns
        ------
        features (np.array)

        '''
        input_img = self.preprocess(np_image_bgr).cuda()
        feats = self.model.forward(input_img)
        return feats[0].cpu().data.numpy().flatten()

    def predict_batch(self, list_of_np_image_bgr):
        '''
        singly inference

        Params
        ------
        list_of_np_image_bgr : list of ndarray
            list of (N x H x W x C) in BGR
        
        Returns
        ------
        list of features (np.array with dim = 1280)

        '''
        
        for i in range(0, len(list_of_np_image_bgr), self.bs):
            in_batch = preprocess(list_of_np_image_bgr[i:i+self.bs])
            res = self.model.forward(in_batch)
        
        input_img = self.preprocess(np_image_bgr).cuda()
        feats = self.model.forward(input_img)
        return feats[0].cpu().data.numpy().flatten()
    
    

In [95]:
resmodel = Resnet_Embedder(depth=50)

Resnet50 Embedder initialised!


In [76]:
import cv2
from pathlib import Path
from torch.utils.data import Dataset, DataLoader

class IndoorDataset(Dataset):
    """Indoor Scene dataset."""

    def __init__(self, root_dir, preproc=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        assert Path(root_dir).is_dir()
        self.impaths = []
        for ext in ['jpg','png','jpeg']:
            self.impaths.extend(Path(root_dir).glob('*.{}'.format(ext)))
        self.preproc = preproc

    def __len__(self):
        return len(self.impaths)

    def __getitem__(self, idx):
        impath = self.impaths[idx]
        X = cv2.imread(str(impath))
        if self.preproc:
            X = self.preproc(X)
        y = '_'.join(impath.stem.split('_')[:2])

        return X, y


In [96]:
indoorDataset = IndoorDataset('/home/levan/Datasets/indoor_scenes/dsta', preproc=resmodel.preprocess)

In [97]:
import matplotlib.pyplot as plt
X, y = indoorDataset[10]
imgplot = plt.imshow(X)
print(y)
plt.show()

TypeError: Invalid dimensions for image data

In [91]:
dataloader = DataLoader(indoorDataset, batch_size=4, shuffle=False, num_workers=4)

In [92]:
next(iter(dataloader))[0].shape

torch.Size([4, 2268, 4032, 3])

In [80]:
for i, Xy in enumerate(dataloader):
    print(i)
    print(Xy)

RuntimeError: Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 232, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 232, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 218, in default_collate
    return torch.stack([torch.from_numpy(b) for b in batch], 0)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 2268 and 4032 in dimension 1 at /pytorch/aten/src/TH/generic/THTensorMoreMath.cpp:1307


In [37]:
input_image_path = '/home/levan/Datasets/indoor_scenes/dsta/Evan_1_0102_03_bright_light.jpg'
input_image = cv2.imread(input_image_path)

In [32]:
emb = resmodel.predict(input_image)

In [34]:
emb

array([0.43468854, 0.9910779 , 0.49240443, ..., 0.24038589, 0.6371722 ,
       0.18353225], dtype=float32)

In [39]:
emb = resmodel.predict_batch([input_image for _ in range(24)])

0
> <ipython-input-35-0b508a78bf15>(67)predict_batch()
-> for i in range(0, len(list_of_np_image_bgr), self.bs):
(Pdb) c
8
> <ipython-input-35-0b508a78bf15>(67)predict_batch()
-> for i in range(0, len(list_of_np_image_bgr), self.bs):
(Pdb) c
16
> <ipython-input-35-0b508a78bf15>(67)predict_batch()
-> for i in range(0, len(list_of_np_image_bgr), self.bs):
(Pdb) q


BdbQuit: 