In [4]:
# !pip install -r requirements.txt

In [1]:
# !pip install torch==1.10.2+cpu torchvision==0.11.3+cpu torchaudio==0.10.2+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from datasets.transforms import ReidTransforms

In [3]:
class ImageFolderWithPaths(ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

def make_inference_data_loader(cfg, path, dataset_class):
    transforms_base = ReidTransforms(cfg)
    val_transforms = transforms_base.build_transforms(is_train=False)
    num_workers = cfg.DATALOADER.NUM_WORKERS
    val_set = dataset_class(path, val_transforms)
    val_loader = DataLoader(
        val_set, batch_size=cfg.TEST.IMS_PER_BATCH, shuffle=False, num_workers=num_workers
    )
    return val_loader


In [4]:
from PIL import Image
from torch.utils.data import Dataset

IMG_EXTENSIONS = (".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp")

from torchvision.datasets.folder import is_image_file

def pil_loader(path: str) -> Image.Image:
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, "rb") as f:
        img = Image.open(f)
        return img.convert("RGB")

    
class ImageDataset(Dataset):
    """Image Person ReID Dataset"""

    def __init__(self, dataset: list, transform = None, loader=pil_loader):
        self.dataset = dataset
        self.transform = transform
        self.loader = loader

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        img_path = self.dataset[index]
        img = self.loader(img_path)

        if self.transform is not None:
            img = self.transform(img)

        return img, '', img_path ## Hack to be consistent with ImageFolderWithPaths dataset


In [52]:
from typing import List, Callable, Dict, Union
def get_all_images(path: Union[str, List[str]]) -> List[str]:
    if os.path.isdir(path):
        print('is-dir')
        images = os.listdir(path)
        print(images)
        images = [os.path.join(path, item) for item in images if is_image_file(item)]
        return images
    elif is_image_file(path):
        return [path]
    else:
        raise Exception(f"{path} is neither a path to a valid image file nor a path to folder containing images")

In [53]:
from config import cfg
import os

In [54]:
class args():
    pass
args.config_file = "configs/320_resnet50_ibn_a.yml"
args.opts = ["TEST.ONLY_TEST", "True", "GPU_IDS", []]

In [55]:
if args.config_file != "":
    cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)

In [56]:
test_path = './test-images/gallery' # To test image taken from a single folder
# test_path = 'test-images-folder'  # To test image where each pids is stored in a seperate directory

In [57]:
images_paths = get_all_images(test_path)

is-dir
['wc2121101spa-021_06.jpg', 'rolex_gallery.jpg', 'wc2120102spa-027_04.jpg', 'wc2120102spa-027_06.jpg', 'wc2121101spa-021_04.jpg', 'apple_watch_gallery.jpg', 'wc2120102evb-011_04.jpg', 'wc2120102evb-011_05.jpg', 'wc2120102evb-011_07.jpg', 'wc2120102spa-027_05.jpg']


In [58]:
images_paths

['./test-images/gallery/wc2121101spa-021_06.jpg',
 './test-images/gallery/rolex_gallery.jpg',
 './test-images/gallery/wc2120102spa-027_04.jpg',
 './test-images/gallery/wc2120102spa-027_06.jpg',
 './test-images/gallery/wc2121101spa-021_04.jpg',
 './test-images/gallery/apple_watch_gallery.jpg',
 './test-images/gallery/wc2120102evb-011_04.jpg',
 './test-images/gallery/wc2120102evb-011_05.jpg',
 './test-images/gallery/wc2120102evb-011_07.jpg',
 './test-images/gallery/wc2120102spa-027_05.jpg']

In [59]:
# val_loader = make_inference_data_loader(cfg, test_path, ImageFolderWithPaths)
val_loader = make_inference_data_loader(cfg, images_paths, ImageDataset)

In [60]:
val_loader.dataset.dataset

['./test-images/gallery/wc2121101spa-021_06.jpg',
 './test-images/gallery/rolex_gallery.jpg',
 './test-images/gallery/wc2120102spa-027_04.jpg',
 './test-images/gallery/wc2120102spa-027_06.jpg',
 './test-images/gallery/wc2121101spa-021_04.jpg',
 './test-images/gallery/apple_watch_gallery.jpg',
 './test-images/gallery/wc2120102evb-011_04.jpg',
 './test-images/gallery/wc2120102evb-011_05.jpg',
 './test-images/gallery/wc2120102evb-011_07.jpg',
 './test-images/gallery/wc2120102spa-027_05.jpg']

In [61]:
len(val_loader)

1

In [18]:
for batch in val_loader:
#     print(batch[0], batch[1])
    print(batch[1], batch[2])

TypeError: object of type 'NoneType' has no len()

In [13]:
from train_ctl_model import CTLModel
import pytorch_lightning as pl
import torch

In [14]:
# checkpoint_path = 'reid_weights.pth' ### This is just Pytorch wegihts therefore it requires different handling
# checkpoint_path = '/home/mwieczorek/centroids-reid/logs/df1_new/320_resnet50_ibn_a/train_ctl_model/version_2/auto_checkpoints/checkpoint_119.pth'
checkpoint_path = './auto_checkpoints/checkpoint_119.pth'

In [15]:
# or call with pretrained model
# model = CTLModel.load_from_checkpoint(checkpoint_path)
# model = CTLModel.load_from_checkpoint(checkpoint_path, map_location={'cuda': 'cpu'})
model = CTLModel(cfg, num_classes=1)
model.load_state_dict(state_dict=torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'], strict=False)

_IncompatibleKeys(missing_keys=[], unexpected_keys=['center_loss.centers', 'fc_query.weight'])

In [16]:
# model = model.cuda()

In [17]:
def _inference(model, batch, normalize_with_bn=True):
    model.eval()
    with torch.no_grad():
        data, _, filename = batch
        _, global_feat = model.backbone(data.cuda() if torch.cuda.is_available() else data)
        if normalize_with_bn:
            global_feat = model.bn(global_feat)
        return global_feat, filename

In [18]:
import numpy as np

In [19]:
vecs = []
paths = []

for pos, x in enumerate(val_loader):
#     if pos % print_freq == 0:
#         log.info(f'Number of processed images: {pos*cfg.TEST.IMS_PER_BATCH}')
    vec, path = _inference(model, x)
    for vv, pp in zip(vec, path):
        paths.append(pp)
        vecs.append(vv.detach().cpu().numpy())

all_vecs = np.vstack(vecs)
paths = np.array(paths)
all_vecs = np.array(all_vecs)

#     return all_vecs, paths

In [20]:
paths

array(['test-images-folder/apple_watch/apple_watch_gallery.jpg',
       'test-images-folder/rolex/rolex_gallery.jpg',
       'test-images-folder/wc2120102evb-011/wc2120102evb-011_04.jpg',
       'test-images-folder/wc2120102evb-011/wc2120102evb-011_05.jpg',
       'test-images-folder/wc2120102evb-011/wc2120102evb-011_07.jpg',
       'test-images-folder/wc2120102spa-027/wc2120102spa-027_04.jpg',
       'test-images-folder/wc2120102spa-027/wc2120102spa-027_05.jpg',
       'test-images-folder/wc2120102spa-027/wc2120102spa-027_06.jpg',
       'test-images-folder/wc2121101spa-021/wc2121101spa-021_04.jpg',
       'test-images-folder/wc2121101spa-021/wc2121101spa-021_06.jpg'],
      dtype='<U59')

In [21]:
all_vecs.shape

(10, 2048)

In [22]:
os.path.split('test-images/gallery/wc2121101spa-021_06.jpg')[0]

'test-images/gallery'

In [23]:
os.path.dirname('test-images/gallery/wc2121101spa-021_06.jpg')

'test-images/gallery'

In [24]:
import os
from pathlib import Path
# import pandas as pd

In [25]:
# def create_pid_path_index_from_filename(paths):
#     paths_pids = np.array([osp.basename(item).rsplit('.', 1)[0].rsplit('_', 1)[0] for item in paths])
#     pids_set = np.array(sorted(set(paths_pids)))
#     temp = [[item, idx] for idx,item in enumerate(paths_pids)]
#     df_pids_inds = pd.DataFrame(temp)
#     df_pids_inds = df_pids_inds.groupby(0)[1].apply(list).reset_index()
#     pid_path_index = dict(zip(df_pids_inds[0], df_pids_inds[1]))

#     return pid_path_index

In [26]:
def create_pid_path_index(paths: List[str], func: Callable[[str], str]) -> Dict[str, list]: 
    paths_pids = [func(item) for item in paths] ## To jako parametr, funkcja, która wyciąga pair-id z patha
    pid2paths_index = {}
    for idx,item in enumerate(paths_pids):
        if item not in pid2paths_index:
            pid2paths_index[item] = [idx]
        else:
            pid2paths_index[item].append(idx)

    return pid2paths_index

In [27]:
create_pid_path_index(paths=paths, func=lambda x: (x).rsplit('.', 1)[0].rsplit('_', 1)[0]) ## To extract pid from filename. Example: /path/to/dir/product001_04.jpg -> pid = product001

{'test-images-folder/apple_watch/apple_watch': [0],
 'test-images-folder/rolex/rolex': [1],
 'test-images-folder/wc2120102evb-011/wc2120102evb-011': [2, 3, 4],
 'test-images-folder/wc2120102spa-027/wc2120102spa-027': [5, 6, 7],
 'test-images-folder/wc2121101spa-021/wc2121101spa-021': [8, 9]}

In [28]:
pid_path_index = create_pid_path_index(paths=paths, func=lambda x: Path(x).parent.name) ## To extract pid from parent directory of an iamge. Example: /path/to/root/001/image_04.jpg -> pid = 001

In [29]:
pid_path_index

{'apple_watch': [0],
 'rolex': [1],
 'wc2120102evb-011': [2, 3, 4],
 'wc2120102spa-027': [5, 6, 7],
 'wc2121101spa-021': [8, 9]}

In [30]:
def calculate_centroids(all_vecs, pid_path_index):
    ### CALULCATE CENTROIDS FOR EACH UNIQUE PID
    pids_centroids_inds = []
    centroids = []
    for pid, indices in pid_path_index.items():
        inds = np.array(indices)
        pids_vecs = all_vecs[inds]
        length = pids_vecs.shape[0]
        centroid = np.sum(pids_vecs,0)/length
        pids_centroids_inds.append(pid)
        centroids.append(centroid)
    centroids_arr = np.vstack(np.array(centroids))
    pids_centroids_inds = np.array(pids_centroids_inds, dtype=np.str_)

    return centroids_arr, pids_centroids_inds


In [31]:
centroids_arr, pids_centroids_inds = calculate_centroids(all_vecs, pid_path_index)

In [32]:
centroids_arr

array([[-1.7014455 ,  1.5409266 ,  1.6630719 , ...,  0.5662677 ,
        -0.41568708, -0.48824012],
       [ 0.25610152, -0.7836398 , -2.0181324 , ...,  1.0076957 ,
        -1.2709763 , -1.0479494 ],
       [ 0.477285  ,  1.8517109 , -0.08200178, ...,  0.5469231 ,
        -0.60911685, -1.4273696 ],
       [ 0.11328981, -0.5193767 , -1.0376171 , ..., -1.1393673 ,
        -0.46810064,  0.15353815],
       [-0.24071372, -1.7749814 , -2.0049882 , ...,  3.832631  ,
        -0.49882045,  1.1599592 ]], dtype=float32)

In [33]:
pids_centroids_inds

array(['apple_watch', 'rolex', 'wc2120102evb-011', 'wc2120102spa-027',
       'wc2121101spa-021'], dtype='<U16')

In [34]:
from pathlib import Path
ROOT_DIR = Path('./')
save_dir_name = 'gallery-data'
SAVE_DIR = ROOT_DIR / save_dir_name


SAVE_DIR.mkdir(exist_ok=True, parents=True)

In [35]:
np.save(SAVE_DIR / 'embeddings.npy', all_vecs)
np.save(SAVE_DIR / 'paths.npy', paths)

AttributeError: 'numpy.ndarray' object has no attribute 'norm'