In [1]:
import cv2
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize((224, 224)),
        # transforms.Normalize(mean, std),
        # lambda x: torch.flip(x, [1]),
        # transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ]
)
dataset = torchvision.datasets.ImageFolder(
    root="./data/caltech101/train",
    transform=transform,
)

valset = torchvision.datasets.ImageFolder(
    root="./data/caltech101/val",
    transform=transform,
)

testset = torchvision.datasets.ImageFolder(
    root="./data/caltech101/test",
    transform=transform,
)

dataloader = torch.utils.data.DataLoader(
    dataset, batch_size=128, shuffle=False, num_workers=2
)

valloader = torch.utils.data.DataLoader(
    valset, batch_size=128, shuffle=False, num_workers=2
)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=128, shuffle=False, num_workers=2
)

In [2]:
from cbir import *
from cbir.pipeline import *

# rgb_histogram = RGBHistogram(n_bin=8, h_type="region")
# resnet = ResNetExtractor(model = "resnet18", device="cuda")
siftbow = SIFTBOWExtractor(mode="tfidf")
sift_array_store = NPArrayStore(retrieve=KNNRetrieval(metric="manhattan"))

rgb_histogram = RGBHistogram(n_bin=4, h_type="region")
color_array_store = NPArrayStore(retrieve=KNNRetrieval(metric="cosine"))

In [3]:
from tqdm import tqdm
import numpy as np
train_img = []
for images, labels in tqdm(valloader):
    images = (images.numpy().transpose(0,2,3,1) * 255).astype(np.uint8)
    train_img.append(images)
    
train_img = np.concatenate(train_img)
siftbow.fit(train_img, k=96)

100%|██████████| 11/11 [00:01<00:00,  7.48it/s]
Extracting Features: 100%|██████████| 1326/1326 [00:09<00:00, 140.35it/s]


Fit Kmeans clustering to create BOW
Fit IDF for TF-IDF Transformation
Complete Fitting SIFT BOW Extractor


In [4]:
cbir_sift = CBIR(siftbow, sift_array_store)
cbir_color = CBIR(siftbow, color_array_store)

In [5]:
for images, labels in tqdm(dataloader):
    images = (images.numpy().transpose(0,2,3,1) * 255).astype(np.uint8)
    # images = images.numpy()
    cbir_sift.indexing(images)
    cbir_color.indexing(images)

100%|██████████| 50/50 [01:43<00:00,  2.08s/it]


In [13]:
from cbir.entities.search_objects import ImageSearchObject
import torch


def ensemble_search(
    *args: list[ImageSearchObject], weights: list, datalength: int, k: int = 10
) -> list[ImageSearchObject]:
    assert len(args) == len(weights), "Arguments and weights must have same length"
    
    for arg in args:
        assert isinstance(
            arg[0], ImageSearchObject
        ), "Arguments must be list of ImageSearchObject"

    scores = torch.zeros(datalength)
    for search_list, weight in zip(args, weights):
        search_scores = torch.zeros(datalength).float()
        index_tensor = torch.tensor([i.index for i in search_list])
        value_tensor = torch.tensor([s.score for s in search_list]).float()
        search_scores = search_scores.scatter_(0, index_tensor, value_tensor) * weight
        
        scores += search_scores
    
    top = scores.topk(k)
    
    return [ImageSearchObject(index, score) for index, score in zip(top.indices, top.values)]


In [14]:
from time import time

# Retrieval
start = time()
rs = []
ground_truth = []
for images, labels in tqdm(testloader, desc="Retrieval"):
    images = (images.numpy().transpose(0, 2, 3, 1) * 255).astype(np.uint8)
    for image in images:
        rs.append(
            ensemble_search(
                cbir_sift.retrieve(image, k=100),
                cbir_color.retrieve(image, k=100),
                weights=[1.0, 0.8],
                datalength=len(dataset),
                k = 10
            )
        )
    ground_truth.extend(labels)
avg_retrieval_time = round((time() - start) / len(dataset), 6)

Retrieval: 100%|██████████| 12/12 [02:07<00:00, 10.66s/it]


In [11]:
# Evaluation
ap1 = []
hit1 = []
ap5 = []
hit5 = []
ap10 = []
hit10 = []
for r, g in zip(rs, ground_truth):
    predicted = []
    for i in r:
        predicted.append(i.index)
    class_preds = np.take(dataset.targets, predicted, axis=0)
    ap1.append(average_precision(class_preds.tolist(), [g.tolist()], 1))
    hit1.append(hit_rate(class_preds.tolist(), [g.tolist()], 1))
    ap5.append(average_precision(class_preds.tolist(), [g.tolist()], 5))
    hit5.append(hit_rate(class_preds.tolist(), [g.tolist()], 5))
    ap10.append(average_precision(class_preds.tolist(), [g.tolist()], 10))
    hit10.append(hit_rate(class_preds.tolist(), [g.tolist()], 10))

map1 = round(np.mean(ap1), 6)
avg_hit1 = round(np.mean(hit1), 6)
map5 = round(np.mean(ap5), 6)
avg_hit5 = round(np.mean(hit5), 6)
map10 = round(np.mean(ap10), 6)
avg_hit10 = round(np.mean(hit10), 6)

print(
            "map@1: ", map1,
            "map@5: ", map5,
            "map@10: ", map10,
            "hit_rate@1: ", avg_hit1,
            "hit_rate@5: ", avg_hit5,
            "hit_rate@10: ", avg_hit10,
        )

AttributeError: 'Tensor' object has no attribute 'index'

In [59]:
import pandas as pd
df = pd.read_csv("out/histogram_knn_eval.csv")

In [8]:
df.max(axis=0)

NameError: name 'df' is not defined