## Data Preparation 

In [10]:
# Import Block
import os
import time
import random
import numpy as np
import pandas as pd

from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
from torchvision import models, transforms
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
from matplotlib import patches
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity


# Dataset Directories
data_dir = "D:\\Trade-Off_CBIR\\dataset\\Corel-1K"
data_dir_GHIM = "D:\\Trade-Off_CBIR\\dataset\\GHIM-10K"

print("Classes available (Corel-1K):", os.listdir(data_dir))
print("Classes available (GHIM-10K):", os.listdir(data_dir_GHIM))


# Device Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


# Image Transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

Classes available (Corel-1K): ['africans', 'beaches', 'buildings', 'buses', 'dinosaurs', 'elephants', 'flowers', 'food', 'horses', 'mountains']
Classes available (GHIM-10K): ['aircraft', 'buildings', 'butterfly', 'cars', 'dragon fly', 'fireworks', 'flowers', 'hen', 'horses', 'insects', 'motorcycles', 'mountains', 'sea shores', 'ships', 'sunset', 'temples', 'trees', 'valleys', 'walls', 'yacht']
Using device: cuda


## Dataset: Corel-1K

### Tahap 1: Model Initialization

In [13]:
import cv2
import numpy as np
import torch
import torch.nn as nn
from skimage.feature import graycomatrix, graycoprops

class HOGBackbone:
    def __init__(self, resize_shape=(128, 128)):
        self.resize_shape = resize_shape
        self.hog = cv2.HOGDescriptor(
            _winSize=resize_shape,
            _blockSize=(16,16),
            _blockStride=(8,8),
            _cellSize=(8,8),
            _nbins=9
        )

    def __call__(self, img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, self.resize_shape)
        feat = self.hog.compute(img)
        return feat.flatten()


class GISTBackbone:
    def __init__(self, resize_shape=(128,128), num_blocks=4):
        self.resize_shape = resize_shape
        self.num_blocks = num_blocks
        self.thetas = [0, np.pi/6, np.pi/4, np.pi/3, np.pi/2]
        self.scales = [4, 8, 16]

    def __call__(self, img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, self.resize_shape)

        h, w = self.resize_shape
        bh, bw = h // self.num_blocks, w // self.num_blocks
        feats = []

        for i in range(self.num_blocks):
            for j in range(self.num_blocks):
                patch = img[i*bh:(i+1)*bh, j*bw:(j+1)*bw]
                for theta in self.thetas:
                    for k in self.scales:
                        kernel = cv2.getGaborKernel(
                            (k,k), 4.0, theta, 10.0, 0.5, 0
                        )
                        f = cv2.filter2D(patch, cv2.CV_32F, kernel)
                        feats.extend([f.mean(), f.std()])

        return np.array(feats, dtype=np.float32)


class GLCMBackbone:
    def __init__(
        self,
        resize_shape=(128,128),
        distances=(1, 2),
        angles=(0, np.pi/4, np.pi/2, 3*np.pi/4),
        levels=256,
        props=("contrast", "dissimilarity", "homogeneity", "energy", "correlation", "ASM")
    ):
        self.resize_shape = resize_shape
        self.distances = distances
        self.angles = angles
        self.levels = levels
        self.props = props

    def __call__(self, img):
        # grayscale
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, self.resize_shape)

        # IMPORTANT: GLCM expects integer levels
        img = np.clip(img, 0, self.levels - 1).astype(np.uint8)

        glcm = graycomatrix(
            img,
            distances=self.distances,
            angles=self.angles,
            levels=self.levels,
            symmetric=True,
            normed=True
        )

        feats = []
        for prop in self.props:
            vals = graycoprops(glcm, prop)
            feats.extend(vals.flatten())

        return np.array(feats, dtype=np.float32)

In [14]:
class ClassicalContextEncoder(nn.Module):
    def __init__(self, backbone, backbone_output_dim, embedding_dim):
        super().__init__()
        self.backbone = backbone
        self.proj = nn.Linear(backbone_output_dim, embedding_dim)

    def forward(self, x):
        # x: batch of images (list of numpy images)
        feats = [self.backbone(img) for img in x]
        feats = torch.tensor(np.stack(feats), dtype=torch.float32)
        z = self.proj(feats)
        return z


class ClassicalTargetEncoder(nn.Module):
    def __init__(self, backbone, backbone_output_dim, embedding_dim):
        super().__init__()
        self.backbone = backbone
        self.proj = nn.Linear(backbone_output_dim, embedding_dim)

    def forward(self, x):
        feats = [self.backbone(img) for img in x]
        feats = torch.tensor(np.stack(feats), dtype=torch.float32)
        z = self.proj(feats)
        return z


class Predictor(nn.Module):
    def __init__(self, embedding_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(embedding_dim, embedding_dim),
            nn.ReLU(),
            nn.Linear(embedding_dim, embedding_dim)
        )

    def forward(self, z):
        return self.net(z)



# BACKBONES
hog_backbone = HOGBackbone()
glcm_backbone = GLCMBackbone()
gist_backbone = GISTBackbone()

# OUTPUT DIM (FIX, STATIC)
HOG_DIM  = len(hog_backbone(np.zeros((128,128,3), dtype=np.uint8)))
GLCM_DIM = len(glcm_backbone(np.zeros((128,128,3), dtype=np.uint8)))
GIST_DIM = len(gist_backbone(np.zeros((128,128,3), dtype=np.uint8)))

# CONTEXT ENCODERS (CLASSICAL)
context_encoders = {
    "hog":  ClassicalContextEncoder(hog_backbone,  HOG_DIM,  256),
    "glcm": ClassicalContextEncoder(glcm_backbone, GLCM_DIM, 256),
    "gist": ClassicalContextEncoder(gist_backbone, GIST_DIM, 256)
}

### Tahap 2: Feature Extraction (JOINT-EMBEDDING MODELS INITIALIZATION)

In [17]:
hog = HOGBackbone()
glcm = GLCMBackbone()
gist = GISTBackbone()

# output dim (static)
dummy = np.zeros((128,128,3), dtype=np.uint8)
HOG_DIM  = len(hog(dummy))
GLCM_DIM = len(glcm(dummy))
GIST_DIM = len(gist(dummy))

context_encoders = {
    "hog":  ClassicalContextEncoder(hog,  HOG_DIM),
    "glcm": ClassicalContextEncoder(glcm, GLCM_DIM),
    "gist": ClassicalContextEncoder(gist, GIST_DIM)
}

models_dict = {
    "hog":  {"baseline": hog,  "pretrained": context_encoders["hog"]},
    "glcm": {"baseline": glcm, "pretrained": context_encoders["glcm"]},
    "gist": {"baseline": gist, "pretrained": context_encoders["gist"]}
}


# =====================================================
# FEATURE STORAGE (SAMA PERSIS DENGAN CNN)
# =====================================================

features_dict = {
    m: {t: [] for t in ["baseline", "pretrained"]}
    for m in models_dict
}


# =====================================================
# FEATURE EXTRACTION LOOP (CNN-STYLE)
# =====================================================

for path in tqdm(img_paths, desc="Extracting Classical Features"):
    img = cv2.imread(path)

    for m_name in models_dict:
        for t_name in models_dict[m_name]:

            if t_name == "pretrained":
                feat = models_dict[m_name][t_name](img)
            else:
                feat = extract_features(models_dict[m_name][t_name], img)

            features_dict[m_name][t_name].append(feat)


# =====================================================
# L2 NORMALIZATION (SAMA PERSIS)
# =====================================================

for m_name in features_dict:
    for t_name in features_dict[m_name]:
        arr = np.array(features_dict[m_name][t_name])
        features_dict[m_name][t_name] = normalize(arr).tolist()


# =====================================================
# EXAMPLE ACCESS (SAMA PERSIS)
# =====================================================

hog_pre   = np.array(features_dict["hog"]["pretrained"])
glcm_base = np.array(features_dict["glcm"]["baseline"])
gist_base = np.array(features_dict["gist"]["baseline"])

TypeError: ClassicalContextEncoder.__init__() missing 1 required positional argument: 'embedding_dim'

### Tahap 3: Image Retrieval & Similarity Matching

### Tahap 4: Model Evaluation

## Dataset: GHIM-10K

### Tahap 2: Feature Extraction (JOINT-EMBEDDING MODELS INITIALIZATION)

### Tahap 3: Image Retrieval & Similarity Matching

### Tahap 4: Model Evaluation