In [1]:
import torch
import numpy as np

# ----------------------------------------------
# Notebook Snippet: Verificar equivalencia de backbones
# ----------------------------------------------

from image_retrieval.backbones.cvnet.cvnet_backbone import CVNetBackbone    # Ruta de ejemplo al código antiguo
from landmark_detection.resnet import ResNet              # Ruta de ejemplo al código nuevo

# Parámetros (ajústalos según tu configuración real):
RESNET_DEPTH    = 50
REDUCTION_DIM   = 2048
OLD_PRETRAINED_PATH = "image_retrieval/backbones/cvnet/CVPR2022_CVNet_R50.pyth"  # peso que usas en el nuevo código
NEW_PRETRAINED_PATH = "landmark_detection/CVNet_50_2048.pth"  # peso que usas en el nuevo código

In [2]:
# Old backbone
old_backbone = CVNetBackbone(depth=RESNET_DEPTH, reduction_dim=REDUCTION_DIM, pretrained_weights=OLD_PRETRAINED_PATH)
old_backbone.eval()

CVNetBackbone(
  (model): CVNet_Rerank(
    (encoder_q): ResNet(
      (stem): ResStemIN(
        (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      )
      (s1): ResStage(
        (b1): ResBlock(
          (proj): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (f): BottleneckTransform(
            (a): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (a_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (a_relu): ReLU(inplace=True)
            (b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (b

In [3]:
# New backbone
new_backbone = ResNet(RESNET_DEPTH, REDUCTION_DIM)
# Precisamos cargar el mismo state_dict que el antiguo:
checkpoint = torch.load(NEW_PRETRAINED_PATH, map_location="cpu")
state_dict = checkpoint.get("state_dict", checkpoint)
new_backbone.load_state_dict(state_dict, strict=True)
new_backbone.eval()

ResNet(
  (stem): ResStemIN(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (s1): ResStage(
    (b1): ResBlock(
      (proj): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (f): BottleneckTransform(
        (a): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (a_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (a_relu): ReLU(inplace=True)
        (b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (b_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (b_relu): ReLU(inplace=True

In [4]:
# 3) Generar una imagen de prueba aleatoria:
#    Usa un tamaño típico (p.ej. 1 imagen de 224×224).
H, W = 224, 224
batch_size = 2  # puedes usar 1 o más para verificar en batch

# Imagen en numpy NHWC, valores en [0,1]
img_np = (np.random.rand(batch_size, H, W, 3)).astype(np.float32)

In [5]:
# 4) Pasar por ambos backbones:

# --- Old backbone: produce numpy NHWC (n, h, w, C) ---
fm_old_nhwc = old_backbone.predict(img_np)  # shape: (batch_size, h, w, C)

# Convertir a Tensor NCHW para comparar:
fm_old = torch.from_numpy(fm_old_nhwc).permute(0, 3, 1, 2)  # (batch_size, C, h, w)

# --- New backbone: acepta NCHW torch.Tensor ---
# Convertir img_np a torch.Tensor NCHW
img_tensor = torch.from_numpy(img_np).permute(0, 3, 1, 2)  # (batch_size, 3, H, W)

with torch.no_grad():
    fm_new = new_backbone(img_tensor)  # (batch_size, 2048, h, w)

In [6]:
# 5) Comparar resultados:

print("Shape FM old (NCHW):", fm_old.shape)
print("Shape FM new (NCHW):", fm_new.shape)

# Comprobamos que sean casi iguales numéricamente:
are_close = torch.allclose(fm_old, fm_new, atol=1e-5)
max_diff = (fm_old - fm_new).abs().max().item()

print(f"¿Outputs iguales (tol=1e-5)? {are_close}")
print(f"Diferencia máxima entre tensores: {max_diff:.6f}")

Shape FM old (NCHW): torch.Size([2, 2048, 7, 7])
Shape FM new (NCHW): torch.Size([2, 2048, 7, 7])
¿Outputs iguales (tol=1e-5)? True
Diferencia máxima entre tensores: 0.000000


In [7]:
import numpy as np
import tensorflow as tf

from image_retrieval.global_features.pooling import Pooling
from landmark_detection.pooling import SuperGlobalExtractor, RGEM_Batch, GEMp_Batch, SGEM_Batch

In [8]:
# Parámetros de prueba
batch_size = 2
aug = 3
N = batch_size * aug
C = 2048
H = 7
W = 7

# 1) Generar feature maps aleatorios en formato NHWC para el código antiguo
fm_nhwc  = np.random.rand(N, H, W, C).astype(np.float32)

In [9]:
# 1a) Salida RGEM antiguo (TF/NumPy):
pool_rgem = Pooling(method="rgem", method_params={"pr": 2.5, "size": 5})
rgem_old = pool_rgem(fm_nhwc)           # (N, H, W, C)

# 1b) Salida RGEM nuevo (PyTorch):
rgem_new_mod = RGEM_Batch(pr=2.5, size=5)
# convertir a NCHW tensor
fm_nchw = torch.from_numpy(fm_nhwc).permute(0, 3, 1, 2)
with torch.no_grad():
    rgem_new = rgem_new_mod(fm_nchw).permute(0, 2, 3, 1).cpu().numpy()  # (N, H, W, C)

print("RGEM: máximo diff =", np.max(np.abs(rgem_old - rgem_new)))

RGEM: máximo diff = 5.9604645e-08


In [10]:
# —— 2) GeM —— 

# 2a) Salida GeM antiguo: entrada = rgem_old (NHWC)
pool_gem = Pooling(method="gem", method_params={"p": 4.6, "eps": 1e-8})
gem_old = pool_gem(rgem_new)  # NumPy array shape (N, C)

# 2b) Salida GeM nuevo: entrada = rgem_new (torch tensor NCHW)
# ya tenemos rgem_new de forma (N, C, H, W)
gem_new_mod = GEMp_Batch(p=4.6, eps=1e-8)
# convertir a NCHW tensor
rgem_old = torch.from_numpy(rgem_new).permute(0, 3, 1, 2)
with torch.no_grad():
    gem_new = gem_new_mod(rgem_old)  # (N, C)
gem_new_np = gem_new.cpu().numpy()  # a NumPy

print("GeM: máximo diff =", np.max(np.abs(gem_old - gem_new_np)))

GeM: máximo diff = 2.9802322e-08


In [11]:
# 3a) Normalizar antiguo + SGEM antiguo
pooled_norm_old = tf.linalg.l2_normalize(tf.convert_to_tensor(gem_new_np), axis=1).numpy()  # (N, C)
sgem_old = pool_rgem.sgem_fusion(
    descriptors=pooled_norm_old,
    aug=aug,
    mode="lp",
    p=5.0,
    eps=1e-8
)  # (batch_size, C)

# 3b) Normalizar nuevo + SGEM nuevo
gem_new_np = torch.from_numpy(gem_new_np)
pooled_norm_new = torch.nn.functional.normalize(gem_new_np, p=2, dim=1)  # (N, C)
sgem_new_mod = SGEM_Batch(ps=5.0, infinity=False, eps=1e-8)
with torch.no_grad():
    sgem_new = sgem_new_mod(pooled_norm_new, aug=aug).cpu().numpy()  # (batch_size, C)

print("SGEM: máximo diff =", np.max(np.abs(sgem_old - sgem_new)))

SGEM: máximo diff = 3.6282465e-05


In [12]:
# ===== Código antiguo (TF + NumPy) =====
feature_maps_nhwc  = np.random.rand(N, H, W, C).astype(np.float32)
# Instanciar pooling con RGEM y GeM
use_rgem = True
rgem_params = {'pr': 2.5, 'size': 5}
pool_rgem = Pooling(method="rgem", method_params=rgem_params)
pool_gem = Pooling(method="gem", method_params={'p': 4.6, 'eps': 1e-8})

# 1a) Regional-GeM sobre NHWC
if use_rgem:
    rgem_out = pool_rgem(feature_maps_nhwc)  # (N, H, W, C)
else:
    rgem_out = feature_maps_nhwc

# 1b) GeM global pooling: de (N, H, W, C) a (N, C)
pooled_old = pool_gem(rgem_out)  # NumPy array shape (N, C)

# 1c) Normalización L2 fila a fila
pooled_tf = tf.convert_to_tensor(pooled_old, dtype=tf.float32)
pooled_norm = tf.linalg.l2_normalize(pooled_tf, axis=1).numpy()  # (N, C)

# 1d) SGEM_fusion (Scale-GeM) para fusionar cada grupo de 'aug' vectores
pooling_for_sgem = pool_rgem  # Instancia de Pooling tiene el método sgem_fusion
old_descriptors = pooling_for_sgem.sgem_fusion(
    descriptors=pooled_norm,
    aug=aug,
    mode="lp",
    p=10.0,
    eps=1e-8
)  # NumPy array shape (batch_size, C)

In [13]:
# ===== Código nuevo (PyTorch) =====

# Convertir feature_maps_nhwc a formato NCHW torch.Tensor
feature_maps_nchw = torch.from_numpy(feature_maps_nhwc).permute(0, 3, 1, 2)  # (N, C, H, W)

# Instanciar el extractor PyTorch
new_extractor = SuperGlobalExtractor(
    rgem_pr=2.5, rgem_size=5,
    gem_p=4.6,
    sgem_ps=10.0,
    sgem_infinity=False,
    eps=1e-8
).eval()

# 2) Obtener los descriptores con el código nuevo
with torch.no_grad():
    new_descriptors = new_extractor(feature_maps_nchw, aug=aug)  # (batch_size, C)
new_descriptors_np = new_descriptors.cpu().numpy()

In [14]:
# ===== Comparación entre viejo y nuevo =====

print("Shape old descriptors:", old_descriptors.shape)
print("Shape new descriptors:", new_descriptors_np.shape)

# Comprobar que sean cercanos numéricamente
are_close = np.allclose(old_descriptors, new_descriptors_np, atol=1e-5)
max_diff = np.max(np.abs(old_descriptors - new_descriptors_np))

print(f"¿Descriptors iguales? {are_close}")
print(f"Diferencia máxima: {max_diff:.6f}")

Shape old descriptors: (2, 2048)
Shape new descriptors: (2, 2048)
¿Descriptors iguales? False
Diferencia máxima: 0.000031


In [15]:
old_descriptors

array([[0.0217317 , 0.02197091, 0.02226226, ..., 0.02341955, 0.02229463,
        0.02066084],
       [0.02271119, 0.02245686, 0.02252023, ..., 0.0227776 , 0.02292699,
        0.02339281]], dtype=float32)

In [16]:
new_descriptors_np

array([[0.0217317 , 0.02197091, 0.02226226, ..., 0.02341955, 0.02229463,
        0.02066084],
       [0.02273296, 0.0224573 , 0.02253044, ..., 0.02280346, 0.02294248,
        0.02340364]], dtype=float32)

In [17]:
import torch
from torch import nn

class SGEM_Debug(nn.Module):
    """
    SGEM with debug prints to trace intermediate computations.
    """
    def __init__(self, ps=10.0, infinity=False, eps=1e-8):
        super(SGEM_Debug, self).__init__()
        self.ps = ps
        self.infinity = infinity
        self.eps = eps

    def forward(self, x: torch.Tensor, aug: int) -> torch.Tensor:
        """
        Args:
            x (torch.Tensor): Input descriptors, shape (N, d) where N = batch_size * aug
            aug (int): Number of augmentations (scales) per sample

        Returns:
            torch.Tensor: Fused descriptors, shape (batch_size, d)
        """
        N, d = x.shape
        if N % aug != 0:
            raise ValueError(f"N={N} not divisible by aug={aug}")
        B = N // aug  # batch_size

        # 1) Reshape
        reshaped = x.view(B, aug, d)  # (B, aug, d)
        print("reshaped (B, aug, d):")
        print(reshaped)

        if self.infinity:
            # SGEM∞: normalize each vector and max over aug
            norms = torch.norm(reshaped, p=2, dim=2, keepdim=True) + self.eps  # (B, aug, 1)
            print("norms (before normalization):")
            print(norms)
            normalized = reshaped / norms  # (B, aug, d)
            print("normalized (B, aug, d):")
            print(normalized)
            output = normalized.max(dim=1)[0]  # (B, d)
            print("output SGEM∞ (B, d):")
            print(output)
        else:
            # SGEM^p: gamma = minimum over whole tensor (scalar)
            gamma = reshaped.min()  # scalar
            print("gamma (scalar):", gamma.item())

            # Center
            centered = reshaped - gamma  # (B, aug, d)
            print("centered (reshaped - gamma):")
            print(centered)

            # Power
            x_pow = centered.pow(self.ps)  # (B, aug, d)
            print(f"x_pow (clamped ^ {self.ps}):")
            print(x_pow)

            # Pool (mean over aug)
            pooled = x_pow.mean(dim=1)  # (B, d)
            print("pooled (mean over aug):")
            print(pooled)

            # Root and add gamma
            root = pooled.pow(1.0 / self.ps)  # (B, d)
            print(f"root (pooled ^ (1/{self.ps})): ")
            print(root)

            output = root + gamma  # (B, d)
            print("output SGEM^p (root + gamma):")
            print(output)

        return output

aug = 3
pooled_norm_new_dbl = pooled_norm_new.double()
sgem_debug = SGEM_Debug(ps=10.0, infinity=False, eps=1e-8)
print("== Running SGEM Debug ==")
sgem_output = sgem_debug(pooled_norm_new_dbl, aug=aug)
print("Final output:")
print(sgem_output)

== Running SGEM Debug ==
reshaped (B, aug, d):
tensor([[[0.0223, 0.0236, 0.0222,  ..., 0.0231, 0.0221, 0.0235],
         [0.0210, 0.0228, 0.0238,  ..., 0.0227, 0.0232, 0.0229],
         [0.0223, 0.0221, 0.0221,  ..., 0.0222, 0.0207, 0.0220]],

        [[0.0228, 0.0232, 0.0227,  ..., 0.0230, 0.0233, 0.0230],
         [0.0210, 0.0235, 0.0213,  ..., 0.0216, 0.0220, 0.0229],
         [0.0214, 0.0198, 0.0220,  ..., 0.0238, 0.0217, 0.0220]]],
       dtype=torch.float64)
gamma (scalar): 0.017812207341194153
centered (reshaped - gamma):
tensor([[[0.0045, 0.0058, 0.0044,  ..., 0.0053, 0.0043, 0.0057],
         [0.0032, 0.0050, 0.0060,  ..., 0.0049, 0.0054, 0.0051],
         [0.0044, 0.0042, 0.0043,  ..., 0.0044, 0.0029, 0.0042]],

        [[0.0050, 0.0054, 0.0049,  ..., 0.0051, 0.0055, 0.0052],
         [0.0032, 0.0056, 0.0035,  ..., 0.0038, 0.0042, 0.0051],
         [0.0036, 0.0020, 0.0042,  ..., 0.0060, 0.0039, 0.0041]]],
       dtype=torch.float64)
x_pow (clamped ^ 10.0):
tensor([[[3.5054e-2

In [18]:
import numpy as np

# Versión NumPy de sgem_fusion con prints para debug
def sgem_fusion_numpy(descriptors, aug=1, p=10.0, eps=1e-8):
    """
    descriptors: numpy array shape (n, d), n = batch_size * aug
    """
    n, d = descriptors.shape
    assert n % aug == 0, "n debe ser divisible por aug"
    batch_size = n // aug

    # Reorganizar: (batch_size, aug, d)
    reshaped = descriptors.reshape(batch_size, aug, d)
    print("NumPy - reshaped (batch_size, aug, d):")
    print(reshaped)

    # Modo LP
    gamma = np.min(reshaped)
    print("NumPy - gamma (scalar):", gamma)

    centered = reshaped - gamma
    print("NumPy - centered (reshaped - gamma):")
    print(centered)

    x_pow = np.power(centered, p)
    print(f"NumPy - x_pow (centered ^ {p}):")
    print(x_pow)

    pooled = np.mean(x_pow, axis=1)  # (batch_size, d)
    print("NumPy - pooled (mean over aug):")
    print(pooled)

    root = np.power(pooled, 1.0 / p)
    print(f"NumPy - root (pooled ^ (1/{p})): ")
    print(root)

    output = root + gamma  # (batch_size, d)
    print("NumPy - output (root + gamma):")
    print(output)
    return output

In [19]:
# Ejecutar versión NumPy
print("\n=== NumPy SGEM Debug ===")
sgem_out_np = sgem_fusion_numpy(pooled_norm_old, aug=aug, p=10.0, eps=1e-8)


=== NumPy SGEM Debug ===
NumPy - reshaped (batch_size, aug, d):
[[[0.02232529 0.02356438 0.02223353 ... 0.0231483  0.02207901 0.02351354]
  [0.02097395 0.02279555 0.02376533 ... 0.02270956 0.02322488 0.02293076]
  [0.0222598  0.02205416 0.02210595 ... 0.02219017 0.02067645 0.02202351]]

 [[0.02281752 0.02323251 0.02274826 ... 0.02295719 0.02330608 0.02297578]
  [0.0209754  0.02345003 0.02134929 ... 0.02161308 0.02197118 0.02294771]
  [0.02140538 0.01983954 0.02200707 ... 0.02379593 0.021716   0.02196138]]]
NumPy - gamma (scalar): 0.01781221
NumPy - centered (reshaped - gamma):
[[[0.00451308 0.00575217 0.00442132 ... 0.00533609 0.0042668  0.00570134]
  [0.00316174 0.00498334 0.00595312 ... 0.00489735 0.00541267 0.00511855]
  [0.00444759 0.00424195 0.00429374 ... 0.00437796 0.00286424 0.0042113 ]]

 [[0.00500531 0.0054203  0.00493605 ... 0.00514499 0.00549387 0.00516357]
  [0.00316319 0.00563782 0.00353708 ... 0.00380087 0.00415897 0.0051355 ]
  [0.00359317 0.00202733 0.00419486 ... 0.0