#### Imagenet100 Evaluation

In [None]:
# Connect Google Drive
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/MyDrive/Colab Notebooks/Colab Notebooks/autoencoders

%pwd

Mounted at /content/gdrive
/content/gdrive/.shortcut-targets-by-id/1UMow24kXYpDLYgShcir7-CB3ZYQsgEih/Colab Notebooks/autoencoders


'/content/gdrive/.shortcut-targets-by-id/1UMow24kXYpDLYgShcir7-CB3ZYQsgEih/Colab Notebooks/autoencoders'

In [None]:
# Encoded test prediction
import numpy as np
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# PATHS
inet_root        = Path("./datasets/inet100")
TEST_PT          = inet_root / "test.pt"

output_inet_root = Path("./best_models/output_inet")
enc_save_root    = inet_root / "encoded_test_latent"
enc_save_root.mkdir(parents=True, exist_ok=True)

assert TEST_PT.exists(), f"Missing: {TEST_PT}"

inet_model_cfgs = {
    "AuE": {
        "dir":  output_inet_root / "output_inet_ae",
        "ckpt": "best_overall_AuE.pt",
    },
    "VAE": {
        "dir":  output_inet_root / "output_inet_vae",
        "ckpt": "best_overall_VAE.pt",
    },
    "VQVE": {
        "dir":  output_inet_root / "output_inet_vqvae",
        "ckpt": "best_overall_VQVE.pt",
    },
    "VQVA2": {
        "dir":  output_inet_root / "output_inet_vqvae2",
        "ckpt": "best_overall_VQVA2.pt",
    },
}

Xte = torch.load(TEST_PT, map_location="cpu", weights_only=True)
print("Xte:", tuple(Xte.shape), Xte.dtype, "range", (float(Xte.min()), float(Xte.max())))

test_ds = TensorDataset(Xte)
test_loader = DataLoader(test_ds, batch_size=256, shuffle=False)

# MODELS
class Snake(nn.Module):
    def __init__(self, alpha=1.0):
        super().__init__()
        self.alpha = nn.Parameter(torch.tensor(alpha))
    def forward(self, x):
        a = self.alpha.abs() + 1e-6
        return x + (1.0 / a) * torch.sin(a * x).pow(2)

BOTTLENECK_CH = 56
CODEBOOK_SIZE = 512
COMMIT_BETA   = 0.25
TOP_CH        = 56

class AuE(nn.Module):
    def __init__(self, ch=BOTTLENECK_CH):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), Snake(),
            nn.Conv2d(32, 64, 4, stride=2, padding=1), Snake(),
            nn.Conv2d(64, ch, 4, stride=2, padding=1), Snake()
        )
        self.dec = nn.Sequential(
            nn.ConvTranspose2d(ch, 64, 2, stride=2), Snake(),
            nn.ConvTranspose2d(64, 32, 2, stride=2), Snake(),
            nn.Conv2d(32, 1, 1), nn.Tanh()
        )
    def forward(self, x):
        z = self.enc(x)
        xh = self.dec(z)
        return xh, {}

class VAE(nn.Module):
    def __init__(self, ch=BOTTLENECK_CH):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), Snake(),
            nn.Conv2d(32, 64, 4, stride=2, padding=1), Snake()
        )
        self.mu     = nn.Conv2d(64, ch, 3, padding=1)
        self.logvar = nn.Conv2d(64, ch, 3, padding=1)
        self.dec = nn.Sequential(
            nn.ConvTranspose2d(ch, 64, 2, stride=2), Snake(),
            nn.Conv2d(64, 32, 3, padding=1), Snake(),
            nn.Conv2d(32, 1, 1), nn.Tanh()
        )

    def reparam(self, mu, logv):
        std = (0.5 * logv).exp()
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h  = self.enc(x)
        mu = self.mu(h)
        lv = self.logvar(h)
        z  = self.reparam(mu, lv) if self.training else mu
        xh = self.dec(z)
        kld = (-0.5 * (1 + lv - mu.pow(2) - lv.exp())).mean()
        return xh, {"kld": kld, "mu": mu, "logvar": lv}

class VectorQuantizer(nn.Module):
    def __init__(self, K, D, beta=COMMIT_BETA):
        super().__init__()
        self.K, self.D, self.beta = K, D, beta
        self.emb = nn.Embedding(K, D)
        self.emb.weight.data.uniform_(-1.0 / D, 1.0 / D)
    def forward(self, z):
        zf   = z.permute(0, 2, 3, 1).contiguous()
        flat = zf.view(-1, self.D)
        dist = (flat.pow(2).sum(1, keepdim=True)
                - 2 * flat @ self.emb.weight.t()
                + self.emb.weight.pow(2).sum(1))
        ind  = dist.argmin(1)
        zq   = self.emb(ind).view_as(zf)

        commit = self.beta * ((zq - zf.detach())**2).mean()
        codebk = ((zf - zq.detach())**2).mean()
        loss = commit + codebk

        zq = zf + (zq - zf).detach()
        return zq.permute(0, 3, 1, 2).contiguous(), loss

class VQVE(nn.Module):
    def __init__(self, ch=BOTTLENECK_CH, K=CODEBOOK_SIZE, beta=COMMIT_BETA):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Conv2d(1, 32, 4, stride=2, padding=1), Snake(),
            nn.Conv2d(32, ch, 4, stride=2, padding=1), Snake(),
        )
        self.vq  = VectorQuantizer(K, ch, beta)
        self.dec = nn.Sequential(
            nn.ConvTranspose2d(ch, 32, 4, stride=2, padding=1), Snake(),
            nn.ConvTranspose2d(32, 1, 4, stride=2, padding=1), Snake(),
            nn.Tanh()
        )
    def forward(self, x):
        z = self.enc(x)
        zq, lvq = self.vq(z)
        xh = self.dec(zq)
        return xh, {"vq": lvq}

class VQVA2(nn.Module):
    def __init__(self, ch=BOTTLENECK_CH, top_ch=TOP_CH, K=CODEBOOK_SIZE, beta=COMMIT_BETA):
        super().__init__()
        self.enc_b = nn.Sequential(
            nn.Conv2d(1, 32, 4, stride=2, padding=1), Snake(),
            nn.Conv2d(32, ch, 4, stride=2, padding=1), Snake()
        )
        self.enc_t = nn.Sequential(
            nn.Conv2d(ch, top_ch, 4, stride=2, padding=1), Snake()
        )
        self.vq_t = VectorQuantizer(K, top_ch, beta)
        self.vq_b = VectorQuantizer(K, ch, beta)

        self.up_t = nn.Sequential(
            nn.ConvTranspose2d(top_ch, ch, 4, stride=2, padding=1), Snake(),
            nn.Conv2d(ch, ch, 3, padding=1), Snake()
        )
        self.dec = nn.Sequential(
            nn.ConvTranspose2d(ch * 2, 64, 4, stride=2, padding=1), Snake(),
            nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1), Snake(),
            nn.Conv2d(32, 1, 1), Snake(),
            nn.Tanh()
        )
    def forward(self, x):
        zb = self.enc_b(x)
        zt = self.enc_t(zb)
        zqt, lt = self.vq_t(zt)
        up = self.up_t(zqt)

        Hb, Wb = zb.shape[-2:]
        if up.shape[-2] < Hb or up.shape[-1] < Wb:
            up = F.pad(up, (0, max(0, Wb-up.shape[-1]), 0, max(0, Hb-up.shape[-2])))
        up = up[:, :, :Hb, :Wb]

        zb_input = zb + up
        zqb, lb = self.vq_b(zb_input)

        xh = self.dec(torch.cat([zqb, up], dim=1))
        return xh, {"vq_top": lt, "vq_bottom": lb}

def build_model_by_name(name: str) -> nn.Module:
    if name == "AuE":   return AuE()
    if name == "VAE":   return VAE()
    if name == "VQVE":  return VQVE()
    if name == "VQVA2": return VQVA2()
    raise ValueError(f"Unknown model: {name}")

def load_best_inet_model_for_encoding(model_name: str) -> nn.Module:
    info = inet_model_cfgs[model_name]
    ckpt_path = info["dir"] / info["ckpt"]
    assert ckpt_path.exists(), f"Checkpoint not found: {ckpt_path}"

    state = torch.load(ckpt_path, map_location="cpu", weights_only=False)
    sd = state["model_state"] if (isinstance(state, dict) and "model_state" in state) else state

    model = build_model_by_name(model_name)
    model.load_state_dict(sd, strict=False)
    model.to(device).eval()
    return model

@torch.no_grad()
def encode_batch(model_name: str, model: nn.Module, xb: torch.Tensor) -> torch.Tensor:
    xb = xb.to(device)
    if model_name == "AuE":
        return model.enc(xb).cpu()
    if model_name == "VAE":
        h = model.enc(xb)
        return model.mu(h).cpu()
    if model_name == "VQVE":
        return model.enc(xb).cpu()
    if model_name == "VQVA2":
        zb = model.enc_b(xb)
        zt = model.enc_t(zb)
        zt_up = F.interpolate(zt, size=zb.shape[-2:], mode="nearest")
        return torch.cat([zb, zt_up], dim=1).cpu()
    raise ValueError(model_name)

# ENCODE FULL TEST SET + SAVE
all_latent_paths = {}

for name in ["AuE", "VAE", "VQVE", "VQVA2"]:
    ckpt_path = inet_model_cfgs[name]["dir"] / inet_model_cfgs[name]["ckpt"]
    if not ckpt_path.exists():
        print(f"\n=== Skipping {name} (missing: {ckpt_path}) ===")
        continue

    print(f"\n=== Encoding inet100 test set – {name} ===")
    model = load_best_inet_model_for_encoding(name)

    latents = []
    for (xb,) in test_loader:
        xb = xb.float()
        z = encode_batch(name, model, xb)
        latents.append(z)

    Z = torch.cat(latents, dim=0).numpy()

    out_path = enc_save_root / f"inet100_{name}_test_latent.npy"
    np.save(out_path, Z)
    all_latent_paths[name] = str(out_path)

    print(f"  Saved {name} latents: {Z.shape} -> {out_path}")

print("\nAll encoded test latents saved:")
for k, v in all_latent_paths.items():
    print(f"  {k}: {v}")

Device: cuda
Xte: (5000, 1, 28, 28) torch.float16 range (-1.0, 1.0)

=== Encoding inet100 test set – AuE ===
  Saved AuE latents: (5000, 56, 7, 7) -> datasets/inet100/encoded_test_latent/inet100_AuE_test_latent.npy

=== Encoding inet100 test set – VAE ===
  Saved VAE latents: (5000, 56, 14, 14) -> datasets/inet100/encoded_test_latent/inet100_VAE_test_latent.npy

=== Encoding inet100 test set – VQVE ===
  Saved VQVE latents: (5000, 56, 7, 7) -> datasets/inet100/encoded_test_latent/inet100_VQVE_test_latent.npy

=== Encoding inet100 test set – VQVA2 ===
  Saved VQVA2 latents: (5000, 112, 7, 7) -> datasets/inet100/encoded_test_latent/inet100_VQVA2_test_latent.npy

All encoded test latents saved:
  AuE: datasets/inet100/encoded_test_latent/inet100_AuE_test_latent.npy
  VAE: datasets/inet100/encoded_test_latent/inet100_VAE_test_latent.npy
  VQVE: datasets/inet100/encoded_test_latent/inet100_VQVE_test_latent.npy
  VQVA2: datasets/inet100/encoded_test_latent/inet100_VQVA2_test_latent.npy


In [None]:
# Create Input train & test in numpy format
from pathlib import Path
import torch
import numpy as np

# PATHS
ROOT = Path(".")
INET_ROOT = ROOT / "datasets/inet100"

TRAIN_PT = INET_ROOT / "train.pt"
TEST_PT  = INET_ROOT / "test.pt"

TRAIN_OUT_DIR = INET_ROOT / "."
TEST_OUT_DIR  = INET_ROOT / "."
TRAIN_OUT_DIR.mkdir(parents=True, exist_ok=True)
TEST_OUT_DIR.mkdir(parents=True, exist_ok=True)

TRAIN_NPY = TRAIN_OUT_DIR / "inet100_train.npy"
TEST_NPY  = TEST_OUT_DIR  / "inet100_test.npy"

print("Loading ImageNet100 cached tensors...")

X_train = torch.load(TRAIN_PT, map_location="cpu", weights_only=True)
X_test  = torch.load(TEST_PT,  map_location="cpu", weights_only=True)

print("Loaded:")
print("  train:", X_train.shape, X_train.dtype)
print("  test :", X_test.shape,  X_test.dtype)

assert X_train.ndim == 4 and X_train.shape[1] == 1, "Expected (N,1,28,28)"
assert X_test.ndim  == 4 and X_test.shape[1]  == 1, "Expected (N,1,28,28)"

# Use float32 for NumPy interoperability
train_np = X_train.float().numpy()
test_np  = X_test.float().numpy()

print("Converted to NumPy:")
print("  train:", train_np.shape, train_np.dtype,
      "range:", (train_np.min(), train_np.max()))
print("  test :", test_np.shape,  test_np.dtype,
      "range:", (test_np.min(),  test_np.max()))

np.save(TRAIN_NPY, train_np)
np.save(TEST_NPY,  test_np)

print("\nSaved:")
print(" ", TRAIN_NPY.resolve())
print(" ", TEST_NPY.resolve())

Loading ImageNet100 cached tensors...
Loaded:
  train: torch.Size([130000, 1, 28, 28]) torch.float16
  test : torch.Size([5000, 1, 28, 28]) torch.float16
Converted to NumPy:
  train: (130000, 1, 28, 28) float32 range: (np.float32(-1.0), np.float32(1.0))
  test : (5000, 1, 28, 28) float32 range: (np.float32(-1.0), np.float32(1.0))

Saved:
  /content/gdrive/.shortcut-targets-by-id/1UMow24kXYpDLYgShcir7-CB3ZYQsgEih/Colab Notebooks/autoencoders/datasets/inet100/inet100_train.npy
  /content/gdrive/.shortcut-targets-by-id/1UMow24kXYpDLYgShcir7-CB3ZYQsgEih/Colab Notebooks/autoencoders/datasets/inet100/inet100_test.npy
