In [None]:
#tta_inference_resnet_autoencoder.py
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
from torchvision.transforms import functional as TF
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import numpy as np

# --- CONFIG ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("‚úÖ Using device:", DEVICE)

IMAGE_SIZE = 128
BATCH_SIZE = 16
TEST_DIR = "/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/testing_videos"
MODEL_PATH = None

# Find model
for root, _, files in os.walk("/kaggle/input"):
    for f in files:
        if "model_seed" in f.lower() and f.endswith(".pth"):
            MODEL_PATH = os.path.join(root, f)
            break
    if MODEL_PATH: break

if not MODEL_PATH:
    raise FileNotFoundError("‚ùå Model not found ‚Äî check data attachment")

# --- MODEL ---
class ResNetAutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        base = models.resnet18(weights=None)
        self.encoder = nn.Sequential(*list(base.children())[:-2])
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, 3, 2, 1, 1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 3, 2, 1, 1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 3, 2, 1, 1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 3, 2, 1, 1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 3, 2, 1, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        feat = self.encoder(x)
        recon = self.decoder(feat)
        return recon, feat

# --- DATASET ---
class TestDataset(Dataset):
    def __init__(self, root_dir):
        self.samples = []
        for vid in sorted(os.listdir(root_dir)):
            vid_path = os.path.join(root_dir, vid)
            if os.path.isdir(vid_path):
                for f in sorted(os.listdir(vid_path)):
                    if f.lower().endswith(('.jpg','.png')):
                        frame_num = int(f.split('.')[0].split('_')[-1])
                        self.samples.append((os.path.join(vid_path, f), int(vid), frame_num))
    def __len__(self): return len(self.samples)
    def __getitem__(self, idx):
        path, vid, frame = self.samples[idx]
        img = Image.open(path).convert("RGB")
        img = transforms.Resize((IMAGE_SIZE, IMAGE_SIZE))(img)
        img = transforms.ToTensor()(img)
        return img, vid, frame

# --- INFERENCE ---
print("‚úÖ Model:", MODEL_PATH)
model = ResNetAutoEncoder()
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.to(DEVICE)  # ‚Üê CRITICAL: Move to GPU
model.eval()

dataset = TestDataset(TEST_DIR)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# ===== CORRECT extract_features =====
def extract_features(self, x):
    x = self.encoder[0](x)   # conv1
    x = self.encoder[1](x)   # bn1
    x = self.encoder[2](x)   # relu
    x = self.encoder[3](x)   # maxpool
    x = self.encoder[4](x)   # layer1
    x = self.encoder[5](x)   # layer2
    f3 = self.encoder[6](x)  # layer3
    f4 = self.encoder[7](f3) # layer4
    return f3, f4

if not hasattr(model, 'extract_features'):
    model.extract_features = extract_features.__get__(model)

# ===== INFERENCE LOOP (Full TTA + Noise-Favoring) =====
results = []
with torch.no_grad():
    for imgs, vids, frames in loader:
        imgs = imgs.to(DEVICE)
        batch_scores = []
        
        # üîπ FULL TTA (with vflip ‚Äî your last GOOD config)
        tta_ops = [
            lambda x: x,                          # identity
            TF.hflip,                             # hflip
            TF.vflip,                             # vflip
            lambda x: TF.rotate(x, 180),         # rot180
        ]
        
        for op in tta_ops:
            a = op(imgs)
            recon, _ = model(a)
            
            f3_a, f4_a = model.extract_features(a)
            f3_r, f4_r = model.extract_features(recon)
            
            r_err = (a - recon).abs().mean([1, 2, 3])  # L1
            err3 = ((f3_a - f3_r) ** 2).mean([1, 2, 3])
            err4 = ((f4_a - f4_r) ** 2).mean([1, 2, 3])
            f_err = 0.4 * err3 + 0.6 * err4  # conservative
            
            # ‚úÖ CRITICAL CHANGE: Noise-favoring balance
            score = 0.795 * (r_err) + 0.205 * (f_err)  # ‚Üê +0.01 AP nudge
            batch_scores.append(score)
        
        avg_score = torch.stack(batch_scores).mean(0)
        final_score = avg_score ** 1.31  # Œ≥ = 1.28
        
        for v, f, s in zip(vids, frames, final_score):
            results.append({"video": int(v), "frame": int(f), "score": float(s)})

# Save
df = pd.DataFrame(results)
df["Id"] = df.video.astype(str) + "_" + df.frame.astype(str)
output_path = "submission_top10.csv"
df[["Id", "score"]].rename(columns={"score": "Predicted"}).to_csv(output_path, index=False)

print(f"üìä Score range: {df['score'].min():.4f} ‚Üí {df['score'].max():.4f}")
print(f"‚úÖ Done! {output_path} saved.")

‚úÖ Using device: cuda
‚úÖ Model: /kaggle/input/new-training-model-ipynb/model_seed82.pth
üìä Score range: 0.1408 ‚Üí 0.2931
‚úÖ Done! submission_top10.csv saved.
