In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import numpy as np
import pandas as pd

In [2]:
# ===== SEED CONTROL (ADD THIS CELL) =====
import random
import numpy as np
import torch

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [3]:
SEED = 82  # change this number for each run


In [4]:
IMAGE_SIZE = 128
BATCH_SIZE = 16
EPOCHS = 10
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device:", DEVICE)

Using device: cpu


In [5]:
class TrainFrameDataset(Dataset):
    def __init__(self, root_dir, transform):
        self.files = []
        self.transform = transform
        
        for vid in sorted(os.listdir(root_dir)):
            vid_path = os.path.join(root_dir, vid)
            if os.path.isdir(vid_path):
                for f in os.listdir(vid_path):
                    self.files.append(os.path.join(vid_path, f))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(self.files[idx]).convert("RGB")
        img = self.transform(img)
        return img

In [6]:
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor()
])

TRAIN_DIR = "/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos"

set_seed(SEED)
train_dataset = TrainFrameDataset(TRAIN_DIR, train_transform)
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

print("Train frames:", len(train_dataset))


Train frames: 9204


In [7]:
class ResNetAutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        base = models.resnet18(weights=None)
        self.encoder = nn.Sequential(*list(base.children())[:-2])

        self.decoder = nn.Sequential(
    nn.ConvTranspose2d(512, 256, 3, 2, 1, 1),  # 4 → 8
    nn.ReLU(),

    nn.ConvTranspose2d(256, 128, 3, 2, 1, 1),  # 8 → 16
    nn.ReLU(),

    nn.ConvTranspose2d(128, 64, 3, 2, 1, 1),   # 16 → 32
    nn.ReLU(),

    nn.ConvTranspose2d(64, 32, 3, 2, 1, 1),    # 32 → 64
    nn.ReLU(),

    nn.ConvTranspose2d(32, 3, 3, 2, 1, 1),     # 64 → 128
    nn.Sigmoid()
)


    def forward(self, x):
        feat = self.encoder(x)
        recon = self.decoder(feat)
        return recon, feat

In [8]:
set_seed(SEED)   # or set_seed(42)

model = ResNetAutoEncoder().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.MSELoss()

for epoch in range(EPOCHS):
    total_loss = 0
    for imgs in train_loader:
        imgs = imgs.to(DEVICE)

        recon, _ = model(imgs)
        loss = criterion(recon, imgs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.6f}")


Epoch 1/10 | Loss: 0.010396
Epoch 2/10 | Loss: 0.001995
Epoch 3/10 | Loss: 0.001460
Epoch 4/10 | Loss: 0.001204
Epoch 5/10 | Loss: 0.001040
Epoch 6/10 | Loss: 0.000922
Epoch 7/10 | Loss: 0.000842
Epoch 8/10 | Loss: 0.000772
Epoch 9/10 | Loss: 0.000720
Epoch 10/10 | Loss: 0.000668


In [9]:
torch.save(model.state_dict(), f"model_seed{SEED}.pth")
print("model saved")

model saved


In [10]:
torch.save(model.state_dict(), "/kaggle/working/resnet18_autoencoder.pth")
print("weights saved safely")

weights saved safely


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [12]:
model.load_state_dict(
    torch.load("/kaggle/working/resnet18_autoencoder.pth", map_location=device)
)
model.eval()
print("model clean & ready")

model clean & ready


In [13]:
import os

path = "/kaggle/working/resnet18_autoencoder.pth"
print("Exists:", os.path.exists(path))
print("Size (MB):", os.path.getsize(path) / (1024*1024) if os.path.exists(path) else "NA")

Exists: True
Size (MB): 48.695467948913574


In [14]:
model.load_state_dict(
    torch.load("/kaggle/working/resnet18_autoencoder.pth", map_location=device)
)
model.to(device)
model.eval()
print("weights loaded successfully, model ready")

weights loaded successfully, model ready


In [15]:
TEST_DIR = "/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/testing_videos"