In [1]:
!pip install torch-ema

Collecting torch-ema
  Downloading torch_ema-0.3-py3-none-any.whl.metadata (415 bytes)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->torch-ema)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->torch-ema)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->torch-ema)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->torch-ema)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->torch-ema)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->torch-ema)
  Downloading nvidia_

In [2]:
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import AdamW
import torchvision.transforms as T
import torchvision.transforms.functional as F
from scipy.ndimage import gaussian_filter
import torch.nn.functional as F_nn
import random
from torch_ema import ExponentialMovingAverage

from PIL import Image
from tqdm import tqdm

import time
import os

import matplotlib.pyplot as plt
import pandas as pd
import timm


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

num_epoch_head = 10       # Train classifier head first
num_epoch_finetune = 40  # Fine-tune full model

def paired_transform(color_img, depth_img, output_size=224, max_depth=10.0, apply_color_jitter=True,
                    epoch=0, total_epochs=50):
    if random.random() > 0.5:
        color_img = F.hflip(color_img)
        depth_img = F.hflip(depth_img)

    angle = random.uniform(-15, 15)
    color_img = F.rotate(color_img, angle, interpolation=Image.BILINEAR)
    depth_img = F.rotate(depth_img, angle, interpolation=Image.NEAREST)

    i, j, h, w = T.RandomResizedCrop.get_params(
        color_img, scale=(0.8, 1.0), ratio=(1.0, 1.0)
    )
    color_img = F.resized_crop(color_img, i, j, h, w, size=(output_size, output_size))
    depth_img = F.resized_crop(depth_img, i, j, h, w, size=(output_size, output_size))

    if apply_color_jitter:
        color_jitter = T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3)
        color_img = color_jitter(color_img)

    # RGB → tensor
    color_tensor = F.to_tensor(color_img)
    color_tensor = F.normalize(color_tensor, mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])

    # Depth → tensor (meters)
    depth_np = np.array(depth_img).astype(np.float32) / 1000.0

    
    t = min(epoch / (0.3 * total_epochs), 1.0)  # ramp factor
    
   # Brightness-like scaling
    if random.random() < 0.5:
        scale = random.uniform(1 - 0.05*t, 1 + 0.05*t)
        depth_np *= scale

    # Gamma
    if random.random() < 0.5:
        gamma = random.uniform(1 - 0.2*t, 1 + 0.2*t)
        depth_norm = np.clip(depth_np / max_depth, 1e-6, 1.0)
        depth_np = (depth_norm ** gamma) * max_depth

    # Gaussian noise
    if random.random() < 0.5:
        sigma = 0.005 + 0.02*t
        depth_np += np.random.normal(0, sigma, depth_np.shape)

    # Range dropout (simulate missing band)
    if random.random() < 0.2:
        h = depth_np.shape[0]
        y0 = np.random.randint(0, h)
        band = max(1, int(0.05*h))
        depth_np[y0:y0+band, :] = 0.0

    # Gaussian blur (defocus)
    if random.random() < 0.2:
        from scipy.ndimage import gaussian_filter
        depth_np = gaussian_filter(depth_np, sigma=0.5)

    
    depth_tensor = torch.from_numpy(np.clip(depth_np, 0, max_depth)).unsqueeze(0)

    return color_tensor, depth_tensor
    

class DepthDataset(Dataset):
    def __init__(self, data_dir, paired_transform=None, max_depth=10.0):
        """
        Args:
            data_dir (str): Path to folder containing 'colors/' and 'depths/' subfolders.
            paired_transform (callable, optional): Function to apply same geometric transform to both RGB and depth.
            max_depth (float): Maximum depth value to scale depth maps (in meters).
        """
        self.data_dir = data_dir
        self.paired_transform = paired_transform
        self.max_depth = max_depth

        # Paths to color and depth folders
        self.color_dir = os.path.join(data_dir, "colors")
        self.depth_dir = os.path.join(data_dir, "depths")

        # List all RGB color images
        self.color_files = sorted([f for f in os.listdir(self.color_dir) if f.endswith("_colors.png")])
        
        self.current_epoch = num_epoch_head
        self.total_epochs = num_epoch_finetune  # default, can be updated

    def __len__(self):
        return len(self.color_files)

    def __getitem__(self, idx):
        # Load RGB image
        color_path = os.path.join(self.color_dir, self.color_files[idx])
        color_img = Image.open(color_path).convert("RGB")

        # Load corresponding depth image
        depth_file = self.color_files[idx].replace("_colors.png", "_depth.png")
        depth_path = os.path.join(self.depth_dir, depth_file)
        depth_img = Image.open(depth_path)

        # Apply paired transform if provided
        if self.paired_transform:
            color_tensor, depth_tensor = paired_transform(color_img, depth_img,
                                              epoch=self.current_epoch,
                                              total_epochs=self.total_epochs)
        else:
            # Convert RGB to tensor and normalize
            color_tensor = T.ToTensor()(color_img)
            color_tensor = T.Normalize(mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])(color_tensor)
            # Convert depth to tensor and scale to meters
            depth_np = np.array(depth_img).astype(np.float32) / 1000.0  # mm → meters
            depth_tensor = torch.from_numpy(depth_np).unsqueeze(0)
            depth_tensor = torch.clamp(depth_tensor, 0, self.max_depth)

        return color_tensor, depth_tensor

    def set_epoch(self, epoch, total_epochs=None):
        """Update current epoch for curriculum depth augmentations"""
        self.current_epoch = epoch
        if total_epochs is not None:
            self.total_epochs = total_epochs


class DepthModel(nn.Module):
    def __init__(self, backbone_name='efficientnet_b3', pretrained=True):
        super().__init__()
        
        # --------------------------
        # Encoder: pretrained CNN
        # --------------------------
        # Use features_only=True to get intermediate feature maps for decoder
        self.encoder = timm.create_model(backbone_name, pretrained=pretrained, features_only=True)
        
        # Channels of encoder feature maps at each stage
        encoder_channels = self.encoder.feature_info.channels()  # e.g., [40, 48, 136, 384]
        last_ch = encoder_channels[-1]

        # --------------------------
        # Simple decoder: upsample to original resolution
        # --------------------------
        self.decoder = nn.Sequential(
            nn.Conv2d(last_ch, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),

            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),

            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),

            nn.Conv2d(64, 1, kernel_size=3, padding=1)  # Output: single-channel depth map
        )

    def forward(self, x):
        # Encoder forward: returns list of feature maps at different stages
        input_size = x.shape[2:]  # height and width of **original input image**
        
        features = self.encoder(x)
        x_enc = features[-1]
        
        depth = self.decoder(x_enc)
        depth = F_nn.interpolate(depth, size=input_size, mode='bilinear', align_corners=False)
        
        return depth
        
# -------------------------
# Custom Loss (Doesn't punish loss for outliers)
# -------------------------
class DepthLoss(nn.Module):
    def __init__(self, delta=0.5, alpha=0.7):
        super().__init__()
        self.delta = delta
        self.alpha = alpha

    def huber(self, pred, target):
        diff = pred - target
        abs_diff = diff.abs()
        mask = target > 0  # ignore invalid zeros
        diff = diff[mask]
        abs_diff = abs_diff[mask]

        loss = torch.where(
            abs_diff <= self.delta,
            0.5 * diff**2,
            self.delta * (abs_diff - 0.5*self.delta)
        )
        return loss.mean()

    def si_log(self, pred, target, eps=1e-6):
        mask = target > 0
        p = torch.log(pred[mask].clamp(min=eps))
        t = torch.log(target[mask].clamp(min=eps))
        d = p - t
        return d.pow(2).mean() - d.mean().pow(2)

    def forward(self, pred, target):
        return self.alpha*self.huber(pred, target) + (1-self.alpha)*self.si_log(pred, target)


# -------------------------
# Optimizer + Cosine + Warmup
# -------------------------
def get_optimizer_scheduler(model, train_loader, num_epochs, lr=3e-4, weight_decay=1e-2, warmup_frac=0.05):
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    total_steps = len(train_loader) * num_epochs
    warmup_steps = int(total_steps * warmup_frac)

    def lr_lambda(step):
        if step < warmup_steps:
            return step / max(1, warmup_steps)
        progress = (step - warmup_steps) / max(1, total_steps - warmup_steps)
        return 0.5 * (1 + np.cos(np.pi * progress))

    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
    return optimizer, scheduler, total_steps

# -------------------------
# EMA wrapper
# -------------------------
def get_ema(model, decay=0.999):
    return ExponentialMovingAverage(model.parameters(), decay=decay)




Using device: cuda


In [3]:


def train_depth_model(model,train_dataset, train_loader, val_loader, epochs, freeze_encoder=True,
                      epoch_offset=0, total_epochs=50, model_path="depth_model.pth", 
                      best_val_loss=float("inf")):

    criterion = DepthLoss()
    scaler = torch.amp.GradScaler()

    # Freeze encoder if needed
    if freeze_encoder:
        for p in model.encoder.parameters():
            p.requires_grad = False

    optimizer, scheduler, total_steps = get_optimizer_scheduler(model, train_loader, epochs)
    ema = get_ema(model)

    global_step = 0

    for epoch in range(epochs):
        train_dataset.set_epoch(epoch + epoch_offset, total_epochs=total_epochs)
        
        model.train()
        train_loss = 0.0

        for images, depths in tqdm(train_loader):
            images, depths = images.to(device), depths.to(device)

            # Apply epoch-aware paired transform
            # images, depths = paired_transform(images, depths, epoch=epoch + epoch_offset, total_epochs=total_epochs)

            optimizer.zero_grad()
            with torch.amp.autocast(device_type = "cuda"):
                outputs = model(images)
                loss = criterion(outputs, depths)
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            ema.update()
            global_step += 1

            train_loss += loss.item() * images.size(0)

        train_loss /= len(train_loader.dataset)

        # Validation with EMA weights
        ema.store()
        ema.copy_to()
        model.eval()
        val_loss, val_mae = 0.0, 0.0
        with torch.no_grad():
            for images, depths in val_loader:
                images, depths = images.to(device), depths.to(device)
                outputs = model(images)
                loss = criterion(outputs, depths)
                val_loss += loss.item() * images.size(0)
                val_mae += (outputs-depths).abs().mean().item() * images.size(0)
        val_loss /= len(val_loader.dataset)
        val_mae /= len(val_loader.dataset)
        rmse = np.sqrt(val_loss)
        ema.restore()

        print(f"Epoch {epoch+1+epoch_offset}/{total_epochs} | "
              f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | RMSE: {rmse:.4f} | MAE: {val_mae:.4f}")

        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_path)
            print("✅ Best model saved.")

    return best_val_loss



# --------------------------
# Run training
# --------------------------


# --------------------------
# Device
# --------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --------------------------
# Model
# --------------------------
model = DepthModel().to(device)


# --------------------------
# Datasets & Loaders
# --------------------------
train_folder = "/kaggle/input/nyu-depth-split-dataset/nyu_split/train"
valid_folder = "/kaggle/input/nyu-depth-split-dataset/nyu_split/val"
max_depth = 10

train_dataset = DepthDataset(train_folder, paired_transform, max_depth)
val_dataset = DepthDataset(valid_folder, paired_transform, max_depth)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# --------------------------
# Freeze backbone initially
# --------------------------
for param in model.encoder.parameters():
    param.requires_grad = False

# --------------------------
# Training Parameters
# --------------------------

MODEL_PATH = "/kaggle/working/depth_predictor.pth"

# 1. Train decoder head first
best_val_loss = train_depth_model(model, train_dataset, train_loader, val_loader, epochs=num_epoch_head, freeze_encoder=True, 
                  epoch_offset=0, total_epochs=(num_epoch_head + num_epoch_finetune), model_path= MODEL_PATH)

# 2. Fine-tune full model
for param in model.encoder.parameters():
    param.requires_grad = True
train_depth_model(model, train_dataset, train_loader, val_loader, epochs=num_epoch_finetune, freeze_encoder=False, 
                  epoch_offset=num_epoch_head, total_epochs=(num_epoch_head + num_epoch_finetune), model_path="depth_model.pth", best_val_loss = best_val_loss)

model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

100%|██████████| 19/19 [00:16<00:00,  1.13it/s]


Epoch 1/50 | Train Loss: 3.9009 | Val Loss: 0.7338 | RMSE: 0.8566 | MAE: 1.0389
✅ Best model saved.


100%|██████████| 19/19 [00:11<00:00,  1.61it/s]


Epoch 2/50 | Train Loss: 0.6226 | Val Loss: 0.6184 | RMSE: 0.7864 | MAE: 0.9430
✅ Best model saved.


100%|██████████| 19/19 [00:11<00:00,  1.65it/s]


Epoch 3/50 | Train Loss: 0.5925 | Val Loss: 0.5616 | RMSE: 0.7494 | MAE: 0.9153
✅ Best model saved.


100%|██████████| 19/19 [00:11<00:00,  1.62it/s]


Epoch 4/50 | Train Loss: 0.5171 | Val Loss: 0.6946 | RMSE: 0.8334 | MAE: 0.8779


100%|██████████| 19/19 [00:11<00:00,  1.62it/s]


Epoch 5/50 | Train Loss: 0.5066 | Val Loss: 0.5799 | RMSE: 0.7615 | MAE: 0.8978


100%|██████████| 19/19 [00:11<00:00,  1.64it/s]


Epoch 6/50 | Train Loss: 0.5125 | Val Loss: 0.5619 | RMSE: 0.7496 | MAE: 0.8553


100%|██████████| 19/19 [00:11<00:00,  1.63it/s]


Epoch 7/50 | Train Loss: 0.4785 | Val Loss: 0.5660 | RMSE: 0.7523 | MAE: 0.8418


100%|██████████| 19/19 [00:11<00:00,  1.64it/s]


Epoch 8/50 | Train Loss: 0.4813 | Val Loss: 0.4517 | RMSE: 0.6720 | MAE: 0.8179
✅ Best model saved.


100%|██████████| 19/19 [00:11<00:00,  1.60it/s]


Epoch 9/50 | Train Loss: 0.4850 | Val Loss: 0.4936 | RMSE: 0.7026 | MAE: 0.7977


100%|██████████| 19/19 [00:11<00:00,  1.60it/s]


Epoch 10/50 | Train Loss: 0.4855 | Val Loss: 0.5129 | RMSE: 0.7161 | MAE: 0.8162


100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Epoch 11/50 | Train Loss: 0.4644 | Val Loss: 0.5604 | RMSE: 0.7486 | MAE: 0.8584


100%|██████████| 19/19 [00:12<00:00,  1.58it/s]


Epoch 12/50 | Train Loss: 0.4723 | Val Loss: 0.5190 | RMSE: 0.7204 | MAE: 0.8708


100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Epoch 13/50 | Train Loss: 0.4602 | Val Loss: 0.5398 | RMSE: 0.7347 | MAE: 0.8200


100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Epoch 14/50 | Train Loss: 0.4510 | Val Loss: 0.4671 | RMSE: 0.6834 | MAE: 0.7685


100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Epoch 15/50 | Train Loss: 0.4397 | Val Loss: 0.4046 | RMSE: 0.6361 | MAE: 0.7459
✅ Best model saved.


100%|██████████| 19/19 [00:12<00:00,  1.58it/s]


Epoch 16/50 | Train Loss: 0.4312 | Val Loss: 0.3677 | RMSE: 0.6064 | MAE: 0.6970
✅ Best model saved.


100%|██████████| 19/19 [00:11<00:00,  1.59it/s]


Epoch 17/50 | Train Loss: 0.4326 | Val Loss: 0.4083 | RMSE: 0.6390 | MAE: 0.6914


100%|██████████| 19/19 [00:12<00:00,  1.54it/s]


Epoch 18/50 | Train Loss: 0.4293 | Val Loss: 0.4231 | RMSE: 0.6505 | MAE: 0.7309


100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Epoch 19/50 | Train Loss: 0.4096 | Val Loss: 0.3851 | RMSE: 0.6205 | MAE: 0.6740


100%|██████████| 19/19 [00:11<00:00,  1.59it/s]


Epoch 20/50 | Train Loss: 0.3751 | Val Loss: 0.4322 | RMSE: 0.6574 | MAE: 0.6832


100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Epoch 21/50 | Train Loss: 0.3875 | Val Loss: 0.3690 | RMSE: 0.6075 | MAE: 0.6602


100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Epoch 22/50 | Train Loss: 0.3752 | Val Loss: 0.3890 | RMSE: 0.6237 | MAE: 0.6378


100%|██████████| 19/19 [00:12<00:00,  1.53it/s]


Epoch 23/50 | Train Loss: 0.3908 | Val Loss: 0.3825 | RMSE: 0.6185 | MAE: 0.6747


100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Epoch 24/50 | Train Loss: 0.3890 | Val Loss: 0.3907 | RMSE: 0.6250 | MAE: 0.6520


100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Epoch 25/50 | Train Loss: 0.3597 | Val Loss: 0.4156 | RMSE: 0.6446 | MAE: 0.6639


100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Epoch 26/50 | Train Loss: 0.3485 | Val Loss: 0.3723 | RMSE: 0.6101 | MAE: 0.6537


100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Epoch 27/50 | Train Loss: 0.3419 | Val Loss: 0.3670 | RMSE: 0.6058 | MAE: 0.6624
✅ Best model saved.


100%|██████████| 19/19 [00:12<00:00,  1.54it/s]


Epoch 28/50 | Train Loss: 0.3391 | Val Loss: 0.3974 | RMSE: 0.6304 | MAE: 0.6663


100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Epoch 29/50 | Train Loss: 0.3465 | Val Loss: 0.3916 | RMSE: 0.6258 | MAE: 0.6495


100%|██████████| 19/19 [00:12<00:00,  1.54it/s]


Epoch 30/50 | Train Loss: 0.3599 | Val Loss: 0.3978 | RMSE: 0.6308 | MAE: 0.6260


100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Epoch 31/50 | Train Loss: 0.3409 | Val Loss: 0.3881 | RMSE: 0.6230 | MAE: 0.6573


100%|██████████| 19/19 [00:12<00:00,  1.53it/s]


Epoch 32/50 | Train Loss: 0.3377 | Val Loss: 0.3136 | RMSE: 0.5600 | MAE: 0.5875
✅ Best model saved.


100%|██████████| 19/19 [00:11<00:00,  1.59it/s]


Epoch 33/50 | Train Loss: 0.3581 | Val Loss: 0.3921 | RMSE: 0.6262 | MAE: 0.6226


100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Epoch 34/50 | Train Loss: 0.3558 | Val Loss: 0.3608 | RMSE: 0.6007 | MAE: 0.6109


100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Epoch 35/50 | Train Loss: 0.3361 | Val Loss: 0.3718 | RMSE: 0.6097 | MAE: 0.6306


100%|██████████| 19/19 [00:13<00:00,  1.42it/s]


Epoch 36/50 | Train Loss: 0.3272 | Val Loss: 0.3362 | RMSE: 0.5798 | MAE: 0.6047


100%|██████████| 19/19 [00:13<00:00,  1.39it/s]


Epoch 37/50 | Train Loss: 0.3141 | Val Loss: 0.3818 | RMSE: 0.6179 | MAE: 0.6324


100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Epoch 38/50 | Train Loss: 0.3297 | Val Loss: 0.3748 | RMSE: 0.6122 | MAE: 0.5835


100%|██████████| 19/19 [00:13<00:00,  1.45it/s]


Epoch 39/50 | Train Loss: 0.3106 | Val Loss: 0.3284 | RMSE: 0.5731 | MAE: 0.6202


100%|██████████| 19/19 [00:13<00:00,  1.42it/s]


Epoch 40/50 | Train Loss: 0.3054 | Val Loss: 0.3439 | RMSE: 0.5864 | MAE: 0.6008


100%|██████████| 19/19 [00:13<00:00,  1.39it/s]


Epoch 41/50 | Train Loss: 0.3264 | Val Loss: 0.3344 | RMSE: 0.5783 | MAE: 0.6163


100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Epoch 42/50 | Train Loss: 0.3083 | Val Loss: 0.3879 | RMSE: 0.6228 | MAE: 0.6305


100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Epoch 43/50 | Train Loss: 0.3217 | Val Loss: 0.3518 | RMSE: 0.5932 | MAE: 0.6005


100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Epoch 44/50 | Train Loss: 0.3073 | Val Loss: 0.3378 | RMSE: 0.5812 | MAE: 0.5993


100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Epoch 45/50 | Train Loss: 0.3208 | Val Loss: 0.3288 | RMSE: 0.5734 | MAE: 0.6103


100%|██████████| 19/19 [00:13<00:00,  1.46it/s]


Epoch 46/50 | Train Loss: 0.3138 | Val Loss: 0.3487 | RMSE: 0.5905 | MAE: 0.6026


100%|██████████| 19/19 [00:13<00:00,  1.44it/s]


Epoch 47/50 | Train Loss: 0.3093 | Val Loss: 0.3843 | RMSE: 0.6199 | MAE: 0.6271


100%|██████████| 19/19 [00:13<00:00,  1.40it/s]


Epoch 48/50 | Train Loss: 0.3016 | Val Loss: 0.3613 | RMSE: 0.6011 | MAE: 0.6428


100%|██████████| 19/19 [00:13<00:00,  1.45it/s]


Epoch 49/50 | Train Loss: 0.3211 | Val Loss: 0.3356 | RMSE: 0.5793 | MAE: 0.5957


100%|██████████| 19/19 [00:13<00:00,  1.44it/s]


Epoch 50/50 | Train Loss: 0.3024 | Val Loss: 0.3020 | RMSE: 0.5496 | MAE: 0.5911
✅ Best model saved.


0.3020265868076911

In [4]:
# # --------------------------
# # Testing / Inference
# # --------------------------
# import torch
# import torch.nn as nn
# import numpy as np
# import matplotlib.pyplot as plt
# from tqdm import tqdm


# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print("Using device:", device)

# # --------------------------
# # Test Dataset & DataLoader
# # --------------------------
# test_folder = "/kaggle/input/nyu-test-colors-depths/nyu2_test_colors_depths"
# test_dataset = DepthDataset(test_folder, paired_transform, max_depth=10.0)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# # Load trained model
# test_model = DepthModel().to(device)
# test_model.load_state_dict(torch.load(
#     "/kaggle/input/depthmodel3/pytorch/default/1/depth_classifier.pth", 
#     map_location=device
# ))
# test_model.eval()

# # Loss function for evaluation
# criterion = nn.MSELoss()

# # --------------------------
# # Evaluation with tqdm
# # --------------------------
# val_loss, val_mae = 0.0, 0.0
# with torch.no_grad():
#     for images, depths in tqdm(test_loader, desc="Evaluating on test set"):
#         images, depths = images.to(device), depths.to(device)
#         outputs = test_model(images)

#         # Loss metrics
#         mse_loss = criterion(outputs, depths)
#         mae_loss = torch.mean(torch.abs(outputs - depths))

#         val_loss += mse_loss.item() * images.size(0)
#         val_mae += mae_loss.item() * images.size(0)

# val_loss /= len(test_loader.dataset)
# val_mae /= len(test_loader.dataset)
# val_rmse = np.sqrt(val_loss)

# print(f"✅ Test Results -> MSE: {val_loss:.4f}, RMSE: {val_rmse:.4f} m, MAE: {val_mae:.4f} m")

# # --------------------------
# # Visualization
# # --------------------------
# def show_predictions(model, dataset, num_samples=5):
#     model.eval()
#     fig, axes = plt.subplots(num_samples, 3, figsize=(12, 4 * num_samples))

#     for i in tqdm(range(num_samples), desc="Visualizing predictions"):
#         color, depth_gt = dataset[i]
#         color_batch = color.unsqueeze(0).to(device)

#         with torch.no_grad():
#             depth_pred = model(color_batch).cpu().squeeze().numpy()

#         depth_gt = depth_gt.squeeze().numpy()

#         # Plot RGB
#         axes[i, 0].imshow(np.transpose(color.numpy(), (1, 2, 0)))
#         axes[i, 0].set_title("RGB Input")
#         axes[i, 0].axis("off")

#         # Plot Ground Truth Depth
#         axes[i, 1].imshow(depth_gt, cmap="inferno")
#         axes[i, 1].set_title("Ground Truth Depth")
#         axes[i, 1].axis("off")

#         # Plot Predicted Depth
#         axes[i, 2].imshow(depth_pred, cmap="inferno")
#         axes[i, 2].set_title("Predicted Depth")
#         axes[i, 2].axis("off")

#     plt.tight_layout()
#     plt.show()

# # Visualize predictions from test set
# show_predictions(test_model, test_dataset, num_samples=5)