In [None]:
import torch
import numpy as np
import h5py
from torch.utils.data import DataLoader, Dataset

mat_file = h5py.File('nyu_depth_v2_labeled.mat', 'r')

# Extract RGB images and depth maps
rgb_images = np.array(mat_file['images'])  # Expected shape: (N, 3, H, W)
depth_maps = np.array(mat_file['depths'])   # Expected shape: (N, H, W)

# Normalize RGB images (from [0, 255] to [0, 1])
rgb_images = rgb_images.astype(np.float32) / 255.0

# Normalize depth maps (scale between 0 and 1)
depth_maps = depth_maps.astype(np.float32)
depth_maps /= np.max(depth_maps)  # Normalize depth values

# Convert to PyTorch tensors
# Note: We add the channel dimension here so each depth map becomes (1, H, W)
rgb_tensors = torch.tensor(rgb_images)
depth_tensors = torch.tensor(depth_maps).unsqueeze(1)  # Shape: (N, 1, H, W)

class NYUDepthDataset(Dataset):
    def __init__(self, rgb_data, depth_data):
        self.rgb_data = rgb_data
        self.depth_data = depth_data

    def __len__(self):
        return len(self.rgb_data)

    def __getitem__(self, idx):
        # Each item: (RGB image with shape (3, H, W), Depth map with shape (1, H, W))
        return self.rgb_data[idx], self.depth_data[idx]

# Create dataset and DataLoader (using a batch size of 16)
dataset = NYUDepthDataset(rgb_tensors, depth_tensors)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class OptimizedDepthCNN(nn.Module):
    def __init__(self):
        super(OptimizedDepthCNN, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),  # Increased filters
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),  # Learned upsampling
            nn.ReLU(),
            nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Tanh()  # Better for depth normalization
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Enable model compilation for speedup (PyTorch 2.x)
model = OptimizedDepthCNN()
if hasattr(torch, "compile"):
    model = torch.compile(model)

In [None]:
loss_function = nn.SmoothL1Loss(beta=0.1)  # Huber Loss is better for depth estimation
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)  # AdamW with decay

In [None]:
import os
import time
import torch
import re

num_epochs = 1
latest_epoch = 0
num_workers = max(0, os.cpu_count() // 2)  # Dynamic worker selection

# Snapshot Directory
snapshot_dir = "model_snapshots"
os.makedirs(snapshot_dir, exist_ok=True)

# Model Path
model_pattern = re.compile(r"depth_model_epoch_(\d+)\.pth")

# List all files in the directory and find matching model checkpoints
model_files = [f for f in os.listdir(snapshot_dir) if model_pattern.match(f)]

if model_files:
    # Extract epoch numbers and find the latest one
    latest_model = max(model_files, key=lambda f: int(model_pattern.match(f).group(1)))
    latest_epoch = int(model_pattern.match(latest_model).group(1))
    model_path = os.path.join(snapshot_dir, latest_model)

    # Load the model
    model.load_state_dict(torch.load(model_path))
    model.eval()

    print(f"Loaded model from: {model_path} (Epoch {latest_epoch})")


for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    start_time = time.time()

    for rgb_batch, depth_batch in dataloader:
        optimizer.zero_grad()

        output = model(rgb_batch)
        depth_batch_resized = torch.nn.functional.interpolate(
            depth_batch, size=(output.shape[2], output.shape[3]), mode="bilinear", align_corners=False
        )
        
        loss = loss_function(output, depth_batch_resized)  
            

        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    epoch_time = time.time() - start_time
    avg_loss = train_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.6f}, Time: {epoch_time:.2f}s")

    # Save model after each epoch
    torch.save(model.state_dict(), f"{snapshot_dir}/depth_model_epoch_{latest_epoch + epoch}.pth")
    print(f"Model saved: {snapshot_dir}/depth_model_epoch_{latest_epoch + epoch}.pth")

print("\nTraining complete.")

In [None]:
import cv2
import torch
import matplotlib.pyplot as plt

# Load a test RGB image
test_rgb = cv2.imread('test_image_2.jpg')
show_img = test_rgb.copy()
test_rgb = cv2.resize(test_rgb, (640, 480)) / 255.0  # Resize and normalize
test_rgb_tensor = torch.tensor(test_rgb).permute(2, 0, 1).unsqueeze(0).float()

# Predict depth
snapshot_dir = "model_snapshots"
model_path = f"{snapshot_dir}/depth_model_epoch_1"
model = OptimizedDepthCNN()
model.load_state_dict(torch.load(model_path))
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    predicted_depth = model(test_rgb_tensor).squeeze().numpy()

# Display predicted depth map
plt.subplot(1, 2, 1).imshow(cv2.cvtColor(show_img, cv2.COLOR_BGR2RGB))
plt.title("Test Image")
plt.subplot(1, 2, 2).imshow(predicted_depth, cmap='jet')
plt.title("Predicted Depth Map")
plt.axis("off")
plt.show()
