In [1]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

In [2]:
class DepthEstimationDataset(Dataset):
    def __init__(self, images_dir, depths_dir=None, transform=None):
        """
        Dataset for depth estimation with optional depth maps
        
        Args:
            images_dir (str): Directory containing images
            depths_dir (str, optional): Directory containing depth maps
            transform (callable, optional): Optional transform to be applied on images
        """
        self.images_dir = images_dir
        self.depths_dir = depths_dir
        self.transform = transform
        
        # Get list of image filenames
        self.image_filenames = sorted(os.listdir(images_dir))
        
        # If depth directory is provided, get depth filenames
        self.depth_filenames = sorted(os.listdir(depths_dir)) if depths_dir else None
    
    def __len__(self):
        return len(self.image_filenames)
    
    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.images_dir, self.image_filenames[idx])
        image = Image.open(img_path).convert('RGB')
    
        # Load depth map if available
        if self.depth_filenames:
            depth_path = os.path.join(self.depths_dir, self.depth_filenames[idx])
            depth = Image.open(depth_path).convert('L')  # Convert depth to grayscale
            
            if self.transform:
                image = self.transform(image)
                depth = transforms.ToTensor()(depth)  # Convert depth to a single-channel tensor
    
            return image, depth
        else:
            if self.transform:
                image = self.transform(image)
    
            return image, self.image_filenames[idx]

In [3]:
class DepthEstimationModel(nn.Module):
    def __init__(self, pretrained=True):
        """
        Advanced Depth Estimation Model using ResNet as encoder
        
        Args:
            pretrained (bool): Use pretrained weights for encoder
        """
        super(DepthEstimationModel, self).__init__()
        
        # Use ResNet50 as encoder backbone
        backbone = torchvision.models.resnet50(pretrained=pretrained)
        
        # Encoder layers (first 4 blocks of ResNet)
        self.encoder1 = nn.Sequential(
            backbone.conv1,
            backbone.bn1,
            backbone.relu,
            backbone.maxpool
        )
        self.encoder2 = backbone.layer1
        self.encoder3 = backbone.layer2
        self.encoder4 = backbone.layer3
        self.encoder5 = backbone.layer4
        
        # Decoder layers with skip connections
        self.decoder5 = self._upconv_block(2048, 1024)
        self.decoder4 = self._upconv_block(1024, 512)
        self.decoder3 = self._upconv_block(512, 256)
        self.decoder2 = self._upconv_block(256, 64)
        self.decoder1 = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(64, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )
        
        # Final depth prediction layer
        self.final_conv = nn.Conv2d(32, 1, kernel_size=3, padding=1)
        
    def _upconv_block(self, in_channels, out_channels):
        """
        Create an upconvolution block with skip connection
        """
        return nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        """
        Forward pass through the depth estimation network
        """
        # Encoder pass
        e1 = self.encoder1(x)
        e2 = self.encoder2(e1)
        e3 = self.encoder3(e2)
        e4 = self.encoder4(e3)
        e5 = self.encoder5(e4)
        
        # Decoder pass with skip connections
        d5 = self.decoder5(e5)
        d4 = self.decoder4(d5)
        d3 = self.decoder3(d4)
        d2 = self.decoder2(d3)
        d1 = self.decoder1(d2)
        
        # Final depth prediction
        depth = self.final_conv(d1)
        
        return depth

In [4]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    """
    Train the depth estimation model with progress updates.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    train_losses, val_losses = [], []

    for epoch in range(num_epochs):
        model.train()
        train_epoch_loss = 0.0

        print(f"\nEpoch {epoch + 1}/{num_epochs} --------------------------")

        for batch_idx, (images, depths) in enumerate(train_loader):
            images, depths = images.to(device), depths.to(device)

            optimizer.zero_grad()
            predicted_depths = model(images)

            loss = criterion(predicted_depths, depths)
            loss.backward()
            optimizer.step()

            train_epoch_loss += loss.item()

            # Print batch progress every 10 batches
            if (batch_idx + 1) % 10 == 0 or batch_idx == len(train_loader) - 1:
                print(
                    f"Batch {batch_idx + 1}/{len(train_loader)} - Train Loss: {loss.item():.6f}"
                )

        # Validation phase
        model.eval()
        val_epoch_loss = 0.0

        with torch.no_grad():
            for images, depths in val_loader:
                images, depths = images.to(device), depths.to(device)
                predicted_depths = model(images)

                loss = criterion(predicted_depths, depths)
                val_epoch_loss += loss.item()

        train_loss = train_epoch_loss / len(train_loader)
        val_loss = val_epoch_loss / len(val_loader)

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        # Print epoch-level progress
        print(f"Epoch {epoch + 1} Completed ✅ | Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

    return {"train_losses": train_losses, "val_losses": val_losses}


In [5]:
def generate_predictions(model, test_loader):
    """
    Generate predictions on test dataset
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    
    predictions = []
    image_names = []
    
    with torch.no_grad():
        for images, filenames in test_loader:
            images = images.to(device)
            predicted_depths = model(images)
            
            # Convert predictions to numpy
            predicted_depths_np = predicted_depths.cpu().numpy()
            
            # Resize and normalize
            processed_depths = []
            for pred in predicted_depths_np:
                # Remove singleton dimensions and reshape
                pred = pred.squeeze()
                
                # Resize to 128x128
                pred_resized = cv2.resize(pred, (128, 128))
                
                # Normalize to 0-1 range
                pred_norm = (pred_resized - pred_resized.min()) / (pred_resized.max() - pred_resized.min() + 1e-6)
                
                # Convert to 8-bit image
                pred_8bit = np.uint8(pred_norm * 255)
                
                processed_depths.append(pred_8bit)
            
            predictions.extend(processed_depths)
            image_names.extend(filenames)
    
    return predictions, image_names

In [6]:
def images_to_csv_with_metadata(predictions, image_names, output_csv='predictions.csv'):
    """
    Convert depth predictions to CSV with metadata
    """
    data = []
    for idx, (pred, filename) in enumerate(zip(predictions, image_names)):
        # Flatten the image into a 1D array
        image_flat = pred.flatten()
        
        # Create row with ID, ImageID, and pixel values
        row = [idx, filename] + image_flat.tolist()
        data.append(row)
    
    # Create column names
    num_columns = len(data[0]) - 2 if data else 0
    column_names = ["id", "ImageID"] + [indx for indx in range(num_columns)]
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(data, columns=column_names)
    df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

In [7]:
torch.manual_seed(42)
np.random.seed(42)

In [8]:
# Data transforms
transforms_config = {
    'train': transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ]),
    'test': transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
}
    
# Create datasets
train_dataset = DepthEstimationDataset(
    '/kaggle/input/depth-estimation/competition-data/competition-data/training/images', 
    '/kaggle/input/depth-estimation/competition-data/competition-data/training/depths', 
    transform=transforms_config['train']
)

val_dataset = DepthEstimationDataset(
    '/kaggle/input/depth-estimation/competition-data/competition-data/validation/images', 
    '/kaggle/input/depth-estimation/competition-data/competition-data/validation/depths', 
    transform=transforms_config['test']
)

test_dataset = DepthEstimationDataset(
    '/kaggle/input/depth-estimation/competition-data/competition-data/testing/images', 
    transform=transforms_config['test']
)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
# Initialize model, loss, and optimizer
model = DepthEstimationModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 181MB/s] 


In [10]:
# Train the model
training_results = train_model(
    model, 
    train_loader, 
    val_loader, 
    criterion, 
    optimizer, 
    num_epochs=40
)


Epoch 1/40 --------------------------
Batch 10/209 - Train Loss: 0.048451
Batch 20/209 - Train Loss: 0.023084
Batch 30/209 - Train Loss: 0.019104
Batch 40/209 - Train Loss: 0.016305
Batch 50/209 - Train Loss: 0.018525
Batch 60/209 - Train Loss: 0.018527
Batch 70/209 - Train Loss: 0.014867
Batch 80/209 - Train Loss: 0.015225
Batch 90/209 - Train Loss: 0.012472
Batch 100/209 - Train Loss: 0.014817
Batch 110/209 - Train Loss: 0.011282
Batch 120/209 - Train Loss: 0.014483
Batch 130/209 - Train Loss: 0.020324
Batch 140/209 - Train Loss: 0.011165
Batch 150/209 - Train Loss: 0.011487
Batch 160/209 - Train Loss: 0.013081
Batch 170/209 - Train Loss: 0.011302
Batch 180/209 - Train Loss: 0.010401
Batch 190/209 - Train Loss: 0.014779
Batch 200/209 - Train Loss: 0.010706
Batch 209/209 - Train Loss: 0.012276
Epoch 1 Completed ✅ | Training Loss: 0.0207, Validation Loss: 0.0111

Epoch 2/40 --------------------------
Batch 10/209 - Train Loss: 0.011949
Batch 20/209 - Train Loss: 0.010059
Batch 30/209 

In [11]:
# Save model
torch.save(model.state_dict(), 'depth_estimation_model.pth')

In [12]:
# Generate and save predictions
predictions, image_names = generate_predictions(model, test_loader)

# Convert predictions to CSV
images_to_csv_with_metadata(predictions, image_names)

Predictions saved to predictions.csv


In [13]:
df = pd.read_csv("predictions.csv")
df

Unnamed: 0,id,ImageID,0,1,2,3,4,5,6,7,...,16374,16375,16376,16377,16378,16379,16380,16381,16382,16383
0,0,10052011.png,42,41,42,41,41,42,42,41,...,246,246,246,246,246,247,248,248,248,243
1,1,10052012.png,13,11,12,12,12,12,12,12,...,248,248,249,249,249,249,251,254,253,245
2,2,10152031.png,51,51,50,48,47,45,44,44,...,246,247,247,248,249,250,252,254,254,252
3,3,10152032.png,18,16,16,16,16,16,16,16,...,250,251,252,252,252,252,253,254,254,246
4,4,10252051.png,56,57,58,56,55,54,53,52,...,254,254,254,254,254,253,254,254,253,246
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
831,831,9751952.png,13,10,11,11,11,11,11,11,...,251,252,252,253,253,253,254,254,254,246
832,832,9851971.png,47,46,47,46,46,46,46,46,...,229,230,231,232,233,234,236,239,242,239
833,833,9851972.png,18,15,16,16,16,16,16,16,...,251,252,252,252,252,253,254,254,254,247
834,834,9951991.png,47,46,47,48,48,48,48,48,...,252,252,252,252,251,251,252,252,251,247
