In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import cv2




# Define dataset class
class DepthDataset(Dataset):
    def __init__(self, image_paths, depth_paths, transform=None):
        self.image_paths = image_paths
        self.depth_paths = depth_paths
        self.transform = transform

    def __len__(self):
        # Return the total number of samples
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Load RGB image and depth map
        image = cv2.imread(self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        depth = np.load(self.depth_paths[idx])  # Depth maps are stored as numpy arrays

        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        # Return the image and depth map as PyTorch tensors
        return torch.tensor(image, dtype=torch.float32).permute(2, 0, 1), \
               torch.tensor(depth, dtype=torch.float32).unsqueeze(0)




# Define CNN model for depth estimation
class DepthEstimationCNN(nn.Module):
    def __init__(self):
        super(DepthEstimationCNN, self).__init__()
        # Encoder: Feature extraction layers
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        # Decoder: Upsampling layers for depth prediction
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU()
        )

    def forward(self, x):
        # Forward pass through encoder and decoder
        x = self.encoder(x)
        x = self.decoder(x)
        return x




# Training loop
def train_model(model, dataloader, optimizer, criterion, num_epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for images, depths in dataloader:
            images, depths = images.cuda(), depths.cuda()  # Move data to GPU
            optimizer.zero_grad()  # Reset gradients
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, depths)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update model parameters
            epoch_loss += loss.item()

        # Print epoch loss
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(dataloader):.4f}")




# Main execution
if __name__ == "__main__":
    # Define paths for images and depth maps
    image_paths = ["path_to_image_1", "path_to_image_2"]  # Paths to RGB images
    depth_paths = ["path_to_depth_1.npy", "path_to_depth_2.npy"]  # Paths to depth maps

    # Create data loader
    dataset = DepthDataset(image_paths, depth_paths, transform=None)
    dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

    # Initialize model, loss function, and optimizer
    model = DepthEstimationCNN().cuda()
    criterion = nn.MSELoss()  # Mean Squared Error loss
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    train_model(model, dataloader, optimizer, criterion)
