In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm

In [3]:
class DepthDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.transform = transforms.Compose([
            transforms.Resize((264, 264)),
            transforms.CenterCrop((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        self.depth_transform = transforms.Compose([
            transforms.Resize((264, 264)),
            transforms.CenterCrop((256, 256)),
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]  # Assuming first column contains image paths
        depth_path = self.dataframe.iloc[idx, 1]  # Assuming second column contains depth paths

        image = Image.open(img_path).convert("RGB")
        depth = Image.open(depth_path).convert("L")

        image = self.transform(image)
        depth = self.depth_transform(depth)

        return image, depth

In [4]:
# Create DataFrame for Training Data
train_df = pd.DataFrame({
    "image_path": sorted(os.listdir("/kaggle/input/depth-estimation/competition-data/competition-data/training/images")),
    "depth_path": sorted(os.listdir("/kaggle/input/depth-estimation/competition-data/competition-data/training/depths"))
})

train_df["image_path"] = train_df["image_path"].apply(lambda x: os.path.join("/kaggle/input/depth-estimation/competition-data/competition-data/training/images", x))
train_df["depth_path"] = train_df["depth_path"].apply(lambda x: os.path.join("/kaggle/input/depth-estimation/competition-data/competition-data/training/depths", x))

# Create DataFrame for Validation Data
val_df = pd.DataFrame({
    "image_path": sorted(os.listdir("/kaggle/input/depth-estimation/competition-data/competition-data/validation/images")),
    "depth_path": sorted(os.listdir("/kaggle/input/depth-estimation/competition-data/competition-data/validation/depths"))
})

val_df["image_path"] = val_df["image_path"].apply(lambda x: os.path.join("/kaggle/input/depth-estimation/competition-data/competition-data/validation/images", x))
val_df["depth_path"] = val_df["depth_path"].apply(lambda x: os.path.join("/kaggle/input/depth-estimation/competition-data/competition-data/validation/depths", x))

# Create Datasets and DataLoaders
train_dataset = DepthDataset(train_df)
val_dataset = DepthDataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)  # No shuffling for validation

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        
        def conv_block(in_channels, out_channels):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.GroupNorm(16, out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.GroupNorm(16, out_channels),
                nn.ReLU(inplace=True)
            )
        
        self.encoder1 = conv_block(3, 64)
        self.encoder2 = conv_block(64, 128)
        self.encoder3 = conv_block(128, 256)
        self.encoder4 = conv_block(256, 512)
        self.encoder5 = conv_block(512, 1024)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.decoder1 = conv_block(1024, 512)
        
        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.decoder2 = conv_block(512, 256)
        
        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.decoder3 = conv_block(256, 128)
        
        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.decoder4 = conv_block(128, 64)
        
        self.final_conv = nn.Conv2d(64, 1, kernel_size=1)
        
    def forward(self, x):
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.pool(enc1))
        enc3 = self.encoder3(self.pool(enc2))
        enc4 = self.encoder4(self.pool(enc3))
        enc5 = self.encoder5(self.pool(enc4))
        
        dec1 = self.upconv1(enc5)
        dec1 = torch.cat((dec1, enc4), dim=1)
        dec1 = self.decoder1(dec1)
        
        dec2 = self.upconv2(dec1)
        dec2 = torch.cat((dec2, enc3), dim=1)
        dec2 = self.decoder2(dec2)
        
        dec3 = self.upconv3(dec2)
        dec3 = torch.cat((dec3, enc2), dim=1)
        dec3 = self.decoder3(dec3)
        
        dec4 = self.upconv4(dec3)
        dec4 = torch.cat((dec4, enc1), dim=1)
        dec4 = self.decoder4(dec4)
        
        output = torch.sigmoid(self.final_conv(dec4))
        return output

In [6]:
def loss_function(y_true, y_pred):
    l_depth = torch.mean(torch.abs(y_pred - y_true))
    l_mse = torch.mean(torch.abs(y_pred - y_true) ** 2)
    w2, w3 = 1.0, 10.0
    return (w3 * l_depth) + (w2 * l_mse)

In [7]:
# Define training parameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 12
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for images, depths in tqdm(train_loader):
        images, depths = images.to(device), depths.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, depths)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

    train_loss = total_loss / len(train_loader)

    # Validation Step
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for images, depths in val_loader:
            images, depths = images.to(device), depths.to(device)
            outputs = model(images)
            loss = loss_function(outputs, depths)
            total_val_loss += loss.item()

    val_loss = total_val_loss / len(val_loader)

    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

100%|██████████| 836/836 [04:42<00:00,  2.96it/s]


Epoch [1/12], Train Loss: 1.0279, Val Loss: 0.9607


100%|██████████| 836/836 [03:57<00:00,  3.52it/s]


Epoch [2/12], Train Loss: 0.9333, Val Loss: 0.8715


100%|██████████| 836/836 [03:59<00:00,  3.49it/s]


Epoch [3/12], Train Loss: 0.8509, Val Loss: 0.8452


100%|██████████| 836/836 [03:58<00:00,  3.51it/s]


Epoch [4/12], Train Loss: 0.8368, Val Loss: 0.8308


100%|██████████| 836/836 [04:05<00:00,  3.41it/s]


Epoch [5/12], Train Loss: 0.8234, Val Loss: 0.8114


100%|██████████| 836/836 [03:59<00:00,  3.48it/s]


Epoch [6/12], Train Loss: 0.8123, Val Loss: 0.7942


100%|██████████| 836/836 [04:06<00:00,  3.39it/s]


Epoch [7/12], Train Loss: 0.7971, Val Loss: 0.8178


100%|██████████| 836/836 [04:05<00:00,  3.40it/s]


Epoch [8/12], Train Loss: 0.7827, Val Loss: 0.7675


100%|██████████| 836/836 [04:01<00:00,  3.46it/s]


Epoch [9/12], Train Loss: 0.7653, Val Loss: 0.7489


100%|██████████| 836/836 [04:00<00:00,  3.48it/s]


Epoch [10/12], Train Loss: 0.7538, Val Loss: 0.7485


100%|██████████| 836/836 [04:01<00:00,  3.47it/s]


Epoch [11/12], Train Loss: 0.7400, Val Loss: 0.7298


100%|██████████| 836/836 [03:58<00:00,  3.51it/s]


Epoch [12/12], Train Loss: 0.7263, Val Loss: 0.7414


In [8]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import os

class TestDepthDataset(Dataset):
    def __init__(self, images_dir, transform=None):
        self.images_dir = images_dir
        self.image_files = sorted(os.listdir(images_dir))
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = os.path.join(self.images_dir, self.image_files[idx])
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, self.image_files[idx]  # Return filename to save correctly

# Define transformation
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Initialize test dataset & loader
test_images_dir = "/kaggle/input/depth-estimation/competition-data/competition-data/testing/images"
test_dataset = TestDepthDataset(test_images_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [9]:
import torch
import numpy as np
import cv2
import os

# Define output directory
output_dir = "data/sample_solution"
os.makedirs(output_dir, exist_ok=True)

# Switch model to evaluation mode
model.eval()

# Generate predictions for test data
with torch.no_grad():
    for idx, (image, filename) in enumerate(test_loader):  # Use test_loader
        image = image.to(device)  # Send image to GPU/CPU
        prediction = model(image)  # Get model output

        # Convert prediction to numpy
        prediction = prediction.squeeze().cpu().numpy()
        prediction = (prediction - prediction.min()) / (prediction.max() - prediction.min() + 1e-6)
        prediction = np.uint8(prediction * 255)

        # Save image with the original filename
        cv2.imwrite(os.path.join(output_dir, filename[0]), prediction)


In [10]:
!python /kaggle/input/depth-estimation/imgs2csv.py

In [11]:
# rm -rf /kaggle/working/*