In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [11]:
device = torch.device('cpu')
device

device(type='cpu')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd

torch.cuda.empty_cache()

# Define a UNet architecture for depth estimation
class UNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super(UNet, self).__init__()
        
        # Encoder
        self.enc1 = self._block(in_channels, 64)
        self.enc2 = self._block(64, 128)
        self.enc3 = self._block(128, 256)
        self.enc4 = self._block(256, 512)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Bottleneck
        self.bottleneck = self._block(512, 1024)
        
        # Decoder
        self.upconv4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec4 = self._block(1024, 512)
        
        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec3 = self._block(512, 256)
        
        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = self._block(256, 128)
        
        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = self._block(128, 64)
        
        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)
        
    def _block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        # Encoder
        enc1 = self.enc1(x)
        enc2 = self.enc2(self.pool(enc1))
        enc3 = self.enc3(self.pool(enc2))
        enc4 = self.enc4(self.pool(enc3))
        
        # Bottleneck
        bottleneck = self.bottleneck(self.pool(enc4))
        
        # Decoder
        dec4 = self.upconv4(bottleneck)
        dec4 = torch.cat((dec4, enc4), dim=1)
        dec4 = self.dec4(dec4)
        
        dec3 = self.upconv3(dec4)
        dec3 = torch.cat((dec3, enc3), dim=1)
        dec3 = self.dec3(dec3)
        
        dec2 = self.upconv2(dec3)
        dec2 = torch.cat((dec2, enc2), dim=1)
        dec2 = self.dec2(dec2)
        
        dec1 = self.upconv1(dec2)
        dec1 = torch.cat((dec1, enc1), dim=1)
        dec1 = self.dec1(dec1)
        
        return self.final_conv(dec1)

# Custom Dataset for SPAD images and depth maps
class SPADDataset(Dataset):
    def __init__(self, spad_dir, depth_dir, transform=None):
        self.spad_dir = spad_dir
        self.depth_dir = depth_dir
        self.transform = transform
        self.spad_files = sorted([f for f in os.listdir(spad_dir) if f.endswith('.png')])
        
    def __len__(self):
        return len(self.spad_files)
    
    def __getitem__(self, idx):
        spad_path = os.path.join(self.spad_dir, self.spad_files[idx])
        depth_path = os.path.join(self.depth_dir, self.spad_files[idx])
        
        # Load SPAD image (binary)
        spad_img = Image.open(spad_path).convert('L')  # Convert to grayscale
        
        # Load depth map
        depth_img = Image.open(depth_path).convert('L')
        
        if self.transform:
            spad_img = self.transform(spad_img)
            depth_img = self.transform(depth_img)
        
        return spad_img, depth_img

class TestSPADDataset(Dataset):
    def __init__(self, spad_dir, transform=None):
        self.spad_dir = spad_dir
        self.transform = transform
        self.spad_files = sorted([f for f in os.listdir(spad_dir) if f.endswith('.png')])
        
    def __len__(self):
        return len(self.spad_files)
    
    def __getitem__(self, idx):
        spad_path = os.path.join(self.spad_dir, self.spad_files[idx])
        
        # Load SPAD image (binary)
        spad_img = Image.open(spad_path).convert('L')  # Convert to grayscale
        
        if self.transform:
            spad_img = self.transform(spad_img)
        
        # Return the filename as well for saving predictions with correct names
        return spad_img, self.spad_files[idx]
        
# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device='cuda'):
    model.to(device)
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0

        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        
        for spad_imgs, depth_maps in train_pbar:
            spad_imgs = spad_imgs.to(device)
            depth_maps = depth_maps.to(device)
            
            # Forward pass
            outputs = model(spad_imgs)
            loss = criterion(outputs, depth_maps)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()

            train_pbar.set_postfix({'loss': loss.item()})
        
        avg_train_loss = train_loss / len(train_loader)
        
        # Validation phase
        model.eval()
        val_loss = 0.0

        val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Val]')
        with torch.no_grad():
            for spad_imgs, depth_maps in val_pbar:
                spad_imgs = spad_imgs.to(device)
                depth_maps = depth_maps.to(device)
                
                outputs = model(spad_imgs)
                loss = criterion(outputs, depth_maps)
                
                val_loss += loss.item()
                val_pbar.set_postfix({'loss': loss.item()})
                
        avg_val_loss = val_loss / len(val_loader)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
        
        # Save the best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_spad_depth_model.pth')
    
    return model

# Function to generate predictions for test set
def generate_predictions(model, test_loader, output_dir, device='cuda'):
    model.eval()
    os.makedirs(output_dir, exist_ok=True)
    
    with torch.no_grad():
        for i, (spad_imgs, _) in enumerate(test_loader):
            spad_imgs = spad_imgs.to(device)
            outputs = model(spad_imgs)
            
            # Convert to numpy and save
            for j, output in enumerate(outputs):
                depth_map = output.cpu().numpy().squeeze()
                # Normalize to 0-255 for saving as image
                depth_map = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255).astype(np.uint8)
                
                # Save as image
                img = Image.fromarray(depth_map)
                img.save(os.path.join(output_dir, f'pred_{i*test_loader.batch_size+j}.png'))

# Main execution
def main():
    # Paths
    train_spad_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-images'
    train_depth_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-depths'
    val_spad_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-images'
    val_depth_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-depths'
    test_spad_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images'
    output_dir = '/kaggle/working/predictions'
    
    # Hyperparameters
    batch_size = 32
    learning_rate = 0.001
    num_epochs = 20
    
    # Transforms
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])
    
    # Datasets and DataLoaders
    train_dataset = SPADDataset(train_spad_dir, train_depth_dir, transform=transform)
    val_dataset = SPADDataset(val_spad_dir, val_depth_dir, transform=transform)
    test_dataset = TestSPADDataset(test_spad_dir, transform=transform)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    # Model, loss, and optimizer
    model = UNet(in_channels=1, out_channels=1)
    criterion = nn.MSELoss()  # Mean Squared Error loss
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Train the model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)
    
    # Generate predictions
    generate_predictions(model, test_loader, output_dir, device)
    
    print("Training and prediction completed!")

if __name__ == "__main__":
    main()


Epoch 1/20 [Train]: 100%|██████████| 209/209 [04:17<00:00,  1.23s/it, loss=0.0483]
Epoch 1/20 [Val]: 100%|██████████| 27/27 [00:18<00:00,  1.47it/s, loss=0.0356]


Epoch [1/20], Train Loss: 0.0657, Val Loss: 0.0538


Epoch 2/20 [Train]: 100%|██████████| 209/209 [03:14<00:00,  1.08it/s, loss=0.0411]
Epoch 2/20 [Val]: 100%|██████████| 27/27 [00:10<00:00,  2.53it/s, loss=0.0359]


Epoch [2/20], Train Loss: 0.0451, Val Loss: 0.0490


Epoch 3/20 [Train]: 100%|██████████| 209/209 [03:14<00:00,  1.08it/s, loss=0.0505]
Epoch 3/20 [Val]: 100%|██████████| 27/27 [00:10<00:00,  2.58it/s, loss=0.0348]


Epoch [3/20], Train Loss: 0.0426, Val Loss: 0.0424


Epoch 4/20 [Train]: 100%|██████████| 209/209 [03:13<00:00,  1.08it/s, loss=0.0527]
Epoch 4/20 [Val]: 100%|██████████| 27/27 [00:10<00:00,  2.61it/s, loss=0.0305]


Epoch [4/20], Train Loss: 0.0397, Val Loss: 0.0430


Epoch 5/20 [Train]:  68%|██████▊   | 143/209 [02:12<01:00,  1.08it/s, loss=0.0362]

In [None]:
import matplotlib.pyplot as plt



In [3]:
import os
import cv2
import pandas as pd
import numpy as np

def images_to_csv_with_metadata(image_folder, output_csv):
    # Initialize an empty list to store image data and metadata
    data = []

    # Loop through all images in the folder
    for idx, filename in enumerate(sorted(os.listdir(image_folder))):
        if filename.endswith(".png"):
            filepath = os.path.join(image_folder, filename)
            # Read the image
            image = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
            image = cv2.resize(image, (128, 128))
            image = image / 255.
            image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-6)
            image = np.uint8(image * 255.)
            # Flatten the image into a 1D array
            image_flat = image.flatten()
            # Add ID, ImageID (filename), and pixel values
            row = [idx, filename] + image_flat.tolist()
            data.append(row)
    
    # Create a DataFrame
    num_columns = len(data[0]) - 2 if data else 0
    column_names = ["id", "ImageID"] + [indx for indx in range(num_columns)]
    df = pd.DataFrame(data, columns=column_names)

    # Save to CSV
    df.to_csv(output_csv, index=False)

# Paths for prediction and ground truth images
predictions_folder = "/kaggle/working/predictions"

# Output CSV paths
predictions_csv = "/kaggle/working/Final.csv"

# Convert prediction images to CSV
images_to_csv_with_metadata(predictions_folder, predictions_csv)