In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from scipy.ndimage import median_filter, percentile_filter
import os

# List of image paths (replace with your actual image paths)
spad_image_paths = [
 '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images/10052011.png',    # Example: original SPAD image path
 '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images/10052012.png'       # Another SPAD image path
]

def show_median_filtered(spad_paths, filter_size=7):
    n = len(spad_paths)
    plt.figure(figsize=(8, 4 * n))
    for i, path in enumerate(spad_paths):
        # Load as grayscale
        spad_img = Image.open(path).convert('L')
        spad_np = np.array(spad_img)
        # Apply median filter
        filtered = median_filter(spad_np, size=filter_size)
        filtered_img = percentile_filter(spad_img, percentile=80, size=5)
        # Plot original
        plt.subplot(n, 2, 2*i+1)
        plt.imshow(spad_np, cmap='gray')
        plt.title(f'Original SPAD Image\n{os.path.basename(path)}')
        plt.axis('off')
        # Plot filtered
        plt.subplot(n, 2, 2*i+2)
        plt.imshow(filtered_img, cmap='gray')
        plt.title(f'Median Filtered (size={filter_size})')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Call the function
show_median_filtered(spad_image_paths, filter_size=3)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.ndimage import median_filter, percentile_filter

# Clear GPU cache before starting
torch.cuda.empty_cache()

class CoarseNetwork(nn.Module):
    def __init__(self):
        super(CoarseNetwork, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
        )
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(64, 1, kernel_size=3, padding=1)
        )
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

class FineNetwork(nn.Module):
    def __init__(self):
        super(FineNetwork, self).__init__()
        self.conv1 = nn.Conv2d(2, 64, kernel_size=7, stride=1, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm2d(64)
        
        self.conv3 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(32)
        
        self.conv4 = nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(16)
        
        self.conv5 = nn.Conv2d(16, 1, kernel_size=1)
        
    def forward(self, img, coarse_depth):
        if coarse_depth.shape != img.shape:
            coarse_depth = nn.functional.interpolate(
                coarse_depth, size=img.shape[2:], mode='bilinear', align_corners=False)
        
        x = torch.cat([img, coarse_depth], dim=1)
        
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        
        fine_depth = self.conv5(x) + coarse_depth
        
        return fine_depth

class MultiScaleDepthNetwork(nn.Module):
    def __init__(self):
        super(MultiScaleDepthNetwork, self).__init__()
        self.coarse_network = CoarseNetwork()
        self.fine_network = FineNetwork()
        
    def forward(self, x):
        coarse_depth = self.coarse_network(x)
        fine_depth = self.fine_network(x, coarse_depth)
        return fine_depth

class SPADDataset(Dataset):
    def __init__(self, spad_dir, depth_dir, transform=None):
        self.spad_dir = spad_dir
        self.depth_dir = depth_dir
        self.transform = transform
        self.spad_files = sorted([f for f in os.listdir(spad_dir) if f.endswith('.png')])
        
    def __len__(self):
        return len(self.spad_files)
    
    def __getitem__(self, idx):
        spad_path = os.path.join(self.spad_dir, self.spad_files[idx])
        depth_path = os.path.join(self.depth_dir, self.spad_files[idx])
        
        spad_img = Image.open(spad_path).convert('L')
        spad_np = np.array(spad_img)
        filtered_spad = percentile_filter(spad_np, percentile=80, size=3)
        spad_img = Image.fromarray(filtered_spad.astype(np.uint8))
        depth_img = Image.open(depth_path).convert('L')
        
        if self.transform:
            spad_img = self.transform(spad_img)
            depth_img = self.transform(depth_img)
        
        return spad_img, depth_img

class TestSPADDataset(Dataset):
    def __init__(self, spad_dir, transform=None):
        self.spad_dir = spad_dir
        self.transform = transform
        self.spad_files = sorted([f for f in os.listdir(spad_dir) if f.endswith('.png')])
        
    def __len__(self):
        return len(self.spad_files)
    
    def __getitem__(self, idx):
        spad_path = os.path.join(self.spad_dir, self.spad_files[idx])
        spad_img = Image.open(spad_path).convert('L')
        spad_np = np.array(spad_img)
        filtered_spad = percentile_filter(spad_np, percentile=80, size=3)
        spad_img = Image.fromarray(filtered_spad.astype(np.uint8))
        
        if self.transform:
            spad_img = self.transform(spad_img)
        
        return spad_img, self.spad_files[idx]

def combined_loss(pred, target, alpha=0.4, beta=0.6):
    mse_loss = nn.MSELoss()(pred, target)
    mae_loss = nn.L1Loss()(pred, target)
    return alpha * mse_loss + beta * mae_loss

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20, device='cuda'):
    model.to(device)
    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        
        for spad_imgs, depth_maps in train_pbar:
            spad_imgs = spad_imgs.to(device)
            depth_maps = depth_maps.to(device)
            
            outputs = model(spad_imgs)
            loss = criterion(outputs, depth_maps)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_pbar.set_postfix({'loss': loss.item()})
        
        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        model.eval()
        val_loss = 0.0
        
        val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Val]')
        
        with torch.no_grad():
            for spad_imgs, depth_maps in val_pbar:
                spad_imgs = spad_imgs.to(device)
                depth_maps = depth_maps.to(device)
                
                outputs = model(spad_imgs)
                loss = criterion(outputs, depth_maps)
                
                val_loss += loss.item()
                val_pbar.set_postfix({'loss': loss.item()})
        
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
        
        scheduler.step(avg_val_loss)
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_spad_depth_model.pth')
            patience_counter = 0
        else:
            patience_counter += 1
            
        if patience_counter >= patience:
            print(f'Early stopping after {epoch+1} epochs')
            break
    
    plot_loss(train_losses, val_losses, train_loader.batch_size, optimizer.param_groups[0]['lr'])
    
    return model, train_losses, val_losses

def plot_loss(train_losses, val_losses, batch_size, learning_rate):
    epochs = len(train_losses)
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, epochs + 1), train_losses, label='Train Loss', marker='o')
    plt.plot(range(1, epochs + 1), val_losses, label='Validation Loss', marker='x')
    plt.title(f'Training and Validation Loss\nBatch Size: {batch_size}, Learning Rate: {learning_rate}')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig('loss_plot.png')
    plt.show()

def generate_predictions(model, test_loader, output_dir, device='cuda'):
    model.eval()
    os.makedirs(output_dir, exist_ok=True)
    
    test_pbar = tqdm(test_loader, desc='Generating predictions')
    
    with torch.no_grad():
        for spad_imgs, filenames in test_pbar:
            spad_imgs = spad_imgs.to(device)
            outputs = model(spad_imgs)
            
            for j, output in enumerate(outputs):
                depth_map = output.cpu().numpy().squeeze()
                depth_map = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255).astype(np.uint8)
                
                img = Image.fromarray(depth_map)
                img.save(os.path.join(output_dir, filenames[j]))

def main():
    train_spad_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-images'
    train_depth_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-depths'
    val_spad_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-images'
    val_depth_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-depths'
    test_spad_dir = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images'
    output_dir = '/kaggle/working/multiscaleMODEL_batch32_epoch6_L1loss'
    
    batch_size = 32
    learning_rate = 0.0002
    num_epochs = 4
    
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])
    
    train_dataset = SPADDataset(train_spad_dir, train_depth_dir, transform=transform)
    val_dataset = SPADDataset(val_spad_dir, val_depth_dir, transform=transform)
    test_dataset = TestSPADDataset(test_spad_dir, transform=transform)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    model = MultiScaleDepthNetwork()
    criterion = combined_loss
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model, train_losses, val_losses = train_model(
        model, train_loader, val_loader, criterion, optimizer, num_epochs, device
    )
    
    generate_predictions(model, test_loader, output_dir, device)
    
    print("Training and prediction completed!")

if __name__ == "__main__":
    main()


In [None]:
import os
import cv2
import pandas as pd
import numpy as np

def images_to_csv_with_metadata(image_folder, output_csv):
    # Initialize an empty list to store image data and metadata
    data = []

    # Loop through all images in the folder
    for idx, filename in enumerate(sorted(os.listdir(image_folder))):
        if filename.endswith(".png"):
            filepath = os.path.join(image_folder, filename)
            # Read the image
            image = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
            image = cv2.resize(image, (128, 128))
            image = image / 255.
            image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-6)
            image = np.uint8(image * 255.)
            # Flatten the image into a 1D array
            image_flat = image.flatten()
            # Add ID, ImageID (filename), and pixel values
            row = [idx, filename] + image_flat.tolist()
            data.append(row)
    
    # Create a DataFrame
    num_columns = len(data[0]) - 2 if data else 0
    column_names = ["id", "ImageID"] + [indx for indx in range(num_columns)]
    df = pd.DataFrame(data, columns=column_names)

    # Save to CSV
    df.to_csv(output_csv, index=False)

# Paths for prediction and ground truth images
predictions_folder = "/kaggle/working/multiscaleMODEL_batch32_epoch6_L1loss"

# Output CSV paths
predictions_csv = "predictionsbymultiscalemodel_percentile_80.csv"

# Convert prediction images to CSV
images_to_csv_with_metadata(predictions_folder, predictions_csv)