In [1]:

import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, Model, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
import pandas as pd
import time
from tqdm import tqdm
import cv2

2025-04-28 03:36:13.950487: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745811374.123733      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745811374.182472      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Configuration
BATCH_SIZE = 16
IMG_HEIGHT = 256
IMG_WIDTH = 256
OUTPUT_CHANNELS = 1  # Depth map is single-channel
LAMBDA = 100  # Weight for L1 loss in GAN objective
EPOCHS = 2

In [3]:
# Define the path for the new directory
checkpoint_dir = '/kaggle/working/checkpoints'

# Create the directory if it doesn't exist
os.makedirs(checkpoint_dir, exist_ok=True)

In [4]:
# Generator network (U-Net architecture)
def build_generator():
    # Encoder
    inputs = layers.Input(shape=[IMG_HEIGHT, IMG_WIDTH, 1])  # SPAD images are binary (1 channel)
    
    # Downsampling layers
    down_stack = [
        downsample(64, 4, apply_batchnorm=False),  # (batch_size, 128, 128, 64)
        downsample(128, 4),  # (batch_size, 64, 64, 128)
        downsample(256, 4),  # (batch_size, 32, 32, 256)
        downsample(512, 4),  # (batch_size, 16, 16, 512)
        downsample(512, 4),  # (batch_size, 8, 8, 512)
        downsample(512, 4),  # (batch_size, 4, 4, 512)
        downsample(512, 4),  # (batch_size, 2, 2, 512)
        downsample(512, 4),  # (batch_size, 1, 1, 512)
    ]
    
    # Upsampling layers
    up_stack = [
        upsample(512, 4, dropout=True),  # (batch_size, 2, 2, 1024)
        upsample(512, 4, dropout=True),  # (batch_size, 4, 4, 1024)
        upsample(512, 4, dropout=True),  # (batch_size, 8, 8, 1024)
        upsample(512, 4),  # (batch_size, 16, 16, 1024)
        upsample(256, 4),  # (batch_size, 32, 32, 512)
        upsample(128, 4),  # (batch_size, 64, 64, 256)
        upsample(64, 4),  # (batch_size, 128, 128, 128)
    ]
    
    # Final layer (output depth map)
    last = layers.Conv2DTranspose(
        OUTPUT_CHANNELS, 4, strides=2, padding='same',
        activation='tanh'  # Using tanh to get output in [-1, 1] range
    )  # (batch_size, 256, 256, 1)
    
    x = inputs
    
    # Downsampling through the model
    skips = []
    for down in down_stack:
        x = down(x)
        skips.append(x)
    
    skips = reversed(skips[:-1])
    
    # Upsampling and establishing the skip connections
    for up, skip in zip(up_stack, skips):
        x = up(x)
        x = layers.Concatenate()([x, skip])
    
    x = last(x)
    
    return Model(inputs=inputs, outputs=x)


In [5]:
# Discriminator network (PatchGAN)
def build_discriminator():
    initializer = tf.random_normal_initializer(0., 0.02)
    
    inp = layers.Input(shape=[IMG_HEIGHT, IMG_WIDTH, 1], name='input_image')
    tar = layers.Input(shape=[IMG_HEIGHT, IMG_WIDTH, 1], name='target_image')
    
    x = layers.Concatenate()([inp, tar])  # (batch_size, 256, 256, 2)
    
    down1 = downsample(64, 4, False)(x)  # (batch_size, 128, 128, 64)
    down2 = downsample(128, 4)(down1)  # (batch_size, 64, 64, 128)
    down3 = downsample(256, 4)(down2)  # (batch_size, 32, 32, 256)
    
    zero_pad1 = layers.ZeroPadding2D()(down3)  # (batch_size, 34, 34, 256)
    conv = layers.Conv2D(512, 4, strides=1, kernel_initializer=initializer, 
                         use_bias=False)(zero_pad1)  # (batch_size, 31, 31, 512)
    
    batchnorm1 = layers.BatchNormalization()(conv)
    leaky_relu = layers.LeakyReLU()(batchnorm1)
    
    zero_pad2 = layers.ZeroPadding2D()(leaky_relu)  # (batch_size, 33, 33, 512)
    last = layers.Conv2D(1, 4, strides=1, 
                         kernel_initializer=initializer)(zero_pad2)  # (batch_size, 30, 30, 1)
    
    return Model(inputs=[inp, tar], outputs=last)

In [6]:
# Helper functions for generator and discriminator
def downsample(filters, size, apply_batchnorm=True):
    initializer = tf.random_normal_initializer(0., 0.02)
    
    result = tf.keras.Sequential()
    result.add(
        layers.Conv2D(filters, size, strides=2, padding='same',
                       kernel_initializer=initializer, use_bias=False))
    
    if apply_batchnorm:
        result.add(layers.BatchNormalization())
    
    result.add(layers.LeakyReLU())
    
    return result

In [7]:
def upsample(filters, size, dropout=False):
    initializer = tf.random_normal_initializer(0., 0.02)
    
    result = tf.keras.Sequential()
    result.add(
        layers.Conv2DTranspose(filters, size, strides=2, padding='same',
                               kernel_initializer=initializer, use_bias=False))
    
    result.add(layers.BatchNormalization())
    
    if dropout:
        result.add(layers.Dropout(0.5))
    
    result.add(layers.ReLU())
    
    return result

In [8]:
# Loss functions
def generator_loss(disc_generated_output, gen_output, target):
    gan_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(
        tf.ones_like(disc_generated_output), disc_generated_output)
    
    # Mean absolute error (L1 loss) between generated and target depth maps
    l1_loss = tf.reduce_mean(tf.abs(target - gen_output))
    
    total_gen_loss = gan_loss + (LAMBDA * l1_loss)
    
    return total_gen_loss, gan_loss, l1_loss

def discriminator_loss(disc_real_output, disc_generated_output):
    real_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(
        tf.ones_like(disc_real_output), disc_real_output)
    
    generated_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(
        tf.zeros_like(disc_generated_output), disc_generated_output)
    
    total_disc_loss = real_loss + generated_loss
    
    return total_disc_loss

In [9]:
def load_and_preprocess_data(spad_path, depth_path):
    # Load SPAD binary image (0 or 1 values)
    spad_img = tf.io.read_file(spad_path)
    spad_img = tf.image.decode_png(spad_img, channels=1)
    spad_img = tf.cast(spad_img, tf.float32)
    spad_img = (spad_img / 255.0) * 2 - 1  # Normalize to [-1, 1]
    
    # Load depth map
    depth_img = tf.io.read_file(depth_path)
    depth_img = tf.image.decode_png(depth_img, channels=1)
    depth_img = tf.cast(depth_img, tf.float32)
    depth_img = (depth_img / 255.0) * 2 - 1  # Normalize to [-1, 1]
    
    return spad_img, depth_img

In [10]:
def random_crop(spad_img, depth_img):
    stacked_image = tf.stack([spad_img, depth_img], axis=0)
    cropped_image = tf.image.random_crop(
        stacked_image, size=[2, IMG_HEIGHT, IMG_WIDTH, 1])
    
    return cropped_image[0], cropped_image[1]

def normalize(spad_img, depth_img):
    return spad_img, depth_img

# def random_jitter(spad_img, depth_img):
#     # Resize to bigger height and width
#     spad_img, depth_img = resize(spad_img, depth_img, 286, 286)
    
#     # Random crop back to the target size
#     spad_img, depth_img = random_crop(spad_img, depth_img)
    
#     if tf.random.uniform(()) > 0.5:
#         # Random mirroring
#         spad_img = tf.image.flip_left_right(spad_img)
#         depth_img = tf.image.flip_left_right(depth_img)
    
#     return spad_img, depth_img

def resize(spad_img, depth_img, height, width):
    spad_img = tf.image.resize(spad_img, [height, width],
                               method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    depth_img = tf.image.resize(depth_img, [height, width],
                               method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    
    return spad_img, depth_img

In [11]:
# Create training dataset
def create_dataset(spad_paths, depth_paths, batch_size=BATCH_SIZE, 
                   buffer_size=400, train=True):
    dataset = tf.data.Dataset.from_tensor_slices((spad_paths, depth_paths))
    
    dataset = dataset.map(load_and_preprocess_data, 
                          num_parallel_calls=tf.data.AUTOTUNE)
    
    # if train:
    #     dataset = dataset.map(random_jitter, 
    #                          num_parallel_calls=tf.data.AUTOTUNE)
    
    dataset = dataset.map(normalize, 
                         num_parallel_calls=tf.data.AUTOTUNE)
    
    dataset = dataset.shuffle(buffer_size)
    dataset = dataset.batch(batch_size)
    
    return dataset

In [12]:
# Training the model
@tf.function
def train_step(generator, discriminator, generator_optimizer, discriminator_optimizer, 
               spad_images, depth_maps):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        # Generate depth maps from SPAD images
        generated_depths = generator(spad_images, training=True)
        
        # Discriminator predictions
        disc_real_output = discriminator([spad_images, depth_maps], training=True)
        disc_generated_output = discriminator([spad_images, generated_depths], training=True)
        
        # Calculate losses
        gen_total_loss, gen_gan_loss, gen_l1_loss = generator_loss(
            disc_generated_output, generated_depths, depth_maps)
        disc_loss = discriminator_loss(disc_real_output, disc_generated_output)
    
    # Calculate gradients
    generator_gradients = gen_tape.gradient(
        gen_total_loss, generator.trainable_variables)
    discriminator_gradients = disc_tape.gradient(
        disc_loss, discriminator.trainable_variables)
    
    # Apply gradients
    generator_optimizer.apply_gradients(
        zip(generator_gradients, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(
        zip(discriminator_gradients, discriminator.trainable_variables))
    
    return gen_total_loss, gen_gan_loss, gen_l1_loss, disc_loss

In [13]:
# def train(generator, discriminator, generator_optimizer, discriminator_optimizer, 
#           train_dataset, checkpoint_filepath='/kaggle/working/outputs/checkpoints'):
def train(generator, discriminator, generator_optimizer, discriminator_optimizer,
          train_dataset, val_dataset=None, checkpoint_filepath='/kaggle/working/outputs/checkpoints'):
    for epoch in range(EPOCHS):
        print(f"\nEpoch {epoch+1}/{EPOCHS}")
        start_time = time.time()

        # Train step
        for n, (spad_batch, depth_batch) in enumerate(train_dataset):
            gen_total_loss, gen_gan_loss, gen_l1_loss, disc_loss = train_step(
                generator, discriminator, generator_optimizer, discriminator_optimizer,
                spad_batch, depth_batch)

            if n % 10 == 0:
                print(f"  [Train] Batch {n:03d} - Gen Loss: {gen_total_loss:.4f}, "
                      f"GAN: {gen_gan_loss:.4f}, L1: {gen_l1_loss:.4f}, Disc: {disc_loss:.4f}")

        # --- Validation step (optional) ---
        if val_dataset is not None:
            mae_metric = tf.keras.metrics.MeanAbsoluteError()
            mse_metric = tf.keras.metrics.MeanSquaredError()
            ssim_scores = []

            for spad_val_batch, depth_val_batch in val_dataset:
                pred_val_batch = generator(spad_val_batch, training=False)
                mae_metric.update_state(depth_val_batch, pred_val_batch)
                mse_metric.update_state(depth_val_batch, pred_val_batch)
                ssim_score = tf.reduce_mean(tf.image.ssim(depth_val_batch, pred_val_batch, max_val=2.0))
                ssim_scores.append(ssim_score.numpy())

            val_mae = mae_metric.result().numpy()
            val_rmse = np.sqrt(mse_metric.result().numpy())
            val_ssim = np.mean(ssim_scores)

            print(f"  [Val] MAE: {val_mae:.4f}, RMSE: {val_rmse:.4f}, SSIM: {val_ssim:.4f}")

        # Save checkpoint every 10 epochs or at the last one
        if (epoch + 1) % 10 == 0 or epoch == EPOCHS - 1:
            checkpoint_prefix = os.path.join(checkpoint_filepath, f"ckpt_epoch_{epoch+1}")
            checkpoint = tf.train.Checkpoint(
                generator_optimizer=generator_optimizer,
                discriminator_optimizer=discriminator_optimizer,
                generator=generator,
                discriminator=discriminator)
            checkpoint.save(file_prefix=checkpoint_prefix)
            print(f"Checkpoint saved: {checkpoint_prefix}")

            # Generate and save visualization
            for idx, (spad, depth) in enumerate(train_dataset.take(1)):
                generate_and_save_images(generator, epoch + 1, spad, depth)
                break

        print(f"Epoch {epoch+1} completed in {time.time() - start_time:.2f} seconds.")

In [14]:
def generate_and_save_images(model, epoch, spad_batch, depth_batch):
    # Generate images
    predictions = model(spad_batch, training=False)
    
    fig = plt.figure(figsize=(15, 10))
    
    display_list = [spad_batch[0], depth_batch[0], predictions[0]]
    title = ['SPAD Input', 'Ground Truth Depth', 'Predicted Depth']
    
    for i in range(3):
        plt.subplot(1, 3, i+1)
        plt.title(title[i])
        # Getting the pixel values in the [0, 1] range to plot.
        plt.imshow(display_list[i] * 0.5 + 0.5, cmap='viridis')
        plt.axis('off')
    
    plt.savefig(f'predictions_epoch_{epoch}.png')
    plt.close()

In [15]:
# Inference function for testing on new SPAD images
def predict_depth_map(generator, spad_image_path):
    # Load and preprocess SPAD image
    spad_img = tf.io.read_file(spad_image_path)
    spad_img = tf.image.decode_png(spad_img, channels=1)
    spad_img = tf.image.resize(spad_img, [IMG_HEIGHT, IMG_WIDTH])
    spad_img = tf.cast(spad_img, tf.float32)
    spad_img = (spad_img / 255.0) * 2 - 1  # Normalize to [-1, 1]
    spad_img = tf.expand_dims(spad_img, 0)  # Add batch dimension
    
    # Generate depth map
    predicted_depth = generator(spad_img, training=False)
    
    # Denormalize
    predicted_depth = (predicted_depth[0] + 1) / 2
    
    return predicted_depth.numpy()

In [16]:
# # Class for img2csv functionality if the module is not available
# class Img2CSV:
#     @staticmethod
#     def convert_to_csv(image_dir, output_csv):
#         """
#         Convert depth map images to a submission CSV file.
        
#         Args:
#             image_dir (str): Directory containing depth map PNG images
#             output_csv (str): Output CSV file path
#         """
#         print(f"Converting depth maps from {image_dir} to CSV...")
#         all_data = []
        
#         # Get all PNG files in the directory
#         image_files = [f for f in os.listdir(image_dir) if f.endswith("_depth.png")]
        
#         for image_file in tqdm(image_files, desc="Processing images"):
#             # Extract the image ID (remove _depth.png)
#             image_id = image_file.replace("_depth.png", "")
            
#             # Load the depth map
#             depth_map_path = os.path.join(image_dir, image_file)
#             depth_map = plt.imread(depth_map_path)
            
#             # Handle RGB images by converting to grayscale if necessary
#             if len(depth_map.shape) > 2:
#                 depth_map = np.mean(depth_map, axis=2)
            
#             # Flatten the depth map and create rows
#             flat_depth = depth_map.flatten()
            
#             for pixel_id, depth_value in enumerate(flat_depth):
#                 all_data.append({
#                     'image_id': image_id,
#                     'pixel_id': pixel_id,
#                     'depth': depth_value  # Already in range [0, 1] if read with plt.imread
#                 })
        
#         # Create and save dataframe
#         df = pd.DataFrame(all_data)
#         df.to_csv(output_csv, index=False)
#         print(f"CSV file created: {output_csv}")

In [17]:
# # If img2csv.py is not available, use the built-in implementation
# if 'img2csv' not in globals():
#     img2csv = Img2CSV

In [18]:
# Function to evaluate the model using various metrics
def evaluate_model(generator, val_dataset):
    """
    Evaluate the model using various metrics: MAE, RMSE, SSIM
    
    Args:
        generator: Trained generator model
        val_dataset: Validation dataset
    
    Returns:
        Dictionary of metrics
    """
    mae_metric = tf.keras.metrics.MeanAbsoluteError()
    mse_metric = tf.keras.metrics.MeanSquaredError()
    
    # SSIM implementation
    def ssim(img1, img2):
        return tf.reduce_mean(tf.image.ssim(img1, img2, max_val=2.0))
    
    ssim_scores = []
    
    print("Evaluating model...")
    for i, (spad_batch, depth_batch) in enumerate(tqdm(val_dataset)):
        # Generate predictions
        pred_batch = generator(spad_batch, training=False)
        
        # Update metrics
        mae_metric.update_state(depth_batch, pred_batch)
        mse_metric.update_state(depth_batch, pred_batch)
        
        # Calculate and collect SSIM scores
        ssim_value = ssim(depth_batch, pred_batch)
        ssim_scores.append(ssim_value.numpy())
    
    # Calculate final metrics
    mae = mae_metric.result().numpy()
    rmse = np.sqrt(mse_metric.result().numpy())
    mean_ssim = np.mean(ssim_scores)
    
    metrics = {
        'MAE': mae,
        'RMSE': rmse,
        'SSIM': mean_ssim
    }
    
    print("Evaluation metrics:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")
    
    return metrics

In [19]:
# Function to load a trained model from checkpoint
def load_model_from_checkpoint(checkpoint_path):
    # Initialize models
    generator = build_generator()
    discriminator = build_discriminator()
    
    # Initialize optimizers
    generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
    discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
    
    # Create checkpoint
    checkpoint = tf.train.Checkpoint(
        generator_optimizer=generator_optimizer,   
        discriminator_optimizer=discriminator_optimizer,
        generator=generator,
        discriminator=discriminator)
    
    # Restore checkpoint
    checkpoint.restore(checkpoint_path).expect_partial()
    print(f"Model loaded from checkpoint: {checkpoint_path}")
    
    return generator, discriminator

In [20]:
# # Main function to run the entire pipeline
# def main():
#     # Initialize models
#     generator = build_generator()
#     discriminator = build_discriminator()
    
#     # Optimizers
#     generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
#     discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
    
#     # Dataset paths (replace with actual paths)
#     SPAD_TRAIN_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-images'
#     DEPTH_TRAIN_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-depths'
#     # Add validation dataset paths
#     SPAD_VAL_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-images'
#     DEPTH_VAL_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-depths'
#     SPAD_TEST_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images'
#     OUTPUT_DIR = '/kaggle/working/outputs'
#     CHECKPOINT_DIR = os.path.join(OUTPUT_DIR, 'checkpoints')
#     TEST_OUTPUT_DIR = os.path.join(OUTPUT_DIR, 'test_predictions')
    
#     # Create output directories
#     os.makedirs(OUTPUT_DIR, exist_ok=True)
#     os.makedirs(CHECKPOINT_DIR, exist_ok=True)
#     os.makedirs(TEST_OUTPUT_DIR, exist_ok=True)
    
#     # Get lists of image paths
#     spad_train_images = sorted([os.path.join(SPAD_TRAIN_PATH, filename) 
#                          for filename in os.listdir(SPAD_TRAIN_PATH) 
#                          if filename.endswith('.png')])
#     depth_train_maps = sorted([os.path.join(DEPTH_TRAIN_PATH, filename) 
#                         for filename in os.listdir(DEPTH_TRAIN_PATH) 
#                         if filename.endswith('.png')])
    
#     # Add validation image paths
#     print("Loading validation data...")
#     try:
#         spad_val_images = sorted([os.path.join(SPAD_VAL_PATH, filename) 
#                            for filename in os.listdir(SPAD_VAL_PATH) 
#                            if filename.endswith('.png')])
#         depth_val_maps = sorted([os.path.join(DEPTH_VAL_PATH, filename) 
#                           for filename in os.listdir(DEPTH_VAL_PATH) 
#                           if filename.endswith('.png')])
        
#         # Debug: Check if validation directories exist and contain images
#         print(f"Found {len(spad_val_images)} validation SPAD images and {len(depth_val_maps)} validation depth maps")
        
#         # Debug: Check if validation pairs match
#         if len(spad_val_images) != len(depth_val_maps):
#             print(f"WARNING: Validation dataset has mismatched counts - {len(spad_val_images)} SPAD images vs {len(depth_val_maps)} depth maps")
            
#         # Debug: Print first few validation image paths
#         if spad_val_images:
#             print(f"First validation SPAD image: {os.path.basename(spad_val_images[0])}")
#         if depth_val_maps:
#             print(f"First validation depth map: {os.path.basename(depth_val_maps[0])}")
            
#     except FileNotFoundError as e:
#         print(f"ERROR: Validation directory not found: {e}")
#         print("Falling back to splitting training data...")
#         # Fallback to splitting if validation directories don't exist
#         train_spad_paths, train_depth_paths, spad_val_images, depth_val_maps = split_dataset(
#             spad_train_images, depth_train_maps, val_split=0.2)
#         print(f"Fallback: {len(spad_val_images)} validation images created from training split")
    
#     print(f"Training on {len(spad_train_images)} images, validating on {len(spad_val_images)} images")
    
#     # Create datasets
#     train_dataset = create_dataset(spad_train_images, depth_train_maps, batch_size=BATCH_SIZE)
#     val_dataset = create_dataset(spad_val_images, depth_val_maps, batch_size=BATCH_SIZE, train=False)
    
#     # Debug: Check first batch from each dataset
#     for x_batch, y_batch in train_dataset.take(1):
#         print(f"Training batch shape: SPAD={x_batch.shape}, depth={y_batch.shape}")
        
#     for x_batch, y_batch in val_dataset.take(1):
#         print(f"Validation batch shape: SPAD={x_batch.shape}, depth={y_batch.shape}")
    
#     # Check if we should load from checkpoint or train from scratch
#     load_checkpoint = input("Load from checkpoint? (y/n): ").lower() == 'y'
    
#     if load_checkpoint:
#         checkpoint_path = input("Enter checkpoint path: ")
#         generator, discriminator = load_model_from_checkpoint(checkpoint_path)
#     else:
#         # Train the model
#         print("Starting model training...")
#         history = train(generator, discriminator, generator_optimizer, discriminator_optimizer, 
#                         train_dataset, val_dataset, checkpoint_filepath=CHECKPOINT_DIR)
#         print("Training completed!")
        
#         # Evaluate the model
#         print("Evaluating model on validation set...")
#         metrics = evaluate_model(generator, val_dataset)
        
#         # Save evaluation metrics
#         with open(os.path.join(OUTPUT_DIR, 'evaluation_metrics.txt'), 'w') as f:
#             for metric_name, metric_value in metrics.items():
#                 f.write(f"{metric_name}: {metric_value:.4f}\n")
    
#     # Get test images
#     test_spad_paths = sorted([os.path.join(SPAD_TEST_PATH, filename) 
#                              for filename in os.listdir(SPAD_TEST_PATH) 
#                              if filename.endswith('.png')])
    
#     print(f"Running inference on {len(test_spad_paths)} test images...")
    
#     # Create test dataset
#     test_dataset = create_test_dataset(test_spad_paths, batch_size=BATCH_SIZE)
    
#     # Generate depth maps for test data
#     predictions_dict = test(generator, test_dataset, test_spad_paths, output_dir=TEST_OUTPUT_DIR)
    
#     # Convert predictions to submission CSV
#     submission_file = convert_to_submission(predictions_dict, 
#                                            output_dir=TEST_OUTPUT_DIR, 
#                                            output_file=os.path.join(OUTPUT_DIR, "submission.csv"))
    
#     print(f"Submission file created: {submission_file}")
    
#     # Visualize a few test predictions
#     print("Creating visualization of test predictions...")
#     visualize_test_predictions(generator, test_dataset, test_spad_paths, 
#                               num_examples=5, output_dir=TEST_OUTPUT_DIR)
    
#     print("Pipeline completed successfully!")

In [21]:
def create_test_dataset(spad_paths, batch_size=BATCH_SIZE):
    dataset = tf.data.Dataset.from_tensor_slices(spad_paths)
    dataset = dataset.map(load_and_preprocess_test_data, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    return dataset

def load_and_preprocess_test_data(spad_path):
    # Load SPAD binary image (0 or 1 values)
    spad_img = tf.io.read_file(spad_path)
    spad_img = tf.image.decode_png(spad_img, channels=1)
    # spad_img = tf.image.resize(spad_img, [IMG_HEIGHT, IMG_WIDTH])  # Resize if necessary
    spad_img = tf.cast(spad_img, tf.float32)
    spad_img = (spad_img / 255.0) * 2 - 1  # Normalize to [-1, 1]
    return spad_img

In [22]:
# Function to visualize test predictions
def visualize_test_predictions(generator, test_dataset, test_filenames, num_examples=5, output_dir="."):
    """
    Visualize test predictions for a few examples
    
    Args:
        generator: Trained generator model
        test_dataset: Test dataset
        test_filenames: List of test filenames
        num_examples: Number of examples to visualize
        output_dir: Output directory for visualizations
    """
    plt.figure(figsize=(12, 8))
    
    for i, spad_batch in enumerate(test_dataset):
        if i >= num_examples:
            break
            
        # Generate predictions
        predicted_depths = generator(spad_batch, training=False)
        
        # Denormalize
        spad_images = (spad_batch + 1) / 2
        predicted_depths = (predicted_depths + 1) / 2
        
        # Display a few examples
        for j in range(min(3, spad_batch.shape[0])):
            # Get the filename
            idx = i * BATCH_SIZE + j
            if idx >= len(test_filenames):
                break
                
            filename = os.path.basename(test_filenames[idx])
            
            plt.subplot(min(3, spad_batch.shape[0]), 2, j*2+1)
            plt.title(f"SPAD Input: {filename}")
            plt.imshow(spad_images[j], cmap='gray')
            plt.axis('off')
            
            plt.subplot(min(3, spad_batch.shape[0]), 2, j*2+2)
            plt.title(f"Predicted Depth")
            plt.imshow(predicted_depths[j], cmap='viridis')
            plt.axis('off')
        
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"test_predictions_batch_{i}.png"))
        plt.close()

In [23]:
# main()

In [24]:
# /kaggle/working/checkpoints

In [25]:
def test(generator, test_dataset, test_filenames, output_dir="."):
    predictions_dict = {}  # Dictionary to store predictions
    
    for i, spad_batch in enumerate(test_dataset):
        # Generate predictions
        predicted_depths = generator(spad_batch, training=False)
        
        
        # Denormalize predictions
        predicted_depths = (predicted_depths + 1) / 2
        
        # Save predictions for each image in the batch
        for j in range(spad_batch.shape[0]):
            # Get the filename
            # print(predicted_depths[j].numpy().shape)
            idx = i * BATCH_SIZE + j
            if idx >= len(test_filenames):
                break
            
            filename = os.path.basename(test_filenames[idx])
            image_id = filename.replace(".png", "")
            
            predictions_dict[image_id] = predicted_depths[j].numpy()  # Store prediction
            
            # Save the predicted depth map
            output_path = os.path.join(output_dir, f"{image_id}.png")
            # Clip values and ensure correct type
            depth_map_to_save = predicted_depths[j].numpy()
            depth_map_to_save = np.clip(depth_map_to_save, 0, 1)

            # Convert to grayscale and save
            depth_map_to_save = tf.image.convert_image_dtype(depth_map_to_save, dtype=tf.uint8)
            # plt.imsave(output_path, predicted_depths[j].numpy(), cmap='viridis')
            plt.imsave(output_path, np.squeeze(predicted_depths[j].numpy()), cmap='viridis')

    
    return predictions_dict

In [26]:
def convert_to_submission(image_folder, output_csv):
    """
    Converts prediction images into a submission CSV exactly like images_to_csv_with_metadata.

    Args:
        image_folder (str): Path to folder containing prediction images.
        output_csv (str): Path to save the final submission CSV.

    Returns:
        str: Path to the saved submission CSV.
    """
    data = []

    # Loop through all images in the folder
    for idx, filename in enumerate(sorted(os.listdir(image_folder))):
        if filename.endswith(".png"):
            filepath = os.path.join(image_folder, filename)
            # Read the image
            image = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
            # Resize to (128, 128)
            image = cv2.resize(image, (128, 128))
            # Normalize the image
            image = image / 255.0
            image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-6)
            image = np.uint8(image * 255.0)
            # Flatten the image
            image_flat = image.flatten()
            # Create a row: [id, ImageID, pixel0, pixel1, pixel2, ...]
            row = [idx, filename] + image_flat.tolist()
            data.append(row)

    # Column names
    num_pixels = len(data[0]) - 2 if data else 0
    column_names = ["id", "ImageID"] + [str(i) for i in range(num_pixels)]

    # Create DataFrame
    submission_df = pd.DataFrame(data, columns=column_names)

    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_csv), exist_ok=True)

    # Save to CSV
    submission_df.to_csv(output_csv, index=False)

    print(f" Submission file saved to: {output_csv}")
    return output_csv

In [27]:
generator = build_generator()
discriminator = build_discriminator()

# Optimizers
generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)

# Dataset paths (replace with actual paths)
SPAD_TRAIN_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-images'
DEPTH_TRAIN_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-depths'
# Add validation dataset paths
SPAD_VAL_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-images'
DEPTH_VAL_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-depths'
SPAD_TEST_PATH = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images'
OUTPUT_DIR = '/kaggle/working/outputs'
CHECKPOINT_DIR = os.path.join(OUTPUT_DIR, 'checkpoints')
TEST_OUTPUT_DIR = os.path.join(OUTPUT_DIR, 'test_predictions')
VISUAL_OUTPUT_DIR = os.path.join(OUTPUT_DIR, 'test_visualizations')
# Create output directories
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(TEST_OUTPUT_DIR, exist_ok=True)
os.makedirs(VISUAL_OUTPUT_DIR, exist_ok=True)


# Get lists of image paths
spad_train_images = sorted([os.path.join(SPAD_TRAIN_PATH, filename) 
                     for filename in os.listdir(SPAD_TRAIN_PATH) 
                     if filename.endswith('.png')])
depth_train_maps = sorted([os.path.join(DEPTH_TRAIN_PATH, filename) 
                    for filename in os.listdir(DEPTH_TRAIN_PATH) 
                    if filename.endswith('.png')])

# Add validation image paths
print("Loading validation data...")
try:
    spad_val_images = sorted([os.path.join(SPAD_VAL_PATH, filename) 
                       for filename in os.listdir(SPAD_VAL_PATH) 
                       if filename.endswith('.png')])
    depth_val_maps = sorted([os.path.join(DEPTH_VAL_PATH, filename) 
                      for filename in os.listdir(DEPTH_VAL_PATH) 
                      if filename.endswith('.png')])
    
    # Debug: Check if validation directories exist and contain images
    print(f"Found {len(spad_val_images)} validation SPAD images and {len(depth_val_maps)} validation depth maps")
    
    # Debug: Check if validation pairs match
    if len(spad_val_images) != len(depth_val_maps):
        print(f"WARNING: Validation dataset has mismatched counts - {len(spad_val_images)} SPAD images vs {len(depth_val_maps)} depth maps")
        
    # Debug: Print first few validation image paths
    if spad_val_images:
        print(f"First validation SPAD image: {os.path.basename(spad_val_images[0])}")
    if depth_val_maps:
        print(f"First validation depth map: {os.path.basename(depth_val_maps[0])}")
        
except FileNotFoundError as e:
    print(f"ERROR: Validation directory not found: {e}")
    print("Falling back to splitting training data...")
    # Fallback to splitting if validation directories don't exist
    train_spad_paths, train_depth_paths, spad_val_images, depth_val_maps = split_dataset(
        spad_train_images, depth_train_maps, val_split=0.2)
    print(f"Fallback: {len(spad_val_images)} validation images created from training split")

print(f"Training on {len(spad_train_images)} images, validating on {len(spad_val_images)} images")

# Create datasets
train_dataset = create_dataset(spad_train_images, depth_train_maps, batch_size=BATCH_SIZE)
val_dataset = create_dataset(spad_val_images, depth_val_maps, batch_size=BATCH_SIZE, train=False)

# Debug: Check first batch from each dataset
for x_batch, y_batch in train_dataset.take(1):
    print(f"Training batch shape: SPAD={x_batch.shape}, depth={y_batch.shape}")
    
for x_batch, y_batch in val_dataset.take(1):
    print(f"Validation batch shape: SPAD={x_batch.shape}, depth={y_batch.shape}")

# Check if we should load from checkpoint or train from scratch
# load_checkpoint = input("Load from checkpoint? (y/n): ").lower() == 'y'
load_checkpoint = False
if load_checkpoint:
    checkpoint_path = input("Enter checkpoint path: ")
    generator, discriminator = load_model_from_checkpoint(checkpoint_path)
else:
    # Train the model
    print("Starting model training...")
    history = train(generator, discriminator, generator_optimizer, discriminator_optimizer, 
                    train_dataset, val_dataset, checkpoint_filepath=CHECKPOINT_DIR)
    print("Training completed!")
    
    # Evaluate the model
    print("Evaluating model on validation set...")
    metrics = evaluate_model(generator, val_dataset)
    
    # Save evaluation metrics
    with open(os.path.join(OUTPUT_DIR, 'evaluation_metrics.txt'), 'w') as f:
        for metric_name, metric_value in metrics.items():
            f.write(f"{metric_name}: {metric_value:.4f}\n")



I0000 00:00:1745811386.655034      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Loading validation data...
Found 836 validation SPAD images and 836 validation depth maps
First validation SPAD image: 01.png
First validation depth map: 01.png
Training on 6686 images, validating on 836 images
Training batch shape: SPAD=(16, 256, 256, 1), depth=(16, 256, 256, 1)
Validation batch shape: SPAD=(16, 256, 256, 1), depth=(16, 256, 256, 1)
Starting model training...

Epoch 1/2


E0000 00:00:1745811404.609457      31 meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape infunctional_15_1/sequential_8_1/dropout_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
I0000 00:00:1745811405.408435      91 cuda_dnn.cc:529] Loaded cuDNN version 90300


  [Train] Batch 000 - Gen Loss: 65.5629, GAN: 0.8078, L1: 0.6476, Disc: 1.6827
  [Train] Batch 010 - Gen Loss: 49.2997, GAN: 0.7750, L1: 0.4852, Disc: 1.5114
  [Train] Batch 020 - Gen Loss: 29.2474, GAN: 0.7523, L1: 0.2850, Disc: 1.4562
  [Train] Batch 030 - Gen Loss: 30.0710, GAN: 0.8091, L1: 0.2926, Disc: 1.5872
  [Train] Batch 040 - Gen Loss: 29.1735, GAN: 0.7412, L1: 0.2843, Disc: 1.4263
  [Train] Batch 050 - Gen Loss: 31.3994, GAN: 0.7942, L1: 0.3061, Disc: 1.4926
  [Train] Batch 060 - Gen Loss: 33.8142, GAN: 0.7509, L1: 0.3306, Disc: 1.4162
  [Train] Batch 070 - Gen Loss: 24.8264, GAN: 0.7515, L1: 0.2407, Disc: 1.3737
  [Train] Batch 080 - Gen Loss: 28.8464, GAN: 0.7295, L1: 0.2812, Disc: 1.3739
  [Train] Batch 090 - Gen Loss: 26.9510, GAN: 0.7995, L1: 0.2615, Disc: 1.4692
  [Train] Batch 100 - Gen Loss: 29.2263, GAN: 0.7198, L1: 0.2851, Disc: 1.3812
  [Train] Batch 110 - Gen Loss: 21.9750, GAN: 0.7696, L1: 0.2121, Disc: 1.3627
  [Train] Batch 120 - Gen Loss: 20.0127, GAN: 0.8000

E0000 00:00:1745811539.721283      31 meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape infunctional_15_1/sequential_8_1/dropout_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


  [Val] MAE: 0.3127, RMSE: 0.4819, SSIM: 0.5043
Epoch 1 completed in 156.94 seconds.

Epoch 2/2
  [Train] Batch 000 - Gen Loss: 34.0834, GAN: 0.9620, L1: 0.3312, Disc: 0.8262
  [Train] Batch 010 - Gen Loss: 29.9438, GAN: 0.8195, L1: 0.2912, Disc: 1.1860
  [Train] Batch 020 - Gen Loss: 26.6512, GAN: 1.1487, L1: 0.2550, Disc: 0.9704
  [Train] Batch 030 - Gen Loss: 26.6534, GAN: 1.0196, L1: 0.2563, Disc: 1.2460
  [Train] Batch 040 - Gen Loss: 28.1243, GAN: 1.3319, L1: 0.2679, Disc: 0.7023
  [Train] Batch 050 - Gen Loss: 29.2357, GAN: 1.4598, L1: 0.2778, Disc: 0.9668
  [Train] Batch 060 - Gen Loss: 27.3224, GAN: 0.9617, L1: 0.2636, Disc: 0.8327
  [Train] Batch 070 - Gen Loss: 34.4192, GAN: 1.0376, L1: 0.3338, Disc: 0.8503
  [Train] Batch 080 - Gen Loss: 28.4201, GAN: 1.4162, L1: 0.2700, Disc: 0.7409
  [Train] Batch 090 - Gen Loss: 22.9360, GAN: 1.4197, L1: 0.2152, Disc: 0.7594
  [Train] Batch 100 - Gen Loss: 22.2931, GAN: 1.1264, L1: 0.2117, Disc: 0.8306
  [Train] Batch 110 - Gen Loss: 24.

100%|██████████| 53/53 [00:04<00:00, 10.65it/s]

Evaluation metrics:
  MAE: 0.3089
  RMSE: 0.4792
  SSIM: 0.5079





In [28]:
# Get test images
test_spad_paths = sorted([os.path.join(SPAD_TEST_PATH, filename) 
                         for filename in os.listdir(SPAD_TEST_PATH) 
                         if filename.endswith('.png')])

print(f"Running inference on {len(test_spad_paths)} test images...")

# Create test dataset
test_dataset = create_test_dataset(test_spad_paths, batch_size=BATCH_SIZE)

# Generate depth maps for test data
predictions_dict = test(generator, test_dataset, test_spad_paths, output_dir=TEST_OUTPUT_DIR)

# Convert predictions to submission CSV
# submission_file = convert_to_submission(
#                                        output_dir=TEST_OUTPUT_DIR, 
#                                        output_file=os.path.join(OUTPUT_DIR, "submission.csv"))
submission_file = convert_to_submission(image_folder=TEST_OUTPUT_DIR,
                                       output_csv=os.path.join(OUTPUT_DIR, "submission.csv"))
print(f"Submission file created: {submission_file}")

# Visualize a few test predictions
print("Creating visualization of test predictions...")
visualize_test_predictions(generator, test_dataset, test_spad_paths, 
                          num_examples=5, output_dir=VISUAL_OUTPUT_DIR)

print("Pipeline completed successfully!")

Running inference on 836 test images...
 Submission file saved to: /kaggle/working/outputs/submission.csv
Submission file created: /kaggle/working/outputs/submission.csv
Creating visualization of test predictions...
Pipeline completed successfully!


In [29]:
# print(predicted_depths[j].numpy().shape)

In [30]:
# # Get test images
# test_spad_paths = sorted([os.path.join(SPAD_TEST_PATH, filename) 
#                          for filename in os.listdir(SPAD_TEST_PATH) 
#                          if filename.endswith('.png')])

# print(f"Running inference on {len(test_spad_paths)} test images...")

# # Create test dataset
# test_dataset = create_test_dataset(test_spad_paths, batch_size=BATCH_SIZE)

# # Generate depth maps for test data
# predictions_dict = test(generator, test_dataset, test_spad_paths, output_dir=TEST_OUTPUT_DIR)

# # Convert predictions to submission CSV
# submission_file = convert_to_submission(predictions_dict, 
#                                        output_dir=TEST_OUTPUT_DIR, 
#                                        output_file=os.path.join(OUTPUT_DIR, "submission.csv"))

# print(f"Submission file created: {submission_file}")

# # Visualize a few test predictions
# print("Creating visualization of test predictions...")
# visualize_test_predictions(generator, test_dataset, test_spad_paths, 
#                           num_examples=5, output_dir=TEST_OUTPUT_DIR)

# print("Pipeline completed successfully!")