In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, callbacks
# Import specific ResNet version if needed, depends on TensorFlow/Keras versions
# from tensorflow.keras.applications.resnet import ResNet34 # Example if needed

from tqdm.keras import TqdmCallback # Import TqdmCallback
from tqdm import tqdm # Import standard tqdm for custom loops

2025-04-29 11:19:32.644375: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745925572.856597      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745925572.916111      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# ======================
# Configuration
# ======================
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 1 # SPAD images are grayscale/single channel

In [3]:
TRAIN_SPAD_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-images' # Adjust folder names if needed
TRAIN_GT_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-depths' # Adjust folder names if needed
VAL_SPAD_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-images'   # Adjust folder names if needed
VAL_GT_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-depths'   # Adjust folder names if needed
TEST_SPAD_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images' # Adjust folder names if needed
BASE_PREDICTION_OUTPUT_DIR = '/kaggle/working/Predictons'

In [4]:
# Training parameters
BATCH_SIZE = 16
EPOCHS = 50
LEARNING_RATE = 3e-4 # Used with AdamW in the original snippet
SEQ_LENGTH = 8 

In [5]:
# Prediction saving frequency
SAVE_PREDICTIONS_FREQ = 5 # Save predictions every N epochs

# For numerical stability with log
EPSILON = 1e-7 # Use slightly larger epsilon for log(1-p)

# ======================
# Data Loading Helpers
# ======================

def load_input_image(filepath):
    """
    Loads the input image (assumed to be averaged binary or single frame result),
    resizes, converts to [0, 1], and calculates phi_tau = -log(1 - integrated).
    NOTE: This assumes the input filepath points directly to a single 8-bit PNG
    which is the result of averaging 'seq_length' binary frames (implicitly).
    """
    img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
    if img is None:
        # print(f"Error loading image: {filepath}") # Avoid spamming console
        return None
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_AREA)

    # Assuming img is already the result of averaging 'seq_length' binary frames
    # Scale to [0, 1]
    integrated = img.astype(np.float32) / 255.0

    # Apply the Poisson correction to estimate rate * seq_length
    # Clamp to avoid log(0) and log(<0)
    integrated_clipped = np.clip(integrated, EPSILON, 1.0 - EPSILON)
    phi_tau = -np.log(1.0 - integrated_clipped) # Ensure 1.0 is float

    phi_tau = np.expand_dims(phi_tau, -1) # Add channel dimension
    return phi_tau

def load_gt_depth(filepath):
    """
    Loads a 16-bit single-channel ground truth depth image, resizes,
    and normalizes to [0, 1].
    """
    # Load as 16-bit grayscale (cv2.IMREAD_UNCHANGED or -1)
    depth = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
    if depth is None:
        # print(f"Error loading depth: {filepath}") # Avoid spamming console
        return None
    depth = cv2.resize(depth, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_AREA)

    # Assuming 16-bit depth, normalize to [0, 1]
    depth = depth.astype(np.float32) / 65535.0

    depth = np.expand_dims(depth, -1) # Add channel dimension

    return depth # Return normalized linear depth

def get_file_pairs(spad_dir, gt_dir):
    """Helper to list matching SPAD and GT depth files."""
    spad_files = sorted([f for f in os.listdir(spad_dir) if f.endswith('.png')])
    gt_files = sorted([f for f in os.listdir(gt_dir) if f.endswith('.png')])
    
    # Match files based on filename (assuming they have the same base name)
    spad_basenames = {os.path.splitext(f)[0]: f for f in spad_files}
    gt_basenames = {os.path.splitext(f)[0]: f for f in gt_files}
    
    common_basenames = list(set(spad_basenames.keys()) & set(gt_basenames.keys()))
    common_basenames.sort() # Ensure consistent order
    
    spad_paths = [os.path.join(spad_dir, spad_basenames[b]) for b in common_basenames]
    gt_paths = [os.path.join(gt_dir, gt_basenames[b]) for b in common_basenames]

    if len(spad_paths) != len(spad_files) or len(gt_paths) != len(gt_files):
         print(f"Warning: Found {len(spad_files)} SPAD files and {len(gt_files)} GT files in {spad_dir}/{gt_dir}, but only {len(spad_paths)} pairs matched filenames.")

    return spad_paths, gt_paths

In [6]:
# ======================
# Data Pipeline (Generator)
# ======================
class SPADGenerator(tf.keras.utils.Sequence):
    def __init__(self, spad_paths, gt_paths, batch_size=BATCH_SIZE):
        self.spad_paths = spad_paths
        self.gt_paths = gt_paths
        self.batch_size = batch_size
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch."""
        return len(self.spad_paths) // self.batch_size

    def __getitem__(self, index):
        """Generate one batch of data."""
        # Get batch file paths
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_spad_paths = [self.spad_paths[k] for k in indexes]
        batch_gt_paths = [self.gt_paths[k] for k in indexes]

        # Generate data
        return self.__data_generation(batch_spad_paths, batch_gt_paths)

    def on_epoch_end(self):
        """Updates indexes after each epoch."""
        self.indexes = np.arange(len(self.spad_paths))
        np.random.shuffle(self.indexes)

    def __data_generation(self, batch_spad_paths, batch_gt_paths):
        """Generates data containing batch_size samples."""
        X = np.empty((len(batch_spad_paths), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.float32)
        y = np.empty((len(batch_gt_paths), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.float32)

        loaded_count = 0
        for i, (spad_path, gt_path) in enumerate(zip(batch_spad_paths, batch_gt_paths)):
            phi_tau_img = load_input_image(spad_path)
            gt_depth_norm = load_gt_depth(gt_path)

            if phi_tau_img is None or gt_depth_norm is None:
                 # Handle loading errors - ideally log this.
                 # For simplicity, skip this sample. This might lead to a slightly smaller batch
                 # if multiple samples fail in a batch. A robust generator handles this.
                 continue # Skip this sample if loading fails

            X[loaded_count,] = phi_tau_img
            y[loaded_count,] = gt_depth_norm # This is normalized linear depth [0,1]
            loaded_count += 1

        # Return only the successfully loaded samples
        if loaded_count < len(batch_spad_paths):
             # print(f"Warning: Batch size reduced from {len(batch_spad_paths)} to {loaded_count} due to loading errors.")
             pass # Avoid spamming console

        return X[:loaded_count], y[:loaded_count]

# ======================
# Hybrid U-Net Architecture
# ======================
def build_spadnet(input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)):
    inputs = layers.Input(shape=input_shape)

    # Use ResNet-34 if available, otherwise use a similar structure or simplify
    # Note: ResNet34 is not a standard Keras application. ResNet50/101/152 are.
    # If ResNet34 is specifically needed and not available, you'd need to implement it
    # or find a custom implementation.
    # Let's assume a ResNet model that takes 1 channel input and doesn't have the top layer.
    # We'll use ResNet50 as an example, adapting it for 1 input channel.
    # If your environment *does* have ResNet34 available via tf.keras.applications.resnet, uncomment that import.

    # --- Coarse Path (Using adapted ResNet50 as an example) ---
    # Note: Adapting a pre-trained ResNet for 1 channel input might require loading weights manually
    # or training from scratch. Using weights=None here means training from scratch.
    try:
        # Attempt to use a standard ResNet (like 50) and adapt its input layer
        base_resnet = tf.keras.applications.ResNet50(
            include_top=False, weights=None, input_shape=input_shape
        )
        # The first conv layer in ResNet50 is typically 7x7, 64 filters, stride 2
        # It expects 3 input channels. We need to replace or adapt it for 1 channel.
        # A common trick is to load pre-trained weights (if weights='imagenet'),
        # average the first conv kernel weights across input channels, and use that.
        # Since weights=None, we just need a 1-channel input layer connected to the rest.

        # Get output from the base ResNet before global pooling
        # This gives the highest-level feature map from the ResNet body
        coarse_backbone_output = base_resnet.output # Shape (?, H/32, W/32, 2048 for ResNet50)

        # Apply Global Average Pooling
        coarse_feat_global = layers.GlobalAveragePooling2D()(coarse_backbone_output) # Shape (?, 2048)

        # Dense layers on the global feature (as in user's snippet)
        coarse_feat_dense = layers.Dense(512, activation='relu')(coarse_feat_global) # Shape (?, 512)

    except ImportError:
        print("Warning: ResNet50 not found or could not be adapted. Building a simpler coarse path.")
        # Fallback: A few conv layers and then global pooling
        x_coarse = layers.Conv2D(32, 5, strides=2, activation='relu', padding='same')(inputs)
        x_coarse = layers.Conv2D(64, 3, strides=2, activation='relu', padding='same')(x_coarse)
        x_coarse = layers.Conv2D(128, 3, strides=2, activation='relu', padding='same')(x_coarse)
        coarse_feat_global = layers.GlobalAveragePooling2D()(x_coarse)
        coarse_feat_dense = layers.Dense(512, activation='relu')(coarse_feat_global)

    # --- Fine Path (U-Net) ---
    # Standard U-Net encoder
    conv1 = layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
    conv1 = layers.Conv2D(32, 3, activation='relu', padding='same')(conv1) # Shape (?, 256, 256, 32)
    pool1 = layers.MaxPooling2D()(conv1) # Shape (?, 128, 128, 32)

    conv2 = layers.Conv2D(64, 3, activation='relu', padding='same')(pool1)
    conv2 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv2) # Shape (?, 128, 128, 64)
    pool2 = layers.MaxPooling2D()(conv2) # Shape (?, 64, 64, 64)

    conv3 = layers.Conv2D(128, 3, activation='relu', padding='same')(pool2)
    conv3 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv3) # Shape (?, 64, 64, 128)
    pool3 = layers.MaxPooling2D()(conv3) # Shape (?, 32, 32, 128)

    conv4 = layers.Conv2D(256, 3, activation='relu', padding='same')(pool3)
    conv4 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv4) # Shape (?, 32, 32, 256)
    # No dropout here in the user's snippet, but adding it can help prevent overfitting
    # drop4 = layers.Dropout(0.5)(conv4)

    # Bottleneck
    bottleneck = layers.MaxPooling2D()(conv4) # Shape (?, 16, 16, 256)
    bottleneck = layers.Conv2D(512, 3, activation='relu', padding='same')(bottleneck) # Shape (?, 16, 16, 512)
    bottleneck = layers.Conv2D(512, 3, activation='relu', padding='same')(bottleneck) # Shape (?, 16, 16, 512) # Added extra conv for symmetry/capacity


    # --- Fusion ---
    # Fuse the global coarse_feat_dense with the spatial bottleneck features
    # coarse_feat_dense shape: (?, 512)
    # bottleneck shape: (?, 16, 16, 512)

    # Reshape global feature to (batch_size, 1, 1, 512)
    coarse_feat_reshaped = layers.Reshape((1, 1, 512))(coarse_feat_dense)

    # Spatially replicate the global feature to match bottleneck spatial dimensions
    # Get dynamic H, W from bottleneck
    bottleneck_spatial_shape = tf.shape(bottleneck)[1:3]
    coarse_feat_replicated = tf.tile(coarse_feat_reshaped, [1, bottleneck_spatial_shape[0], bottleneck_spatial_shape[1], 1])

    # Concatenate replicated global feature with bottleneck feature
    # Results in shape (?, 16, 16, 512 + 512 = 1024)
    fused = layers.Concatenate()([bottleneck, coarse_feat_replicated])


    # --- Decoder ---
    # Upsample and concatenate with skip connections
    up6 = layers.Conv2DTranspose(256, 2, strides=(2, 2), padding='same')(fused) # Shape (?, 32, 32, 256)
    merge6 = layers.concatenate([conv4, up6], axis=3) # Skip connection from conv4 (shape ?, 32, 32, 256). Result shape (?, 32, 32, 512)
    conv6 = layers.Conv2D(256, 3, activation='relu', padding='same')(merge6)
    conv6 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv6) # Shape (?, 32, 32, 256)

    up7 = layers.Conv2DTranspose(128, 2, strides=(2, 2), padding='same')(conv6) # Shape (?, 64, 64, 128)
    merge7 = layers.concatenate([conv3, up7], axis=3) # Skip connection from conv3 (shape ?, 64, 64, 128). Result shape (?, 64, 64, 256)
    conv7 = layers.Conv2D(128, 3, activation='relu', padding='same')(merge7)
    conv7 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv7) # Shape (?, 64, 64, 128)

    up8 = layers.Conv2DTranspose(64, 2, strides=(2, 2), padding='same')(conv7) # Shape (?, 128, 128, 64)
    merge8 = layers.concatenate([conv2, up8], axis=3) # Skip connection from conv2 (shape ?, 128, 128, 64). Result shape (?, 128, 128, 128)
    conv8 = layers.Conv2D(64, 3, activation='relu', padding='same')(merge8)
    conv8 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv8) # Shape (?, 128, 128, 64)

    up9 = layers.Conv2DTranspose(32, 2, strides=(2, 2), padding='same')(conv8) # Shape (?, 256, 256, 32)
    merge9 = layers.concatenate([conv1, up9], axis=3) # Skip connection from conv1 (shape ?, 256, 256, 32). Result shape (?, 256, 256, 64)
    conv9 = layers.Conv2D(32, 3, activation='relu', padding='same')(merge9)
    conv9 = layers.Conv2D(32, 3, activation='relu', padding='same')(conv9) # Shape (?, 256, 256, 32)

    # Output layer predicts normalized linear depth [0,1]
    outputs = layers.Conv2D(1, 1, activation='sigmoid', dtype='float32')(conv9) # Ensure float32 output

    return Model(inputs=inputs, outputs=outputs)

In [7]:
# ======================
# Prediction Saving Function (Loop-based for simplicity)
# ======================
def save_predictions_for_test_set_loop(model, test_spad_paths, output_dir):
    """Predicts on test set and saves scaled depth PNGs using a loop."""
    os.makedirs(output_dir, exist_ok=True)

    for i, spad_filepath in enumerate(tqdm(test_spad_paths, desc=f"Saving to {os.path.basename(output_dir)}")):
        phi_tau_img = load_input_image(spad_filepath)
        if phi_tau_img is None:
            # Error already printed in load_input_image
            continue

        img_batch = np.expand_dims(phi_tau_img, axis=0) # Add batch dimension

        # Predict (model outputs normalized linear depth [0,1])
        predicted_depth_norm = model.predict(img_batch, verbose=0)[0] # Remove batch dimension

        # --- Scaling for PNG Save (0-255) ---
        # The model output is [0,1] linear depth.
        # imgs2csv.py expects 0-255 uint8 PNGs, which it will then MIN-MAX scale to [0,1]
        # Saving the direct [0,1] output scaled to [0,255] is standard for this.
        scaled_uint8_depth = np.clip(predicted_depth_norm * 255.0, 0, 255).astype(np.uint8)

        # Remove the channel dimension for cv2.imwrite
        scaled_uint8_depth = np.squeeze(scaled_uint8_depth)

        # Save the prediction with the original filename
        output_filename = os.path.basename(spad_filepath)
        output_filepath = os.path.join(output_dir, output_filename)
        cv2.imwrite(output_filepath, scaled_uint8_depth)


# ======================
# Custom Callback for Periodic Saving
# ======================
class PeriodicPredictionSaverCallback(callbacks.Callback):
    def __init__(self, test_spad_paths, save_freq, base_output_dir):
        super().__init__()
        self.test_spad_paths = test_spad_paths
        self.save_freq = save_freq
        self.base_output_dir = base_output_dir

    def on_epoch_end(self, epoch, logs=None):
        # Save predictions every self.save_freq epochs (starting from epoch 0, so +1 for display)
        if (epoch + 1) % self.save_freq == 0:
            print(f"\nSaving predictions after epoch {epoch + 1}...")
            epoch_output_dir = os.path.join(self.base_output_dir, f"epoch_{epoch+1:03d}")
            save_predictions_for_test_set_loop( # Use the loop-based saver
                self.model, # The model instance is accessible via self.model
                self.test_spad_paths,
                epoch_output_dir
            )

In [8]:
# ======================
# Training Configuration
# ======================
def main():
    # --- Prepare Data File Lists ---
    print("Preparing data file lists...")

    # Get file lists for training and validation
    train_spad_paths, train_gt_paths = get_file_pairs(TRAIN_SPAD_DIR, TRAIN_GT_DIR)
    val_spad_paths, val_gt_paths = get_file_pairs(VAL_SPAD_DIR, VAL_GT_DIR)

    # Test data only needs input SPAD paths
    test_spad_paths = sorted([os.path.join(TEST_SPAD_DIR, f) for f in os.listdir(TEST_SPAD_DIR) if f.endswith('.png')])

    print(f"Training samples: {len(train_spad_paths)}")
    print(f"Validation samples: {len(val_spad_paths)}")
    print(f"Test samples: {len(test_spad_paths)}")

    if not train_spad_paths or not val_spad_paths or not test_spad_paths:
        print("\nError: One or more data directories are empty or paths are incorrect!")
        print("Please check DATA_DIR and subfolder names (TRAIN_SPAD_DIR, etc.)")
        return # Exit if data is not found

    # Data generators
    train_gen = SPADGenerator(train_spad_paths, train_gt_paths, batch_size=BATCH_SIZE)
    val_gen = SPADGenerator(val_spad_paths, val_gt_paths, batch_size=BATCH_SIZE)

    # Model setup
    print("Building model...")
    model = build_spadnet(input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))

    print("Compiling model...")
    # Compile using user's specified loss and metric
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(LEARNING_RATE),
        loss='mse', # Mean Squared Error on normalized [0,1] depth
        metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')] # Use 'rmse' for monitoring
    )

    # Optional: Print model summary
    # model.summary()

    # --- Set up Callbacks ---
    print("Setting up callbacks...")
    # Checkpoint needs .keras extension in modern Keras
    checkpoint_filepath = 'best_model.keras'

    callbacks_list = [
        callbacks.ModelCheckpoint(
            checkpoint_filepath,
            save_best_only=True,
            monitor='val_rmse', # Monitor validation RMSE
            mode='min',
            verbose=0 # Keep checkpoint callback quiet
        ),
        PeriodicPredictionSaverCallback(
             test_spad_paths=test_spad_paths,
             save_freq=SAVE_PREDICTIONS_FREQ, # Save every N epochs
             base_output_dir=BASE_PREDICTION_OUTPUT_DIR,
        ),
        callbacks.ReduceLROnPlateau(
            monitor='val_rmse', # Monitor validation RMSE
            factor=0.5, # Reduce learning rate by half
            patience=3, # If val_rmse doesn't improve for 3 epochs
            mode='min',
            min_lr=1e-6 # Minimum learning rate
        ),
        callbacks.EarlyStopping(
            monitor='val_rmse',
            patience=10, # Stop if val_rmse doesn't improve for 10 epochs
            mode='min',
            restore_best_weights=True # Use weights from the best epoch
        ),
        TqdmCallback(verbose=1) # Progress bar callback
    ]

    # --- Training ---
    print("Starting training...")
    # Removed workers/use_multiprocessing to avoid TypeError
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=EPOCHS,
        callbacks=callbacks_list,
        verbose=0 # Let TqdmCallback handle verbosity
    )

    print("Training finished.")

    # --- Final Prediction and Saving ---
    # Load the best model weights saved by ModelCheckpoint
    # Note: EarlyStopping with restore_best_weights=True often makes this redundant,
    # but explicitly loading ensures you use the best saved model state, especially
    # if training finished without early stopping.
    if os.path.exists(checkpoint_filepath):
        print(f"Loading best model weights from {checkpoint_filepath}")
        model.load_weights(checkpoint_filepath)
    else:
        print("No checkpoint found. Using model state after last epoch.")

    print("\nGenerating final predictions with the best model...")
    final_output_dir = os.path.join(BASE_PREDICTION_OUTPUT_DIR, "final_best_model")
    save_predictions_for_test_set_loop( # Use the loop-based saver
        model,
        test_spad_paths,
        final_output_dir
    )
    print(f"Final predictions saved to {final_output_dir}")


    # --- Submission Instructions ---
    print("\n--- Submission Step ---")
    print("1. Ensure you have the imgs2csv.py script provided by the challenge.")
    print(f"2. The predicted depth maps are saved in subfolders of: {BASE_PREDICTION_OUTPUT_DIR}")
    print("3. Choose the folder you want to submit (e.g., 'final_best_model' for the best model, or 'epoch_XXX' for periodic saves).")
    print("4. Open a terminal/notebook cell and run the conversion script, pointing to the chosen folder:")
    # Adjust the path to where your imgs2csv.py is located on Kaggle
    print(f"   python /kaggle/input/spad-depth-challenge/imgs2csv.py {final_output_dir} submission.csv") # **ADJUST THIS PATH**
    print("5. Submit the generated 'submission.csv' file to Kaggle.")
    print("\nNote: Replace '/kaggle/input/spad-depth-challenge/imgs2csv.py' with the actual path to the imgs2csv.py script on Kaggle.")
    print("----------------------")


if __name__ == "__main__":
    # Set default float type for consistency
    tf.keras.backend.set_floatx('float32')
    main()

Preparing data file lists...
Training samples: 6686
Validation samples: 836
Test samples: 836
Building model...


I0000 00:00:1745925832.644277      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [9]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, callbacks
from tqdm.keras import TqdmCallback
from tqdm import tqdm

# ======================
# Configuration
# ======================
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 1 # SPAD images are grayscale/single channel

# Kaggle input directory structure
TRAIN_SPAD_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-images'
TRAIN_GT_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/training-depths'
VAL_SPAD_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-images'
VAL_GT_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/validation-depths'
TEST_SPAD_DIR = '/kaggle/input/ee-5179-modern-computer-vision-course-competition/competition-data/testing-images'
BASE_PREDICTION_OUTPUT_DIR = '/kaggle/working/predictions_png'

BATCH_SIZE = 16
EPOCHS = 50
LEARNING_RATE = 3e-4
SEQ_LENGTH = 8
SAVE_PREDICTIONS_FREQ = 5
EPSILON = 1e-7

# ======================
# Data Loading Helpers
# ======================

def load_input_image(filepath):
    img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_AREA)
    integrated = img.astype(np.float32) / 255.0
    integrated_clipped = np.clip(integrated, EPSILON, 1.0 - EPSILON)
    phi_tau = -np.log(1.0 - integrated_clipped)
    phi_tau = np.expand_dims(phi_tau, -1)
    return phi_tau

def load_gt_depth(filepath):
    depth = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
    if depth is None:
        return None
    depth = cv2.resize(depth, (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_AREA)
    depth = depth.astype(np.float32) / 65535.0
    depth = np.expand_dims(depth, -1)
    return depth

def get_file_pairs(spad_dir, gt_dir):
    spad_files = sorted([f for f in os.listdir(spad_dir) if f.endswith('.png')])
    gt_files = sorted([f for f in os.listdir(gt_dir) if f.endswith('.png')])
    spad_basenames = {os.path.splitext(f)[0]: f for f in spad_files}
    gt_basenames = {os.path.splitext(f)[0]: f for f in gt_files}
    common_basenames = list(set(spad_basenames.keys()) & set(gt_basenames.keys()))
    common_basenames.sort()
    spad_paths = [os.path.join(spad_dir, spad_basenames[b]) for b in common_basenames]
    gt_paths = [os.path.join(gt_dir, gt_basenames[b]) for b in common_basenames]
    if len(spad_paths) != len(spad_files) or len(gt_paths) != len(gt_files):
        print(f"Warning: Found {len(spad_files)} SPAD files and {len(gt_files)} GT files in {spad_dir}/{gt_dir}, but only {len(spad_paths)} pairs matched filenames.")
    return spad_paths, gt_paths

# ======================
# Data Pipeline (Generator)
# ======================
class SPADGenerator(tf.keras.utils.Sequence):
    def __init__(self, spad_paths, gt_paths, batch_size=BATCH_SIZE):
        self.spad_paths = spad_paths
        self.gt_paths = gt_paths
        self.batch_size = batch_size
        self.on_epoch_end()

    def __len__(self):
        return len(self.spad_paths) // self.batch_size

    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_spad_paths = [self.spad_paths[k] for k in indexes]
        batch_gt_paths = [self.gt_paths[k] for k in indexes]
        return self.__data_generation(batch_spad_paths, batch_gt_paths)

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.spad_paths))
        np.random.shuffle(self.indexes)

    def __data_generation(self, batch_spad_paths, batch_gt_paths):
        X = np.empty((len(batch_spad_paths), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.float32)
        y = np.empty((len(batch_gt_paths), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.float32)
        loaded_count = 0
        for i, (spad_path, gt_path) in enumerate(zip(batch_spad_paths, batch_gt_paths)):
            phi_tau_img = load_input_image(spad_path)
            gt_depth_norm = load_gt_depth(gt_path)
            if phi_tau_img is None or gt_depth_norm is None:
                continue
            X[loaded_count,] = phi_tau_img
            y[loaded_count,] = gt_depth_norm
            loaded_count += 1
        return X[:loaded_count], y[:loaded_count]

# ======================
# Hybrid U-Net Architecture
# ======================
def build_spadnet(input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)):
    inputs = layers.Input(shape=input_shape)
    # --- Fine Path (U-Net) ---
    conv1 = layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
    conv1 = layers.Conv2D(32, 3, activation='relu', padding='same')(conv1)
    pool1 = layers.MaxPooling2D()(conv1)
    conv2 = layers.Conv2D(64, 3, activation='relu', padding='same')(pool1)
    conv2 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv2)
    pool2 = layers.MaxPooling2D()(conv2)
    conv3 = layers.Conv2D(128, 3, activation='relu', padding='same')(pool2)
    conv3 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv3)
    pool3 = layers.MaxPooling2D()(conv3)
    conv4 = layers.Conv2D(256, 3, activation='relu', padding='same')(pool3)
    conv4 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv4)
    bottleneck = layers.MaxPooling2D()(conv4)
    bottleneck = layers.Conv2D(512, 3, activation='relu', padding='same')(bottleneck)
    bottleneck = layers.Conv2D(512, 3, activation='relu', padding='same')(bottleneck)
    # Decoder
    up6 = layers.Conv2DTranspose(256, 2, strides=(2, 2), padding='same')(bottleneck)
    merge6 = layers.concatenate([conv4, up6], axis=3)
    conv6 = layers.Conv2D(256, 3, activation='relu', padding='same')(merge6)
    conv6 = layers.Conv2D(256, 3, activation='relu', padding='same')(conv6)
    up7 = layers.Conv2DTranspose(128, 2, strides=(2, 2), padding='same')(conv6)
    merge7 = layers.concatenate([conv3, up7], axis=3)
    conv7 = layers.Conv2D(128, 3, activation='relu', padding='same')(merge7)
    conv7 = layers.Conv2D(128, 3, activation='relu', padding='same')(conv7)
    up8 = layers.Conv2DTranspose(64, 2, strides=(2, 2), padding='same')(conv7)
    merge8 = layers.concatenate([conv2, up8], axis=3)
    conv8 = layers.Conv2D(64, 3, activation='relu', padding='same')(merge8)
    conv8 = layers.Conv2D(64, 3, activation='relu', padding='same')(conv8)
    up9 = layers.Conv2DTranspose(32, 2, strides=(2, 2), padding='same')(conv8)
    merge9 = layers.concatenate([conv1, up9], axis=3)
    conv9 = layers.Conv2D(32, 3, activation='relu', padding='same')(merge9)
    conv9 = layers.Conv2D(32, 3, activation='relu', padding='same')(conv9)
    outputs = layers.Conv2D(1, 1, activation='sigmoid', dtype='float32')(conv9)
    return Model(inputs=inputs, outputs=outputs)

# ======================
# Prediction Saving Function (Loop-based for simplicity)
# ======================
def save_predictions_for_test_set_loop(model, test_spad_paths, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for i, spad_filepath in enumerate(tqdm(test_spad_paths, desc=f"Saving to {os.path.basename(output_dir)}")):
        phi_tau_img = load_input_image(spad_filepath)
        if phi_tau_img is None:
            continue
        img_batch = np.expand_dims(phi_tau_img, axis=0)
        predicted_depth_norm = model.predict(img_batch, verbose=0)[0]
        scaled_uint8_depth = np.clip(predicted_depth_norm * 255.0, 0, 255).astype(np.uint8)
        scaled_uint8_depth = np.squeeze(scaled_uint8_depth)
        output_filename = os.path.basename(spad_filepath)
        output_filepath = os.path.join(output_dir, output_filename)
        cv2.imwrite(output_filepath, scaled_uint8_depth)

# ======================
# Custom Callback for Periodic Saving
# ======================
class PeriodicPredictionSaverCallback(callbacks.Callback):
    def __init__(self, test_spad_paths, save_freq, base_output_dir):
        super().__init__()
        self.test_spad_paths = test_spad_paths
        self.save_freq = save_freq
        self.base_output_dir = base_output_dir
    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            print(f"\nSaving predictions after epoch {epoch + 1}...")
            epoch_output_dir = os.path.join(self.base_output_dir, f"epoch_{epoch+1:03d}")
            save_predictions_for_test_set_loop(
                self.model,
                self.test_spad_paths,
                epoch_output_dir
            )

# ======================
# Training Configuration
# ======================
def main():
    print("Preparing data file lists...")
    train_spad_paths, train_gt_paths = get_file_pairs(TRAIN_SPAD_DIR, TRAIN_GT_DIR)
    val_spad_paths, val_gt_paths = get_file_pairs(VAL_SPAD_DIR, VAL_GT_DIR)
    test_spad_paths = sorted([os.path.join(TEST_SPAD_DIR, f) for f in os.listdir(TEST_SPAD_DIR) if f.endswith('.png')])
    print(f"Training samples: {len(train_spad_paths)}")
    print(f"Validation samples: {len(val_spad_paths)}")
    print(f"Test samples: {len(test_spad_paths)}")
    if not train_spad_paths or not val_spad_paths or not test_spad_paths:
        print("\nError: One or more data directories are empty or paths are incorrect!")
        print("Please check DATA_DIR and subfolder names (TRAIN_SPAD_DIR, etc.)")
        return
    train_gen = SPADGenerator(train_spad_paths, train_gt_paths, batch_size=BATCH_SIZE)
    val_gen = SPADGenerator(val_spad_paths, val_gt_paths, batch_size=BATCH_SIZE)
    print("Building model...")
    model = build_spadnet(input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
    print("Compiling model...")
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(LEARNING_RATE),
        loss='mse',
        metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')]
    )
    print("Setting up callbacks...")
    checkpoint_filepath = 'best_model.keras'
    callbacks_list = [
        callbacks.ModelCheckpoint(
            checkpoint_filepath,
            save_best_only=True,
            monitor='val_rmse',
            mode='min',
            verbose=0
        ),
        PeriodicPredictionSaverCallback(
            test_spad_paths=test_spad_paths,
            save_freq=SAVE_PREDICTIONS_FREQ,
            base_output_dir=BASE_PREDICTION_OUTPUT_DIR,
        ),
        callbacks.ReduceLROnPlateau(
            monitor='val_rmse',
            factor=0.5,
            patience=3,
            mode='min',
            min_lr=1e-6
        ),
        callbacks.EarlyStopping(
            monitor='val_rmse',
            patience=10,
            mode='min',
            restore_best_weights=True
        ),
        TqdmCallback(verbose=1)
    ]
    print("Starting training...")
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=EPOCHS,
        callbacks=callbacks_list,
        verbose=0
    )
    print("Training finished.")
    if os.path.exists(checkpoint_filepath):
        print(f"Loading best model weights from {checkpoint_filepath}")
        model.load_weights(checkpoint_filepath)
    else:
        print("No checkpoint found. Using model state after last epoch.")
    print("\nGenerating final predictions with the best model...")
    final_output_dir = os.path.join(BASE_PREDICTION_OUTPUT_DIR, "final_best_model")
    save_predictions_for_test_set_loop(
        model,
        test_spad_paths,
        final_output_dir
    )
    print(f"Final predictions saved to {final_output_dir}")
    print("\n--- Submission Step ---")
    print("1. Ensure you have the imgs2csv.py script provided by the challenge.")
    print(f"2. The predicted depth maps are saved in subfolders of: {BASE_PREDICTION_OUTPUT_DIR}")
    print("3. Choose the folder you want to submit (e.g., 'final_best_model' for the best model, or 'epoch_XXX' for periodic saves).")
    print("4. Open a terminal/notebook cell and run the conversion script, pointing to the chosen folder:")
    print(f"   python /kaggle/input/spad-depth-challenge/imgs2csv.py {final_output_dir} submission.csv")
    print("5. Submit the generated 'submission.csv' file to Kaggle.")
    print("\nNote: Replace '/kaggle/input/spad-depth-challenge/imgs2csv.py' with the actual path to the imgs2csv.py script on Kaggle.")
    print("----------------------")

if __name__ == "__main__":
    tf.keras.backend.set_floatx('float32')
    main()


Preparing data file lists...
Training samples: 6686
Validation samples: 836
Test samples: 836
Building model...
Compiling model...
Setting up callbacks...


0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Starting training...


  self._warn_if_super_not_called()
I0000 00:00:1745926137.658788     114 service.cc:148] XLA service 0x79f598003a00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745926137.659697     114 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1745926138.561033     114 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1745926154.544686     114 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



Saving predictions after epoch 5...




Saving to epoch_005:   0%|          | 0/836 [00:00<?, ?it/s][A[A

Saving to epoch_005:   0%|          | 1/836 [00:01<20:44,  1.49s/it][A[A

Saving to epoch_005:   0%|          | 3/836 [00:01<06:06,  2.27it/s][A[A

Saving to epoch_005:   1%|          | 5/836 [00:01<03:25,  4.05it/s][A[A

Saving to epoch_005:   1%|          | 7/836 [00:01<02:20,  5.91it/s][A[A

Saving to epoch_005:   1%|          | 9/836 [00:02<01:48,  7.62it/s][A[A

Saving to epoch_005:   1%|▏         | 11/836 [00:02<01:29,  9.21it/s][A[A

Saving to epoch_005:   2%|▏         | 13/836 [00:02<01:19, 10.36it/s][A[A

Saving to epoch_005:   2%|▏         | 15/836 [00:02<01:10, 11.66it/s][A[A

Saving to epoch_005:   2%|▏         | 17/836 [00:02<01:04, 12.73it/s][A[A

Saving to epoch_005:   2%|▏         | 19/836 [00:02<01:00, 13.60it/s][A[A

Saving to epoch_005:   3%|▎         | 21/836 [00:02<00:57, 14.19it/s][A[A

Saving to epoch_005:   3%|▎         | 23/836 [00:02<00:57, 14.15it/s][A[A

Saving to e


Saving predictions after epoch 10...




Saving to epoch_010:   0%|          | 0/836 [00:00<?, ?it/s][A[A

Saving to epoch_010:   0%|          | 2/836 [00:00<00:59, 14.05it/s][A[A

Saving to epoch_010:   0%|          | 4/836 [00:00<00:53, 15.69it/s][A[A

Saving to epoch_010:   1%|          | 6/836 [00:00<00:51, 16.27it/s][A[A

Saving to epoch_010:   1%|          | 8/836 [00:00<00:51, 16.14it/s][A[A

Saving to epoch_010:   1%|          | 10/836 [00:00<00:50, 16.34it/s][A[A

Saving to epoch_010:   1%|▏         | 12/836 [00:00<00:49, 16.59it/s][A[A

Saving to epoch_010:   2%|▏         | 14/836 [00:00<00:49, 16.59it/s][A[A

Saving to epoch_010:   2%|▏         | 16/836 [00:00<00:49, 16.64it/s][A[A

Saving to epoch_010:   2%|▏         | 18/836 [00:01<00:48, 16.76it/s][A[A

Saving to epoch_010:   2%|▏         | 20/836 [00:01<00:48, 16.87it/s][A[A

Saving to epoch_010:   3%|▎         | 22/836 [00:01<00:47, 16.97it/s][A[A

Saving to epoch_010:   3%|▎         | 24/836 [00:01<00:47, 17.01it/s][A[A

Saving to 

Training finished.
Loading best model weights from best_model.keras

Generating final predictions with the best model...


Saving to final_best_model: 100%|██████████| 836/836 [00:50<00:00, 16.65it/s]

Final predictions saved to /kaggle/working/predictions_png/final_best_model

--- Submission Step ---
1. Ensure you have the imgs2csv.py script provided by the challenge.
2. The predicted depth maps are saved in subfolders of: /kaggle/working/predictions_png
3. Choose the folder you want to submit (e.g., 'final_best_model' for the best model, or 'epoch_XXX' for periodic saves).
4. Open a terminal/notebook cell and run the conversion script, pointing to the chosen folder:
   python /kaggle/input/spad-depth-challenge/imgs2csv.py /kaggle/working/predictions_png/final_best_model submission.csv
5. Submit the generated 'submission.csv' file to Kaggle.

Note: Replace '/kaggle/input/spad-depth-challenge/imgs2csv.py' with the actual path to the imgs2csv.py script on Kaggle.
----------------------





In [13]:
import os
import cv2
import pandas as pd
import numpy as np

def images_to_csv_with_metadata(image_folder, output_csv):
    # Initialize an empty list to store image data and metadata
    data = []

    # Loop through all images in the folder
    for idx, filename in enumerate(sorted(os.listdir(image_folder))):
        if filename.endswith(".png"):
            filepath = os.path.join(image_folder, filename)
            # Read the image
            image = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
            image = cv2.resize(image, (128, 128))
            image = image / 255.
            image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-6)
            image = np.uint8(image * 255.)
            # Flatten the image into a 1D array
            image_flat = image.flatten()
            # Add ID, ImageID (filename), and pixel values
            row = [idx, filename] + image_flat.tolist()
            data.append(row)
    
    # Create a DataFrame
    num_columns = len(data[0]) - 2 if data else 0
    column_names = ["id", "ImageID"] + [indx for indx in range(num_columns)]
    df = pd.DataFrame(data, columns=column_names)

    # Save to CSV
    df.to_csv(output_csv, index=False)

# Paths for prediction and ground truth images
predictions_folder = "/kaggle/working/predictions_png/epoch_010"

# Output CSV paths
predictions_csv = "/kaggle/working/predictions_10.csv"

# Convert prediction images to CSV
images_to_csv_with_metadata(predictions_folder, predictions_csv)