Task 1 - Single image depth estimation -- this could be achieved via CNN and traditional method

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define directories
RGB_DIR = './data/rgb/'
DEPTH_DIR = './data/depth/'

# Load dataset filenames
rgb_files = sorted([os.path.join(RGB_DIR, f) for f in os.listdir(RGB_DIR) if f.endswith('.png')])
depth_files = sorted([os.path.join(DEPTH_DIR, f) for f in os.listdir(DEPTH_DIR) if f.endswith('.png')])

# Assuming that the images are matched, i.e., rgb_files[i] corresponds to depth_files[i]
assert len(rgb_files) == len(depth_files), "Mismatch in dataset sizes"

# Load images into memory (you may want to resize or preprocess them further)
rgb_images = [cv2.imread(f) for f in rgb_files]
depth_images = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in depth_files]  # Assuming depth images are grayscale

# Convert to numpy arrays and normalize
rgb_images = np.array(rgb_images) / 255.0
depth_images = np.array(depth_images) / 255.0

# Split dataset into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(rgb_images, depth_images, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set size:", len(X_train))
print("Validation set size:", len(X_val))
print("Test set size:", len(X_test))


In [1]:
import tensorflow as tf
from tensorflow.keras import layers

def depth_estimation_model(input_shape=(96, 96, 3)):
    # Encoder
    inputs = layers.Input(shape=input_shape)
    
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    # Decoder
    x = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), activation='relu', padding='same')(x)
    x = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), activation='relu', padding='same')(x)
    
    # Output depth map, sigmoid activation to ensure output is between 0 and 1 (black and white)
    outputs = layers.Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

model = depth_estimation_model()
model.compile(optimizer='adam', loss='mean_squared_error')

print(model.summary())


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 96, 96, 3)]       0         
                                                                 
 conv2d (Conv2D)             (None, 96, 96, 64)        1792      
                                                                 
 max_pooling2d (MaxPooling2  (None, 48, 48, 64)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 48, 48, 128)       73856     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 24, 24, 128)       0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 24, 24, 256)       295168

In [None]:
# Assuming you have the following data loaded and preprocessed:
# train_images, train_depths, val_images, val_depths, test_images, test_depths

# Training
epochs = 20
batch_size = 32
history = model.fit(train_images, train_depths, validation_data=(val_images, val_depths), epochs=epochs, batch_size=batch_size)

# Evaluation
val_loss = model.evaluate(val_images, val_depths, verbose=1)
print(f"Validation MSE Loss: {val_loss}")

# Testing
predicted_depths = model.predict(test_images)

# Visualization (Optional: To visualize a test image and its predicted depth map)
import matplotlib.pyplot as plt

idx = 0  # Change this to view different test samples
plt.figure(figsize=(10,4))

plt.subplot(1, 2, 1)
plt.imshow(test_images[idx])
plt.title("Original Image")

plt.subplot(1, 2, 2)
plt.imshow(predicted_depths[idx].reshape(96,96), cmap='gray')
plt.title("Predicted Depth")

plt.show()
