In [7]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, Input

In [20]:
CSV_PATH = './gazebo_dataset_01172026/labels.csv'
IMG_DIR = './gazebo_dataset_01172026/images/'
EDGE_DIR = './gazebo_dataset_01172026/edge_detection_results_01192026/'
EDGE_CTRD_DIR = './gazebo_dataset_01172026/edge_detection_with_centroid_results_01192026/'

In [9]:
df = pd.read_csv(CSV_PATH)
print(f"Total samples: {len(df)}")
print(f"\nFirst few rows:\n{df.head()}")
print(f"\nDirection counts:\n{df['direction'].value_counts()}")

Total samples: 9716

First few rows:
                               current_image  \
0  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
1  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
2  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
3  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
4  Loc0-102ed7ec84c44be3b4066caccff2011e.png   

                           destination_image direction  
0  Loc0-5107f16132e14cbbae95826a39aa0643.png     right  
1  Loc0-ad13f58f6f9549b48af9a145c8398fde.png     right  
2  Loc9-fb497088983647238d06767871bef8f7.png  backward  
3  Loc9-057e8f35ac974ad487b4ca23310cb397.png  backward  
4  Loc9-15467f62ac4f489194c3adb5d3fea27e.png  backward  

Direction counts:
direction
forward     2462
backward    2452
left        2415
right       2387
Name: count, dtype: int64


In [10]:
direction_map = {'forward': 0, 'backward': 1, 'left': 2, 'right': 3}
df['direction_label'] = df['direction'].map(direction_map)
shard_size = len(df)
print(f"first few rows after direction mapping:\n{df.head()}")

first few rows after direction mapping:
                               current_image  \
0  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
1  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
2  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
3  Loc0-102ed7ec84c44be3b4066caccff2011e.png   
4  Loc0-102ed7ec84c44be3b4066caccff2011e.png   

                           destination_image direction  direction_label  
0  Loc0-5107f16132e14cbbae95826a39aa0643.png     right                3  
1  Loc0-ad13f58f6f9549b48af9a145c8398fde.png     right                3  
2  Loc9-fb497088983647238d06767871bef8f7.png  backward                1  
3  Loc9-057e8f35ac974ad487b4ca23310cb397.png  backward                1  
4  Loc9-15467f62ac4f489194c3adb5d3fea27e.png  backward                1  


In [11]:
df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)

shard = df_shuffled.iloc[:]

# For each shard create train/val split
train_df, val_df = train_test_split(shard, test_size=0.2, random_state=42)


In [17]:
image_cache = {}

def load_image_cached(img_path):
    if img_path not in image_cache:
        img = load_img(img_path, target_size=(128, 128))
        img = img_to_array(img) / 255.0
        image_cache[img_path] = img
    return image_cache[img_path]

In [21]:
def create_dataset(dataframe, image_dir, batch_size=32):
    current_images = []
    dest_images = []
    labels = []
    suffix = ''
    if image_dir == EDGE_DIR:
        suffix = '_hed'
    elif image_dir == EDGE_CTRD_DIR:
        suffix = '_hed_c'

    for idx, row in dataframe.iterrows():
        current_img = load_image_cached(image_dir + row['current_image'][:-4] + suffix + '.png')
        dest_img = load_image_cached(image_dir + row['destination_image'][:-4] + suffix + '.png')

        current_images.append(current_img)
        dest_images.append(dest_img)
        labels.append(row['direction_label'])

    current_images = np.array(current_images)
    dest_images = np.array(dest_images)
    labels = np.array(labels)

    return current_images, dest_images, labels

In [22]:
print("Loading training data...")
X_train_current, X_train_dest, y_train = create_dataset(train_df, IMG_DIR)

X_train_current_hed, X_train_dest_hed, y_train_hed = create_dataset(train_df, EDGE_DIR)

X_train_current_hed_c, X_train_dest_hed_c, y_train_hed_c = create_dataset(train_df, EDGE_CTRD_DIR)

print("Loading validation data...")
X_val_current, X_val_dest, y_val = create_dataset(val_df, IMG_DIR)

X_val_current_hed, X_val_dest_hed, y_val_hed = create_dataset(val_df, EDGE_DIR)

X_val_current_hed_c, X_val_dest_hed_c, y_val_hed_c = create_dataset(val_df, EDGE_CTRD_DIR)

print(f"\nTraining data shapes:")
print(f"Current images: {X_train_current.shape}")
print(f"Destination images: {X_train_dest.shape}")
print(f"Labels: {y_train.shape}")

Loading training data...
Loading validation data...

Training data shapes:
Current images: (7772, 128, 128, 3)
Destination images: (7772, 128, 128, 3)
Labels: (7772,)


In [23]:
from tensorflow.keras import layers, models

def create_model(num_classes=4):
    # Inputs
    current_input = layers.Input(shape=(128,128, 3), name='current_image')
    dest_input = layers.Input(shape=(128, 128, 3), name='destination_image')

    # Simple CNN branch
    def cnn_branch(x):
        x = layers.Conv2D(32, 3, activation="relu")(x)
        x = layers.MaxPooling2D()(x)
        x = layers.Conv2D(64, 3, activation="relu")(x)
        x = layers.MaxPooling2D()(x)
        x = layers.Conv2D(128, 3, activation="relu")(x)
        x = layers.GlobalAveragePooling2D()(x)
        return x

    # Both branches
    current_features = cnn_branch(current_input)
    dest_features = cnn_branch(dest_input)

    # Combine
    combined = layers.concatenate([current_features, dest_features])

    # Dense layers
    x = layers.Dense(16, activation='relu')(combined)
    output = layers.Dense(num_classes, activation='softmax')(x)

    # Model
    model = models.Model(inputs=[current_input, dest_input], outputs=output)
    return model


In [43]:
model_rgb = create_model(num_classes=4)
model_rgb.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

In [44]:
history_rgb = model_rgb.fit(
    [X_train_current, X_train_dest],
    y_train,
    validation_data=([X_val_current, X_val_dest], y_val),
    epochs=15,
    batch_size=8
)

Epoch 1/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 58ms/step - accuracy: 0.2515 - loss: 1.3871 - val_accuracy: 0.2438 - val_loss: 1.3864
Epoch 2/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 59ms/step - accuracy: 0.2545 - loss: 1.3865 - val_accuracy: 0.2438 - val_loss: 1.3863
Epoch 3/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 59ms/step - accuracy: 0.2500 - loss: 1.3864 - val_accuracy: 0.2438 - val_loss: 1.3863
Epoch 4/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 59ms/step - accuracy: 0.2496 - loss: 1.3864 - val_accuracy: 0.2438 - val_loss: 1.3864
Epoch 5/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 60ms/step - accuracy: 0.2517 - loss: 1.3864 - val_accuracy: 0.2438 - val_loss: 1.3866
Epoch 6/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 59ms/step - accuracy: 0.2545 - loss: 1.3865 - val_accuracy: 0.2438 - val_loss: 1.3863
Epoch 7/15
[1m9

In [24]:
def rgb_encoder(input_shape):
    inp = Input(shape=input_shape)
    x = layers.Conv2D(32, 3, activation='relu', padding='same')(inp)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
    x = layers.GlobalAveragePooling2D()(x)
    return models.Model(inp, x, name="RGB_Encoder")


In [25]:
def hed_encoder(input_shape):
    inp = Input(shape=input_shape)
    x = layers.Conv2D(16, 3, activation='relu', padding='same')(inp)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(32, 3, activation='relu', padding='same')(x)
    x = layers.GlobalAveragePooling2D()(x)
    return models.Model(inp, x, name="HED_Encoder")


In [38]:
def build_siamese_model(H=128, W=128):
    # Inputs
    current_rgb = Input(shape=(H, W, 3))
    current_hed = Input(shape=(H, W, 3))
    dest_rgb = Input(shape=(H, W, 3))
    dest_hed = Input(shape=(H, W, 3))

    rgb_enc = rgb_encoder((H, W, 3))
    hed_enc = hed_encoder((H, W, 3))

    curr_feat = layers.Concatenate()([
        rgb_enc(current_rgb),
        hed_enc(current_hed)
    ])

    dest_feat = layers.Concatenate()([
        rgb_enc(dest_rgb),
        hed_enc(dest_hed)
    ])

    diff = layers.Subtract()([dest_feat, curr_feat])
    abs_diff = layers.Lambda(lambda x: tf.abs(x))(diff)

    final_feat = layers.Concatenate()([
        curr_feat, dest_feat, diff, abs_diff
    ])


    x = layers.Dense(256, activation='relu')(final_feat)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(128, activation='relu')(x)
    output = layers.Dense(4, activation='softmax')(x)

    model = models.Model(
        inputs=[current_rgb, current_hed, dest_rgb, dest_hed],
        outputs=output
    )

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [39]:
model_siamese = build_siamese_model()

In [28]:
print("current RGB:", X_train_current.shape)
print("current HED:", X_train_current_hed.shape)
print("dest RGB:", X_train_dest.shape)
print("dest HED:", X_train_dest_hed.shape)

current RGB: (7772, 128, 128, 3)
current HED: (7772, 128, 128, 3)
dest RGB: (7772, 128, 128, 3)
dest HED: (7772, 128, 128, 3)


In [29]:
history = model_siamese.fit(
    [
        X_train_current,
        X_train_current_hed,
        X_train_dest,
        X_train_dest_hed
    ],
    y_train,
    validation_data=(
        [
            X_val_current,
            X_val_current_hed,
            X_val_dest,
            X_val_dest_hed
        ],
        y_val
    ),
    epochs=15,
    batch_size=8
)


Epoch 1/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 88ms/step - accuracy: 0.2467 - loss: 1.3875 - val_accuracy: 0.2623 - val_loss: 1.3861
Epoch 2/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 83ms/step - accuracy: 0.2524 - loss: 1.3872 - val_accuracy: 0.2438 - val_loss: 1.3864
Epoch 3/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 81ms/step - accuracy: 0.2877 - loss: 1.3739 - val_accuracy: 0.3354 - val_loss: 1.3376
Epoch 4/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 82ms/step - accuracy: 0.3453 - loss: 1.3290 - val_accuracy: 0.3801 - val_loss: 1.2781
Epoch 5/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 82ms/step - accuracy: 0.4030 - loss: 1.2509 - val_accuracy: 0.4537 - val_loss: 1.1716
Epoch 6/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 82ms/step - accuracy: 0.4870 - loss: 1.1024 - val_accuracy: 0.5252 - val_loss: 1.0331
Epoch 7/15
[1m9

In [30]:
model_siamese.summary()

In [41]:
model_siamese_centroid = build_siamese_model()

In [42]:
history2 = model_siamese_centroid.fit(
    [
        X_train_current,
        X_train_current_hed_c,
        X_train_dest,
        X_train_dest_hed_c
    ],
    y_train,
    validation_data=(
        [
            X_val_current,
            X_val_current_hed_c,
            X_val_dest,
            X_val_dest_hed_c
        ],
        y_val
    ),
    epochs=15,
    batch_size=8
)

Epoch 1/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 89ms/step - accuracy: 0.2433 - loss: 1.3878 - val_accuracy: 0.2438 - val_loss: 1.3869
Epoch 2/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 82ms/step - accuracy: 0.2524 - loss: 1.3873 - val_accuracy: 0.2541 - val_loss: 1.3861
Epoch 3/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 81ms/step - accuracy: 0.2984 - loss: 1.3637 - val_accuracy: 0.3467 - val_loss: 1.3088
Epoch 4/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 82ms/step - accuracy: 0.3680 - loss: 1.2880 - val_accuracy: 0.3796 - val_loss: 1.2651
Epoch 5/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 82ms/step - accuracy: 0.4391 - loss: 1.1774 - val_accuracy: 0.4835 - val_loss: 1.1020
Epoch 6/15
[1m972/972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 82ms/step - accuracy: 0.5425 - loss: 1.0041 - val_accuracy: 0.5885 - val_loss: 0.9202
Epoch 7/15
[1m9