In [1]:
import os
import pandas as pd

# Path to augmented dataset folder
data_dir = r"C:/Users/manog/Downloads/ecg_augmented"

# Prepare lists for filenames and labels
filepaths = []
labels = []

# Loop through each person's folder
for person_folder in sorted(os.listdir(data_dir)):
    person_path = os.path.join(data_dir, person_folder)
    if os.path.isdir(person_path):
        label = int(person_folder.split('_')[1])  # Extract person number from folder name 'person_0' -> 0
        for img_file in os.listdir(person_path):
            if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                filepaths.append(os.path.join(person_path, img_file))
                labels.append(label)

# Create DataFrame
df = pd.DataFrame({
    'filename': filepaths,
    'label': labels
})

print(f"Total images: {len(df)}")
print(f"Unique classes (persons): {df['label'].nunique()}")

# Show first few rows
df.head()


Total images: 1796
Unique classes (persons): 90


Unnamed: 0,filename,label
0,C:/Users/manog/Downloads/ecg_augmented\person_...,0
1,C:/Users/manog/Downloads/ecg_augmented\person_...,0
2,C:/Users/manog/Downloads/ecg_augmented\person_...,0
3,C:/Users/manog/Downloads/ecg_augmented\person_...,0
4,C:/Users/manog/Downloads/ecg_augmented\person_...,0


In [2]:
import os
import pandas as pd

data_dir = r"C:/Users/manog/Downloads/ecg_augmented"

filepaths = []
labels = []

for person_folder in sorted(os.listdir(data_dir)):
    print("Found folder:", person_folder)  # Debug print to check folder names
    person_path = os.path.join(data_dir, person_folder)
    if os.path.isdir(person_path):
        # Try to parse the person number from the folder name
        # Assuming folder names are like 'person_0', 'person_1', ...
        try:
            label = int(person_folder.split('_')[1])
        except Exception as e:
            print(f"Error parsing label from folder '{person_folder}': {e}")
            continue
        for img_file in os.listdir(person_path):
            if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                filepaths.append(os.path.join(person_path, img_file))
                labels.append(label)

df = pd.DataFrame({
    'filename': filepaths,
    'label': labels
})

print(f"Total images: {len(df)}")
print(f"Unique classes (persons): {df['label'].nunique()}")

df.head()


Found folder: person_0
Found folder: person_1
Found folder: person_10
Found folder: person_11
Found folder: person_12
Found folder: person_13
Found folder: person_14
Found folder: person_15
Found folder: person_16
Found folder: person_17
Found folder: person_18
Found folder: person_19
Found folder: person_2
Found folder: person_20
Found folder: person_21
Found folder: person_22
Found folder: person_23
Found folder: person_24
Found folder: person_25
Found folder: person_26
Found folder: person_27
Found folder: person_28
Found folder: person_29
Found folder: person_3
Found folder: person_30
Found folder: person_31
Found folder: person_32
Found folder: person_33
Found folder: person_34
Found folder: person_35
Found folder: person_36
Found folder: person_37
Found folder: person_38
Found folder: person_39
Found folder: person_4
Found folder: person_40
Found folder: person_41
Found folder: person_42
Found folder: person_43
Found folder: person_44
Found folder: person_45
Found folder: person_

Unnamed: 0,filename,label
0,C:/Users/manog/Downloads/ecg_augmented\person_...,0
1,C:/Users/manog/Downloads/ecg_augmented\person_...,0
2,C:/Users/manog/Downloads/ecg_augmented\person_...,0
3,C:/Users/manog/Downloads/ecg_augmented\person_...,0
4,C:/Users/manog/Downloads/ecg_augmented\person_...,0


In [3]:
import numpy as np
from sklearn.utils import shuffle

def create_pairs(df, num_pairs_per_class=20):
    pairs = []
    pair_labels = []

    # Group images by label for easy access
    grouped = df.groupby('label')

    labels = df['label'].unique()

    for label in labels:
        imgs = grouped.get_group(label)['filename'].values

        # Positive pairs
        for _ in range(num_pairs_per_class):
            i1, i2 = np.random.choice(len(imgs), 2, replace=False)
            pairs.append([imgs[i1], imgs[i2]])
            pair_labels.append(1)

        # Negative pairs
        for _ in range(num_pairs_per_class):
            neg_label = np.random.choice(labels[labels != label])
            neg_imgs = grouped.get_group(neg_label)['filename'].values
            i1 = np.random.choice(len(imgs))
            i2 = np.random.choice(len(neg_imgs))
            pairs.append([imgs[i1], neg_imgs[i2]])
            pair_labels.append(0)

    return np.array(pairs), np.array(pair_labels)

pairs, pair_labels = create_pairs(df, num_pairs_per_class=20)

print(f"Total pairs: {len(pairs)}")
print(f"Sample pair and label: {pairs[0]}, Label: {pair_labels[0]}")


Total pairs: 3600
Sample pair and label: ['C:/Users/manog/Downloads/ecg_augmented\\person_0\\person_0_aug_0_4413.png'
 'C:/Users/manog/Downloads/ecg_augmented\\person_0\\person_0_aug_0_8881.png'], Label: 1


In [4]:
import tensorflow as tf

IMG_SIZE = (224, 224)  # Resize images to this size

def preprocess_image(file_path):
    # Read the image from disk
    img = tf.io.read_file(file_path)
    # Decode PNG or JPG as needed
    img = tf.image.decode_png(img, channels=3)  # or decode_jpeg if jpeg images
    # Resize to IMG_SIZE
    img = tf.image.resize(img, IMG_SIZE)
    # Normalize pixel values to [0, 1]
    img = img / 255.0
    return img

def preprocess_pair(file_path_1, file_path_2, label):
    img1 = preprocess_image(file_path_1)
    img2 = preprocess_image(file_path_2)
    return (img1, img2), label

# Convert numpy pairs and labels to tf.data Dataset
def create_dataset(pairs, labels, batch_size=32, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((pairs[:, 0], pairs[:, 1], labels))
    dataset = dataset.map(preprocess_pair, num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1024)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Create dataset
batch_size = 32
dataset = create_dataset(pairs, pair_labels, batch_size=batch_size)

# Preview dataset batch shapes
for (img1, img2), label in dataset.take(1):
    print(f"Image 1 batch shape: {img1.shape}")
    print(f"Image 2 batch shape: {img2.shape}")
    print(f"Label batch shape: {label.shape}")


Image 1 batch shape: (32, 224, 224, 3)
Image 2 batch shape: (32, 224, 224, 3)
Label batch shape: (32,)


In [5]:
from tensorflow.keras import layers, Model, Input
import tensorflow.keras.backend as K

def build_embedding_model(input_shape=(224, 224, 3)):
    inputs = Input(shape=input_shape)
    x = layers.Conv2D(64, (7,7), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D(2)(x)
    x = layers.Conv2D(128, (5,5), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D(2)(x)
    x = layers.Conv2D(256, (3,3), activation='relu', padding='same')(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)  # Embedding vector size 128
    model = Model(inputs, x, name="embedding")
    return model

def build_siamese_network(input_shape=(224,224,3)):
    # Inputs for two images
    input_1 = Input(shape=input_shape)
    input_2 = Input(shape=input_shape)
    
    embedding_model = build_embedding_model(input_shape)
    
    # Generate embeddings
    emb1 = embedding_model(input_1)
    emb2 = embedding_model(input_2)
    
    # Compute L1 distance between embeddings
    l1_distance = layers.Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))([emb1, emb2])
    
    # Output layer (sigmoid for similarity probability)
    outputs = layers.Dense(1, activation='sigmoid')(l1_distance)
    
    siamese_net = Model(inputs=[input_1, input_2], outputs=outputs, name="siamese_network")
    return siamese_net

# Build model
siamese_model = build_siamese_network()

# Compile model with binary crossentropy loss and an optimizer
siamese_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
siamese_model.summary()





In [6]:
import tensorflow as tf
import numpy as np
import cv2
import random
import os

class SiameseDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size=32, img_size=(224,224), shuffle=True):
        """
        df: DataFrame with columns ['filename', 'label']
        """
        self.df = df.reset_index(drop=True)
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        
        # Group indices by label for quick pair sampling
        self.label_to_indices = {}
        for label in self.df['label'].unique():
            self.label_to_indices[label] = self.df[self.df['label'] == label].index.to_list()
        
        self.on_epoch_end()
    
    def __len__(self):
        # Number of batches per epoch
        return len(self.df) // self.batch_size
    
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
    
    def __getitem__(self, idx):
        batch_img1 = []
        batch_img2 = []
        batch_labels = []
        
        while len(batch_img1) < self.batch_size:
            # Randomly pick first image and its label
            idx1 = random.randint(0, len(self.df) - 1)
            img1_path = self.df.loc[idx1, 'filename']
            label1 = self.df.loc[idx1, 'label']
            
            # Decide if pair is genuine or impostor (50/50 chance)
            if random.random() < 0.5:
                # Genuine pair (same label)
                idx2 = idx1
                while idx2 == idx1:
                    idx2 = random.choice(self.label_to_indices[label1])
                label = 1
            else:
                # Impostor pair (different labels)
                label = 0
                label2 = label1
                while label2 == label1:
                    label2 = random.choice(list(self.label_to_indices.keys()))
                idx2 = random.choice(self.label_to_indices[label2])
            
            img2_path = self.df.loc[idx2, 'filename']
            
            # Load images and preprocess
            img1 = cv2.imread(img1_path)
            img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
            img1 = cv2.resize(img1, self.img_size)
            img1 = img1 / 255.0
            
            img2 = cv2.imread(img2_path)
            img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
            img2 = cv2.resize(img2, self.img_size)
            img2 = img2 / 255.0
            
            batch_img1.append(img1)
            batch_img2.append(img2)
            batch_labels.append(label)
        
        return [np.array(batch_img1), np.array(batch_img2)], np.array(batch_labels)

# Usage:
# Assuming 'df' is your DataFrame with 'filename' and 'label' columns from the augmented data

batch_size = 32
train_generator = SiameseDataGenerator(df, batch_size=batch_size, img_size=(224,224))



In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input, Model

def create_base_encoder(input_shape=(224,224,3), embedding_dim=128):
    inputs = Input(shape=input_shape)
    x = layers.Conv2D(64, (7,7), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, (5,5), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(256, (3,3), activation='relu', padding='same')(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(embedding_dim)(x)
    x = layers.Lambda(lambda t: tf.math.l2_normalize(t, axis=1))(x)  # Normalize embeddings
    return Model(inputs, x, name='base_encoder')

# Input pairs
input_a = Input(shape=(224,224,3))
input_b = Input(shape=(224,224,3))

# Shared encoder
encoder = create_base_encoder()

encoded_a = encoder(input_a)
encoded_b = encoder(input_b)

# Compute absolute difference between embeddings
L1_layer = layers.Lambda(lambda tensors: tf.abs(tensors[0] - tensors[1]))
L1_distance = L1_layer([encoded_a, encoded_b])

# Final decision layer
outputs = layers.Dense(1, activation='sigmoid')(L1_distance)

# Define the Siamese network
siamese_model = Model(inputs=[input_a, input_b], outputs=outputs)

siamese_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
siamese_model.summary()


In [9]:
def create_image_pairs(df, num_pairs_per_person=10):
    pairs = []
    labels = []

    grouped = df.groupby('label')

    for label, group in grouped:
        images = group['filename'].tolist()

        # Genuine pairs
        if len(images) >= 2:
            for _ in range(num_pairs_per_person):
                img1, img2 = random.sample(images, 2)
                pairs.append((load_and_preprocess_image(img1), load_and_preprocess_image(img2)))
                labels.append(1)

        # Impostor pairs
        other_labels = df['label'].unique().tolist()
        other_labels.remove(label)

        for _ in range(num_pairs_per_person):
            other_label = random.choice(other_labels)
            other_img = random.choice(df[df['label'] == other_label]['filename'].tolist())
            img1 = random.choice(images)
            pairs.append((load_and_preprocess_image(img1), load_and_preprocess_image(other_img)))
            labels.append(0)

    # Shuffle
    pairs, labels = shuffle(pairs, labels, random_state=42)

    X1 = np.array([pair[0] for pair in pairs])
    X2 = np.array([pair[1] for pair in pairs])
    y = np.array(labels)

    return X1, X2, y

# Create the image pairs
X1, X2, y_pairs = create_image_pairs(df, num_pairs_per_person=20)

print(f"Total Pairs: {len(y_pairs)}, Genuine: {sum(y_pairs)}, Impostor: {len(y_pairs) - sum(y_pairs)}")


Total Pairs: 3600, Genuine: 1800, Impostor: 1800


In [10]:
from tensorflow.keras import layers, Model, Input

def build_embedding_model(input_shape):
    inp = Input(shape=input_shape)

    x = layers.Conv2D(32, (3,3), activation='relu', padding='same')(inp)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)

    model = Model(inputs=inp, outputs=x)
    return model


In [11]:
import tensorflow as tf

def build_siamese_model(input_shape):
    embedding_model = build_embedding_model(input_shape)

    input_a = Input(shape=input_shape)
    input_b = Input(shape=input_shape)

    emb_a = embedding_model(input_a)
    emb_b = embedding_model(input_b)

    # Compute absolute difference
    diff = layers.Lambda(lambda x: tf.abs(x[0] - x[1]))([emb_a, emb_b])

    x = layers.Dense(64, activation='relu')(diff)
    x = layers.Dropout(0.3)(x)
    output = layers.Dense(1, activation='sigmoid')(x)

    siamese = Model(inputs=[input_a, input_b], outputs=output)
    siamese.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return siamese


In [12]:
input_shape = (224, 224, 3)
siamese_model = build_siamese_model(input_shape)
siamese_model.summary()


In [13]:
from sklearn.model_selection import train_test_split
import numpy as np

# Split the image pairs into training and validation
X1_train, X1_val, X2_train, X2_val, y_train, y_val = train_test_split(
    X1, X2, y_pairs, test_size=0.2, random_state=42, stratify=y_pairs
)


In [15]:
from tensorflow.keras.utils import Sequence
import cv2
import numpy as np

class SiamesePairGenerator(Sequence):
    def __init__(self, X1_paths, X2_paths, y_labels, batch_size=32, img_size=(224, 224)):
        self.X1_paths = X1_paths
        self.X2_paths = X2_paths
        self.y = y_labels
        self.batch_size = batch_size
        self.img_size = img_size

    def __len__(self):
        return int(np.ceil(len(self.y) / self.batch_size))

    def __getitem__(self, idx):
        batch_x1 = self.X1_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x2 = self.X2_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        X1 = [self._load_image(img) for img in batch_x1]
        X2 = [self._load_image(img) for img in batch_x2]

        return [np.array(X1), np.array(X2)], np.array(batch_y)

    def _load_image(self, path):
        img = cv2.imread(path)
        img = cv2.resize(img, self.img_size)
        img = img.astype('float32') / 255.0
        return img


In [21]:
class SiamesePairGenerator(Sequence):
    def __init__(self, X1_paths, X2_paths, y_labels, batch_size=32, img_size=(224, 224)):
        self.X1_paths = X1_paths
        self.X2_paths = X2_paths
        self.y = y_labels
        self.batch_size = batch_size
        self.img_size = img_size

    def __len__(self):
        return int(np.ceil(len(self.y) / self.batch_size))

    def __getitem__(self, idx):
        batch_x1 = self.X1_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_x2 = self.X2_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        X1 = [self._load_image(img) for img in batch_x1]
        X2 = [self._load_image(img) for img in batch_x2]

        return [np.array(X1), np.array(X2)], np.array(batch_y)

def _load_image(self, path):
    print(f"Loading image from path: {path} (type: {type(path)})")
    path = str(path)
    img = cv2.imread(path)
    if img is None:
        raise ValueError(f"Failed to load image at path: {path}")
    img = cv2.resize(img, self.img_size)
    img = img.astype('float32') / 255.0
    return img


In [22]:

print(f"Type of X1_train: {type(X1_train)}")             # Should be numpy.ndarray or list
print(f"Type of first element in X1_train: {type(X1_train[0])}")  # Should be str or path-like
print(f"Sample from X1_train: {X1_train[0]}")            # Should print a valid filepath string

print(f"Type of X2_train: {type(X2_train)}")
print(f"Type of first element in X2_train: {type(X2_train[0])}")
print(f"Sample from X2_train: {X2_train[0]}")

print(f"Type of y_train: {type(y_train)}")
print(f"Sample y_train: {y_train[:5]}")


Type of X1_train: <class 'numpy.ndarray'>
Type of first element in X1_train: <class 'numpy.ndarray'>
Sample from X1_train: [[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 ...

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]
Type of X2_train: <class 'numpy.ndarray'>
Type of first element in X2_train: <class 'numpy.ndarray'>
Sample from X2_train: [[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  ...
  [1. 1. 1.]
  

In [24]:
import os
import numpy as np

# Assuming you have a dictionary of person -> list of image paths:
person_to_images = {
    0: ['person_0_img1.png', 'person_0_img2.png', ...],
    1: ['person_1_img1.png', 'person_1_img2.png', ...],
    # ...
}

X1_paths = []
X2_paths = []
y_pairs = []

# Generate positive pairs (same person)
for person, images in person_to_images.items():
    for i in range(len(images)):
        for j in range(i+1, len(images)):
            X1_paths.append(images[i])
            X2_paths.append(images[j])
            y_pairs.append(1)

# Generate negative pairs (different persons)
persons = list(person_to_images.keys())
for i in range(len(persons)):
    for j in range(i+1, len(persons)):
        imgs1 = person_to_images[persons[i]]
        imgs2 = person_to_images[persons[j]]
        for img1 in imgs1:
            for img2 in imgs2:
                X1_paths.append(img1)
                X2_paths.append(img2)
                y_pairs.append(0)

# Convert to numpy array
y_pairs = np.array(y_pairs)



In [25]:
import os
import glob

base_dir = r'C:\Users\manog\Downloads\ecg_augmented'

all_person_folders = [os.path.join(base_dir, f'person_{i}') for i in range(90)]

# Collect all image paths by person
person_to_images = {}
for person_folder in all_person_folders:
    images = glob.glob(os.path.join(person_folder, '*.png'))
    person_to_images[person_folder] = images


In [26]:
import random

X1_paths = []
X2_paths = []
y_pairs = []

# Genuine pairs
for person, images in person_to_images.items():
    for i in range(len(images)):
        for j in range(i+1, len(images)):
            X1_paths.append(images[i])
            X2_paths.append(images[j])
            y_pairs.append(1)

# Impostor pairs
persons = list(person_to_images.keys())
num_impostor_pairs = len(X1_paths)  # balance number of pairs

while len(y_pairs) < 2 * num_impostor_pairs:
    p1, p2 = random.sample(persons, 2)
    img1 = random.choice(person_to_images[p1])
    img2 = random.choice(person_to_images[p2])
    X1_paths.append(img1)
    X2_paths.append(img2)
    y_pairs.append(0)


In [27]:
import numpy as np
from sklearn.model_selection import train_test_split

X1_paths = np.array(X1_paths)
X2_paths = np.array(X2_paths)
y_pairs = np.array(y_pairs)

X1_train, X1_val, X2_train, X2_val, y_train, y_val = train_test_split(
    X1_paths, X2_paths, y_pairs, test_size=0.2, random_state=42, stratify=y_pairs
)


In [28]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array

class SiamesePairGenerator(Sequence):
    def __init__(self, X1_paths, X2_paths, y, batch_size=32, img_size=(224, 224), shuffle=True):
        self.X1_paths = X1_paths
        self.X2_paths = X2_paths
        self.y = y
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.y))
        self.on_epoch_end()

    def __len__(self):
        # Number of batches per epoch
        return int(np.floor(len(self.y) / self.batch_size))

    def __getitem__(self, index):
        # Generate batch indexes
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Select data for this batch
        batch_X1_paths = self.X1_paths[batch_indexes]
        batch_X2_paths = self.X2_paths[batch_indexes]
        batch_y = self.y[batch_indexes]

        # Load and preprocess images
        X1 = np.array([self._load_and_preprocess(img_path) for img_path in batch_X1_paths])
        X2 = np.array([self._load_and_preprocess(img_path) for img_path in batch_X2_paths])

        return [X1, X2], batch_y

    def on_epoch_end(self):
        # Shuffle indexes after each epoch
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def _load_and_preprocess(self, img_path):
        # Load image as PIL image
        img = load_img(img_path, target_size=self.img_size)
        # Convert to numpy array
        img_array = img_to_array(img)
        # Normalize pixels between 0 and 1
        img_array = img_array / 255.0
        return img_array


In [29]:
batch_size = 32
img_size = (224, 224)  # or your model's input size

train_gen = SiamesePairGenerator(X1_train, X2_train, y_train, batch_size=batch_size, img_size=img_size)
val_gen = SiamesePairGenerator(X1_val, X2_val, y_val, batch_size=batch_size, img_size=img_size, shuffle=False)
