<h1>Preprocessing</h1>

<h2>Data generator<h2>

In [4]:
import numpy as np
import cv2
from tensorflow.keras.utils import Sequence

class MultiLabelDataGenerator(Sequence):
    def __init__(self, dataframe, batch_size, target_size, augment=False):
        """
        Args:
            dataframe: A pandas DataFrame containing 'image_path' and label columns.
            batch_size: Number of samples per batch.
            target_size: Tuple (width, height) for resizing images.
            augment: Boolean flag to apply data augmentation.
        """
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.target_size = target_size
        self.augment = augment

    def __len__(self):
        # Number of batches per epoch
        return int(np.ceil(len(self.dataframe) / self.batch_size))

    def __getitem__(self, idx):
        # Fetch a batch of data
        batch_data = self.dataframe.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
        images = []
        labels = []

        for _, row in batch_data.iterrows():
            # Load image
            img = cv2.imread(row['image_path'])
            img = cv2.resize(img, self.target_size) / 255.0  # Normalize to [0, 1]

            # Optional: Data augmentation
            if self.augment:
                img = self._augment_image(img)

            images.append(img)
            labels.append(row.iloc[1:].values.astype('float32'))  # Convert label row to binary array

        return np.array(images), np.array(labels)

    def _augment_image(self, image):
        # Example augmentation: horizontal flip
        if np.random.rand() < 0.5:
            image = cv2.flip(image, 1)
        return image


In [5]:
import pandas as pd

train_data = pd.read_csv('train_labels.csv')
valid_data = pd.read_csv('valid_labels.csv')

train_gen = MultiLabelDataGenerator(train_data, batch_size=16, target_size=(640, 640), augment=True)
val_gen = MultiLabelDataGenerator(valid_data, batch_size=16, target_size=(640, 640))

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the number of labels/classes (10 in this case)
num_labels = 10

# Create the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(640, 640, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_labels, activation='sigmoid')  # Sigmoid for multi-label classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    steps_per_epoch=len(train_gen),
    validation_steps=len(val_gen)
)


  self._warn_if_super_not_called()


Epoch 1/20
[1m  3/163[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m55:59[0m 21s/step - accuracy: 0.1458 - loss: 1.9958 