DATA PREPROCESSING

In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

Creating a img augmentation generator- Keras

In [4]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

Creates batches of images from directory (along with labels)

In [5]:
train_generator = train_datagen.flow_from_directory(
    r"C:\Users\suhan\Desktop\bin\train",
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    r"C:\Users\suhan\Desktop\bin\valid",
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    r"C:\Users\suhan\Desktop\bin\test",
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

Found 3360 images belonging to 2 classes.
Found 49 images belonging to 2 classes.
Found 185 images belonging to 2 classes.


Performing 3-fold cross validation (since dataset is smaller, this improves model's generalisability)

In [6]:
import os
import shutil
import numpy as np
from sklearn.model_selection import KFold

In [7]:
BASE_PATH = r"C:\Users\suhan\Desktop\bin"
FOLDS = 3
TARGET_PATH = r"C:\Users\suhan\Desktop\bin\folds"

In [8]:
# create fold funct
def create_folds(base_path, num_folds=FOLDS):
    train_path = os.path.join(base_path, "train")
    folds_dir = os.path.join(TARGET_PATH)
    if not os.path.exists(folds_dir):
        os.makedirs(folds_dir)

    all_images = []
    for class_dir in os.listdir(train_path):
        class_dir_path = os.path.join(train_path, class_dir)
        if os.path.isdir(class_dir_path):
            for img in os.listdir(class_dir_path):
                all_images.append((os.path.join(class_dir_path, img), class_dir))

    np.random.shuffle(all_images)
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    fold_dirs = []
    for fold in range(num_folds):
        fold_train_dir = os.path.join(folds_dir, f"fold_{fold + 1}_train")
        fold_valid_dir = os.path.join(folds_dir, f"fold_{fold + 1}_valid")

        for d in [fold_train_dir, fold_valid_dir]:
            if not os.path.exists(d):
                os.makedirs(d)
                for class_dir in os.listdir(train_path):
                    os.makedirs(os.path.join(d, class_dir), exist_ok=True)

        fold_dirs.append((fold_train_dir, fold_valid_dir))

    for fold, (train_idx, valid_idx) in enumerate(kf.split(all_images)):
        train_images = [all_images[i] for i in train_idx]
        valid_images = [all_images[i] for i in valid_idx]

        train_dir, valid_dir = fold_dirs[fold]

        for image_path, class_dir in train_images:
            dst_dir = os.path.join(train_dir, class_dir)
            shutil.copy(image_path, dst_dir)

        for image_path, class_dir in valid_images:
            dst_dir = os.path.join(valid_dir, class_dir)
            shutil.copy(image_path, dst_dir)

    print("Folds created and images copied.")


#calling fold funct
create_folds(BASE_PATH)

Folds created and images copied.


Defining the model

In [9]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

def build_model():
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(optimizer=Adam(learning_rate=1e-4), 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

Running the model over the generated folds

In [None]:
def cross_validate(num_folds=FOLDS):
    fold_accuracies = []

    for fold in range(num_folds):
        print(f"Training Fold {fold + 1}")
        
        # data genes for this fold
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest'
        )
        
        val_datagen = ImageDataGenerator(rescale=1./255)
        
        train_dir = os.path.join(TARGET_PATH, f"fold_{fold + 1}_train")
        val_dir = os.path.join(TARGET_PATH, f"fold_{fold + 1}_valid")
        
        train_generator = train_datagen.flow_from_directory(
            train_dir,
            target_size=(150, 150),
            batch_size=32,
            class_mode='binary'
        )
        
        val_generator = val_datagen.flow_from_directory(
            val_dir,
            target_size=(150, 150),
            batch_size=32,
            class_mode='binary'
        )
        
        model = build_model()

        history = model.fit(
            train_generator,
            steps_per_epoch=train_generator.samples // train_generator.batch_size,
            epochs=10,
            validation_data=val_generator,
            validation_steps=val_generator.samples // val_generator.batch_size,
            callbacks=[EarlyStopping(monitor='val_loss', patience=3)]
        )
        
        val_loss, val_acc = model.evaluate(val_generator, steps=val_generator.samples // val_generator.batch_size)
        fold_accuracies.append(val_acc)
        
        print(f"Fold {fold + 1} - Validation Accuracy: {val_acc}")

    print(f"Mean Validation Accuracy across folds: {np.mean(fold_accuracies)}")

#run
cross_validate()

Training Fold 1
Found 3347 images belonging to 2 classes.
Found 2935 images belonging to 2 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 1s/step - accuracy: 0.7146 - loss: 0.5550 - val_accuracy: 0.5055 - val_loss: 0.6928
Epoch 2/10
[1m  1/104[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:32[0m 902ms/step - accuracy: 0.8125 - loss: 0.4561

  self.gen.throw(value)


[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8125 - loss: 0.4561 - val_accuracy: 0.5217 - val_loss: 0.6928
Epoch 3/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 1s/step - accuracy: 0.8812 - loss: 0.2912 - val_accuracy: 0.5055 - val_loss: 0.6958
Epoch 4/10
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0997 - val_accuracy: 0.4783 - val_loss: 0.6954
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 273ms/step - accuracy: 0.5107 - loss: 0.6952
Fold 1 - Validation Accuracy: 0.5054945349693298
Training Fold 2
Found 3352 images belonging to 2 classes.
Found 2938 images belonging to 2 classes.
Epoch 1/10
[1m 56/104[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m53s[0m 1s/step - accuracy: 0.6469 - loss: 0.6159

Performance evaluation

In [None]:
from sklearn.metrics import classification_report

# Generate predictions on the test set
test_generator.reset()  # Reset the test generator to ensure correct predictions
predictions = final_model.predict(test_generator, steps=test_generator.samples // test_generator.batch_size + 1)
predicted_classes = (predictions > 0.5).astype(int)

# True labels
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())  # List of class names

# Generate classification report
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
print(report)
