In [15]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
)
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K

from sklearn.metrics import (
    confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc, f1_score
)
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

In [16]:
warnings.filterwarnings("ignore")

In [None]:
# Set paths to your datasets
train_dir = r"D:\Documents\cnn\Data Set 1\train"
val_dir = r"D:\Documents\cnn\Data Set 1\val"
test_dir = r"D:\Documents\cnn\Data Set 1\test"

# ImageDataGenerator for training and validation
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1)  # Use 20% for training
val_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.3)  # Use 20% for validation
test_datagen = ImageDataGenerator(rescale=1./255)  # Test set handled separately

# Load 20% of the training dataset
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(299, 299),  # Input size for Inception V3 and ResNet50
    batch_size=32,
    class_mode='binary',
    subset='validation'  # Use the 20% subset
)

# Load 20% of the validation dataset
val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(299, 299),
    batch_size=32,
    class_mode='binary',
    subset='validation'  # Use the 20% subset
)

# Manually sample 20% of the test dataset
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Get all test file paths
test_classes = os.listdir(test_dir)
test_filepaths = []
test_labels = []

for class_index, class_name in enumerate(test_classes):
    class_dir = os.path.join(test_dir, class_name)
    files = os.listdir(class_dir)
    filepaths = [os.path.join(class_dir, file) for file in files]
    test_filepaths.extend(filepaths)
    test_labels.extend([class_index] * len(files))

# Randomly sample 20% of the test dataset
total_test_samples = len(test_filepaths)
sample_indices = np.random.choice(total_test_samples, int(0.2 * total_test_samples), replace=False)
sampled_filepaths = [test_filepaths[i] for i in sample_indices]
sampled_labels = [test_labels[i] for i in sample_indices]

# Load sampled test images
def preprocess_image(filepath, target_size=(299, 299)):
    img = load_img(filepath, target_size=target_size)
    img_array = img_to_array(img)
    return img_array / 255.0  # Rescale to [0, 1]

sampled_images = np.array([preprocess_image(fp) for fp in sampled_filepaths])
sampled_labels = np.array(sampled_labels)

# Create a test generator from the sampled data
test_subset_generator = tf.keras.utils.Sequence()
class TestSubsetGenerator(tf.keras.utils.Sequence):
    def __init__(self, images, labels, batch_size):
        self.images = images
        self.labels = labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.images) / self.batch_size))

    def __getitem__(self, index):
        start = index * self.batch_size
        end = min(start + self.batch_size, len(self.images))
        return self.images[start:end], self.labels[start:end]

test_subset_generator = TestSubsetGenerator(sampled_images, sampled_labels, batch_size=32)

In [18]:
# Function to calculate F1 score
def f1_metric(y_true, y_pred):
    y_pred = tf.round(y_pred)
    return tf.py_function(f1_score, (y_true, y_pred), tf.double)

# Load Inception V3 model
inception_model = tf.keras.applications.InceptionV3(
    weights='imagenet',
    include_top=False,
    input_shape=(299, 299, 3)
)

# Freeze the base model
inception_model.trainable = False

# Add custom layers
inception_model = tf.keras.Sequential([
    inception_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
])

In [None]:
# Define the custom F1 metric
def f1_metric(y_true, y_pred):
    y_pred = K.round(y_pred)  # Round predictions to 0 or 1
    tp = K.sum(K.cast(y_true * y_pred, 'float'), axis=0)  # True positives
    fp = K.sum(K.cast((1 - y_true) * y_pred, 'float'), axis=0)  # False positives
    fn = K.sum(K.cast(y_true * (1 - y_pred), 'float'), axis=0)  # False negatives

    precision = tp / (tp + fp + K.epsilon())
    recall = tp / (tp + fn + K.epsilon())
    f1 = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return K.mean(f1)

# Define Custom CNN Model
def create_custom_cnn(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        
        tf.keras.layers.Conv2D(256, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
    ])
    return model

# Create Custom CNN model
custom_cnn_model = create_custom_cnn((299, 299, 3))  # Input size for your custom CNN
custom_cnn_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), f1_metric]
)

# Train Custom CNN
custom_cnn_history = custom_cnn_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10  # Adjust as needed
)

# Load ResNet50 model
resnet_model = tf.keras.applications.ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(299, 299, 3)
)

# Freeze the base model
resnet_model.trainable = False

# Add custom layers
resnet_model = tf.keras.Sequential([
    resnet_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the ResNet model
resnet_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), f1_metric]
)

# Train ResNet50
resnet_history = resnet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10  # Adjust as needed
)

In [None]:
# Function to get predictions from a generator
def get_predictions(model, generator):
    predictions = []
    for x, _ in generator:
        preds = model.predict(x)
        predictions.append(preds)
    return np.concatenate(predictions)

# Get predictions for Inception V3
y_pred_inception = get_predictions(inception_model, test_generator).flatten()

# Get predictions for Custom CNN
y_pred_custom_cnn = get_predictions(custom_cnn_model, test_generator).flatten()

# Get predictions for ResNet50
y_pred_resnet = get_predictions(resnet_model, test_generator).flatten()

# Get true labels
y_true = test_generator.classes

In [None]:
# Calculate ROC Curve and AUC
def plot_roc_curve(y_true, y_pred, model_name):
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc='lower right')

# Plot ROC Curves
plt.figure(figsize=(12, 6))
plot_roc_curve(y_true, y_pred_inception, 'Inception V3')
plot_roc_curve(y_true, y_pred_custom_cnn, 'Custom CNN')
plot_roc_curve(y_true, y_pred_resnet, 'ResNet50')
plt.show()

# Evaluate the models on the test dataset
inception_test_score = inception_model.evaluate(test_generator)
custom_cnn_test_score = custom_cnn_model.evaluate(test_generator)
resnet_test_score = resnet_model.evaluate(test_generator)

# Print evaluation metrics
def print_evaluation_results(model_name, test_score):
    print(f"{model_name} Test Accuracy:", test_score[1])
    print(f"{model_name} Test Precision:", test_score[2])
    print(f"{model_name} Test Recall:", test_score[3])
    print(f"{model_name} Test F1 Score:", test_score[4])

print_evaluation_results("Inception V3", inception_test_score)
print_evaluation_results("Custom CNN", custom_cnn_test_score)
print_evaluation_results("ResNet50", resnet_test_score)

In [None]:
# Compute confusion matrices
def plot_confusion_matrix(y_true, y_pred, model_name):
    y_pred_classes = np.round(y_pred)  # Convert probabilities to class labels
    cm = confusion_matrix(y_true, y_pred_classes)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_generator.class_indices.keys())
    disp.plot(cmap=plt.cm.Blues)
    plt.title(f'Confusion Matrix for {model_name}')
    plt.show()

# Plot confusion matrices for each model
plot_confusion_matrix(y_true, y_pred_inception, 'Inception V3')
plot_confusion_matrix(y_true, y_pred_custom_cnn, 'Custom CNN')
plot_confusion_matrix(y_true, y_pred_resnet, 'ResNet50')