### Breast Cancer Detection using EfficientNet| Dataset used - CBIS-DDSM

## Import Required Libraries

In [None]:
import sys
import os
from os import listdir
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import pandas as pd
import PIL 
import random
random.seed(100)
np.random.seed(100)

# Display some images
import matplotlib.image as mpimg

import tensorflow as tf

import cv2

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
# Import necessary TensorFlow libraries

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam, AdamW
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import plot_model

# from tensorflow.keras.applications.resnet50 import ResNet50

from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model

from tensorflow.keras.layers import Input, Dense, Dropout
# from tensorflow.keras.applications.densenet import DenseNet169
import keras
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
import warnings

# Suppress all warnings globally
warnings.filterwarnings("ignore")

In [None]:
# get the current working directory
current_working_directory = os.getcwd()

# print output to the console
print(current_working_directory)

# output will look something similar to this on a macOS system
# /Users/dionysialemonaki/Documents/my-projects/python-project

##  Data Acquisition

In [None]:
# Provide the correct path to the CSV file
csv_path = '/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/csv/meta.csv'

# Read the CSV file into a DataFrame
df_meta = pd.read_csv(csv_path)

# Display the DataFrame
df_meta

In [None]:
dicom_data = pd.read_csv('/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/csv/dicom_info.csv')
dicom_data.head()

### Exploratory Data Analysis

In [None]:
dicom_data.info()

In [None]:
dicom_data.SeriesDescription.unique()

In [None]:
image_dir = '/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/jpeg'
full_mammogram_images = dicom_data[dicom_data.SeriesDescription == 'full mammogram images'].image_path
cropped_images = dicom_data[dicom_data.SeriesDescription == 'cropped images'].image_path
roi_mask_images = dicom_data[dicom_data.SeriesDescription == 'ROI mask images'].image_path

full_mammogram_images = full_mammogram_images.apply(lambda x: x.replace('CBIS-DDSM/jpeg', image_dir))
cropped_images = cropped_images.apply(lambda x: x.replace('CBIS-DDSM/jpeg', image_dir))
roi_mask_images = roi_mask_images.apply(lambda x: x.replace('CBIS-DDSM/jpeg', image_dir))
full_mammogram_images.iloc[0]

In [None]:
full_mammogram_images.shape

In [None]:
cropped_images.iloc[0]

In [None]:
cropped_images.shape

In [None]:
roi_mask_images.iloc[0]

In [None]:
roi_mask_images.shape

In [None]:
full_mammogram_dict = dict()
cropped_dict = dict()
roi_mask_dict = dict()

for dicom in full_mammogram_images:
    # print(dicom)
    key = dicom.split("/")[5]
    # print(key)
    full_mammogram_dict[key] = dicom
for dicom in cropped_images:
    key = dicom.split("/")[5]
    cropped_dict[key] = dicom
for dicom in roi_mask_images:
    key = dicom.split("/")[5]
    roi_mask_dict[key] = dicom

In [None]:
next(iter((full_mammogram_dict.items())))

In [None]:
sys.getsizeof(full_mammogram_dict)

In [None]:
next(iter((cropped_dict.items())))

In [None]:
sys.getsizeof(cropped_images)

In [None]:
next(iter((roi_mask_dict.items())))

In [None]:
sys.getsizeof(roi_mask_images)

mass and calci csv read (train and test)

In [None]:
mass_train_data = pd.read_csv('/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/csv/mass_case_description_train_set.csv')
mass_test_data = pd.read_csv('/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/csv/mass_case_description_test_set.csv')
calc_train_data = pd.read_csv('/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/csv/calc_case_description_train_set.csv')
calc_test_data = pd.read_csv('/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/csv/calc_case_description_test_set.csv')

In [None]:
mass_train_data.head()

In [None]:
# mass_data = mass_train_data.append(mass_test_data)
mass_test_data.head()

In [None]:
calc_train_data.head()

In [None]:
calc_test_data.head()

In [None]:
# Count the number of benign and malignant cases in the training set
train_counts = mass_train_data['pathology'].value_counts()

# Count the number of benign and malignant cases in the test set
test_counts = mass_test_data['pathology'].value_counts()

# Display the results
print("Mass Training Set:")
print(train_counts)

print("\nMass Test Set:")
print(test_counts)

In [None]:
# Count the number of benign and malignant cases in the training set
train_counts = calc_train_data['pathology'].value_counts()

# Count the number of benign and malignant cases in the test set
test_counts = calc_test_data['pathology'].value_counts()

# Display the results
print("Calc Training Set:")
print(train_counts)

print("\nCalc Test Set:")
print(test_counts)

Path fix for mass

In [None]:
def fix_image_path_mass(dataset):
    for i, img in enumerate(dataset.values):
        img_name = img[11].split("/")[2]
        if img_name in full_mammogram_dict:
            dataset.iloc[i, 11] = full_mammogram_dict[img_name]

        img_name = img[12].split("/")[2]
        if img_name in cropped_dict:
            dataset.iloc[i, 12] = cropped_dict[img_name]
        
        img_name = img[13].split("/")[2]
        if img_name in roi_mask_dict:
            dataset.iloc[i, 13] = roi_mask_dict[img_name]

In [None]:
fix_image_path_mass(mass_train_data)

In [None]:
fix_image_path_mass(mass_test_data)

In [None]:
mass_train_data

In [None]:
mass_test_data

Path fix for Calc

In [None]:
def fix_image_path_calc(dataset):
    for i, img in enumerate(dataset.values):
        img_name = img[11].split("/")[2]
        if img_name in full_mammogram_dict:
            dataset.iloc[i, 11] = full_mammogram_dict[img_name]

        img_name = img[12].split("/")[2]
        if img_name in cropped_dict:
            dataset.iloc[i, 12] = cropped_dict[img_name]
        
        img_name = img[13].split("/")[2]
        if img_name in roi_mask_dict:
            dataset.iloc[i, 13] = roi_mask_dict[img_name]

In [None]:
fix_image_path_calc(calc_train_data)

In [None]:
calc_train_data

In [None]:
fix_image_path_mass(calc_test_data)

In [None]:
calc_test_data

### Data Cleaning

In [None]:
# check unique values in pathology column
mass_train_data.pathology.unique()

In [None]:
calc_train_data.pathology.unique()

In [None]:
mass_train_data.info()

In [None]:
calc_train_data.info()

In [None]:
# rename columns
mass_train = mass_train_data.rename(columns={'left or right breast': 'left_or_right_breast',
                                           'image view': 'image_view',
                                           'abnormality id': 'abnormality_id',
                                           'abnormality type': 'abnormality_type',
                                           'mass shape': 'mass_shape',
                                           'mass margins': 'mass_margins',
                                           'image file path': 'image_file_path',
                                           'cropped image file path': 'cropped_image_file_path',
                                           'ROI mask file path': 'ROI_mask_file_path'})

mass_train.head()

In [None]:
# rename columns
calc_train = calc_train_data.rename(columns={'left or right breast': 'left_or_right_breast',
                                             'breast density':'breast_density',
                                           'image view': 'image_view',
                                           'abnormality id': 'abnormality_id',
                                           'abnormality type': 'abnormality_type',
                                           'calc type': 'calc_type',
                                           'calc distribution': 'calc_distribution',
                                           'image file path': 'image_file_path',
                                           'cropped image file path': 'cropped_image_file_path',
                                           'ROI mask file path': 'ROI_mask_file_path'})

calc_train.head()

In [None]:
# check for null values
mass_train.isnull().sum()

In [None]:
calc_train.isnull().sum()

In [None]:
# fill in missing values using the backwards fill method
mass_train['mass_shape'] = mass_train['mass_shape'].fillna(method='bfill')
mass_train['mass_margins'] = mass_train['mass_margins'].fillna(method='bfill')

#check null values
mass_train.isnull().sum()

In [None]:
# fill in missing values using the backwards fill method
calc_train['calc_type'] = calc_train['calc_type'].fillna(method='bfill')
calc_train['calc_distribution'] = calc_train['calc_distribution'].fillna(method='bfill')

#check null values
calc_train.isnull().sum()

In [None]:
mass_test_data.isnull().sum()

In [None]:
calc_test_data.isnull().sum()

In [None]:
# check for column names in mass_test
print(mass_test_data.columns,'\n')

# renaming the columns
mass_test = mass_test_data.rename(columns={'left or right breast': 'left_or_right_breast',
                                           'image view': 'image_view',
                                           'abnormality id': 'abnormality_id',
                                           'abnormality type': 'abnormality_type',
                                           'mass shape': 'mass_shape',
                                           'mass margins': 'mass_margins',
                                           'image file path': 'image_file_path',
                                           'cropped image file path': 'cropped_image_file_path',
                                           'ROI mask file path': 'ROI_mask_file_path'})

# renamed columns
mass_test.columns

In [None]:
# check for column names in mass_test
print(calc_test_data.columns,'\n')

# renaming the columns
calc_test = calc_test_data.rename(columns={'left or right breast': 'left_or_right_breast',
                                           'breast density':'breast_density',
                                           'image view': 'image_view',
                                           'abnormality id': 'abnormality_id',
                                           'abnormality type': 'abnormality_type',
                                           'calc type': 'calc_type',
                                           'calc distribution': 'calc_distribution',
                                           'image file path': 'image_file_path',
                                           'cropped image file path': 'cropped_image_file_path',
                                           'ROI mask file path': 'ROI_mask_file_path'})

# renamed columns
calc_test.columns

In [None]:
# fill in missing values using the backwards fill method
calc_test['calc_type'] = calc_test['calc_type'].fillna(method='bfill')
calc_test['calc_distribution'] = calc_test['calc_distribution'].fillna(method='bfill')
#check null values
calc_test.isnull().sum()

### Data Visualization

In [None]:
# quantitative summary of features
mass_train.describe()

In [None]:
calc_train.describe()

In [None]:
# check datasets shape
print(f'Shape of mass_train: {mass_train.shape}')
print(f'Shape of mass_test: {mass_test.shape}')

In [None]:
# check datasets shape
print(f'Shape of calc_train: {calc_train.shape}')
print(f'Shape of calc_test: {calc_test.shape}')

In [None]:
# pathology distributions
value = mass_train['pathology'].value_counts() + calc_train['pathology'].value_counts()
plt.figure(figsize=(8,6))

plt.pie(value, labels=value.index, autopct='%1.1f%%')
plt.title('Breast Cancer Mass Types', fontsize=12)
plt.show()

In [None]:
# Assuming mass_train and calc_train are your DataFrames

# Set the color palette for mass_train
mass_palette = sns.color_palette("viridis", n_colors=len(mass_train['assessment'].unique()))
sns.countplot(data=mass_train, y='assessment', hue='pathology', palette=mass_palette)
plt.title('Count Plot for mass_train')
plt.show()

# Set the color palette for calc_train
calc_palette = sns.color_palette("magma", n_colors=len(calc_train['assessment'].unique()))
sns.countplot(data=calc_train, y='assessment', hue='pathology', palette=calc_palette)
plt.title('Count Plot for calc_train')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.countplot(data=mass_train, x='subtlety', palette='magma', hue='subtlety')
plt.title('Breast Cancer Mass Subtlety', fontsize=12)
plt.xlabel('Subtlety Grade')
plt.ylabel('Count')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.countplot(data=calc_train, x='subtlety', palette='viridis', hue='subtlety')
plt.title('Breast Cancer Calc Subtlety', fontsize=12)
plt.xlabel('Subtlety Grade')
plt.ylabel('Count')
plt.show()

In [None]:
# view breast mass shape distribution against pathology
plt.figure(figsize=(8,6))

sns.countplot(mass_train, x='mass_shape', hue='pathology')
plt.title('Mass Shape Distribution by Pathology', fontsize=14)
plt.xlabel('Mass Shape')
plt.xticks(rotation=30, ha='right')
plt.ylabel('Pathology Count')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12, 8))

sns.countplot(data=calc_train, y='calc_type', hue='pathology', palette='viridis')
plt.title('Calcification Type Distribution by Pathology', fontsize=14)
plt.xlabel('Pathology Count')
plt.ylabel('Calc Type')

# Adjust the rotation of the y-axis labels
plt.yticks(rotation=0, ha='right')

# Move the legend outside the plot for better visibility
plt.legend(loc='upper right', bbox_to_anchor=(1.25, 1))

plt.show()

In [None]:
# breast density against pathology
plt.figure(figsize=(8,6))

sns.countplot(mass_train, x='breast_density', hue='pathology')
plt.title('Breast Density vs Pathology\n\n1: fatty || 2: Scattered Fibroglandular Density\n3: Heterogenously Dense || 4: Extremely Dense',
          fontsize=14)
plt.xlabel('Density Grades')
plt.ylabel('Count')
plt.legend()

plt.show()

In [None]:
# breast density against pathology
plt.figure(figsize=(8,6))

sns.countplot(calc_train, x='breast_density', hue='pathology')
plt.title('Breast Density vs Pathology\n\n1: fatty || 2: Scattered Fibroglandular Density\n3: Heterogenously Dense || 4: Extremely Dense',
          fontsize=14)
plt.xlabel('Density Grades')
plt.ylabel('Count')
plt.legend()

plt.show()

In [None]:
mass_train.head()

In [None]:
calc_train.head()

In [None]:
import matplotlib.image as mpimg

def display_images(column, number):
    """displays images in the dataset"""
    # create figure and axes
    number_to_visualize = number
    rows = 1
    cols = number_to_visualize
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5))

    # Loop through rows and display images
    for index, row in mass_train.head(number_to_visualize).iterrows():
        image_path = row[column]
        print(image_path)
        # Check if the file exists
        if os.path.exists(image_path):
            image = mpimg.imread(image_path)
            ax = axes[index]
            ax.imshow(image, cmap='gray')
            ax.set_title(f"{row['pathology']}")
            ax.axis('off')
        else:
            print(f"File not found: {image_path}")

    plt.tight_layout()
    plt.show()

print('Mass Training Dataset\n\n')
print('Full Mammograms:\n')
display_images('image_file_path', 5)
print('Cropped Mammograms:\n')
display_images('cropped_image_file_path', 5)
print('ROI Images:\n')
display_images('ROI_mask_file_path', 5)

In [None]:
def display_images(column, number):
    """displays images in the dataset"""

    number_to_visualize = number
    rows = 1
    cols = number_to_visualize
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5))

    # Loop through all rows and display images
    for index, row in calc_train.head(number_to_visualize).iterrows():
        image_path = row[column]
        print(image_path)
        # Check if the file exists
        if os.path.exists(image_path):
            image = mpimg.imread(image_path)
            ax = axes[index]
            ax.imshow(image, cmap='gray')
            ax.set_title(f"{row['pathology']}")
            ax.axis('off')
        else:
            print(f"File not found: {image_path}")

    plt.tight_layout()
    plt.show()


print('Calcification Trianing Dataset\n\n')
print('Full Mammograms:\n')
display_images('image_file_path', 5)
print('Cropped Mammograms:\n')
display_images('cropped_image_file_path', 5)
print('ROI Images:\n')
display_images('ROI_mask_file_path', 5)

In [None]:
# Merge the datasets
mass_calc = pd.concat([mass_train, mass_test, calc_train, calc_test], axis=0)

# Define target size
target_size = (224, 224, 3)

In [None]:
mass_calc

### Image Preprocessing

In [None]:
def image_processor(image_path, target_size, base_directory="/kaggle/input/cbis-ddsm-breast-cancer-image-dataset/jpeg"):
    """Preprocess images for Inception V3 model with Gaussian filter and CLAHE"""

    # Check if a GPU is available
    physical_devices = tf.config.list_physical_devices('GPU')
    if len(physical_devices) == 0:
        print("Warning: No GPU devices found. Using CPU.")
    else:
        # give equal memory growth to all the GPUs 
        for i in range(len(physical_devices)):
            tf.config.experimental.set_memory_growth(physical_devices[i], True)

    absolute_image_path = os.path.abspath(image_path)

    # Count for skipped and processed images
    skipped_count = 0
    processed_count = 0

    # Check if the image path starts with the specified directory
    if not absolute_image_path.startswith(base_directory):
        skipped_count += 1
        return None, skipped_count, processed_count

    # Read the image using TensorFlow for GPU acceleration
    image = tf.io.read_file(absolute_image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    # Resize the image
    image = tf.image.resize(image, (target_size[0], target_size[1]))

    # Normalize pixel values to be in the range [0, 1]
    image_array = image.numpy()

    processed_count += 1

    return image_array, skipped_count, processed_count

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

# Apply preprocessor to train data
result = mass_calc['image_file_path'].apply(lambda x: pd.Series(image_processor(x, target_size), index=['image_array', 'skipped_count', 'processed_count']))

# Extract processed images, skipped count, and processed count from the result
mass_calc[['processed_images', 'skipped_count', 'processed_count']] = result

# Filter out rows where processed_images is None
mass_calc = mass_calc.dropna(subset=['processed_images'])

In [None]:
# Convert the processed_images column to an array
X_resized = np.array(mass_calc['processed_images'].tolist())

# Create a binary mapping
class_mapper = {'MALIGNANT': 1, 'BENIGN': 0, 'BENIGN_WITHOUT_CALLBACK': 0}

# Apply class mapper to pathology column
mass_calc['labels'] = mass_calc['pathology'].replace(class_mapper)

# Check the number of classes in the dataset
num_classes = len(mass_calc['labels'].unique())

In [None]:
skipped_sum = mass_calc['skipped_count'].sum() 

print(f"Total skipped count: {skipped_sum}")

processed_count = mass_calc['processed_count'].sum() 

print(f"Total processed count: {processed_count}")

In [None]:
mass_calc['processed_count'].sum()

In [None]:
mass_calc

In [None]:
print('Number of Classes:', num_classes)

In [None]:
mass_calc.shape

### Train Test Split

In [None]:
unique_labels_count = mass_calc['labels'].nunique()
print("Total number of unique labels:", unique_labels_count)

In [None]:
filled_labels_count = mass_calc['labels'].count()

print("Total number of filled labels:", filled_labels_count)

In [None]:
print(len(X_resized))
print(len(mass_calc['labels'].values))

In [None]:
mass_calc = mass_calc.dropna(subset=['labels'])

In [None]:
print(len(X_resized))
print(len(mass_calc['labels'].values))

In [None]:
# Assuming mass_calc is your DataFrame and 'labels' is the column of interest
mass_calc = mass_calc.dropna(subset=['labels'])

In [None]:
# Assuming X_resized is a NumPy array
# Remove corresponding rows from X_resized
X_resized = X_resized[mass_calc.index]

In [None]:
print(len(X_resized))
print(len(mass_calc['labels'].values))

In [None]:
# Split data into train, test, and validation sets 
X_train, X_temp, y_train, y_temp = train_test_split(X_resized, mass_calc['labels'].values, test_size=0.2, random_state=42, 
                                                    shuffle=True, stratify=mass_calc['labels'].values)

In [None]:
# Split data into train, test, and validation sets 
# X_train, X_temp, y_train, y_temp = train_test_split(X_resized, mass_calc['labels'].values, test_size=0.2, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42, shuffle=True, stratify=y_temp)

# Convert integer labels to one-hot encoded labels
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
y_val = to_categorical(y_val, num_classes)

In [None]:
# Number of images in each set
num_train_images = X_train.shape[0]
num_test_images = X_test.shape[0]
num_val_images = X_val.shape[0]

print("Number of images in the training set:", num_train_images)
print("Number of images in the testing set:", num_test_images)
print("Number of images in the validation set:", num_val_images)

### Data Augmentation

In [None]:
# Data_augmentation
train_datagen = ImageDataGenerator(rotation_range=20,
                                   rescale=1.0 / 255,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')

In [None]:
# Apply augmentation to training data
train_data_augmented = train_datagen.flow(X_train, y_train, batch_size=32)

In [None]:
from keras.applications import EfficientNetB2

base_model = EfficientNetB2(include_top=False, input_tensor=Input(shape=(224, 224, 3)), weights="imagenet",classes=2)

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(2096, activation='silu')(x)
x = BatchNormalization()(x)
predictions = Dense(2, activation='softmax')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
model.summary()

## Model Report

* Number of layers - 2 additional layers over and above the EfficientNet2B 
    Needed Global Average Pooling Layer as we had removed the penultimate layer from the EfficientNet 2B model
    Needed last dense layer for 2 classes of output
* Number of units in each layer - 2096 and 2 units respectively
    Added 2096 units so that model learns the complex pattern
* Total number of trainable parameters - 2961650 - as given in the model summary


In [None]:
decay_steps = 1000
initial_learning_rate = 0.05
lr_decayed_fn = keras.optimizers.schedules.CosineDecay(
    initial_learning_rate, decay_steps)

# Compile the model
model.compile(optimizer=AdamW(learning_rate=lr_decayed_fn),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Check the output shape of your model
print(model.output_shape)

In [None]:
# Define the EarlyStopping callback
early_stopping = EarlyStopping(
   monitor='val_loss',  # Monitor validation loss
   patience=5,  
   min_delta=0,  
   mode='min', 
   verbose=1,  
   restore_best_weights=True
)



In [None]:
# Continue with the rest of the code
history = model.fit(
   train_data_augmented,
   epochs=30,
   validation_data=(X_val, y_val),
   steps_per_epoch=len(X_train) // 32,
   validation_steps=len(X_val),
   callbacks=[early_stopping]  # Add the EarlyStopping callback
)

### Plotting the results

In [None]:
#plot to visualize the loss and accuracy against number of epochs
plt.figure(figsize=(18,8))

plt.suptitle('Loss and Accuracy Plots', fontsize=18)

plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.xlabel('Number of epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)

plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.xlabel('Number of epochs', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.show()

In [None]:
from sklearn.metrics import roc_curve, auc

# Use the trained model to predict probabilities for the test set
y_pred_prob = model.predict(X_test)

# Calculate the ROC curve
fpr, tpr, thresholds = roc_curve(y_test[:, 1], y_pred_prob[:, 1])
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.show()

# Print the AUC score
print(f'AUC: {roc_auc:.2f}')

In [None]:
model.evaluate(X_test,y_test)

#### Test Accuracy = 56.65%
#### Test Loss = 0.6892

### Confusion matrix for the testing dataset

In [None]:
from sklearn.metrics import confusion_matrix
Y_pred = model.predict(X_test)
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
Y_true = np.argmax(y_test,axis = 1) 

confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
f,ax = plt.subplots(figsize=(8, 8))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01,cmap="BuPu",linecolor="gray", fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:

# Calculate metrics
accuracy = accuracy_score(Y_true, Y_pred_classes)
recall = recall_score(Y_true, Y_pred_classes)
precision = precision_score(Y_true, Y_pred_classes)
f1 = f1_score(Y_true, Y_pred_classes)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test F1 Score: {f1:.4f}")

In [None]:
history_dict = history.history

# plot training loss vs validation loss
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
acc = history_dict['accuracy']

epochs = range(1, len(acc) + 1)

In [None]:
plt.plot(epochs, loss_values, 'b', label='Training Loss')
plt.plot(epochs, val_loss_values, 'r', label='Validation Loss')
plt.title('Training and Validation Loss', fontsize=12)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# plot training vs validation accuracy
val_acc_values = history_dict['val_accuracy']
acc = history_dict['accuracy']

plt.plot(epochs, acc, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc_values, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy', fontsize=12)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()