CHEST-XRAY DETECTION

In [6]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder

In [7]:
data_dir = '/kaggle/input/chest-xray-pneumonia/chest_xray'

In [8]:

# Step 1: Preprocess the dataset
def load_dataset(directory):
    images = []
    labels = []
    for label in os.listdir(directory):
        label_dir = os.path.join(directory, label)
        for image_file in os.listdir(label_dir):
            image_path = os.path.join(label_dir, image_file)
            image = cv2.imread(image_path)
            image = cv2.resize(image, (256, 256))  # Resize the images to a consistent size
            images.append(image)
            labels.append(label)
    return np.array(images), np.array(labels)

In [9]:
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')
val_dir = os.path.join(data_dir, 'val')

In [10]:
x_train, y_train = load_dataset(train_dir)
x_test, y_test = load_dataset(test_dir)
x_val, y_val = load_dataset(val_dir)

In [11]:
# Normalize the pixel values
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_val = x_val.astype('float32') / 255.0

In [12]:

# Convert string labels to numeric representations
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
y_val = label_encoder.transform(y_val)

In [13]:
# Split the dataset into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

In [14]:

# Step 2: Build the CNN model
model_CNN = Sequential()
model_CNN.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model_CNN.add(MaxPooling2D((2, 2)))
model_CNN.add(Conv2D(64, (3, 3), activation='relu'))
model_CNN.add(MaxPooling2D((2, 2)))
model_CNN.add(Flatten())
model_CNN.add(Dense(64, activation='relu'))
model_CNN.add(Dense(1, activation='sigmoid'))

In [None]:
model_CNN.summary()

In [None]:
# Step 3: Train the model
model_CNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model_CNN.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val))

In [17]:
#from sklearn.svm import SVC
# Assuming model_CNN is already trained
# Extract features from the CNN model
#from tensorflow.keras import Model

# Define a new model to extract features from the CNN layers
#feature_extractor = Model(inputs=model_CNN.input, outputs=model_CNN.get_layer('flatten').output)

# Extract features from the trained CNN for train, validation, and test sets
#x_train_features = feature_extractor.predict(x_train)
#x_val_features = feature_extractor.predict(x_val)
#x_test_features = feature_extractor.predict(x_test)


# Initialize and train the SVM
#svm = SVC(kernel='linear')  # You can change the kernel as per your requirement
#svm.fit(x_train_features, y_train)

# Evaluate SVM on the validation set
#svm_val_accuracy = svm.score(x_val_features, y_val)
#print('SVM Validation Accuracy:', svm_val_accuracy)

# Evaluate SVM on the test set
#svm_test_accuracy = svm.score(x_test_features, y_test)
#print('SVM Test Accuracy:', svm_test_accuracy)

In [None]:
img_width = 256
img_height  = 256
test_datagen = ImageDataGenerator(rescale=1.0/255.0)
test_generator1 = test_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode='binary'
)

loss1, accuracy1 = model_CNN.evaluate(test_generator1)
print('Test Loss:', loss1)
print('Test Accuracy:', accuracy1)

In [None]:
# Predict probabilities on validation data
y_pred_prob = model_CNN.predict(x_val)
# Threshold probabilities to get predicted classes
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate precision and recall
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)

print('Precision:', precision)
print('Recall:', recall)


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt  # Importing matplotlib.pyplot module

# Get the predicted probabilities for the validation set
y_pred_prob = model_CNN.predict(x_val)
# Convert probabilities to class labels
y_pred = np.argmax(y_pred_prob, axis=1)

# Compute confusion matrix
conf_matrix = confusion_matrix(y_val, y_pred)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=label_encoder.classes_, 
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()




In [None]:
from sklearn.metrics import f1_score

# Generate predictions for the validation set
y_pred = model_CNN.predict(x_val)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate F1 score
f1score = f1_score(y_val, y_pred)

print('F1 Score:', f1score)




In [None]:
from sklearn.metrics import roc_curve, auc

# Step 4: Make predictions on the validation set
y_pred_val = model_CNN.predict(x_val)

# Step 5: Compute ROC curve and AUC for each class
fpr_normal, tpr_normal, _ = roc_curve(y_val, y_pred_val)
roc_auc_normal = auc(fpr_normal, tpr_normal)

fpr_pneumonia, tpr_pneumonia, _ = roc_curve(1 - y_val, 1 - y_pred_val)  # Inverting labels for pneumonia class
roc_auc_pneumonia = auc(fpr_pneumonia, tpr_pneumonia)

# Step 6: Plot ROC curve for both classes
plt.figure(figsize=(8, 6))
plt.plot(fpr_normal, tpr_normal, color='blue', lw=2, label='Normal (AUC = %0.2f)' % roc_auc_normal)
plt.plot(fpr_pneumonia, tpr_pneumonia, color='red', lw=2, label='Pneumonia (AUC = %0.2f)' % roc_auc_pneumonia)
plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()




In [None]:
import matplotlib.pyplot as plt

# Plot accuracy versus epoch
beingsaved = plt.figure(figsize=(7, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('CNN Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='lower right')
beingsaved.savefig('Accuracy.png', format='png', dpi=600, bbox_inches="tight")
plt.show()

# Plot loss versus epoch
beingsaved = plt.figure(figsize=(7, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('CNN Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper right')
beingsaved.savefig('Loss.png', format='png', dpi=600, bbox_inches="tight")
plt.show()


In [None]:
from sklearn.model_selection import KFold

# Define the number of folds
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=True)

# Lists to store accuracy and loss for each fold
fold_accuracy = []
fold_val_accuracy = []
fold_loss = []
fold_val_loss = []

# Loop through each fold
for fold, (train_idx, val_idx) in enumerate(kfold.split(x_train, y_train), 1):
    print(f'Fold {fold}/{num_folds}')
    
    # Split the data into train and validation sets for this fold
    x_fold_train, x_fold_val = x_train[train_idx], x_train[val_idx]
    y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]

    # Build the CNN model (same as before)
    model_CNN = Sequential()
    model_CNN.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
    model_CNN.add(MaxPooling2D((2, 2)))
    model_CNN.add(Conv2D(64, (3, 3), activation='relu'))
    model_CNN.add(MaxPooling2D((2, 2)))
    model_CNN.add(Flatten())
    model_CNN.add(Dense(64, activation='relu'))
    model_CNN.add(Dense(1, activation='sigmoid'))

    model_CNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model for this fold
    history = model_CNN.fit(x_fold_train, y_fold_train, epochs=20, batch_size=32, validation_data=(x_fold_val, y_fold_val))

    # Evaluate model on validation data for this fold
    loss, accuracy = model_CNN.evaluate(x_fold_val, y_fold_val)
    fold_loss.append(loss)
    fold_accuracy.append(accuracy)

    # Evaluate model on test data for this fold
    val_loss, val_accuracy = model_CNN.evaluate(x_val, y_val)
    fold_val_loss.append(val_loss)
    fold_val_accuracy.append(val_accuracy)

    # Plot accuracy versus epoch for this fold
    plt.figure(figsize=(7, 5))
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'CNN Model Accuracy - Fold {fold}')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Validation'], loc='lower right')
    plt.savefig(f'Accuracy_fold_{fold}.png', format='png', dpi=600, bbox_inches="tight")
    plt.show()

    # Plot loss versus epoch for this fold
    plt.figure(figsize=(7, 5))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'CNN Model Loss - Fold {fold}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.savefig(f'Loss_fold_{fold}.png', format='png', dpi=600, bbox_inches="tight")
    plt.show()

# Print the mean and standard deviation of metrics across all folds
print(f'Mean Accuracy across {num_folds} folds:', np.mean(fold_accuracy))
print(f'Std Deviation of Accuracy across {num_folds} folds:', np.std(fold_accuracy))
print(f'Mean Validation Accuracy across {num_folds} folds:', np.mean(fold_val_accuracy))
print(f'Std Deviation of Validation Accuracy across {num_folds} folds:', np.std(fold_val_accuracy))
print(f'Mean Loss across {num_folds} folds:', np.mean(fold_loss))
print(f'Std Deviation of Loss across {num_folds} folds:', np.std(fold_loss))
print(f'Mean Validation Loss across {num_folds} folds:', np.mean(fold_val_loss))
print(f'Std Deviation of Validation Loss across {num_folds} folds:', np.std(fold_val_loss))
