<a href="https://colab.research.google.com/github/kendraliu/Pneumonia-Prediction/blob/main/nn_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)"""

import pandas as pd
import numpy as np
import random
import cv2
import os
%matplotlib inline
from IPython.display import Image, SVG
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

/test: 234 normal, 390 pna

/val: 8 normal, 8 pna

/train: 1341 normal, 3875 pna

In [2]:
def normalArray(dataPath, dataImage, dataLabel):
    for filename in os.listdir(dataPath):
        img_path = os.path.join(dataPath, filename)
        image = cv2.imread(img_path)
        image = cv2.resize(image, (224, 224))  # Resize to a common size
        image = image / 255.0  # Normalize pixel values to [0, 1]
        dataImage.append(image)
        dataLabel.append(0)
    dataImage = np.array(dataImage)
    dataLabel = np.array(dataLabel)
    return dataImage, dataLabel

In [3]:
def pnaArray(dataPath, dataImage, dataLabel):
    for filename in os.listdir(dataPath):
        img_path = os.path.join(dataPath, filename)
        image = cv2.imread(img_path)
        image = cv2.resize(image, (224, 224))  # Resize to a common size
        image = image / 255.0  # Normalize pixel values to [0, 1]
        dataImage.append(image)
        dataLabel.append(1)
    dataImage = np.array(dataImage)
    dataLabel = np.array(dataLabel)
    return dataImage, dataLabel

In [4]:
def fitHistory(fit):
    history_df = pd.DataFrame(fit.history)
    history_df.index += 1  # Increase the index by 1 to match the number of epochs

    # Plot accuracy and loss history
    history_df.plot(y="accuracy")
    plt.title("Accuracy History")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.show()

    history_df.plot(y="loss")
    plt.title("Loss History")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.show()

In [5]:
def calculate_dnn_predict_classes(model, X_val):
    dnn_predict = model.predict(X_val)
    dnn_predict_classes = (dnn_predict > 0.5).astype(int)
    return dnn_predict_classes

def dnnEval(model, X_val, y_val):
    val_loss, val_accuracy = model.evaluate(X_val, y_val)
    print("Validation Loss:", val_loss)
    print("Validation Accuracy:", val_accuracy)

    dnn_predict_classes = calculate_dnn_predict_classes(model, X_val)

    # Calculate and print metrics
    accuracy = accuracy_score(y_val, dnn_predict_classes)
    print("Accuracy:", accuracy)

    cm = confusion_matrix(y_val, dnn_predict_classes)
    tp = cm[1][1] / (cm[1][1] + cm[1][0])
    fn = cm[1][0] / (cm[1][1] + cm[1][0])
    fp = cm[0][1] / (cm[1][1] + cm[0][1])
    tn = cm[0][0] / (cm[0][0] + cm[0][1])

    print("Confusion Matrix:")
    print(cm)

    print("Classification Report:")
    print(classification_report(y_val, dnn_predict_classes))

    valSummary = pd.DataFrame({
    "accuracy": [accuracy],
    "true_positive": [tp],
    "false_negative": [fn],
    "false positive": [fp],
    "true_negative": [tn]
    })
    print(valSummary)

In [6]:
#def

In [7]:
!pwd

/content


In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
testNormal = "drive/MyDrive/Pneumonia-Prediction/chest_xray/test/NORMAL"
testPNA = "drive/MyDrive/Pneumonia-Prediction/chest_xray/test/PNEUMONIA"
valNormal = "drive/MyDrive/Pneumonia-Prediction/chest_xray/val/NORMAL"
valPNA = "drive/MyDrive/Pneumonia-Prediction/chest_xray/val/PNEUMONIA"
trainNormal = "drive/MyDrive/Pneumonia-Prediction/chest_xray/train/NORMAL"
trainPNA1 = "drive/MyDrive/Pneumonia-Prediction/chest_xray/train/PNEUMONIA1"
#trainPNA2 = "drive/MyDrive/Pneumonia-Prediction/chest_xray/train/PNEUMONIA2"
#trainPNA3 = "drive/MyDrive/Pneumonia-Prediction/chest_xray/train/PNEUMONIA3"

In [10]:
os.listdir(valNormal) #check to see there's no random ".DS_Store" in directory
# if there is, run: find . -name ".DS_Store" -type f -delete
# or just run it anyway to be safe

['NORMAL2-IM-1440-0001.jpeg',
 'NORMAL2-IM-1438-0001.jpeg',
 'NORMAL2-IM-1431-0001.jpeg',
 'NORMAL2-IM-1427-0001.jpeg',
 'NORMAL2-IM-1437-0001.jpeg',
 'NORMAL2-IM-1442-0001.jpeg',
 'NORMAL2-IM-1436-0001.jpeg',
 'NORMAL2-IM-1430-0001.jpeg']

In [11]:
# images are X, labels are y
testNormalimage = []
testNormalLabel = []
testPNAimage = []
testPNALabel = []
valNormalimage = []
valNormalLabel = []
valPNAimage = []
valPNALabel = []
trainNormalimage = []
trainNormalLabel = []
trainPNAimage1 = []
trainPNALabel1= []


In [None]:
trainNormalimage, trainNormalLabel = normalArray(trainNormal, trainNormalimage, trainNormalLabel)
trainNormalimage.shape

In [None]:
trainPNAimage1, trainPNALabel1 = pnaArray(trainPNA1, trainPNAimage1, trainPNALabel1)
trainPNAimage1.shape

In [None]:
X_train = np.concatenate((trainNormalimage, trainPNAimage1), axis=0)
X_train.shape

In [None]:
y_train = np.concatenate((trainNormalLabel, trainPNALabel1), axis=0)
y_train

In [None]:
y_train.shape

In [None]:
valNormalimage, valNormalLabel = normalArray(valNormal, valNormalimage, valNormalLabel)
valNormalimage.shape

In [None]:
valPNAimage, valPNALabel = pnaArray(valPNA, valPNAimage, valPNALabel)
valPNAimage.shape

In [None]:
valPNALabel

In [None]:
X_val = np.concatenate((valNormalimage, valPNAimage), axis=0)
#X_val

In [None]:
y_val = np.concatenate((valNormalLabel, valPNALabel), axis=0)
y_val

In [None]:
testNormalimage, testNormalLabel = normalArray(testNormal, testNormalimage, testNormalLabel)
testNormalimage.shape

In [None]:
testPNAimage, testPNALabel = pnaArray(testPNA, testPNAimage, testPNALabel)
testPNAimage.shape

In [None]:
X_test = np.concatenate((testNormalimage, testPNAimage), axis=0)
X_test.shape

In [None]:
y_test = np.concatenate((testNormalLabel, testPNALabel), axis=0)
y_test.shape

resplit

In [None]:
combinedValTestNormalImage = np.concatenate((valNormalimage, testNormalimage), axis=0)
combinedValTestPNAImage = np.concatenate((valPNAimage, testPNAimage), axis=0)
combinedValTestNormalLabel = np.concatenate((valNormalLabel, testNormalLabel), axis=0)
combinedValTestPNALabel = np.concatenate((valPNALabel, testPNALabel), axis=0)
combinedXValTest = np.concatenate((combinedValTestNormalImage, combinedValTestPNAImage), axis=0)
combinedyValTest = np.concatenate((combinedValTestNormalLabel, combinedValTestPNALabel), axis=0)
X_val40, X_test60, y_val40, y_test60 = train_test_split(combinedXValTest, combinedyValTest, test_size=0.6, random_state=42)
print(X_val40.shape)
print(X_test60.shape)
print(y_val40.shape)
print(y_test60.shape)

In [None]:
"""#test
random_index = random.sample(range(len(X_test60)), 80)
X_test_sampled = np.array([X_test60[i] for i in random_index])
y_test_sampled = np.array([y_test60[i] for i in random_index])
print(X_test_sampled.shape)
print(y_test_sampled.shape)"""

In [None]:
#val
random_index = random.sample(range(len(X_val40)), 30)
X_val_sampled = np.array([X_val40[i] for i in random_index])
y_val_sampled = np.array([y_val40[i] for i in random_index])
print(X_val_sampled.shape)
print(y_val_sampled)

In [None]:
random_index = random.sample(range(len(X_train)), 150)
X_train_sampled = np.array([X_train[i] for i in random_index])
y_train_sampled = np.array([y_train[i] for i in random_index])
print(X_train_sampled.shape)
print(y_train_sampled.shape)

In [None]:
X_train_sampled_reshaped = X_train_sampled.reshape(X_train_sampled.shape[0], -1)
print(X_train_sampled_reshaped.shape)

X_val_sampled_reshaped = X_val_sampled.reshape(X_val_sampled.shape[0], -1)
print(X_val_sampled_reshaped.shape)

"""X_test_sampled_reshaped = X_test_sampled.reshape(X_test_sampled.shape[0], -1)
print(X_test_sampled_reshaped.shape)"""

# DNN

In [None]:
dnnModel = tf.keras.models.Sequential()

In [None]:
#testing nn
dnnModel.add(tf.keras.layers.Dense(units=15, activation=tf.keras.layers.LeakyReLU(alpha=0.2), input_dim=150528))
dnnModel.add(tf.keras.layers.Dense(units=7, activation=tf.keras.layers.LeakyReLU(alpha=0.2)))
#dnnModel.add(tf.keras.layers.Dense(units=2, activation=tf.keras.layers.LeakyReLU(alpha=0.2)))
dnnModel.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

In [None]:
print("train: ", X_train_sampled_reshaped.shape[0])
print("val: ", X_val_sampled_reshaped.shape[0])
#print("test: ", X_test_sampled_reshaped.shape[0])
print(dnnModel.summary())

dnnModel.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

fit8 = dnnModel.fit(X_train_sampled_reshaped, y_train_sampled, epochs=100)

In [None]:
fitHistory(fit8)

In [None]:
dnnEval(dnnModel, X_val_sampled_reshaped, y_val_sampled)

In [None]:
plt.imshow(X_val_sampled[0,:,:], cmap=plt.cm.Greys)

In [None]:
X_val_sampled.shape

In [None]:
for i in range(len(X_val_sampled)):
    image = X_val_sampled[i]
    prediction = predictions[i]

    plt.figure(figsize=(8, 4))

    # Display the image
    plt.subplot(1, 2, 1)
    plt.imshow(image, cmap=plt.cm.Greys)
    plt.axis('off')
    plt.title('Image')

    # Display the prediction
    plt.subplot(1, 2, 2)
    plt.bar(['Negative', 'Positive'], prediction, color=['blue', 'orange'])
    plt.ylabel('Prediction Probability')
    plt.ylim([0, 1])
    plt.title('Prediction')

    plt.tight_layout()
    plt.show()

In [None]:
for i in range(1,2):
    image = X_val_sampled_reshaped[i]
    prediction = calculate_dnn_predict_classes(dnnModel, X_val_sampled_reshaped)
    #print(1 - prediction[i], prediction[i])
    true_label = y_val_sampled[i]

    plt.figure(figsize=(10, 4))

    # Display the image
    plt.subplot(1, 3, 1)
    plt.imshow(image.reshape(224, 224, 3), cmap=plt.cm.Greys)  # Reshape the image back to (224, 224, 3)
    plt.axis('off')
    plt.title('Image')

    # Display the prediction
    plt.subplot(1, 3, 2)
    plt.bar(['Negative', 'Positive'], [1 - prediction[i][0], prediction[i][0]], color=['blue', 'orange'])
    plt.ylabel('Prediction Probability')
    plt.ylim([0, 1])
    plt.title('Prediction')

    # Display the true label
    plt.subplot(1, 3, 3)
    plt.bar(['Negative', 'Positive'], [1 - true_label, true_label], color=['blue', 'orange'])
    plt.ylabel('True Label')
    plt.ylim([0, 1])
    plt.title('True Label')

    plt.tight_layout()
    plt.show()