### 1. Import Libraries

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator, image
from tensorflow.keras.applications.vgg16 import VGG16

### 2. Data Preparation

In [None]:
CATEGORIES=['NORMAL','PNEUMONIA']
DataDir=r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\train'
training_data=[]
img_size=100
def create_training_data():
    for i in CATEGORIES:

        path=os.path.join(DataDir,i)
        class_num=CATEGORIES.index(i)

        for img in os.listdir(path):
            try:
                img_array=cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
                new_array=cv2.resize(img_array,(img_size,img_size))
                training_data.append([new_array,class_num])
            
            except Exception as e:
                pass
create_training_data()
import random
random.shuffle(training_data)
X=[]
y=[]
# spliting the features and labels
for features,label in training_data:
    X.append(features)
    y.append(label)
y = np.array(y)
X = np.array(X).reshape(-1,img_size,img_size,1)

In [None]:
validation_data=[]
DataDir_val= r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\val'
def create_validating_data():
    for i in CATEGORIES:

        path=os.path.join(DataDir_val,i)
        class_num=CATEGORIES.index(i)

        for img in os.listdir(path):
            try:
                img_array=cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
                new_array=cv2.resize(img_array,(img_size,img_size))
                validation_data.append([new_array,class_num])
            
            except Exception as e:
                pass
create_validating_data()
import random
random.shuffle(validation_data)
X_val=[]
y_val=[]
for features,label in validation_data:
    X_val.append(features)
    y_val.append(label)
y_val = np.array(y_val)
X_val=np.array(X_val).reshape(-1,img_size,img_size,1)
X = X/255.0
x_val = X_val/255.0

### 3. Build CNN Model from Scratch

In [None]:
model= Sequential()

model.add(Conv2D(64,(3,3),input_shape=X.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(.2))

model.add(Conv2D(128,(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(.2))

model.add(Conv2D(256,(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(.2))

model.add(Flatten())
model.add(Dense(64))

model.add(Dropout(.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
history=model.fit(X,y,batch_size=4,epochs=10,validation_data=(x_val, y_val))

### 4. Evaluation

In [None]:
test_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\test'
# define categories
CATEGORIES = ["NORMAL", "PNEUMONIA"]

# set image size
img_size = 100

# initialize lists for storing test data
X_test = []
y_test = []

# loop through the test data directory and extract the images and their labels
for category in CATEGORIES:
    path = os.path.join(test_dir, category)
    class_num = CATEGORIES.index(category)
    for img in os.listdir(path):
        try:
            img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
            new_array = cv2.resize(img_array, (img_size, img_size))
            X_test.append(new_array)
            y_test.append(class_num)
        except Exception as e:
            pass

# convert test data to numpy arrays
X_test = np.array(X_test).reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

# normalize test data
X_test = X_test / 255.0

# calculate test accuracy
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)

# print test accuracy
print('Test accuracy:', test_acc)

20/20 - 4s - 178ms/step - accuracy: 0.8590 - loss: 0.7382
Test accuracy: 0.8589743375778198

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.legend()
plt.title("Training & Validation Metrics")
plt.xlabel("Epoch")
plt.ylabel("Value")
plt.grid()
plt.show()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Predict on validation set
y_pred = model.predict(x_val)
y_pred_classes = (y_pred > 0.5).astype("int32")

# Confusion matrix
cm = confusion_matrix(y_val, y_pred_classes)

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Pneumonia'], yticklabels=['Normal', 'Pneumonia'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Classification report
print(classification_report(y_val, y_pred_classes, target_names=['Normal', 'Pneumonia']))

### CNN From Scratch:
-  ✔ Validation Accuracy: ~0.9375
-  ✔ Test Accuracy: ~0.8589

### 5. Hyper Parameter Optimization

In [None]:
def build_model(hp):
    model = Sequential()

    # Conv Layer 1
    model.add(Conv2D(
        filters=64,  # fixed here; tune filters later if you want
        kernel_size=(3, 3),
        activation='relu',
        input_shape=(100, 100, 1)
    ))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Conv Layer 2
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Conv Layer 3
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Conv Layer 4
    model.add(Conv2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())

    # Dense layer
    model.add(Dense(
        units=hp.Choice('dense_units', [64, 128]),
        activation='relu'
    ))

    # Dropout
    model.add(Dropout(hp.Float('dropout_rate', 0.3, 0.5, step=0.1)))

    # Output layer
    model.add(Dense(1, activation='sigmoid'))

    # Compile
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-3, 1e-4, 5e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model
from keras_tuner import RandomSearch

class MyTuner(RandomSearch):
    def run_trial(self, trial, *args, **kwargs):
        hp = trial.hyperparameters

        # Tune batch size and epochs here
        kwargs['batch_size'] = hp.Choice('batch_size', [4, 8, 16])
        kwargs['epochs'] = hp.Choice('epochs', [10, 15, 20])

        return super().run_trial(trial, *args, **kwargs)
from tensorflow.keras.optimizers import Adam
tuner = MyTuner(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='hpo_dir',
    project_name='pneumonia_cnn_batchsize_epochs'
)

from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=3)

tuner.search(X, y,
             validation_data=(x_val, y_val),
             callbacks=[early_stop])
best_model = tuner.get_best_models(1)[0]
best_hp = tuner.get_best_hyperparameters(1)[0]

print("✅ Best Hyperparameters:")
print(f"Batch Size: {best_hp.get('batch_size')}")
print(f"Epochs: {best_hp.get('epochs')}")
print(f"Learning Rate: {best_hp.get('learning_rate')}")
print(f"Dense Units: {best_hp.get('dense_units')}")
print(f"Dropout: {best_hp.get('dropout_rate')}")

# Evaluate
loss, acc = best_model.evaluate(x_val, y_val)
print(f"Validation Accuracy: {acc:.4f}")

In [None]:
test_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\test'
# define categories
CATEGORIES = ["NORMAL", "PNEUMONIA"]

# set image size
img_size = 100
# initialize lists for storing test data
X_test = []
y_test = []

# loop through the test data directory and extract the images and their labels
for category in CATEGORIES:
    path = os.path.join(test_dir, category)
    class_num = CATEGORIES.index(category)
    for img in os.listdir(path):
        try:
            img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
            new_array = cv2.resize(img_array, (img_size, img_size))
            X_test.append(new_array)
            y_test.append(class_num)
        except Exception as e:
            pass

# convert test data to numpy arrays
X_test = np.array(X_test).reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

# normalize test data
X_test = X_test / 255.0

# calculate test accuracy
test_loss, test_acc = best_model.evaluate(X_test, y_test, verbose=2)

# print test accuracy
print('Test accuracy:', test_acc)


### CNN + HPO:
-  ✔ Validation Accuracy: ~1.0000
-  ✔ Test Accuracy: ~0.8429

### 6. Transfer Learning with VGG16

In [None]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# define image size
img_size = 224

# load the pre-trained model (VGG16)
base_model = VGG16(input_shape=(img_size,img_size,3), include_top=False, weights='imagenet')

# freeze the layers of the pre-trained model
for layer in base_model.layers:
    layer.trainable = False

# add custom layers for classification
x = Flatten()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(1, activation='sigmoid')(x)

# create a new model
model = Model(inputs=base_model.input, outputs=predictions)

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# define the image generators for training and validation data
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

# specify the training and validation data directories
train_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\train'
val_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\val'

# create the image generators for training and validation data
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(img_size, img_size), batch_size=32, class_mode='binary')
val_generator = val_datagen.flow_from_directory(val_dir, target_size=(img_size, img_size), batch_size=32, class_mode='binary')

# train the model
model.fit(train_generator, epochs=10, validation_data=val_generator)

# evaluate the model on test data
test_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\test'
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(img_size, img_size), batch_size=32, class_mode='binary', shuffle=False)
test_loss, test_acc = model.evaluate(test_generator)
print('Test accuracy:', test_acc)

### VGG16 Transfer Learning (Frozen):
-  ✔ Validation Accuracy: ~0.9247
-  ✔ Test Accuracy: ~0.9247


### 7. Fine Tune VGG16

In [None]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense,Dropout,Activation,Flatten, Conv2D,MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
img_size=224
base_model = VGG16(input_shape=(img_size,img_size,3), include_top=False, weights='imagenet')

# Unfreeze only last 4 layers
for layer in base_model.layers[-4:]:
    layer.trainable = True

# add custom layers for classification
x = Flatten()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(1, activation='sigmoid')(x)

# create a new model
model1 = Model(inputs=base_model.input, outputs=predictions)
# define the image generators for training and validation data
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

# specify the training and validation data directories
train_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\train'
val_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\val'

# create the image generators for training and validation data
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(img_size, img_size), batch_size=32, class_mode='binary')
val_generator = val_datagen.flow_from_directory(val_dir, target_size=(img_size, img_size), batch_size=32, class_mode='binary')
# compile the model
model1.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)
model1.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    callbacks=[EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)]
)
test_dir = r'C:\Users\rasik\Downloads\X-Ray Kaggle data\chest_xray\test'
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(img_size, img_size), batch_size=32, class_mode='binary', shuffle=False)
test_loss, test_acc = model1.evaluate(test_generator)
print('Test accuracy:', test_acc)

### VGG16 Fine-Tuned:
-   ✔ Validation Accuracy: 1.00
-   ✔ Test Accuracy: 0.924


### 8. Save Model

In [None]:
model1.save("pneumonia_vgg16_model.h5")

### 📈 Final Analysis Summary
 --------------------------------------------------

### Model Comparisons:
 ------------------
🔹 CNN from Scratch:
    - Validation Accuracy: ~93.75%
    - Test Accuracy: ~85.89%

🔹 CNN + HPO:
     - Validation Accuracy: 100.00% (overfitting suspected)
     - Test Accuracy: ~84.29%

 🔹 VGG16 Transfer Learning (Frozen):
    - Validation Accuracy: ~92.47%
    - Test Accuracy: ~92.47%

 🔹 VGG16 Fine-Tuned:
    - Validation Accuracy: 1.00
    - Test Accuracy: 92.4%

 📊 Insights:
 - HPO increased validation accuracy but may have overfit the data (test accuracy dropped).
 - Transfer learning using VGG16 gave the highest test performance without fine-tuning.
 - Fine-tuning is expected to further improve domain adaptation, though it increases training time.
 - Best balance of accuracy and generalization comes from using VGG16 with selective fine-tuning.
