In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt

print('Modules loaded')


Modules loaded


In [2]:
data_dir = r"D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB_Chest_Radiography_Database\image7500f"
filepaths = []
labels = []

for root, dirs, files in os.walk(data_dir):
    for file in files:
        file_path = os.path.join(root, file)
        filepaths.append(file_path)
        label = os.path.basename(root)
        labels.append(label)

df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
labels = df['labels']
print(df)


                                              filepaths        labels
0     D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...        Normal
1     D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...        Normal
2     D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...        Normal
3     D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...        Normal
4     D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...        Normal
...                                                 ...           ...
6995  D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...  Tuberculosis
6996  D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...  Tuberculosis
6997  D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...  Tuberculosis
6998  D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...  Tuberculosis
6999  D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB...  Tuberculosis

[7000 rows x 2 columns]


In [3]:
batch_size = 32
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
epochs = 5
NUM_CLASSES = 2


In [4]:
def get_callbacks(model_name):
    callbacks = []
    checkpoint = ModelCheckpoint(filepath=f'model.{model_name}.h5', verbose=1, monitor='val_accuracy', mode='max')
    callbacks.append(checkpoint)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
    callbacks.append(reduce_lr)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
    callbacks.append(early_stopping)
    return callbacks


In [5]:
def build_model():
    base_model = tf.keras.applications.EfficientNetB3(input_shape=img_shape, include_top=False, weights='imagenet')
    base_model.trainable = True
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.45)(x)
    y = Dense(256, activation='relu')(x)
    y = Dropout(0.45)(y)
    
    predictions = Dense(NUM_CLASSES, activation='softmax', name='final')(y)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adamax(0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [6]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['labels'], random_state=123)


In [7]:
tr_gen = ImageDataGenerator()
ts_gen = ImageDataGenerator()

train_gen = tr_gen.flow_from_dataframe(train_df, x_col='filepaths', y_col='labels',
                                       target_size=img_size, class_mode='categorical',
                                       color_mode='rgb', shuffle=True, batch_size=batch_size)

test_gen = ts_gen.flow_from_dataframe(test_df, x_col='filepaths', y_col='labels',
                                      target_size=img_size, class_mode='categorical',
                                      color_mode='rgb', shuffle=False, batch_size=batch_size)

g_dict = train_gen.class_indices
classes = list(g_dict.keys())
class_count = len(classes)


Found 5600 validated image filenames belonging to 2 classes.
Found 1400 validated image filenames belonging to 2 classes.


In [None]:
model = build_model()
callbacks = get_callbacks('Xception_model')

history = model.fit(train_gen,
                    epochs=epochs,
                    validation_data=test_gen,
                    callbacks=callbacks)


Epoch 1/5
 25/175 [===>..........................] - ETA: 1:47:14 - loss: 0.0948 - accuracy: 0.9588

In [None]:
ts_length = len(test_df)
test_batch_size = min(32, ts_length)
test_steps = ts_length // test_batch_size

test_score = model.evaluate(test_gen, steps=test_steps, verbose=1)
print(f"Test Loss: ", test_score[0])
print(f"Test Accuracy: ", test_score[1])


In [None]:
# Predictions and classification report
preds = model.predict(test_gen)
y_pred = np.argmax(preds, axis=1)
print("Classification Report")
print(classification_report(test_gen.classes, y_pred, target_names=classes))


In [None]:
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
cm = confusion_matrix(test_gen.classes, y_pred)
cm_df = pd.DataFrame(cm, index=classes, columns=classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
