In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import xgboost as xgb
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")








In [None]:
print('Modules loaded')

data_dir = r"D:\CAPSTONE 400A\final_dataset\Tuberculosis\TB_Chest_Radiography_Database\image7500f"
filepaths = []
labels = []

for root, dirs, files in os.walk(data_dir):
    for file in files:
        file_path = os.path.join(root, file)
        filepaths.append(file_path)
        label = os.path.basename(root)
        labels.append(label)

df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
labels = df['labels']
print(df)

batch_size = 32
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['labels'], random_state=123)

In [None]:
def build_feature_extractor():
    base_model = tf.keras.applications.VGG19(input_shape=img_shape, include_top=False, weights='imagenet')
    base_model.trainable = False  # Freeze the base model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    feature_extractor = Model(inputs=base_model.input, outputs=x)
    return feature_extractor
    feature_extractor = build_feature_extractor()

In [None]:

tr_gen = ImageDataGenerator()
ts_gen = ImageDataGenerator()

train_gen = tr_gen.flow_from_dataframe(train_df, x_col='filepaths', y_col='labels',
                                       target_size=img_size, class_mode='categorical',
                                       color_mode='rgb', shuffle=False, batch_size=batch_size)

test_gen = ts_gen.flow_from_dataframe(test_df, x_col='filepaths', y_col='labels',
                                      target_size=img_size, class_mode='categorical',
                                      color_mode='rgb', shuffle=False, batch_size=batch_size)

def extract_features(generator, sample_count):
    features = np.zeros((sample_count, 512))  # 512 is the output size of GlobalAveragePooling2D layer in VGG19
    labels = np.zeros((sample_count, 2))  # Assuming 2 classes for categorical output
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = feature_extractor.predict(inputs_batch)
        features[i * batch_size: (i + 1) * batch_size] = features_batch
        labels[i * batch_size: (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels

train_features, train_labels = extract_features(train_gen, len(train_df))
test_features, test_labels = extract_features(test_gen, len(test_df))

print("Train features shape:", train_features.shape)
print("Test features shape:", test_features.shape)


In [None]:
train_labels = np.argmax(train_labels, axis=1)  # Convert one-hot encoded labels to single integer labels
test_labels = np.argmax(test_labels, axis=1)

dtrain = xgb.DMatrix(train_features, label=train_labels)
dtest = xgb.DMatrix(test_features, label=test_labels)


In [None]:
params = {
    'max_depth': 3,
    'eta': 0.1,
    'objective': 'multi:softmax',
    'num_class': 2,
    'min_child_weight': 1,
    'gamma': 0,
    'subsample': 1,
    'colsample_bytree': 1
}
num_rounds = 300
early_stop = 10

bst = xgb.train(params, dtrain, num_rounds, evals=[(dtest, 'eval')], early_stopping_rounds=early_stop)


In [None]:
preds = bst.predict(dtest)
print("Classification Report")
print(classification_report(test_labels, preds, target_names=train_gen.class_indices.keys()))

cm = confusion_matrix(test_labels, preds)
cm_df = pd.DataFrame(cm, index=train_gen.class_indices.keys(), columns=train_gen.class_indices.keys())

plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()


In [None]:
accuracy = accuracy_score(test_labels, preds)
print(f"Accuracy: {accuracy * 100:.2f}%")
