In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold
import xgboost as xgb

# Define constants
IMAGE_SIZE = 224
NUM_CLASSES = 2
EPOCHS = 2
BATCH_SIZE = 32
NUM_FOLDS = 2
DATA_DIR = "D:/CAPSTONE 400A/final_dataset/Tuberculosis/TB_Chest_Radiography_Database/New_folder"

# Get the list of image filenames for each class
normal_images = [os.path.join(DATA_DIR, "Normal", filename) for filename in os.listdir(os.path.join(DATA_DIR, "Normal"))]
tb_images = [os.path.join(DATA_DIR, "Tuberculosis", filename) for filename in os.listdir(os.path.join(DATA_DIR, "Tuberculosis"))]

# Create DataFrame to use with ImageDataGenerator
df_normal = pd.DataFrame({'filename': normal_images, 'class': 'Normal'})
df_tb = pd.DataFrame({'filename': tb_images, 'class': 'Tuberculosis'})
df = pd.concat([df_normal, df_tb], ignore_index=True)

# K-fold cross-validation
skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True)

# Initialize lists to store results
all_true_labels = []
all_predicted_labels = []

# Iterate over folds
for fold, (train_index, test_index) in enumerate(skf.split(X=df['filename'], y=df['class'])):
    train_df = df.iloc[train_index]
    test_df = df.iloc[test_index]

    # Create data generators
    train_datagen = ImageDataGenerator()

    train_generator = train_datagen.flow_from_dataframe(
        train_df,
        x_col='filename',
        y_col='class',
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )

    test_generator = train_datagen.flow_from_dataframe(
        test_df,
        x_col='filename',
        y_col='class',
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )

    # Define model
    base_model = tf.keras.applications.EfficientNetB3(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(256, activation='relu'),
        Dropout(0.45),
        Dense(NUM_CLASSES, activation='softmax')
    ])

    # Freeze pre-trained layers
    base_model.trainable = False

    # Compile model
    model.compile(optimizer=Adam(0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train model
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=EPOCHS,
        validation_data=test_generator,
        validation_steps=len(test_generator)
    )

    # Evaluate model
    test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator))
    print(f"Test Accuracy: {test_accuracy}")

    


Found 2100 validated image filenames belonging to 2 classes.
Found 2100 validated image filenames belonging to 2 classes.
Epoch 1/2
Epoch 2/2
Test Accuracy: 0.9319047331809998
Found 2100 validated image filenames belonging to 2 classes.
Found 2100 validated image filenames belonging to 2 classes.
Epoch 1/2
Epoch 2/2
Test Accuracy: 0.9642857313156128


In [2]:
# Feature Extraction
train_features = base_model.predict(train_generator)
test_features = base_model.predict(test_generator)

   



In [3]:
 # Reshape features
train_features_flat = train_features.reshape(train_features.shape[0], -1)
test_features_flat = test_features.reshape(test_features.shape[0], -1)

   

In [10]:
import xgboost as xgb

# Define subset size
subset_size = 100  # Adjust this value as needed

# Take a subset of the training features and classes
train_features_subset = train_features_flat[:subset_size]
train_classes_subset = train_generator.classes[:subset_size]

try:
    # Train XGBoost model
    xgb_model = xgb.XGBClassifier()
    xgb_model.fit(train_features_subset, train_classes_subset)
    
    # If training completes successfully
    print("XGBoost model trained successfully!")

except MemoryError:
    print("MemoryError: Unable to allocate memory. Please reduce the dataset size or optimize data representation.")

except Exception as e:
    print("An error occurred during training:", e)


XGBoost model trained successfully!


In [11]:
# Evaluate XGBoost model
xgb_predictions = xgb_model.predict(test_features_flat)
xgb_accuracy = np.mean(xgb_predictions == test_generator.classes)
print("XGBoost Accuracy:", xgb_accuracy)

XGBoost Accuracy: 0.8333333333333334
