In [5]:
import os, shutil, random
from sklearn.model_selection import train_test_split

# CHANGE THESE PATHS
DATASET_ROOT = r"D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Dataset of Tuberculosis Chest X-rays Images"       # raw dataset (Normal/ TB/)
OUTPUT_ROOT  = r"D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split" # where new folders will be created

CLASSES = ["Normal", "TB"]
SPLITS = (0.7, 0.15, 0.15)  # train, val, test

def make_dirs():
    for split in ["train", "val", "test"]:
        for cls in CLASSES:
            os.makedirs(os.path.join(OUTPUT_ROOT, split, cls), exist_ok=True)

def split_and_copy():
    for cls in CLASSES:
        files = [os.path.join(DATASET_ROOT, cls, f) for f in os.listdir(os.path.join(DATASET_ROOT, cls))]
        train_files, temp = train_test_split(files, test_size=(1-SPLITS[0]), random_state=42)
        val_files, test_files = train_test_split(temp, test_size=SPLITS[2]/(SPLITS[1]+SPLITS[2]), random_state=42)

        for f in train_files: shutil.copy(f, os.path.join(OUTPUT_ROOT, "train", cls))
        for f in val_files:   shutil.copy(f, os.path.join(OUTPUT_ROOT, "val", cls))
        for f in test_files:  shutil.copy(f, os.path.join(OUTPUT_ROOT, "test", cls))

if __name__ == "__main__":
    make_dirs()
    split_and_copy()
    print("✅ Dataset split into train/val/test at:", OUTPUT_ROOT)


✅ Dataset split into train/val/test at: D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split


In [5]:
import tensorflow as tf

In [9]:
# Change this to your split dataset path
DATA_DIR = r"D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split"
IMG_SIZE = (224, 224)   # Resize all images to 224x224
BATCH_SIZE = 32

# 1️⃣ Load datasets from folder structure
train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR + "/train",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical"  # one-hot for multi-class
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR + "/val",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical"
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR + "/test",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical"
)

# 2️⃣ Normalize pixel values (scale 0–255 → 0–1)
normalization_layer = tf.keras.layers.Rescaling(1./255)

train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds   = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds  = test_ds.map(lambda x, y: (normalization_layer(x), y))

# 3️⃣ Data Augmentation (apply only on training set)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1),
])

train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

# 4️⃣ Improve performance with caching & prefetching
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds   = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds  = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("✅ Data ready: train, val, test sets created!")


Found 2913 files belonging to 2 classes.
Found 881 files belonging to 2 classes.
Found 877 files belonging to 2 classes.
✅ Data ready: train, val, test sets created!


In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, VGG16, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import numpy as np
import os

In [4]:
# Paths (Change these accordingly)
train_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/train"
val_dir   = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/val"
test_dir  = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/test"

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define paths
train_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/train"
val_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/test"

# Preprocessing with conversion: grayscale -> RGB
datagen = ImageDataGenerator(rescale=1./255)

# Training data
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),   # Resize to match transfer learning model
    color_mode="rgb",         # Convert grayscale -> RGB
    batch_size=32,
    class_mode="binary"       # Change to 'categorical' if >2 classes
)

# Validation data
val_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    color_mode="rgb",         # Convert grayscale -> RGB
    batch_size=32,
    class_mode="binary"
)

Found 2913 images belonging to 2 classes.
Found 877 images belonging to 2 classes.


In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, VGG16, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import numpy as np
import os

In [5]:
# Paths (Change these accordingly)
train_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/train"
val_dir   = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/val"
test_dir  = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/test"

In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50, VGG16, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# -----------------------------
# Use your existing generators
# -----------------------------
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory(
    "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/train",
    target_size=(224, 224),
    color_mode="rgb",  # already RGB
    batch_size=32,
    class_mode="binary"
)

val_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory(
    "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/val",
    target_size=(224, 224),
    color_mode="rgb",  # already RGB
    batch_size=32,
    class_mode="binary",
    shuffle=False
)

# -----------------------------
# Model builder function
# -----------------------------
def build_transfer_model(model_name):
    if model_name == "ResNet50":
        base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224,224,3))
    elif model_name == "VGG16":
        base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224,224,3))
    elif model_name == "EfficientNetB0":
        base_model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224,224,3))
    else:
        raise ValueError("Unsupported model name")

    base_model.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    output = Dense(1, activation="sigmoid")(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

# -----------------------------
# Train and evaluate models
# -----------------------------
models_list = ["ResNet50", "VGG16", "EfficientNetB0"]
results = {}

for name in models_list:
    print(f"\n🔹 Training {name}...\n")
    model = build_transfer_model(name)
    
    # Train
    model.fit(train_generator, validation_data=val_generator, epochs=5)  # adjust epochs if needed
    
    # Predictions
    y_true = val_generator.classes
    y_pred_prob = model.predict(val_generator).ravel()
    y_pred = (y_pred_prob > 0.5).astype(int)
    
    # Metrics
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc = roc_auc_score(y_true, y_pred_prob)
    
    results[name] = {"accuracy": acc, "precision": prec, "recall": rec, "f1_score": f1, "roc_auc": roc}
    
    print(f"\n📊 {name} Evaluation:")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC: {roc:.4f}")
    
    # Save model
    os.makedirs("saved_models", exist_ok=True)
    model.save(f"saved_models/{name}_tb_model.h5")

# -----------------------------
# Select best model
# -----------------------------
best_model_name = max(results, key=lambda x: results[x]["accuracy"])
print(f"\n🏆 Best Model: {best_model_name} with Accuracy: {results[best_model_name]['accuracy']:.4f}")


Found 2913 images belonging to 2 classes.
Found 881 images belonging to 2 classes.

🔹 Training ResNet50...



  self._warn_if_super_not_called()


Epoch 1/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 2s/step - accuracy: 0.7707 - loss: 0.5407 - val_accuracy: 0.8252 - val_loss: 0.4613
Epoch 2/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 2s/step - accuracy: 0.8215 - loss: 0.4844 - val_accuracy: 0.8252 - val_loss: 0.4565
Epoch 3/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 2s/step - accuracy: 0.8156 - loss: 0.4761 - val_accuracy: 0.8252 - val_loss: 0.4522
Epoch 4/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 2s/step - accuracy: 0.8233 - loss: 0.4579 - val_accuracy: 0.8252 - val_loss: 0.4485
Epoch 5/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 2s/step - accuracy: 0.8139 - loss: 0.4685 - val_accuracy: 0.8252 - val_loss: 0.4725
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 2s/step





📊 ResNet50 Evaluation:
Accuracy: 0.8252
Precision: 0.8252
Recall: 1.0000
F1-Score: 0.9042
ROC-AUC: 0.9501

🔹 Training VGG16...

Epoch 1/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m407s[0m 4s/step - accuracy: 0.7860 - loss: 0.4985 - val_accuracy: 0.8252 - val_loss: 0.3888
Epoch 2/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m408s[0m 4s/step - accuracy: 0.8325 - loss: 0.3772 - val_accuracy: 0.8263 - val_loss: 0.3352
Epoch 3/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m408s[0m 4s/step - accuracy: 0.8289 - loss: 0.3401 - val_accuracy: 0.8456 - val_loss: 0.2928
Epoch 4/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m403s[0m 4s/step - accuracy: 0.8519 - loss: 0.2980 - val_accuracy: 0.8774 - val_loss: 0.2603
Epoch 5/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m406s[0m 4s/step - accuracy: 0.8782 - loss: 0.2658 - val_accuracy: 0.8888 - val_loss: 0.2369
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 3s/ste




📊 VGG16 Evaluation:
Accuracy: 0.8888
Precision: 0.8831
Recall: 0.9972
F1-Score: 0.9367
ROC-AUC: 0.9833

🔹 Training EfficientNetB0...

Epoch 1/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 1s/step - accuracy: 0.8206 - loss: 0.4818 - val_accuracy: 0.8252 - val_loss: 0.4653
Epoch 2/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 1s/step - accuracy: 0.8196 - loss: 0.4798 - val_accuracy: 0.8252 - val_loss: 0.4655
Epoch 3/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 1s/step - accuracy: 0.8191 - loss: 0.4800 - val_accuracy: 0.8252 - val_loss: 0.4652
Epoch 4/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 1s/step - accuracy: 0.8254 - loss: 0.4663 - val_accuracy: 0.8252 - val_loss: 0.4640
Epoch 5/5
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 1s/step - accuracy: 0.8334 - loss: 0.4560 - val_accuracy: 0.8252 - val_loss: 0.4638
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 




📊 EfficientNetB0 Evaluation:
Accuracy: 0.8252
Precision: 0.8252
Recall: 1.0000
F1-Score: 0.9042
ROC-AUC: 0.8481

🏆 Best Model: VGG16 with Accuracy: 0.8888
