In [1]:
import os
import random
import shutil

# Paths
normal_path = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Dataset of Tuberculosis Chest X-rays Images/Normal"
tb_path = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Dataset of Tuberculosis Chest X-rays Images/TB"
balanced_tb_path = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Blanced dataset/TB"
balanced_normal_path = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Blanced dataset/Normal"

os.makedirs(balanced_tb_path, exist_ok=True)
os.makedirs(balanced_normal_path, exist_ok=True)

# Get files
normal_files = os.listdir(normal_path)
tb_files = os.listdir(tb_path)

# Copy normal (all 514)
for file in normal_files:
    shutil.copy(os.path.join(normal_path, file), balanced_normal_path)

# Randomly sample TB to 514
tb_sample = random.sample(tb_files, 514)
for file in tb_sample:
    shutil.copy(os.path.join(tb_path, file), balanced_tb_path)

print("✅ Balanced dataset created with 514 Normal and 514 TB images.")


✅ Balanced dataset created with 514 Normal and 514 TB images.


In [3]:
import os, shutil, random
from sklearn.model_selection import train_test_split

# CHANGE THESE PATHS
DATASET_ROOT = r"D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Blanced dataset"       # raw dataset (Normal/ TB/)
OUTPUT_ROOT  = r"D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split" # where new folders will be created

CLASSES = ["Normal", "TB"]
SPLITS = (0.7, 0.15, 0.15)  # train, val, test

def make_dirs():
    for split in ["train", "val", "test"]:
        for cls in CLASSES:
            os.makedirs(os.path.join(OUTPUT_ROOT, split, cls), exist_ok=True)

def split_and_copy():
    for cls in CLASSES:
        files = [os.path.join(DATASET_ROOT, cls, f) for f in os.listdir(os.path.join(DATASET_ROOT, cls))]
        train_files, temp = train_test_split(files, test_size=(1-SPLITS[0]), random_state=42)
        val_files, test_files = train_test_split(temp, test_size=SPLITS[2]/(SPLITS[1]+SPLITS[2]), random_state=42)

        for f in train_files: shutil.copy(f, os.path.join(OUTPUT_ROOT, "train", cls))
        for f in val_files:   shutil.copy(f, os.path.join(OUTPUT_ROOT, "val", cls))
        for f in test_files:  shutil.copy(f, os.path.join(OUTPUT_ROOT, "test", cls))

if __name__ == "__main__":
    make_dirs()
    split_and_copy()
    print("✅ Dataset split into train/val/test at:", OUTPUT_ROOT)


✅ Dataset split into train/val/test at: D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split


In [5]:
import tensorflow as tf

In [7]:
# Change this to your split dataset path
DATA_DIR = r"D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split"
IMG_SIZE = (224, 224)   # Resize all images to 224x224
BATCH_SIZE = 32

# 1️⃣ Load datasets from folder structure
train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR + "/train",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical"  # one-hot for multi-class
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR + "/val",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical"
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR + "/test",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="categorical"
)

# 2️⃣ Normalize pixel values (scale 0–255 → 0–1)
normalization_layer = tf.keras.layers.Rescaling(1./255)

train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds   = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds  = test_ds.map(lambda x, y: (normalization_layer(x), y))

# 3️⃣ Data Augmentation (apply only on training set)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1),
])

train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

# 4️⃣ Improve performance with caching & prefetching
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds   = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds  = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("✅ Data ready: train, val, test sets created!")


Found 718 files belonging to 2 classes.
Found 154 files belonging to 2 classes.
Found 156 files belonging to 2 classes.
✅ Data ready: train, val, test sets created!


In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, VGG16, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import numpy as np
import os

In [4]:
# Paths (Change these accordingly)
train_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/train"
val_dir   = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/val"
test_dir  = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/train test split/test"

In [9]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define paths
train_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split/train"
val_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split/val"

# Preprocessing with conversion: grayscale -> RGB
datagen = ImageDataGenerator(rescale=1./255)

# Training data
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),   # Resize to match transfer learning model
    color_mode="rgb",         # Convert grayscale -> RGB
    batch_size=32,
    class_mode="binary"       # Change to 'categorical' if >2 classes
)

# Validation data
val_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    color_mode="rgb",         # Convert grayscale -> RGB
    batch_size=32,
    class_mode="binary"
)

Found 718 images belonging to 2 classes.
Found 154 images belonging to 2 classes.


In [11]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, VGG16, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import numpy as np
import os

In [13]:
# Paths (Change these accordingly)
train_dir = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split/train"
val_dir   = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split/val"
test_dir  = "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split/test"

In [15]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50, VGG16, EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# -----------------------------
# Use your existing generators
# -----------------------------
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory(
    "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split/train",
    target_size=(224, 224),
    color_mode="rgb",  # already RGB
    batch_size=32,
    class_mode="binary"
)

val_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory(
    "D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/Dataset/Train Val Test Split/test",
    target_size=(224, 224),
    color_mode="rgb",  # already RGB
    batch_size=32,
    class_mode="binary",
    shuffle=False
)

# -----------------------------
# Model builder function
# -----------------------------
def build_transfer_model(model_name):
    if model_name == "ResNet50":
        base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224,224,3))
    elif model_name == "VGG16":
        base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224,224,3))
    elif model_name == "EfficientNetB0":
        base_model = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224,224,3))
    else:
        raise ValueError("Unsupported model name")

    base_model.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    output = Dense(1, activation="sigmoid")(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

# -----------------------------
# Train and evaluate models
# -----------------------------
models_list = ["ResNet50", "VGG16", "EfficientNetB0"]
results = {}

for name in models_list:
    print(f"\n🔹 Training {name}...\n")
    model = build_transfer_model(name)
    
    # Train
    model.fit(train_generator, validation_data=val_generator, epochs=5)  # adjust epochs if needed
    
    # Predictions
    y_true = val_generator.classes
    y_pred_prob = model.predict(val_generator).ravel()
    y_pred = (y_pred_prob > 0.5).astype(int)
    
    # Metrics
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc = roc_auc_score(y_true, y_pred_prob)
    
    results[name] = {"accuracy": acc, "precision": prec, "recall": rec, "f1_score": f1, "roc_auc": roc}
    
    print(f"\n📊 {name} Evaluation:")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC: {roc:.4f}")
    
    # Save model
    os.makedirs("D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/save models", exist_ok=True)
    model.save(f"D:/Project/Guvi_Project/Tuberculosis Detection Using Deep Learning/save models/{name}_tb_model.h5")

# -----------------------------
# Select best model
# -----------------------------
best_model_name = max(results, key=lambda x: results[x]["accuracy"])
print(f"\n🏆 Best Model: {best_model_name} with Accuracy: {results[best_model_name]['accuracy']:.4f}")


Found 718 images belonging to 2 classes.
Found 156 images belonging to 2 classes.

🔹 Training ResNet50...



  self._warn_if_super_not_called()


Epoch 1/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 2s/step - accuracy: 0.4969 - loss: 0.7475 - val_accuracy: 0.5000 - val_loss: 0.6964
Epoch 2/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 0.5572 - loss: 0.7025 - val_accuracy: 0.5449 - val_loss: 0.6871
Epoch 3/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 0.5354 - loss: 0.6958 - val_accuracy: 0.4936 - val_loss: 0.6850
Epoch 4/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2s/step - accuracy: 0.5086 - loss: 0.7155 - val_accuracy: 0.5000 - val_loss: 0.6836
Epoch 5/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 0.4704 - loss: 0.7099 - val_accuracy: 0.6346 - val_loss: 0.6799
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2s/step





📊 ResNet50 Evaluation:
Accuracy: 0.6346
Precision: 0.5814
Recall: 0.9615
F1-Score: 0.7246
ROC-AUC: 0.9111

🔹 Training VGG16...

Epoch 1/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 4s/step - accuracy: 0.5138 - loss: 0.7158 - val_accuracy: 0.7885 - val_loss: 0.6385
Epoch 2/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 4s/step - accuracy: 0.6758 - loss: 0.6453 - val_accuracy: 0.9038 - val_loss: 0.5952
Epoch 3/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 4s/step - accuracy: 0.7864 - loss: 0.5894 - val_accuracy: 0.8910 - val_loss: 0.5599
Epoch 4/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 4s/step - accuracy: 0.8490 - loss: 0.5458 - val_accuracy: 0.8910 - val_loss: 0.5293
Epoch 5/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 4s/step - accuracy: 0.8476 - loss: 0.5235 - val_accuracy: 0.9103 - val_loss: 0.5021
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3s/step





📊 VGG16 Evaluation:
Accuracy: 0.9103
Precision: 0.9444
Recall: 0.8718
F1-Score: 0.9067
ROC-AUC: 0.9574

🔹 Training EfficientNetB0...

Epoch 1/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 1s/step - accuracy: 0.4989 - loss: 0.6983 - val_accuracy: 0.5000 - val_loss: 0.6971
Epoch 2/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1s/step - accuracy: 0.5077 - loss: 0.6979 - val_accuracy: 0.5000 - val_loss: 0.6953
Epoch 3/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1s/step - accuracy: 0.4650 - loss: 0.7069 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 4/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1s/step - accuracy: 0.4977 - loss: 0.7044 - val_accuracy: 0.5000 - val_loss: 0.6940
Epoch 5/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 1s/step - accuracy: 0.4765 - loss: 0.7025 - val_accuracy: 0.5000 - val_loss: 0.6941








[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step 


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



📊 EfficientNetB0 Evaluation:
Accuracy: 0.5000
Precision: 0.0000
Recall: 0.0000
F1-Score: 0.0000
ROC-AUC: 0.9202

🏆 Best Model: VGG16 with Accuracy: 0.9103
