# Background 🤢
Tuberculosis, or TB, is a highly contagious disease caused by the bacteria 'Mycobacterium tuberculosis'. A deadly disease that affects the lungs and spreads to every other organ in the body, it can be spread through the air when someone speaks, coughs, or sneezes, and results in severe coughing, fever, pain, weight loss, night sweats, coughing up blood, and potentially death when left untreated. 

As someone who knows people who have died from TB, I have created a convolutional neural network to accurately diagnose patients using chest X-rays, with user [Tawsifur Rahman](https://www.kaggle.com/tawsifurrahman)'s "Tuberculosis (TB) Chest X-ray Database" dataset, in an attempt to potentially save lives by preventing patients with TB from going untreated and gaining the symptoms mentioned above.

This dataset contains 3500 samples of chest X-rays of patients without tuberculosis, and 700 of patients with tuberculosis, that all look similar to the image below, which I will be using to train this CNN.

![](https://storage.googleapis.com/kagglesdsdata/datasets/891819/2332307/TB_Chest_Radiography_Database/Normal/Normal-1019.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20240713%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240713T160233Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=81e3c3b9b9fa4dfc6654ee819f45aa2a24fd64e87130250359a86f23723efcc10185744e2eb48faf4f82833472a62c4bb2488ed8452ad9bc8ae1e35d46b12e35e845e4ee5be597e3d326fae4b3a5b9e6c0afa958c0d405ae07f03ee0ed70b144d9f2859a8f37529cf2190f5d79df12587c1ad049075555cce2e0561289cacb6c4a9f1a20905092913a2879c439119184186d2f0dc115dd057413f36ec40fbdedaafdac108c0cc62ec31102803194787ac4897ba10359e3422f919932ff72e9e68a936b2c85add5b102f2490b7c4fb1991a4b2957943a0ac2e761c32ce32fef9c0eba91399b394fa799c73017847b9205231fa9f083875579a6da2cd890c31355)

# Image Processing 🖼️
The main problem with the data I wanted to fix was the class imbalance (3500 Normal images vs. 700 TB images).
In the code below I first read all the image files with OpenCV in and transformed them into a format suitable for SMOTE upsampling to even out the class counts.

In [1]:
#Importing the necessary libraries:
import cv2 as cv
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
import os

In [2]:
#Initializing the values needed for all the image files
normaldir = '/kaggle/input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Normal'
tbdir = '/kaggle/input/tuberculosis-tb-chest-xray-dataset/TB_Chest_Radiography_Database/Tuberculosis'
images = []
labels = []
imagesize = 256

In [3]:
#Storing all the image directories in the 'images' array and corresponding them to either 1 for TB images or 0 for normal images.
for x in os.listdir(normaldir):
    imagedir = os.path.join(normaldir, x)
    image = cv.imread(imagedir, cv.IMREAD_GRAYSCALE)
    image = cv.resize(image, (imagesize, imagesize))
    images.append(image)
    labels.append(0)
    
for y in os.listdir(tbdir):
    imagedir = os.path.join(tbdir, y)
    image = cv.imread(imagedir, cv.IMREAD_GRAYSCALE)
    image = cv.resize(image, (imagesize, imagesize))
    images.append(image)
    labels.append(1)

In [4]:
#Converting to NumPy arrays since they have more features than regular lists
images = np.array(images)
labels = np.array(labels)

#Splitting the images and labels into training and testing sets, then normalizing the values within them for computational efficiency (from 0-255 scale to 0-1 scale)
imagetrain, imagetest, labeltrain, labeltest = train_test_split(images, labels, test_size=0.3, random_state=42)
imagetrain = (imagetrain.astype('float32'))/255
imagetest = (imagetest.astype('float32'))/255

In [5]:
#Flattening the image array into 2D (making it [2940 images] x [all the pixels of the image in just one 1D array]) to be suitable for SMOTE oversampling
imagetrain = imagetrain.reshape(2940, (imagesize*imagesize))

#Performing oversampling
smote = SMOTE(random_state=42)
imagetrain, labeltrain = smote.fit_resample(imagetrain, labeltrain)

#Unflattening the images now to use them for convolutional neural network (4914 images of 256x256 size, with 1 color channel (grayscale, as compared to RGB with 3 color channels))
imagetrain = imagetrain.reshape(-1, imagesize, imagesize, 1)
print(imagetrain.shape)

(4914, 256, 256, 1)


In [6]:
#Classes balanced - equal counts of each label
print(np.unique(labeltrain, return_counts=True))

(array([0, 1]), array([2457, 2457]))


# CNN Time 🧠
Using Tensorflow's Sequential API for CNN modeling to diagnose all the patients in the testing set with a high accuracy.

In [7]:
#Importing the necessary libraries
import tensorflow as tf
import keras
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [8]:
#The CNN model has 3 convolutional layers, each followed by pooling to summarize the features found by the layer, starting with 16 and multiplying by 2 each time for computational efficiency, as bits are structured in powers of 2. 3x3 filters and ReLU activation used.
cnn = keras.Sequential(
    [
    #Input layer, same shape as all the images (256x256x1):
    keras.Input(shape=(imagesize, imagesize, 1)),
    
    #1st convolutional layer:
    Conv2D(16, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    #2nd convolutional layer:
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    #3rd convolutional layer:
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    #Flattening layer for the dense layers:
    Flatten(),
    
    #1st dense layer following the convolutional layers:
    Dense(64, activation='relu'),
    
    #Dropout layer with heavy dropout rate to avoid overfitting in the large-ish dataset
    Dropout(0.5),
    
    #Output layer that squeezes each image to either 0 or 1 with sigmoid activation
    Dense(1, activation='sigmoid')
    ]
)

In [9]:
#Compiling the model with parameters best suited for the task at hand:
cnn.compile(
    loss='binary_crossentropy', #Best for binary classification
    optimizer = keras.optimizers.Adam(learning_rate=0.001), #Good starting LR for dataset of this size
    metrics=['accuracy'], #Looking for accuracy
)

In [10]:
#Fitting the model, with the ReduceLROnPlateau callback added to it to reduce the learning rate to take smaller steps in increasing the accuracy whenever the learning rate plateaus (goes in the wrong direction)
#Doing this with patience=1, meaning it will perform this if it even plateaus for one epoch, since only 10 epochs are used
#factor=0.1 means that for every time the learning rate is reduced, it is reduced by a factor of 0.1 - it also won't go lower than 0.00001
from keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='accuracy', factor=0.1, patience=1, min_lr=0.00001, verbose=1)

#Fitting the model w/ the callback. ON VS CODE, batch size of 16 makes each epoch take around a minute in this case w/ good accuracy, making the whole training process 10 min, but on Kaggle it should take longer due to less computational resources:
cnn.fit(imagetrain, labeltrain, batch_size=16, epochs=10, verbose=2, callbacks = [reduce_lr])

Epoch 1/10
308/308 - 10s - 32ms/step - accuracy: 0.8864 - loss: 0.2704 - learning_rate: 0.0010
Epoch 2/10
308/308 - 3s - 9ms/step - accuracy: 0.9571 - loss: 0.1177 - learning_rate: 0.0010
Epoch 3/10
308/308 - 3s - 9ms/step - accuracy: 0.9784 - loss: 0.0627 - learning_rate: 0.0010
Epoch 4/10
308/308 - 3s - 9ms/step - accuracy: 0.9825 - loss: 0.0531 - learning_rate: 0.0010
Epoch 5/10
308/308 - 3s - 9ms/step - accuracy: 0.9900 - loss: 0.0273 - learning_rate: 0.0010
Epoch 6/10

Epoch 6: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
308/308 - 3s - 9ms/step - accuracy: 0.9888 - loss: 0.0349 - learning_rate: 0.0010
Epoch 7/10
308/308 - 3s - 9ms/step - accuracy: 0.9949 - loss: 0.0159 - learning_rate: 1.0000e-04
Epoch 8/10
308/308 - 3s - 9ms/step - accuracy: 0.9969 - loss: 0.0095 - learning_rate: 1.0000e-04
Epoch 9/10
308/308 - 3s - 9ms/step - accuracy: 0.9980 - loss: 0.0073 - learning_rate: 1.0000e-04
Epoch 10/10

Epoch 10: ReduceLROnPlateau reducing learning rate to 1.00

<keras.src.callbacks.history.History at 0x7e1e2b69f640>

In [11]:
#Evaluating the data w/ multiple types of metrics
print('TESTING DATA:')
cnn.evaluate(imagetest, labeltest, batch_size=32, verbose=2)

print('ADVANCED TESTING METRICS:')
from sklearn.metrics import classification_report, confusion_matrix
predictions = cnn.predict(imagetest, batch_size=32)
predicted_labels = (predictions > 0.5).astype('int32')
print(classification_report(labeltest, predicted_labels))
print(confusion_matrix(labeltest, predicted_labels))

TESTING DATA:
40/40 - 2s - 47ms/step - accuracy: 0.9833 - loss: 0.1139
ADVANCED TESTING METRICS:
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      1043
           1       0.98      0.92      0.95       217

    accuracy                           0.98      1260
   macro avg       0.98      0.96      0.97      1260
weighted avg       0.98      0.98      0.98      1260

[[1039    4]
 [  17  200]]


Very good accuracy and loss values, and great balance of precision between the 0 and 1 classes.

In [12]:
cnn.save("tb_model.keras")

# Conclusion 🤓
Though there are obviously better models out there for diagnosing TB from x-ray scans, this model can hopefully work as a starting point to show some of you guys how using a CNN for medical purposes works and how it can benefit our society. Hopefully one day, a 100% accurate TB diagnosis model can be made and properly implemented in the healthcare system so that deaths from TB can be avoided altogether.

**But overall, thanks for reading this notebook - it means a lot. If you liked it please make sure to leave an upvote, and check out my other work as well!**

In [14]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def predict_tumor(model, img_path, img_size=240):
    """
    Loads and preprocesses an image, runs inference using the model,
    and returns the predicted class and confidence.
    
    Assumes the model outputs a single probability (e.g., probability of TB).
    If the probability is above 0.5, the image is predicted as "TB Detected";
    otherwise "Normal".
    
    This version uses img_size=240 so that the flattened input has 240×240=57600 elements.
    """
    # Load and resize the image in grayscale mode to match model training.
    img = load_img(img_path, target_size=(img_size, img_size), color_mode="grayscale")
    # Convert the image to array and normalize pixel values to [0,1]
    img_array = img_to_array(img) / 255.0
    # Expand dims to create a batch of 1.
    img_array = np.expand_dims(img_array, axis=0)
    
    # Debug: print the preprocessed image shape.
    print("Preprocessed image shape:", img_array.shape)
    
    # Get prediction probability from the model.
    pred_prob = model.predict(img_array)[0][0]
    
    # Interpret the prediction.
    if pred_prob > 0.5:
        label = "TB Detected"
        confidence = pred_prob
    else:
        label = "Normal"
        confidence = 1 - pred_prob
    
    return label, confidence

# --- Example Usage ---
# Update these paths as needed:
model_path = "/kaggle/working/tb_model.keras"  # Path to your saved model file.
sample_img_path = "/kaggle/input/cvjdvddvdvd/Tuberculosis-12.png"  # Path to a sample image file.

# Check that the files exist.
if not os.path.isfile(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}")
if not os.path.isfile(sample_img_path):
    raise FileNotFoundError(f"Sample image not found: {sample_img_path}")

# Load the model.
model = load_model(model_path)
print("Model loaded from:", model_path)

# Get prediction using img_size=240 to match the model's expected input.
predicted_label, confidence = predict_tumor(model, sample_img_path, img_size=240)
print(f"Prediction: {predicted_label} | Confidence: {confidence:.4f}")

# Display the image with prediction overlay.
img_disp = cv2.imread(sample_img_path)
# Convert the image to grayscale for consistent display.
img_disp = cv2.cvtColor(img_disp, cv2.COLOR_BGR2GRAY)
plt.imshow(img_disp, cmap='gray')
plt.title(f"Predicted: {predicted_label} | Confidence: {confidence:.4f}")
plt.axis("off")
plt.show()


Model loaded from: /kaggle/working/tb_model.keras


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 57600, but received input with shape (1, 43264)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(1, 224, 224, 1), dtype=float32)
  • training=False
  • mask=None

In [22]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load the model
model_path = "/kaggle/working/tb_model.keras"
if not os.path.isfile(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}")
model = load_model(model_path)

def predict_tb(model, img_path, img_size=24):
    """
    Loads and preprocesses an image, then predicts using the TB detection model.
    This function handles both CNN and dense input models.
    """
    # Use the model's overall input shape instead of the first layer's
    input_shape = model.input_shape  # e.g., (None, height, width, channels) or (None, features)
    
    # Determine target size and color mode based on input shape
    if len(input_shape) == 2:
        # Dense model expects flattened input: input_shape = (None, features)
        needed_pixels = input_shape[1]
        # Determine target size based on known cases:
        if needed_pixels == 576:  # 24x24x1
            target_size = 24
            color_mode = "grayscale"
        elif needed_pixels == 1728:  # 24x24x3
            target_size = 24
            color_mode = "rgb"
        elif needed_pixels == 256:  # 16x16x1
            target_size = 16
            color_mode = "grayscale"
        elif needed_pixels == 768:  # 16x16x3
            target_size = 16
            color_mode = "rgb"
        else:
            # Default: assume grayscale and derive target size
            color_mode = "grayscale"
            target_size = int(np.sqrt(needed_pixels))
    else:
        # Convolutional model expects multi-dimensional input: e.g., (None, height, width, channels)
        if input_shape[1] is not None:
            target_size = input_shape[1]
        else:
            target_size = img_size
        # Determine color mode based on number of channels
        if input_shape[-1] == 3:
            color_mode = "rgb"
        else:
            color_mode = "grayscale"
    
    # Load and preprocess the image
    img = load_img(img_path, target_size=(target_size, target_size), color_mode=color_mode)
    img_array = img_to_array(img) / 255.0

    # Reshape based on model type: flatten for dense inputs, add batch dimension for conv inputs.
    if len(input_shape) == 2:
        # Flatten for dense layer
        img_array = img_array.reshape(1, -1)
    else:
        # Keep dimensions for conv layer
        img_array = np.expand_dims(img_array, axis=0)
    
    # Make prediction
    predictions = model.predict(img_array)
    
    # Handle different output formats (assume single probability output)
    pred_prob = predictions[0][0]
    if pred_prob < 0.5:
        return "Normal", 1 - pred_prob
    else:
        return "TB Detected", pred_prob

# Update the image path
img_path = "/kaggle/input/dvgdgdggdgd/Normal-64.png"
if not os.path.isfile(img_path):
    raise FileNotFoundError(f"Image file not found: {img_path}")

# Run prediction
pred_class, confidence = predict_tb(model, img_path, img_size=24)
print(f"Prediction: {pred_class} | Confidence: {confidence:.4f}")

# Display the image



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
Prediction: Normal | Confidence: 1.0000
