### The example is derived from [AI-sarwar-praject](https://www.kaggle.com/code/laraibakhtar/ai-sarwar-praject), a Kaggle notebook.

This notebook is based on a Kaggle example, [AI-sarwar-praject](https://www.kaggle.com/code/laraibakhtar/ai-sarwar-praject). The Kaggle notebook code was distributed under the [Apache 2.0 License](http://www.apache.org/licenses/LICENSE-2.0). As instructed, the text of the license is located in the same directory as this notebook in the file [AI-SARWAR-LICENSE](AI-SARWAR-LICENSE).

### Setup

You first need to run the code in [make_h5py.ipynb](make_h5py.ipynb) to generate the image database this model uses for training.


In [None]:
# import pandas as pd
import numpy as np
import os
import tensorflow as tf
# import cv2
import h5py
# from PIL import Image
import keras
from keras.layers import Dense,Input, InputLayer, Flatten
from keras.models import Sequential, Model, load_model
from  matplotlib import pyplot as plt
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix,f1_score

In [None]:
#View some samples of the dataset
import random
plt.figure(figsize=(20,9))
folder=r'../TB_Chest_Radiography_Database/Normal'
for i in range(5):
    file = random.choice(os.listdir(folder))
    image_path= os.path.join(folder, file)
    img=mpimg.imread(image_path)
    ax=plt.subplot(2,5,i+1)
    ax.title.set_text(file)
    plt.imshow(img)
folder=r'../TB_Chest_Radiography_Database/Tuberculosis'
for i in range(5):
    file = random.choice(os.listdir(folder))
    image_path= os.path.join(folder, file)
    img=mpimg.imread(image_path)
    ax=plt.subplot(2,5,i+6)
    ax.title.set_text(file)
    plt.imshow(img)
plt.tight_layout()
plt.show()

In [None]:
# Open the HDF5 file
dataset_dir = "../TB_Chest_Radiography_Database"
with h5py.File(f"{dataset_dir}/tb-xraydb.h5py", "r") as h5f:
    # Load the images and labels datasets
    images = h5f['images'][:]
    labels = h5f['labels'][:]

In [None]:
from sklearn.preprocessing import LabelEncoder
# Convert class labels to one-hot encoded vectors
label_encoder = LabelEncoder()
encoded_class_names = label_encoder.fit_transform(labels)

In [None]:
from sklearn.model_selection import train_test_split


# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, encoded_class_names, test_size=0.33, random_state=42)

y_train = np.array(y_train)
y_test = np.array(y_test)
X_train = np.array(X_train)
X_test = np.array(X_test)

# Check the shapes of the datasets
print("Train images shape:", X_train.shape)
print("Train labels shape:", X_test.shape)
print("Validation images shape:", y_train.shape)
print("Validation labels shape:", y_test.shape)

In [None]:
model = keras.Sequential([
    # keras.layers.Conv2D(128, (3, 3), input_shape=(X_train.shape[1], X_train.shape[2], 1)),
    keras.Input(shape=(X_train.shape[1], X_train.shape[2], 1)),
    keras.layers.Conv2D(128, (3, 3)),
    keras.layers.LeakyReLU(alpha=0.02),
    
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Dropout(0.25),

    keras.layers.Conv2D(128, (3, 3)),
    keras.layers.LeakyReLU(alpha=0.02),
    
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Dropout(0.25),

    keras.layers.GlobalMaxPooling2D(),
    
    keras.layers.Dense(512),
    keras.layers.LeakyReLU(alpha=0.02),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(10),
    keras.layers.Activation('softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
# Compile the model
initial_lr = 0.0001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_lr, decay_steps=10000, decay_rate=0.9
)
optimizer = Adam(learning_rate=lr_schedule)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

history = model.fit(X_train, y_train, epochs=50, validation_split=0.2)

In [None]:
# Plotting the training history
def plot_history(history):
    plt.figure(figsize=(12, 5))
    
    # Plot training & validation accuracy values
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    
    # Plot training & validation loss values
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    
    plt.show()

plot_history(history)

In [None]:
predicted_labels = model.predict(X_test)
predicted_labels = np.argmax(predicted_labels, axis=1)
#predicted_class_names = label_encoder.inverse_transform(predicted_labels)

f1 = f1_score(y_test, predicted_labels, average='macro')
report = classification_report(y_test, predicted_labels)

print("F1 Score:", f1)
print("Classification Report:")
print(report)

import seaborn as sns

# Confusion matrix
conf_matrix = confusion_matrix(y_test, predicted_labels)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['NORM', 'TB'], yticklabels=['NORM', 'TB'])
plt.title('Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()