## Neural network for identifying lung diseases from a chest x-ray dataset

In [4]:
# Importing necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import kagglehub
import os
from PIL import Image


In [5]:
# Specifying dataset path
path = r"C:\HTL\ARX\5AHETS\Chest-X-Ray-Disease-Identifying\2. Dataset\chest-xray-dataset"

# Resize images to this size (keeps feature vector reasonable)
IMG_SIZE = (128, 128) 
CLASS_MAP = {"Normal": 0, "Pneumonia": 1, "Tuberculosis": 2}

In [6]:
# Helper: load images from a class folder and return flattened arrays + labels ---
def load_folder(folder, label, size=IMG_SIZE):
    X, y = [], []
    for fname in os.listdir(folder):
        fp = os.path.join(folder, fname)
        if not os.path.isfile(fp):
            continue
        try:
            img = Image.open(fp).convert("L").resize(size)   # grayscale + resize
            arr = np.array(img).flatten()
            X.append(arr)
            y.append(label)
        except Exception:
            continue
    return X, y

In [None]:
# Load train + val folders (combine them), you can also include test if you want separately ---
def load_dataset(base_path, subsets=("train", "val")):
    X_all, y_all = [], []
    for s in subsets:
        for cls, lbl in CLASS_MAP.items():
            folder = os.path.join(base_path, s, cls)
            if os.path.isdir(folder):
                Xi, yi = load_folder(folder, lbl)
                X_all.extend(Xi); y_all.extend(yi)
    return np.array(X_all), np.array(y_all)

X, y = load_dataset(path, subsets=("train","val"))
print("Loaded samples:", X.shape, y.shape)

In [None]:
# Train/test split (we will keep a final test set separate if you want to use DATASET_PATH/test) ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [None]:
# Train a simple MLP
mlp = MLPClassifier(hidden_layer_sizes=(200,), max_iter=50, solver="adam", random_state=42, verbose=True)
mlp.fit(X_train, y_train)

In [None]:
# Evaluate
y_pred = mlp.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=list(CLASS_MAP.keys())))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))