In [4]:
import os
import glob
import cv2
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Image size for resizing (keep it relatively small for MLP)
IMG_SIZE = 64
DATA_ROOT = "data/ct_sections"
RANDOM_STATE = 69

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("kabil007/lungcancer4types-imagedataset")

print("Path to dataset files:", path)

In [5]:
def load_dataset(root_dir, img_size=64):
    X = []
    y = []
    class_names = sorted(
        [d for d in os.listdir(root_dir)
         if os.path.isdir(os.path.join(root_dir, d))]
    )
    class_to_idx = {cls_name: idx for idx, cls_name in enumerate(class_names)}

    for cls_name in class_names:
        class_dir = os.path.join(root_dir, cls_name)
        for img_path in glob.glob(os.path.join(class_dir, "*.png")):
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue

            # Resize to fixed size
            img = cv2.resize(img, (img_size, img_size))

            # Normalize pixel values to [0, 1]
            img = img.astype(np.float32) / 255.0

            # Flatten to 1D
            X.append(img.flatten())
            y.append(class_to_idx[cls_name])

    X = np.array(X, dtype=np.float32)
    y = np.array(y, dtype=np.int64)
    return X, y, class_names


X, y, class_names = load_dataset(DATA_ROOT, IMG_SIZE)
print("Dataset shape:", X.shape, "Labels shape:", y.shape)
print("Classes:", class_names)


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'data/ct_sections'

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,          # keep class proportions
    random_state=RANDOM_STATE
)

# Standardize features (zero mean, unit variance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

NameError: name 'X' is not defined

In [6]:
mlp = MLPClassifier(
    hidden_layer_sizes=(512, 256),   # two hidden layers
    activation='relu',
    solver='adam',
    alpha=1e-4,                      # L2 regularization
    batch_size=64,
    learning_rate_init=1e-3,
    max_iter=50,                     # increase if underfitting
    shuffle=True,
    random_state=RANDOM_STATE,
    early_stopping=True,             # use part of training data as validation
    validation_fraction=0.1,
    n_iter_no_change=10,
    verbose=True                     # show training progress
)

mlp.fit(X_train_scaled, y_train)

NameError: name 'X_train_scaled' is not defined

In [7]:
y_pred = mlp.predict(X_test_scaled)

print("Classification report:")
print(classification_report(y_test, y_pred, target_names=class_names))

print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred))

NameError: name 'X_test_scaled' is not defined

In [None]:
def predict_ct_section(img_path, model, scaler, img_size=64, class_names=None):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Could not read image: {img_path}")

    img = cv2.resize(img, (img_size, img_size))
    img = img.astype(np.float32) / 255.0
    x = img.flatten().reshape(1, -1)

    x_scaled = scaler.transform(x)
    probs = model.predict_proba(x_scaled)[0]
    pred_idx = np.argmax(probs)
    pred_class = class_names[pred_idx] if class_names else pred_idx
    return pred_class, probs


test_img = "some_ct_section.png"
pred_class, probs = predict_ct_section(test_img, mlp, scaler, IMG_SIZE, class_names)
print("Predicted class:", pred_class)
print("Probabilities:", probs)