In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from skimage.feature import hog
import mahotas

# -----------------------------
# 1. Data Loading
# -----------------------------
base_path = '../Database/'
categories = ['Lung_Opacity', 'Normal', 'Viral Pneumonia']
data, labels = [], []

for category in categories:
    path = os.path.join(base_path, category)
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load in grayscale
        if img is None:
            continue
        img = cv2.resize(img, (128, 128))  # Resize to fixed size
        data.append(img)
        labels.append(category)

data = np.array(data)
labels = np.array(labels)

# Encode labels to numeric values
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    data, labels, test_size=0.2, random_state=42
)

# -----------------------------
# 2. (Optional) Preprocessing Function
# -----------------------------
def preprocess_image(image):
    """Apply histogram equalization and sharpening."""
    hist_eq = cv2.equalizeHist(image)
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]])
    sharpened = cv2.filter2D(hist_eq, -1, kernel)
    return sharpened

# For best performance per the paper, use original images.
# Uncomment below to use preprocessed images instead:
# X_train_proc = np.array([preprocess_image(img) for img in X_train])
# X_test_proc  = np.array([preprocess_image(img) for img in X_test])
X_train_proc = X_train  # Using original images
X_test_proc  = X_test

# -----------------------------
# 3. Feature Extraction Functions
# -----------------------------
# Enhanced SIFT: Compute both mean and standard deviation of the descriptors.
def extract_sift_features(images, feat_len=128):
    sift = cv2.SIFT_create()
    features = []
    for img in images:
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is None:
            # If no descriptors are found, use zeros for both mean and std
            feat = np.zeros(feat_len * 2)
        else:
            feat_mean = descriptors.mean(axis=0)
            feat_std  = descriptors.std(axis=0)
            feat = np.hstack([feat_mean, feat_std])
        features.append(feat)
    return np.array(features)

# HOG Features (using standard parameters)
def extract_hog_features(images):
    features = []
    for img in images:
        hog_features = hog(
            img, orientations=9, pixels_per_cell=(8, 8),
            cells_per_block=(2, 2), block_norm='L2-Hys', feature_vector=True
        )
        features.append(hog_features)
    return np.array(features)

# LBP Features: Compute a normalized histogram from LBP codes.
def extract_lbp_features(images, num_points=16, radius=2, n_bins=18):
    features = []
    for img in images:
        # Compute LBP codes
        lbp = mahotas.features.lbp(img, radius=radius, points=num_points, ignore_zeros=False)
        # Compute histogram over a fixed number of bins and normalize it
        hist, _ = np.histogram(lbp, bins=np.arange(0, n_bins + 1), density=True)
        features.append(hist)
    return np.array(features)

# -----------------------------
# 4. Extract and Combine Features
# -----------------------------
# Extract features for training set
sift_train = extract_sift_features(X_train_proc)        # Each becomes 256-d vector
hog_train  = extract_hog_features(X_train_proc)          # Dimension depends on HOG parameters
lbp_train  = extract_lbp_features(X_train_proc)          # n_bins (18) features

# Combine features (concatenate along axis=1)
X_train_features = np.hstack((sift_train, hog_train, lbp_train))

# Extract features for test set
sift_test = extract_sift_features(X_test_proc)
hog_test  = extract_hog_features(X_test_proc)
lbp_test  = extract_lbp_features(X_test_proc)
X_test_features = np.hstack((sift_test, hog_test, lbp_test))

# -----------------------------
# 5. Feature Scaling
# -----------------------------
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features)
X_test_features = scaler.transform(X_test_features)

# -----------------------------
# 6. Model Training with Hyperparameter Tuning
# -----------------------------
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'max_iter': [1000, 2000, 3000]  # Increased iterations to help convergence
}

grid = GridSearchCV(
    LogisticRegression(random_state=42, solver='lbfgs'),
    param_grid, cv=5, n_jobs=-1
)
grid.fit(X_train_features, y_train)
print("Best parameters:", grid.best_params_)
model = grid.best_estimator_

# -----------------------------
# 7. Evaluation
# -----------------------------
y_pred = model.predict(X_test_features)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Best parameters: {'C': 0.01, 'max_iter': 500}
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.83      0.84       253
           1       0.81      0.81      0.81       231
           2       0.97      0.97      0.97       211

    accuracy                           0.87       695
   macro avg       0.87      0.87      0.87       695
weighted avg       0.87      0.87      0.87       695

Accuracy: 0.8676258992805755


In [None]:
# -----------------------------
# 8. Prediction on a New Image
# -----------------------------
def preprocess_and_extract(image):
    # Optionally, preprocess the image. Here we use the original.
    # image = preprocess_image(image)
    sift_feat = extract_sift_features([image])
    hog_feat  = extract_hog_features([image])
    lbp_feat  = extract_lbp_features([image])
    features = np.hstack((sift_feat, hog_feat, lbp_feat))
    return scaler.transform(features)

new_image = cv2.imread('../Sample_Test/VN1/5.jpeg', cv2.IMREAD_GRAYSCALE)
new_image = cv2.resize(new_image, (128, 128))
new_features = preprocess_and_extract(new_image)
prediction = model.predict(new_features)
print("Predicted Class:", label_encoder.inverse_transform(prediction))
