In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from skimage.feature import hog
import mahotas

# -----------------------------
# 1. Data Loading
# -----------------------------
base_path = '../Database/'
categories = ['Lung_Opacity', 'Normal', 'Viral Pneumonia']
data, labels = [], []

for category in categories:
    path = os.path.join(base_path, category)
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load in grayscale
        img = cv2.resize(img, (128, 128))  # Resize to fixed size
        data.append(img)
        labels.append(category)

data = np.array(data)
labels = np.array(labels)

# Encode labels into numbers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    data, labels, test_size=0.2, random_state=42
)

# -----------------------------
# 2. (Optional) Preprocessing Function
# -----------------------------
def preprocess_image(image):
    """Apply histogram equalization and sharpening."""
    hist_eq = cv2.equalizeHist(image)
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]])
    sharpened = cv2.filter2D(hist_eq, -1, kernel)
    return sharpened

# For best performance per [main.pdf], use original images.
# Uncomment below if you want to use preprocessed images:
# X_train_processed = np.array([preprocess_image(img) for img in X_train])
# X_test_processed  = np.array([preprocess_image(img) for img in X_test])
# Here, we use original images:
X_train_processed = X_train
X_test_processed  = X_test

# -----------------------------
# 3. Feature Extraction Functions
# -----------------------------
# Updated SIFT: Compute the average descriptor (128-d) per image.
def extract_sift_features(images, feat_len=128):
    sift = cv2.SIFT_create()
    features = []
    for img in images:
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is None:
            feat = np.zeros(feat_len)
        else:
            # Compute the mean descriptor over all keypoints.
            feat = descriptors.mean(axis=0)
        features.append(feat)
    return np.array(features)

# HOG Features (using standard parameters)
def extract_hog_features(images):
    features = []
    for img in images:
        hog_features = hog(
            img, orientations=9, pixels_per_cell=(8, 8),
            cells_per_block=(2, 2), block_norm='L2-Hys', visualize=False
        )
        features.append(hog_features)
    return np.array(features)

# Updated LBP: Compute a normalized histogram from LBP codes.
def extract_lbp_features(images, num_bins=18):
    features = []
    for img in images:
        # Compute LBP with radius=2 and 16 points (as in your code)
        lbp = mahotas.features.lbp(img, radius=2, points=16, ignore_zeros=False)
        # Compute histogram; bins are from 0 to num_bins
        hist, _ = np.histogram(lbp, bins=np.arange(0, num_bins + 1), density=True)
        features.append(hist)
    return np.array(features)

# -----------------------------
# 4. Extract and Combine Features
# -----------------------------
# Extract features for training set
sift_features = extract_sift_features(X_train_processed)
hog_features  = extract_hog_features(X_train_processed)
lbp_features  = extract_lbp_features(X_train_processed)
X_train_features = np.hstack((sift_features, hog_features, lbp_features))

# Extract features for test set
sift_test_features = extract_sift_features(X_test_processed)
hog_test_features  = extract_hog_features(X_test_processed)
lbp_test_features  = extract_lbp_features(X_test_processed)
X_test_features = np.hstack((sift_test_features, hog_test_features, lbp_test_features))

# -----------------------------
# 5. Feature Scaling
# -----------------------------
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features)
X_test_features = scaler.transform(X_test_features)

# -----------------------------
# 6. Model Training with Hyperparameter Tuning
# -----------------------------
param_grid = {
    'C': [0.1, 1, 10, 100],
    'max_iter': [100, 500, 1000]
}
grid = GridSearchCV(
    LogisticRegression(random_state=42, solver='lbfgs'),
    param_grid, cv=5, n_jobs=-1
)
grid.fit(X_train_features, y_train)
print("Best parameters:", grid.best_params_)
model = grid.best_estimator_

# -----------------------------
# 7. Evaluation
# -----------------------------
y_pred = model.predict(X_test_features)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))



Best parameters: {'C': 0.1, 'max_iter': 100}
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.83      0.83       253
           1       0.81      0.81      0.81       231
           2       0.96      0.98      0.97       211

    accuracy                           0.87       695
   macro avg       0.87      0.87      0.87       695
weighted avg       0.87      0.87      0.87       695

Accuracy: 0.8661870503597122


In [2]:
# -----------------------------
# 8. Prediction on a New Image
# -----------------------------
def preprocess_and_extract(image):
    # For prediction, you can choose to use the original or preprocessed image.
    # Here we use the original image.
    sift_feat = extract_sift_features([image])
    hog_feat  = extract_hog_features([image])
    lbp_feat  = extract_lbp_features([image])
    features = np.hstack((sift_feat, hog_feat, lbp_feat))
    return scaler.transform(features)

new_image = cv2.imread('../Sample_Test/VN1/5.jpeg', cv2.IMREAD_GRAYSCALE)
new_image = cv2.resize(new_image, (128, 128))
new_features = preprocess_and_extract(new_image)
prediction = model.predict(new_features)
print("Predicted Class:", label_encoder.inverse_transform(prediction))


Predicted Class: ['Viral Pneumonia']
