In [None]:
import os
import cv2
import numpy as np
import mahotas
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from skimage.feature import hog
from scipy.ndimage import rotate

# ====================================
# 1. Data Loading and Augmentation
# ====================================

base_path = 'C:/Users/Aakansh/Lung Classification/Lung-X-Ray-Project/Database/'
categories = ['Lung_Opacity', 'Normal', 'Viral Pneumonia']
data, labels = [], []

def load_images():
    for category in categories:
        path = os.path.join(base_path, category)
        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # load as grayscale
            if img is None:
                continue
            img = cv2.resize(img, (128, 128))
            data.append(img)
            labels.append(category)

# A simple augmentation: rotation, horizontal flip, and shifting
def augment_image(img):
    augmented = []
    # Original image
    augmented.append(img)
    # Rotate by a random angle between -10 and 10 degrees
    angle = np.random.uniform(-10, 10)
    rotated = rotate(img, angle, reshape=False, mode='nearest')
    augmented.append((rotated * 255).astype(np.uint8))
    # Horizontal flip
    flipped = cv2.flip(img, 1)
    augmented.append(flipped)
    # Shift (translate) the image
    rows, cols = img.shape
    M = np.float32([[1, 0, np.random.uniform(-5, 5)], [0, 1, np.random.uniform(-5, 5)]])
    shifted = cv2.warpAffine(img, M, (cols, rows))
    augmented.append(shifted)
    return augmented

print("Loading images...")
load_images()

data = np.array(data)
labels = np.array(labels)

# Encode labels (to numbers)
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

# Augment training images (each image produces several augmented versions)
augmented_images = []
augmented_labels = []
print("Augmenting training images...")
for img, label in zip(X_train, y_train):
    aug_imgs = augment_image(img)
    augmented_images.extend(aug_imgs)
    augmented_labels.extend([label] * len(aug_imgs))

X_train_aug = np.array(augmented_images)
y_train_aug = np.array(augmented_labels)

# ====================================
# 2. Preprocessing
# ====================================

def preprocess_image(image):
    """
    Apply histogram equalization and a sharpening filter.
    """
    # Histogram Equalization
    hist_eq = cv2.equalizeHist(image)
    # Sharpening kernel
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]])
    sharpened = cv2.filter2D(hist_eq, -1, kernel)
    return sharpened

# Preprocess both train and test images
X_train_processed = np.array([preprocess_image(img) for img in X_train_aug])
X_test_processed = np.array([preprocess_image(img) for img in X_test])

# ====================================
# 3. Feature Extraction Functions
# ====================================

def extract_sift_features(images, max_features=256):
    """
    Extract SIFT features from a list of images.
    The descriptors are flattened and padded/truncated to a fixed length.
    """
    sift = cv2.SIFT_create()
    features = []
    for img in images:
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is None:
            # If no descriptors are found, create a zero array
            descriptors = np.zeros((1, 128))
        flattened = descriptors.flatten()
        if len(flattened) < max_features:
            flattened = np.pad(flattened, (0, max_features - len(flattened)), mode='constant')
        else:
            flattened = flattened[:max_features]
        features.append(flattened)
    return np.array(features)

def extract_hog_features(images):
    """
    Extract HOG features from a list of images.
    Using smaller pixels per cell and L1-sqrt normalization.
    """
    features = []
    for img in images:
        hog_features = hog(img,
                           orientations=9,
                           pixels_per_cell=(4, 4),  # smaller cell size for more detail
                           cells_per_block=(2, 2),
                           block_norm='L1-sqrt',
                           visualize=False,
                           feature_vector=True)
        features.append(hog_features)
    return np.array(features)

def extract_lbp_features(images, radius=3, points=16):
    """
    Extract LBP features using mahotas.
    Note: To avoid MemoryError, 'points' has been reduced from 24 to 16.
    """
    features = []
    for img in images:
        lbp = mahotas.features.lbp(img, radius=radius, points=points, ignore_zeros=False)
        features.append(lbp)
    return np.array(features)

# ====================================
# 4. Extract and Combine Features
# ====================================

print("Extracting features from training images...")
sift_train = extract_sift_features(X_train_processed, max_features=256)
hog_train  = extract_hog_features(X_train_processed)
lbp_train  = extract_lbp_features(X_train_processed, radius=3, points=16)

# Concatenate features horizontally
X_train_features = np.hstack((sift_train, hog_train, lbp_train))

print("Extracting features from test images...")
sift_test = extract_sift_features(X_test_processed, max_features=256)
hog_test  = extract_hog_features(X_test_processed)
lbp_test  = extract_lbp_features(X_test_processed, radius=3, points=16)
X_test_features = np.hstack((sift_test, hog_test, lbp_test))

# ====================================
# 5. Scaling Features
# ====================================

scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(X_train_features)
X_test_scaled  = scaler.transform(X_test_features)

# ====================================
# 6. Train the Classifier
# ====================================

print("Training Logistic Regression classifier...")
model = LogisticRegression(max_iter=2000, solver='saga', C=20, penalty='l2', random_state=42, n_jobs=-1)
model.fit(X_train_scaled, y_train_aug)

# ====================================
# 7. Model Evaluation
# ====================================

y_pred = model.predict(X_test_scaled)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

# ====================================
# 8. Predict on a New Image
# ====================================

def preprocess_and_extract(image):
    image = cv2.resize(image, (128, 128))
    image = preprocess_image(image)
    sift_feat = extract_sift_features([image], max_features=256)
    hog_feat  = extract_hog_features([image])
    lbp_feat  = extract_lbp_features([image], radius=3, points=16)
    features = np.hstack((sift_feat, hog_feat, lbp_feat))
    return scaler.transform(features)

# Example: Predict a new image
new_image_path = 'C:/Users/Aakansh/Lung Classification/Sample_Test/LO/3.jpg'
new_image = cv2.imread(new_image_path, cv2.IMREAD_GRAYSCALE)
if new_image is not None:
    new_features = preprocess_and_extract(new_image)
    prediction = model.predict(new_features)
    predicted_class = label_encoder.inverse_transform(prediction)
    print("Predicted Class:", predicted_class)
else:
    print("New image not found at", new_image_path)

Loading images...
Augmenting training images...
Extracting features from training images...
Extracting features from test images...
Training Logistic Regression classifier...
