In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')



# Load images and labels
base_path = '/content/drive/My Drive/Lung X-Ray Project/Database'
categories = ['Lung_Opacity', 'Normal', 'Viral Pneumonia']
data, labels = [], []

for category in categories:
    path = os.path.join(base_path, category)
    label = category
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load in grayscale
        img = cv2.resize(img, (128, 128))  # Resize to a fixed size
        data.append(img)
        labels.append(label)

# Convert to arrays
data = np.array(data)
labels = np.array(labels)

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# New Section

In [2]:
def preprocess_image(image):
    # Histogram equalization
    hist_eq = cv2.equalizeHist(image)

    # Sharpening using a kernel
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    sharpened = cv2.filter2D(hist_eq, -1, kernel)

    return sharpened

X_train_processed = np.array([preprocess_image(img) for img in X_train])
X_test_processed = np.array([preprocess_image(img) for img in X_test])


In [3]:
!pip install mahotas

import mahotas


# SIFT
def extract_sift_features(images, max_features=128):
    sift = cv2.SIFT_create()
    features = []
    for img in images:
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is None:
            # If no descriptors are found, use a zero vector
            descriptors = np.zeros((1, max_features))
        # Flatten descriptors and truncate or pad to max_features
        flattened = descriptors.flatten()
        if len(flattened) < max_features:
            # Pad with zeros if fewer features are found
            flattened = np.pad(flattened, (0, max_features - len(flattened)), mode='constant')
        else:
            # Truncate to the first max_features
            flattened = flattened[:max_features]
        features.append(flattened)
    return np.array(features)


# HOG
def extract_hog_features(images):
    from skimage.feature import hog
    features = []
    for img in images:
        hog_features = hog(img, orientations=9, pixels_per_cell=(8, 8),
                           cells_per_block=(2, 2), block_norm='L2-Hys', visualize=False)
        features.append(hog_features)
    return np.array(features)

# LBP
def extract_lbp_features(images):
    features = []
    for img in images:
        lbp = mahotas.features.lbp(img, radius=1, points=8, ignore_zeros=False)
        features.append(lbp)
    return np.array(features)

# Combine features
sift_features = extract_sift_features(X_train_processed)
hog_features = extract_hog_features(X_train_processed)
lbp_features = extract_lbp_features(X_train_processed)

X_train_features = np.hstack((sift_features, hog_features, lbp_features))

# Repeat for test data
sift_test_features = extract_sift_features(X_test_processed)
hog_test_features = extract_hog_features(X_test_processed)
lbp_test_features = extract_lbp_features(X_test_processed)

X_test_features = np.hstack((sift_test_features, hog_test_features, lbp_test_features))


Collecting mahotas
  Downloading mahotas-1.4.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Downloading mahotas-1.4.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mahotas
Successfully installed mahotas-1.4.18


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Create and train the model
model = LogisticRegression(max_iter=500, random_state=42)
model.fit(X_train_features, y_train)

# Predict on the test set
y_pred = model.predict(X_test_features)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.65      0.68       248
           1       0.68      0.71      0.70       238
           2       0.78      0.82      0.80       209

    accuracy                           0.72       695
   macro avg       0.72      0.73      0.73       695
weighted avg       0.72      0.72      0.72       695

Accuracy: 0.7223021582733813


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:


# Preprocess and extract features for a new image
def preprocess_and_extract(image):
    image = preprocess_image(image)
    sift_features = extract_sift_features([image])
    hog_features = extract_hog_features([image])
    lbp_features = extract_lbp_features([image])
    return np.hstack((sift_features, hog_features, lbp_features))

# Load a new image
new_image = cv2.imread('/content/drive/My Drive/Lung X-Ray Project/Sample_Test/VN/3.jpg', cv2.IMREAD_GRAYSCALE)
new_image = cv2.resize(new_image, (128, 128))
new_features = preprocess_and_extract(new_image)

# Predict
prediction = model.predict(new_features)
print("Predicted Class:", label_encoder.inverse_transform(prediction))


Predicted Class: ['Viral Pneumonia']


In [13]:
import joblib

# Save the trained Logistic Regression model
# content/drive/My Drive/Lung X-Ray Project/Sample_Test/VN/3.jpg
joblib.dump(model, '/content/drive/My Drive/Lung X-Ray Project/Models/modelv1.pkl')

['/content/drive/My Drive/Lung X-Ray Project/Models/modelv1.pkl']

In [15]:
modelv1 = joblib.load('/content/drive/My Drive/Lung X-Ray Project/Models/modelv1.pkl')


In [17]:
modellol = joblib.load('/content/drive/My Drive/Lung X-Ray Project/Models/logistic_regression.pkl')


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [19]:


# Preprocess and extract features for a new image
def preprocess_and_extract(image):
    image = preprocess_image(image)
    sift_features = extract_sift_features([image])
    hog_features = extract_hog_features([image])
    lbp_features = extract_lbp_features([image])
    return np.hstack((sift_features, hog_features, lbp_features))

# Load a new image
new_image = cv2.imread('/content/drive/My Drive/Lung X-Ray Project/Sample_Test/VN/3.jpg', cv2.IMREAD_GRAYSCALE)
new_image = cv2.resize(new_image, (128, 128))
new_features = preprocess_and_extract(new_image)

# Predict
prediction = modelv1.predict(new_features)
prediction2 = modellol.predict(new_features)

print("Predicted Class:", label_encoder.inverse_transform(prediction))
print("Predicted Class 2 :", label_encoder.inverse_transform(prediction2))

Predicted Class: ['Viral Pneumonia']
Predicted Class 2 : ['Viral Pneumonia']
