In [31]:
import os
import numpy as np
import glob
import joblib
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
import cv2

In [32]:
# Define the image loading function
def load_data(data_dir):
    images = []
    labels = []
    
    # Iterate through class directories (0 and 1)
    for label in ['0', '1']:
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for img_file in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_file)
                if os.path.isfile(img_path):
                    images.append(img_path)
                    labels.append(label)  # Use '0' or '1' as labels

    return np.array(images), np.array(labels)

In [33]:
# Create the CNN feature extractor
class CNNFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        num_images = len(X)
        features = []

        # Preallocate the array to hold all image data
        img_array = []

        for img_path in (X):
            # Load the image using OpenCV
            img = cv2.imread(img_path)
            
            # Convert to grayscale
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            # Apply CLAHE
            clahe = cv2.createCLAHE()
            img = clahe.apply(img)
            
            # Resize the image to the target size
            img = cv2.resize(cv2.cvtColor(img, cv2.COLOR_GRAY2BGR), (224, 224))
            # img = cv2.resize(img, (224, 224))
            
            # Normalize and add to the preallocated array
            img_array.append(img.astype('float32') / 255.0)

        # Extract features for all images at once
        features = self.model.predict(np.array(img_array))

        return features.reshape(features.shape[0], -1)

In [34]:
# Load the pre-trained model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Load the training data
X_train, y_train = load_data('images/original')

# Encode labels
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)

In [35]:
# base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# feature_extractor = CNNFeatureExtractor(base_model)
# sample_image_paths = X_train
# feature_extractor.fit(sample_image_paths)
# features = feature_extractor.transform(sample_image_paths)
# print(f"Extracted features shape: {features.shape}")

In [36]:
# Create the pipeline
pipeline = make_pipeline(
    CNNFeatureExtractor(model=base_model),
    SVC(kernel='rbf', C=1, gamma='scale')
)

In [None]:
# Train the pipeline
pipeline.fit(X_train, y_train_encoded)

# Save the pipeline and label encoder
joblib.dump(pipeline, 'model.pkl')
joblib.dump(le, 'label_encoder.pkl')

In [38]:
# Load the pre-trained model and label encoder
pipeline = joblib.load('model.pkl')
le = joblib.load('label_encoder.pkl')

In [39]:
# Load the test data
X_test, y_test = load_data(r'test images\Actualmed-COVID-chestxray-dataset-master\categories') 

In [40]:
# Encode labels
y_test_encoded = le.fit_transform(y_test)

In [None]:
y_pred = pipeline.predict(X_test)

In [27]:
# Evaluate performance
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

accuracy = accuracy_score(y_test_encoded, y_pred)
precision = precision_score(y_test_encoded, y_pred)
recall = recall_score(y_test_encoded, y_pred)
f1 = f1_score(y_test_encoded, y_pred)

In [None]:
# Print the evaluation metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

In [None]:
# For AUC, we need the probabilities
y_pred_proba = pipeline.decision_function(X_test) 
auc = roc_auc_score(y_test_encoded, y_pred_proba)

In [None]:
print(f'AUC: {auc:.4f}')