In [5]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Maximum Entropy Thresholding
def maximum_entropy_threshold(img_gray):
    hist = cv2.calcHist([img_gray], [0], None, [256], [0, 256])
    hist = hist.ravel() / hist.sum()  
    cdf = hist.cumsum() 
    entropy = -np.cumsum(hist * np.log2(hist + 1e-8)) 

    max_entropy = float('-inf')
    threshold = 0
    for i in range(256):
        ent_b = entropy[i] / cdf[i] if cdf[i] > 0 else 0
        ent_f = (entropy[-1] - entropy[i]) / (1 - cdf[i]) if (1 - cdf[i]) > 0 else 0
        ent_sum = ent_b + ent_f
        if ent_sum > max_entropy:
            max_entropy = ent_sum
            threshold = i

    return threshold

# Nonlinear Contrast Stretching
def nonlinear_contrast_stretching(img):
    min_val, max_val = np.percentile(img, [2, 98])  
    img_stretched = np.clip((img - min_val) * (255 / (max_val - min_val + 1e-8)), 0, 255)
    return img_stretched.astype(np.uint8)

#  Preprocess an Image
def preprocess_all_steps(image_path):
    
    img = cv2.imread(image_path)

    # Convert to Grayscale
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Apply Maximum Entropy Thresholding
    threshold = maximum_entropy_threshold(gray_img)
    _, entropy_img = cv2.threshold(gray_img, threshold, 255, cv2.THRESH_BINARY)

    # Apply Edge Detection (Canny)
    edges = cv2.Canny(gray_img, threshold1=50, threshold2=150)

    # Apply Nonlinear Contrast Stretching
    contrast_img = nonlinear_contrast_stretching(gray_img)

    return gray_img, entropy_img, edges, contrast_img

# Function: Process Dataset and Save Results
def process_dataset_and_save(dataset_path, output_path):
    class_names = ['normal', 'cataract', 'diabetic_retinopathy', 'glaucoma']

    for class_name in class_names:
        class_folder = os.path.join(dataset_path, class_name)
        output_class_folder = os.path.join(output_path, class_name)
        os.makedirs(output_class_folder, exist_ok=True)

        for filename in os.listdir(class_folder):
            img_path = os.path.join(class_folder, filename)
            if os.path.isfile(img_path):
                # Apply all preprocessing steps
                gray_img, entropy_img, edges, contrast_img = preprocess_all_steps(img_path)

                # Save results
                cv2.imwrite(os.path.join(output_class_folder, f"{filename}_gray.png"), gray_img)
                cv2.imwrite(os.path.join(output_class_folder, f"{filename}_entropy.png"), entropy_img)
                cv2.imwrite(os.path.join(output_class_folder, f"{filename}_edges.png"), edges)
                cv2.imwrite(os.path.join(output_class_folder, f"{filename}_contrast.png"), contrast_img)

# Define paths
dataset_path = '/Users/siblingsmac/Desktop/dataset/'  # Input dataset folder path
output_path = '/Users/siblingsmac/Desktop/pre'  # Output folder path

# Process dataset
process_dataset_and_save(dataset_path, output_path)
print("Preprocessing completed and results saved.")

# Load and preprocess images BEFORE splitting
images, labels = load_and_preprocess_dataset(dataset_path)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Print dataset shapes
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Testing labels shape: {y_test.shape}")

Preprocessing completed and results saved.
Training data shape: (3373, 64, 64, 3)
Testing data shape: (844, 64, 64, 3)
Training labels shape: (3373,)
Testing labels shape: (844,)


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

def build_model(input_shape=(224, 224, 3), num_classes=4):
    model = Sequential([
        # Layer 1: Convolution + BatchNorm + ReLU + MaxPooling
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        # Layer 2: Convolution + BatchNorm + ReLU + MaxPooling
        Conv2D(64, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        # Layer 3: Convolution + BatchNorm + ReLU + MaxPooling
        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),

        # Flatten + Fully Connected Layers
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),  # Regularization
        Dense(num_classes, activation='softmax')  # Output layer
    ])
    return model

# Initialize model
model = build_model()
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
from keras.models import Model
from keras.utils import to_categorical
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
import cv2
import numpy as np

y_train = to_categorical(y_train, num_classes=4) 
y_test = to_categorical(y_test, num_classes=4)

print("One-hot encoded training labels shape:", y_train.shape)
print("One-hot encoded testing labels shape:", y_test.shape)

def resize_images(images, target_size=(224, 224)):
    resized_images = [cv2.resize(img, target_size) for img in images]
    return np.array(resized_images)

# Resize training and testing images to match the input shape of the model
X_train_resized = resize_images(X_train)
X_test_resized = resize_images(X_test)

# Define the CNN model (without the final classification layer)
input_layer = Input(shape=(224, 224, 3))  # Input shape for your images
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)  # Flatten to extract features

# Create the model that outputs features 
feature_extractor = Model(inputs=input_layer, outputs=x)

# Use the model to extract features from training and testing data
X_train_features = feature_extractor.predict(X_train_resized)  
X_test_features = feature_extractor.predict(X_test_resized)    
# Check the shape of the extracted features (for verification)
print("Shape of extracted training features:", X_train_features.shape)
print("Shape of extracted testing features:", X_test_features.shape)

One-hot encoded training labels shape: (3373, 4)
One-hot encoded testing labels shape: (844, 4)
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 90ms/step
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 81ms/step
Shape of extracted training features: (3373, 186624)
Shape of extracted testing features: (844, 186624)


In [8]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# X_train_features and X_test_features are extracted from CNN
print("Shape of extracted training features:", X_train_features.shape)
print("Shape of extracted testing features:", X_test_features.shape)

print("One-hot encoded training labels shape before flattening:", y_train.shape)
print("One-hot encoded testing labels shape before flattening:", y_test.shape)

# Convert (3373, 4, 4) → (3373, 16) → (3373,) class labels
y_train = y_train.reshape(y_train.shape[0], -1).argmax(axis=1)
y_test = y_test.reshape(y_test.shape[0], -1).argmax(axis=1)

print("Flattened training labels shape:", y_train.shape)  
print("Flattened testing labels shape:", y_test.shape)  

# Step 3: Standardize the features 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_features)  # Fit and transform on train
X_test_scaled = scaler.transform(X_test_features)  # Transform on test

svm_model = SVC(class_weight="balanced")  # Handles class imbalance
svm_model.fit(X_train_scaled, y_train)

# Step 5: Evaluate the SVM model
y_pred = svm_model.predict(X_test_scaled)

# Print classification report and accuracy score
print("SVM Classification Report:")
print(classification_report(y_test, y_pred))
print("SVM Accuracy Score:", accuracy_score(y_test, y_pred))

Shape of extracted training features: (3373, 186624)
Shape of extracted testing features: (844, 186624)
One-hot encoded training labels shape before flattening: (3373, 4)
One-hot encoded testing labels shape before flattening: (844, 4)
Flattened training labels shape: (3373,)
Flattened testing labels shape: (844,)
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.84      0.80       245
           1       0.79      0.63      0.70       203
           2       0.99      0.96      0.98       204
           3       0.60      0.68      0.63       192

    accuracy                           0.78       844
   macro avg       0.79      0.78      0.78       844
weighted avg       0.79      0.78      0.78       844

SVM Accuracy Score: 0.7796208530805687


In [10]:
from sklearn.pipeline import Pipeline
import joblib

# Create a pipeline with the feature extractor (CNN) and the trained SVM classifier
nn_svm_pipeline = Pipeline([
    ('feature_extractor', feature_extractor),  # CNN for feature extraction
    ('svm_classifier', svm_model)  # Trained SVM classifier
])

# Save the entire pipeline
joblib.dump(nn_svm_pipeline, "/Users/siblingsmac/Desktop/nn_svm_pipeline.pkl")
#joblib.dump(nn_svm_pipeline, "nn_svm_pipeline.pkl")



['nn_svm_pipeline.pkl']

In [1]:
import joblib

model_path = "/Users/siblingsmac/Desktop/nn_svm_pipeline.pkl"
nn_svm_pipeline = joblib.load(model_path)

print(type(nn_svm_pipeline))  # Check the type of the loaded object


python(14289) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


<class 'sklearn.pipeline.Pipeline'>
