In [1]:
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import tensorflow as tf

IMG_SIZE = (224, 224)  
BATCH_SIZE = 32
RANDOM_STATE = 42

IMAGES_FOLDER = "/kaggle/input/glaucoma-datasets/G1020/Images"
df = pd.read_csv("/kaggle/input/glaucoma-datasets/G1020/G1020.csv")

def load_data(df, IMG_SIZE):
    images = []
    labels = []
    
    for _, row in df.iterrows():
        img_name = row['imageID']
        label = row['binaryLabels']
        img_path = os.path.join(IMAGES_FOLDER, img_name)
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, IMG_SIZE)
            images.append(img)
            labels.append(row['binaryLabels'])
    
    return np.array(images), np.array(labels)

images, labels = load_data(df, IMG_SIZE)
images = images.astype('float32') / 255.0

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.2, stratify=labels, random_state=RANDOM_STATE
)

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

augmented_images = []
augmented_labels = []
for x, y in zip(X_train, y_train):
    for _ in range(3):  
        augmented_images.append(datagen.random_transform(x))
        augmented_labels.append(y)
        
X_train_aug = np.concatenate([X_train, np.array(augmented_images, dtype=np.float32)])
y_train_aug = np.concatenate([y_train, np.array(augmented_labels)])


# Feature extraction with ResNet50
def extract_features(images):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))
    x = GlobalAveragePooling2D()(base_model.output)
    
    feature_extractor = Model(inputs=base_model.input, outputs=x)
    features = feature_extractor.predict(images, batch_size=BATCH_SIZE, verbose=1)
    return features

print("Extracting training features...")
train_features = extract_features(X_train)

print("Extracting test features...")
test_features = extract_features(X_test)

# Normalize features
scaler = StandardScaler()
train_features_aug = extract_features(X_train_aug)
X_train_features_aug = scaler.fit_transform(train_features_aug)
X_test_features = scaler.transform(test_features)

svm_params = {'C': [1, 10, 100], 'gamma': [0.001, 0.01, 0.1], 'kernel': ['rbf']}
svm_grid = GridSearchCV(SVC(probability=True), svm_params, cv=5, n_jobs=-1, verbose=2)
svm_grid.fit(X_train_features_aug, y_train_aug)
print(f"Best SVM parameters: {svm_grid.best_params_}")

# svm_random = RandomizedSearchCV(SVC(probability=True), svm_params, n_iter=5, cv=3, n_jobs=-1, verbose=2, random_state=RANDOM_STATE)
# svm_random.fit(X_train_features_aug, y_train_aug)
# print(f"Best SVM parameters: {svm_random.best_params_}")

rf = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE)
rf.fit(X_train_features_aug, y_train_aug)

y_pred = svm_grid.predict(X_test_features)
y_prob = svm_grid.predict_proba(X_test_features)[:, 1]

print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred, target_names=["Normal", "Glaucoma"]))

print(f"\nSVM Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"SVM AUC-ROC: {roc_auc_score(y_test, y_prob):.4f}")

y_pred_rf = rf.predict(X_test_features)
y_prob_rf = rf.predict_proba(X_test_features)[:, 1]

print("\nClassification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf, target_names=["Normal", "Glaucoma"]))

print(f"\nRandom Forest Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print(f"Random Forest AUC-ROC: {roc_auc_score(y_test, y_prob_rf):.4f}")

Extracting training features...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 191ms/step
Extracting test features...
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 662ms/step
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 38ms/step
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Best SVM parameters: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}

Classification Report (SVM):
              precision    recall  f1-score   support

      Normal       0.72      0.98      0.83       145
    Glaucoma       0.50      0.05      0.09        59

    accuracy                           0.71       204
   macro avg       0.61      0.52      0.46       204
weighted avg       0.65      0.71      0.62       204


SVM Acc