In [1]:
import os  # helps you to interact with operating system, 
# managing files and directories 
# finding current directories, 
# working with enviornment variables 
import pandas as pd  # for dataframe 
import numpy as np  # handaling arrays
import cv2  # used for image processing and computer vision
# read and write files, Object detections(faces) and more 
import matplotlib.pyplot as plt 
from tqdm import tqdm # shows the level of progress
import random 
import concurrent.futures # for parallel execution of tasks 
import seaborn as sns
import pickle
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from joblib import dump, load
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Working on images that were recieved after preprocessing them 

In [2]:
base_dir = "/Users/meetsudra/Downloads/DiabeticRetino/archive/dr_unified_v2/dr_unified_v2"

train_csv = os.path.join(base_dir, "ptrain.csv")
val_csv = os.path.join(base_dir, "pval.csv")
test_csv = os.path.join(base_dir, "ptest.csv")

train_df = pd.read_csv(train_csv)
val_df = pd.read_csv(val_csv)
test_df = pd.read_csv(test_csv)

In [8]:
test_df.head()
test_df.shape

(9242, 2)

In [10]:
train_df.head()
train_df.shape

(73932, 2)

In [11]:
val_df.head()

Unnamed: 0,image_path,label
0,/Users/meetsudra/Downloads/DiabeticRetino/arch...,0
1,/Users/meetsudra/Downloads/DiabeticRetino/arch...,0
2,/Users/meetsudra/Downloads/DiabeticRetino/arch...,0
3,/Users/meetsudra/Downloads/DiabeticRetino/arch...,1
4,/Users/meetsudra/Downloads/DiabeticRetino/arch...,0


In [12]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB5
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input
from tqdm import tqdm

# Load EfficientNetB5 (without top layer)
base_model = EfficientNetB5(weights="imagenet", include_top=False, pooling="avg")

In [16]:
def extract_features(df):
    features = []
    labels = []
    
    for img_path, label in tqdm(zip(df["image_path"], df["label"]), total=len(df), desc="Extracting Features"):
        try:
            img = image.load_img(img_path, target_size=(456, 456))
            img_array = image.img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array)

            feature = base_model.predict(img_array, verbose=0)
            features.append(feature.flatten())
            labels.append(label)
        
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    return np.array(features), np.array(labels)

In [17]:
X_train, y_train = extract_features(train_df)
X_val, y_val = extract_features(val_df)
X_test, y_test = extract_features(test_df)

Extracting Features: 100%|██████████| 73932/73932 [7:56:56<00:00,  2.58it/s]   
Extracting Features: 100%|██████████| 9241/9241 [1:19:18<00:00,  1.94it/s]
Extracting Features: 100%|██████████| 9242/9242 [56:40<00:00,  2.72it/s]


In [18]:
# Save extracted features
np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)
np.save("X_val.npy", X_val)
np.save("y_val.npy", y_val)
np.save("X_test.npy", X_test)
np.save("y_test.npy", y_test)

In [19]:
# Load extracted feature arrays and labels
X_train = np.load("X_train.npy")
y_train = np.load("y_train.npy")
X_val = np.load("X_val.npy")
y_val = np.load("y_val.npy")
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")

# Print basic information
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

# View first 5 feature vectors and their corresponding labels
print("\nFirst 5 feature vectors from X_train:")
print(X_train[:5])  # Displays first 5 feature vectors

print("\nFirst 5 labels from y_train:")
print(y_train[:5])  # Displays first 5 labels

# Check unique class labels
unique_labels, counts = np.unique(y_train, return_counts=True)
print("\nClass distribution in y_train:")
for label, count in zip(unique_labels, counts):
    print(f"Class {label}: {count} samples")


X_train shape: (73932, 2048), y_train shape: (73932,)
X_val shape: (9241, 2048), y_val shape: (9241,)
X_test shape: (9242, 2048), y_test shape: (9242,)

First 5 feature vectors from X_train:
[[ 0.35464016 -0.11336438 -0.00797085 ... -0.18172437 -0.16587104
  -0.13923632]
 [ 0.4410764  -0.09153248 -0.08678316 ... -0.18042731 -0.01045698
  -0.06213193]
 [ 0.2633043  -0.12626693 -0.05649288 ... -0.21565202 -0.16273029
  -0.16317786]
 [ 0.75709766 -0.05302758 -0.13664187 ... -0.07646815 -0.13231465
  -0.1226446 ]
 [ 0.33083418 -0.21344388  0.09940313 ... -0.1499331  -0.02238587
  -0.10861953]]

First 5 labels from y_train:
[1 2 0 0 0]

Class distribution in y_train:
Class 0: 55119 samples
Class 1: 3701 samples
Class 2: 12105 samples
Class 3: 1005 samples
Class 4: 2002 samples


In [22]:
# Set desired number of components (retain 95% variance)
pca = PCA(n_components=0.95, svd_solver='full')  

# Fit PCA on training data and transform all datasets
X_train_pca = pca.fit_transform(X_train)
X_val_pca = pca.transform(X_val)
X_test_pca = pca.transform(X_test)

# Print the new shape of datasets
print(f"X_train shape after PCA: {X_train_pca.shape}")
print(f"X_val shape after PCA: {X_val_pca.shape}")
print(f"X_test shape after PCA: {X_test_pca.shape}")

# Save the reduced features
np.save("X_train_pca.npy", X_train_pca)
np.save("X_val_pca.npy", X_val_pca)
np.save("X_test_pca.npy", X_test_pca)

X_train shape after PCA: (73932, 234)
X_val shape after PCA: (9241, 234)
X_test shape after PCA: (9242, 234)


In [23]:
from sklearn.svm import SVC
# Load PCA-transformed features
X_train_pca = np.load("X_train_pca.npy")
X_val_pca = np.load("X_val_pca.npy")
X_test_pca = np.load("X_test_pca.npy")

# Initialize SVM classifier with RBF kernel
svm_classifier = SVC(kernel="rbf", C=1.0, gamma="scale", probability=True, random_state=42)

# Train the SVM classifier with tqdm progress bar
print("Training SVM model...")
for _ in tqdm(range(1), desc="Training Progress"):
    svm_classifier.fit(X_train_pca, y_train)

# Save the trained model
dump(svm_classifier, "svm_model.joblib")
print("SVM model saved successfully.")

Training SVM model...


Training Progress: 100%|██████████| 1/1 [2:48:36<00:00, 10116.60s/it]

SVM model saved successfully.





In [24]:
# Evaluate on validation set
y_val_pred = svm_classifier.predict(X_val_pca)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {val_accuracy:.4f}")

Validation Accuracy: 0.7977


In [25]:
# Evaluate on test set
y_test_pred = svm_classifier.predict(X_test_pca)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.7983


In [26]:
# Classification report
print("\nClassification Report on Test Set:")
print(classification_report(y_test, y_test_pred))


Classification Report on Test Set:
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      6890
           1       0.58      0.06      0.11       463
           2       0.64      0.29      0.40      1513
           3       0.43      0.02      0.05       126
           4       0.73      0.24      0.36       250

    accuracy                           0.80      9242
   macro avg       0.64      0.32      0.36      9242
weighted avg       0.77      0.80      0.75      9242

