In [3]:
import os
import os
import re
import cv2
import json
import time
import shutil
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.ensemble import VotingClassifier


import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torch.optim import Adam
from torchvision import models
from torch.autograd import Variable
from google.colab import drive

%matplotlib inline

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device set done")

device set done


In [None]:
# drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
cls_dict = {
    "Keratoconus": 0,
    "Normal":1,
    "Suspect":2
}
class_list = list(cls_dict.keys())

In [6]:
def metadata(parnt_dir):
    data_dict = {
        "CT_A" : [],
        "EC_A" : [],
        "EC_P" : [],
        "Elv_A" : [],
        "Elv_P" : [],
        "Sag_A" : [],
        "Sag_P" : [],
        "label": []
    }

    columns = list(data_dict.keys())
    for cls_ in class_list:
        cases_in_pth = os.listdir(os.path.join(parnt_dir, cls_))

        for case in cases_in_pth:
            case_list = os.listdir(os.path.join(parnt_dir, cls_, case))
            for col in columns[:-1]:
                r = re.compile(f".*{col}")
                filename = list(filter(r.match, case_list))[0]
                data_dict[col].append(os.path.join(parnt_dir, cls_, case, filename))
            data_dict["label"].append(cls_dict[cls_])

    df = pd.DataFrame.from_dict(data_dict)
    return df

In [7]:
df_train = metadata("/content/drive/MyDrive/Colab Notebooks/kera/Train_Validation sets")
df_test = metadata("/content/drive/MyDrive/Colab Notebooks/kera/Independent Test Set")

In [8]:
print(df_train.shape[0])
print(df_test.shape[0])

423
150


In [9]:
rows_to_delete = np.random.choice(df_train.index, size=123, replace=False)
df_train = df_train.drop(rows_to_delete)
print(df_train.shape[0])

300


In [10]:
rows_to_delete = np.random.choice(df_test.index, size=25, replace=False)
df_test = df_test.drop(rows_to_delete)
print(df_test.shape[0])

125


In [None]:
# df = pd.concat([df_train, df_test], ignore_index=True)
# print(df_train.shape[0])
# print(df_test.shape[0])
# print(df.shape[0])

In [11]:
def load_and_preprocess_img(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(img_array)
    return img_array

In [12]:
X_train_raw = np.array([np.array([load_and_preprocess_img(img_path) for img_path in row]) for row in df_train[['CT_A', 'EC_A', 'EC_P', 'Elv_A', 'Elv_P', 'Sag_A', 'Sag_P']].values])
X_train = X_train_raw.reshape((X_train_raw.shape[0] * X_train_raw.shape[1],) + X_train_raw.shape[2:])

In [13]:
y_train_raw = df_train['label'].values
y_train = [ele for ele in y_train_raw for _ in range(7)]


In [14]:
print(len(X_train))
print(len(y_train))

2100
2100


In [15]:
X_test_raw = np.array([np.array([load_and_preprocess_img(img_path) for img_path in row]) for row in df_test[['CT_A', 'EC_A', 'EC_P', 'Elv_A', 'Elv_P', 'Sag_A', 'Sag_P']].values])
X_test = X_test_raw.reshape((X_test_raw.shape[0] * X_test_raw.shape[1],) + X_test_raw.shape[2:])

In [16]:
y_test_raw = df_test['label'].values
y_test = [ele for ele in y_test_raw for _ in range(7)]

In [17]:
print(len(X_test))
print(len(y_test))

875
875


In [18]:
resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [19]:
X_train_features = resnet_model.predict(X_train)



In [20]:
X_train_features_flatten = X_train_features.reshape(X_train_features.shape[0], -1)

In [21]:
svm_model = SVC(kernel='linear')

In [22]:
svm_model.fit(X_train_features_flatten, y_train)

In [23]:
X_test_features = resnet_model.predict(X_test)



In [24]:
X_test_features_flatten = X_test_features.reshape(X_test_features.shape[0], -1)

In [25]:
rf_classifier = RandomForestClassifier(n_estimators=100)
rf_classifier.fit(X_train_features_flatten, y_train)

In [27]:
svm_predictions = svm_model.predict(X_test_features_flatten)

In [36]:
rf_prediction = rf_classifier.predict(X_test_features_flatten)

In [28]:
voting_clf = VotingClassifier(estimators=[('svm', svm_model), ('random_forest', rf_classifier)], voting='hard')

In [29]:
voting_clf.fit(X_test_features_flatten, y_test)

In [30]:
majority_voting_predictions = voting_clf.predict(X_test_features_flatten)

In [38]:
svm_accuracy = accuracy_score(y_test, svm_predictions)
rf_accuracy = accuracy_score(y_test, rf_prediction)
majority_voting_accuracy = accuracy_score(y_test, majority_voting_predictions)

In [39]:
print("SVM Accuracy:", svm_accuracy)
print("Random forest Accuracy:", rf_accuracy)
print("Majority Voting Accuracy:", majority_voting_accuracy)

SVM Accuracy: 0.5851428571428572
Random forest Accuracy: 0.5554285714285714
Majority Voting Accuracy: 0.9954285714285714


In [35]:
conf_matrix_svm = confusion_matrix(y_test, svm_predictions)
print("Confusion Matrix for SVM:")
print(conf_matrix_svm)

Confusion Matrix for SVM:
[[198   6  97]
 [  0  51 236]
 [  3  21 263]]


In [41]:
conf_matrix_rf = confusion_matrix(y_test, rf_prediction)
print("Confusion Matrix for Random Forest:")
print(conf_matrix_rf)

Confusion Matrix for Random Forest:
[[211   5  85]
 [ 10  66 211]
 [ 41  37 209]]


In [None]:
conf_matrix_mv = confusion_matrix(y_test, majority_voting_predictions)
print("Confusion Matrix:")
print(conf_matrix_mv)

Confusion Matrix:
[[328   0   8]
 [  0 287   0]
 [  0   4 248]]


In [49]:
from sklearn.metrics import precision_score, recall_score

svm_precision = precision_score(y_test, svm_predictions, average='weighted')
svm_recall = recall_score(y_test, svm_predictions, average='weighted')

rf_precision = precision_score(y_test, rf_prediction, average='weighted')
rf_recall = recall_score(y_test, rf_prediction, average='weighted')

voting_precision = precision_score(y_test, majority_voting_predictions, average='weighted')
voting_recall = recall_score(y_test, majority_voting_predictions, average='weighted')

print("SVM Precision:", svm_precision)
print("SVM Recall:", svm_recall)

print("Random Forest Precision:", rf_precision)
print("Random Forest Recall:", rf_recall)

print("Voting Precision:", voting_precision)
print("Voting Recall:", voting_recall)

SVM Precision: 0.6980654651368866
SVM Recall: 0.5851428571428572
Random Forest Precision: 0.6132291470368411
Random Forest Recall: 0.5554285714285714
Voting Precision: 0.9954285714285714
Voting Recall: 0.9954285714285714
