In [4]:
import numpy as np
import os
from os.path import join
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from sklearn.metrics import mean_squared_error, mean_absolute_error, roc_auc_score, classification_report, confusion_matrix

from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn import svm
from sklearn.mixture import GaussianMixture
from sklearn.ensemble import IsolationForest
from sklearn.isotonic import IsotonicRegression
import re

In [5]:
path_train= 'Dataset/Train/'
path_test= 'Dataset/Test/'

In [7]:
Train_images_path= [join(path_train,filename) for filename in os.listdir(path_train)]
Test_images_path= [join(path_test,filename) for filename in os.listdir(path_test)]

In [9]:
image_size = 224

In [10]:
def read_and_prep_images(img_paths, img_height=image_size, img_width=image_size):
    imgs = [load_img(img_path, target_size=(img_height, img_width)) for img_path in img_paths]
    img_array = np.array([img_to_array(img) for img in imgs])

    output = preprocess_input(img_array)
    return(output)

In [11]:
X_train= read_and_prep_images(Train_images_path)
X_test= read_and_prep_images(Test_images_path)

In [8]:
resnet_model = ResNet50(weights='imagenet',input_shape=(224, 224, 3),include_top = False, pooling = 'avg')
resnet_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
___________________________________________________________________________________________

In [12]:
X_train= resnet_model.predict(X_train)

In [13]:
X_test= resnet_model.predict(X_test)

In [14]:
ss = StandardScaler()
ss.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [15]:
X_train = ss.transform(X_train)
X_test= ss.transform(X_test)

print(len(X_train), len(X_test))

1833 3104


In [16]:
pca = PCA(n_components=512, whiten=True)
pca = pca.fit(X_train)
print('Explained variance percentage = %0.2f' % sum(pca.explained_variance_ratio_))

X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

Explained variance percentage = 0.95


In [17]:
oc_svm_clf = svm.OneClassSVM(gamma=0.001, kernel='rbf', nu=0.08)  # Obtained using grid search
if_clf = IsolationForest(contamination=0.08, max_features=1.0, max_samples=1.0, n_estimators=40)  # Obtained using grid search

oc_svm_clf.fit(X_train)
if_clf.fit(X_train)



IsolationForest(behaviour='old', bootstrap=False, contamination=0.08,
                max_features=1.0, max_samples=1.0, n_estimators=40, n_jobs=None,
                random_state=None, verbose=0, warm_start=False)

In [18]:
oc_svm_preds = oc_svm_clf.predict(X_test)
if_preds = if_clf.predict(X_test)



In [20]:
results= pd.DataFrame({
    'Path': [os.path.basename(i) for i in Test_images_path ],
    'oc_svm_preds': [0 if x == -1 else 1 for x in oc_svm_preds] ,
    'if_preds': [0 if x == -1 else 1 for x in if_preds]
})

In [21]:
results.sample(5)

Unnamed: 0,Path,oc_svm_preds,if_preds
2756,test (725).jpeg,0,0
2554,test (561).jpeg,0,0
791,test (1472).jpeg,0,0
1426,test (199).jpeg,0,0
231,test (1016).jpeg,0,0


In [22]:
import pandas as pd
df = pd.read_csv('Dataset/test_labels.csv')

In [23]:
result= results.merge(df)
result.head()
result.to_csv('Dataset/result.csv')

In [24]:
print('roc auc score: if_preds')
if_preds= result['if_preds']
actual= result['is_Fundus']
print(roc_auc_score(actual, if_preds))
print(classification_report(actual, if_preds))
print(confusion_matrix(actual, if_preds))
plt.show()

roc auc score: if_preds
0.9794652010958166
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3005
           1       0.98      0.96      0.97        99

    accuracy                           1.00      3104
   macro avg       0.99      0.98      0.98      3104
weighted avg       1.00      1.00      1.00      3104

[[3003    2]
 [   4   95]]


In [25]:
print('roc auc score: oc_svm_preds')
oc_svm_preds=result['oc_svm_preds']
actual=result['is_Fundus']
print(roc_auc_score(actual, oc_svm_preds))
print(classification_report(actual, oc_svm_preds))
print(confusion_matrix(actual, oc_svm_preds))
plt.show()

roc auc score: oc_svm_preds
0.9747474747474747
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3005
           1       1.00      0.95      0.97        99

    accuracy                           1.00      3104
   macro avg       1.00      0.97      0.99      3104
weighted avg       1.00      1.00      1.00      3104

[[3005    0]
 [   5   94]]
