In [1]:
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from skimage.measure import *
from skimage.filters import *
from skimage.feature import *
from skimage.transform import *
from skimage.morphology import *
from sklearn.linear_model import LogisticRegression
from sklearn import svm, metrics
from sklearn.metrics import roc_curve
from sklearn import metrics
from sklearn.metrics import *
import skimage
from skimage.util import *
import math
import numpy as np
import matplotlib.pyplot as plt
from skimage import draw
from util import *
import opsfaz as faz
import random
import os
import cv2
from tqdm import tqdm
import natsort
import glob
import pdb
import pickle

# seed setting
def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(17)

def roundness(img):
    regions = regionprops(img.astype(int))
    if len(regions) != 1:
        raise('There are one more contours!')
    convex = convex_hull_image(img)
    convex_perimeter = perimeter(convex)
    
    return (4 * math.pi * regions[0].area) / convex_perimeter ** 2

def solidity(img):
    regions = regionprops(img.astype(int))
    if len(regions) != 1:
        raise('There are one more contours!')
    convex = convex_hull_image(img)
    convex_regions = regionprops(convex.astype(int))
    if len(convex_regions) != 1:
        raise('There are one more contours!')
    
    return regions[0].area / convex_regions[0].area

def eccentricity(img):
    regions = regionprops(img.astype(int))
    if len(regions) != 1:
        raise('There are one more contours!')
    
    return regions[0].minor_axis_length / regions[0].major_axis_length

def compactness(img):
    regions = regionprops(img.astype(int))
    if len(regions) != 1:
        raise('There are one more contours!')

    return (4 * math.pi * regions[0].area)/(perimeter(img))**2

In [90]:
def make_df_ex(data, label):
    cols = {
    'roundness': [],
    'solidity': [],
    'eccentricity': [],
    'compactness': [],
    }
    area_lst = []
    age_lst = []
    gender_lst = []
    scheme_lst = []
    label_lst = []

    for img_path in tqdm(data):
        data = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
        data = data/255
        x, y = data.shape 
        size = data.shape
        
        mm = 3
        deep = 0
        precision = 0 # 
        imOCT = np.zeros((size[0],size[1]),np.float64)
        contours,_ = cv2.findContours(data.astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cogidos = []
        cnt,cogidos = higest_contour (contours, cogidos)
        m = cv2.contourArea(cnt)
        fazAreainMM = m*(mm*mm)/(size[0]*size[1])

        im = np.zeros((size[0],size[1]), np.uint8)
        cv2.drawContours(im, [cnt], 0, (255,255,255), -1)
        im = im[:]/255
        
        # make region growing
        
        reg = region_growing(imOCT, im*1.0, fazAreainMM, 0, 4, precision)

        reg = morph ('open', reg, 3)
        reg = morph ('closed', reg, 3)
        image1 = cv2.convertScaleAbs(reg) 
        contours, h = cv2.findContours(image1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cogidos = []
        cnt,cogidos = higest_contour (contours, cogidos)

        m = cv2.contourArea(cnt)
        fazAreainMM = m*(mm*mm)/(size[0]*size[1])
        area = fazAreainMM
        for col in cols:
            cols[col].append(globals()[col](data))
        num = int(str(img_path).split("/")[-1].split(" ")[0].split("_")[-1])
        Later = str(img_path).split("/")[-1].split(".")[0].split(" ")[-1]
        df = pd.read_excel('/'.join(os.path.dirname(os.path.realpath("__file__")).split('/')[:-1])+"/AD_clinical_Holdout.xlsx")
        df[df["patients"]==num]
        right = df[df.columns[:15].tolist()]
        left = df[df.columns[15:].tolist()]
        right = right.fillna(right.shift(1))
        df = pd.concat([right,left],axis=1)
        area_lst.append(area)
        scheme_lst.append(int(num))
        age_lst.append(int(df[df["patients"]==num]["Age"].tolist()[0]))
        gender_lst.append(int(df[df["patients"]==num]["Sex"].tolist()[0]))
        z = df[df["patients"]==num]
        label_lst.append(label)
        
    df = pd.DataFrame(cols)
    fold = pd.DataFrame(scheme_lst)
    df['label'] = label_lst
    df['area'] = area_lst
    df["age"] = age_lst
    df["gender"] = gender_lst
    fold["scheme_num"] = scheme_lst

    return df,fold

In [91]:
AD_scp_ex = []
for i in Path('/'.join(os.path.dirname(os.path.realpath("__file__")).split('/')[:-1])+"/Holdout_proposed_AI/AD").iterdir(): 
    if 'AD' in str(i):                   
        AD_scp_ex.append(i)
SCD_scp_ex = []
for i in Path('/'.join(os.path.dirname(os.path.realpath("__file__")).split('/')[:-1])+"/Holdout_proposed_AI/SCD").iterdir(): 
    if 'SCD' in str(i):                   
        SCD_scp_ex.append(i)

AD_scp_ex,fold_num1_ex = make_df_ex(AD_scp_ex,"AD_scp")
SCD_scp_ex,fold_num2_ex = make_df_ex(SCD_scp_ex, "SCD_scp")
fold_AD_ex = pd.concat([fold_num1_ex,AD_scp_ex["label"]], axis=1).reset_index(drop=True)
fold_SCD_ex = pd.concat([fold_num2_ex,SCD_scp_ex["label"]], axis=1).reset_index(drop=True)
# fold_ex = pd.concat([fold_AD_ex,fold_SCD_ex]).reset_index(drop=True)
fold_ex = pd.concat([fold_SCD_ex,fold_AD_ex]).reset_index(drop=True)
fold_dup_ex = fold_ex.drop_duplicates().reset_index(drop=True)
# df_ex = pd.concat([AD_scp_ex,SCD_scp_ex]).reset_index(drop=True)
df_ex = pd.concat([SCD_scp_ex,AD_scp_ex]).reset_index(drop=True)
df_ex.dropna(axis=1, inplace=True)
cols_ex = df_ex.columns.tolist()
cols_ex.remove('label')

100%|██████████| 29/29 [00:09<00:00,  3.15it/s]
100%|██████████| 16/16 [00:04<00:00,  3.26it/s]


In [92]:
#Proposed
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from sklearn.metrics import *
import warnings
import joblib
# save model
warnings.filterwarnings('ignore')

le = LabelEncoder()
labels_ex= le.fit_transform(fold_ex["label"])

acc_sum = 0
score_sum = 0
acc_lst = []
auc_lst = []
y_lst = []
pred_lst = []
label_lst=[]
fold_num = []

#confusion_matrix
spec_lst = []
sens_lst = []
acc_lst = []
set_seed(0)
for i in range(5):
    lr_clf = joblib.load('/'.join(os.path.dirname(os.path.realpath("__file__")).split('/')[:-1])+'/FAZ_code/Proposed_weight/'+'lgb_'+str(i)+'.pkl')
    X_test =  df_ex[cols_ex]
    y_test = labels_ex
    pred = lr_clf.predict(X_test)
    preds = lr_clf.predict_proba(X_test)
    score = roc_auc_score(y_test, preds[:, 1])
    y_lst.extend(y_test)
    label_lst.extend(y_test)
    pred_lst.extend(preds[:,0])
    auc_lst.append(float(score))
    fold_num.append(preds[:,0])
    # print(y_test[16:29])
    tp, fn, fp, tn = confusion_matrix(y_test,pred).ravel()
    spec_lst.append(tn/(tn+fp))
    sens_lst.append(tp/(tp+fn))
    acc_lst.append((tn+tp)/(tn+fp+fn+tp))
spec = np.array(spec_lst)
sens = np.array(sens_lst)
acc = np.array(acc_lst)
print(f'Sensitivity:{sens.mean():.3f}± {sens.std():.3f}, Specificity:{spec.mean():.3f}± {spec.std():.3f}, Accuracy:{acc.mean():.3f}± {acc.std():.3f}, AUC:{np.array(auc_lst).mean():.3f}± {np.array(auc_lst).std():.3f} ')



Sensitivity:0.545± 0.051, Specificity:0.838± 0.064, Accuracy:0.649± 0.043, AUC:0.720± 0.048 
