# EigenFaces with Normalized Cross Correlation (NCC) data

In [None]:
import numpy as np
from nilearn import image
from nilearn.image import get_data
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from arch.bootstrap import IIDBootstrap
from scipy.stats import kurtosis
import math
import re
import time
import json
import glob
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, classification_report,f1_score,roc_auc_score,recall_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import roc_curve,auc

In [None]:
path= '/dicom/'
label_path='with_category.csv'
indicative="000"
df_labels=pd.read_csv(label_path,sep=';')
df_labels['id1']=indicative+df_labels['id'].astype(str)

df_labels['path']=path+df_labels['id1']+'/'
df_labels.path[1]
#glob.glob(path + "/*")

In [None]:
# skipping studies with previous surgeries or artifacts
skip = ['00020023096','00020029351','00030026189','00030037155','00030037273','00050009584','00050002822',
        '00050004047','00050004076','00050004345','00050004468','00050004755']
df_labels=df_labels[~df_labels.id1.isin(skip)]

# Eigenfunction by hemispheres

In [None]:
path_health=df_labels[df_labels.label==1].path
path_pathologic=df_labels[df_labels.label==0].path
space = 'hemispheres'

if(space=='hemispheres'):
    nx=45;ny=109;nz=91
elif(space=='all'):
    nx=91;ny=109;nz=91

dat_healt=getDataFromDirectory('/dicom/',path_health.values,space,nx,ny,nz,'mean')
dat_pathologic=getDataFromDirectory('/dicom/',path_pathologic.values,space,nx,ny,nz,'mean')

In [None]:
print('health neuroimaging: ',len(dat_healt['right']))
print('pathologic neuroimaging: ',len(dat_pathologic['right']))

In [None]:
heat_cov_right, heat_cov_left = imagCov(dat_healt,'hemispheres')
path_cov_right, path_cov_left = imagCov(dat_pathologic,'hemispheres')

In [None]:
heat_cov_left.shape

In [None]:
# Valores singulares
_,s_h_right,_ = np.linalg.svd(heat_cov_right)
_,s_h_left,_ = np.linalg.svd(heat_cov_left)
_,s_p_right,_ = np.linalg.svd(path_cov_right)
_,s_p_left,_ = np.linalg.svd(path_cov_left)

In [None]:
comp_healt_rigth = CompNum(s_h_right,0.99)
comp_healt_left = CompNum(s_h_left,0.99)
comp_patho_right = CompNum(s_p_right,0.99)
comp_patho_left = CompNum(s_p_left,0.99)

print("Healthy Right Components:", comp_healt_rigth,
      "\nHealthy Right Components:", comp_healt_left,
     "\nPathology Right Components:", comp_patho_right,
     "\nPathologic Right Components:", comp_patho_left)

In [None]:
n_comp_healt = [comp_healt_rigth,comp_healt_left]
heat_pca_right, heat_pca_left = imagPCA(dat_healt,n_comp_healt,'hemispheres')

In [None]:
heat_pca_right.components_.shape

In [None]:
heat_pca_left.components_.shape

In [None]:
n_comp_patho = [comp_patho_right,comp_patho_left]
patho_pca_right, patho_pca_left = imagPCA(dat_pathologic,n_comp_patho)

In [None]:
patho_pca_right.components_.shape

In [None]:
patho_pca_left.components_.shape

## Data projection

In [None]:
healt_right_projected = heat_pca_right.transform(dat_healt['right'])
healt_left_projected = heat_pca_left.transform(dat_healt['left'])
patho_right_projected = patho_pca_right.transform(dat_pathologic['right'])
patho_left_projected = patho_pca_left.transform(dat_pathologic['left'])

In [None]:
print("Healthy Right Projected Shape:", healt_right_projected.shape,
      "\nHealthy Right Projected Shape:", healt_left_projected.shape,
     "\nPathology Right Projected Shape:", patho_right_projected.shape,
     "\nPathologic Right Projected Shape:", patho_left_projected.shape)

## Hemispheres comparison

In [None]:
_ ,_, diff_healthy = hemisCOmpar(healt_right_projected,healt_left_projected,2)
_ ,_, diff_pathology = hemisCOmpar(patho_right_projected,patho_left_projected,2)

In [None]:
np.quantile(diff_healthy,[0.025,0.05,0.25,0.5,0.75,0.95,0.975])

In [None]:
np.quantile(diff_pathology,[0.025,0.05,0.25,0.5,0.75,0.95,0.975])

In [None]:
p1=sns.kdeplot(diff_healthy, shade=True, color="r")
p1=sns.kdeplot(diff_pathology, shade=True, color="b")

# Eigenfunction all

In [None]:
path_health=df_labels[df_labels.label==1].path
path_pathologic=df_labels[df_labels.label==0].path
space = 'all'

if(space=='hemispheres'):
    nx=45;ny=109;nz=91
elif(space=='all'):
    nx=91;ny=109;nz=91

dat_healt=getDataFromDirectory('/dicom/',path_health.values,space,nx,ny,nz,'mean')
dat_pathologic=getDataFromDirectory('/dicom/',path_pathologic.values,space,nx,ny,nz,'mean')

In [None]:
import os
Eigen_path = "root/Eigen_imagenes/"
Eigen_studies=os.listdir(Eigen_path)
list_est_Eigen = []
for p in Eigen_studies:
    if(p!='.DS_Store'):
        list_est_Eigen.append(Eigen_path+p+'/')
Eigen_data=getDataFromDirectory('/Eigen_imagenes/',list_est_Eigen,space,nx,ny,nz,'mean')

In [None]:
print('health neuroimaging: ',len(dat_healt['data']))
print('pathologic neuroimaging: ',len(dat_pathologic['data']))
print('healty neuroimaging by Eigenfaces: ',len(Eigen_data['data']))

## Random select healty data

In [None]:
%%time
results_cv,res_indicators,res_accuracy,n_componentes = mainRgress(Eigen_data['data'],0.99,dat_healt['data'],dat_pathologic['data'],n_iterEigen=200,
                                       size_eigen_sample=0.6,scoring='accuracy',n_splits=10,n_iter_fitModels=20)

In [None]:
np.mean(n_componentes)


In [None]:
CV_plot(results_cv,'accuracy')

In [None]:
# groups by indicators analysis
vari = ['Helthy','Pathologic','micro avg','macro avg','weighted avg']

## Desicion trees

In [None]:
CART=res_indicators['CART']
CART['indicator']=CART.index
for v in vari:
    res_ind = modelIndicators(CART,v,'indicator')
    print(v,'\n',res_ind,'\n')
acc(res_accuracy,'CART')

## Suport vector machine

In [None]:
SVM=res_indicators['SVM']
SVM['indicator']=SVM.index
for v in vari:
    res_ind = modelIndicators(SVM,v,'indicator')
    print(v,'\n',res_ind,'\n')
acc(res_accuracy,'SVM')

## Random forest

In [None]:
RF=res_indicators['RF']
RF['indicator']=RF.index
for v in vari:
    res_ind = modelIndicators(RF,v,'indicator')
    print(v,'\n',res_ind,'\n')
acc(res_accuracy,'RF')

## Stocastic gradient boosting

In [None]:
SGB=res_indicators['SGB']
SGB['indicator']=SGB.index
for v in vari:
    res_ind = modelIndicators(SGB,v,'indicator')
    print(v,'\n',res_ind,'\n')
acc(res_accuracy,'SGB')

# Best model: SGB

In [None]:
heat_cov = imagCov(Eigen_data['data'],space='all')
path_cov = imagCov(dat_pathologic['data'],space='all')

In [None]:
# singular values
_,s_h,_ = np.linalg.svd(heat_cov)
_,s_p,_ = np.linalg.svd(path_cov)

In [None]:
comp_healt = CompNum(s_h,0.90)
comp_patho = CompNum(s_p,0.90)

print("Healthy Components:", comp_healt,
     "\nPathology Components:", comp_patho)

In [None]:
# selelct the greather componentes numbers
n_comp = max(comp_healt,comp_patho)
n_comp = [n_comp]
n_comp

In [None]:
heat_pca = imagPCA(Eigen_data['data'],n_comp,'all')

In [None]:
heat_pca.components_.shape

## Data projection

### Projection pathologic on helthy space

In [None]:
healtONhealt = heat_pca.transform(dat_healt['data'])
patholONhealt = heat_pca.transform(dat_pathologic['data'])

In [None]:
print("Healthy Projected Shape:", healtONhealt.shape,
     "\nPathology Projected Shape:", patholONhealt.shape)

## Models 1 : with pathologic on helathy space

In [None]:
h = np.zeros(healtONhealt.shape[0])
p = np.ones(patholONhealt.shape[0])
x = np.concatenate((healtONhealt,patholONhealt),axis=0)
y = np.concatenate((h,p),axis=0)

df = pd.DataFrame(x)
df['y'] = y

In [None]:
df.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, stratify=y)#,random_state=0)

In [None]:
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Stocastic Gradient Boosting

In [None]:
SGB = GradientBoostingClassifier(n_estimators=100)
SGB.fit(X_train, y_train)

In [None]:
y_test_pred = SGB.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["Helthy", "Parhologic"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.invert_yaxis()
ax.invert_xaxis()

In [None]:
fpr, tpr, _ = roc_curve(y_test, y_test_pred)
roc_auc = auc(fpr, tpr)

In [None]:
plt.figure(1,figsize=(10, 5))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.plot(fpr, tpr, color='darkorange',
         lw=2,label='ROC curve (area = %0.2f)' % roc_auc)
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()