In [None]:
import sys
sys.path.append('../functions/')
from imageProcess import *
from functionsAndTest import *
import numpy as np
from nilearn import image
from nilearn.image import get_data
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from arch.bootstrap import IIDBootstrap
from scipy.stats import kurtosis
import re
import time
import json
import glob
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, classification_report,f1_score,roc_auc_score,recall_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import roc_curve,auc

In [None]:
path= '/dicom/'
label_path='with_category.csv'
indicative="000"
df_labels=pd.read_csv(label_path,sep=';')
df_labels['id1']=indicative+df_labels['id'].astype(str)
df_labels['path']=path+df_labels['id1']+'/'

In [None]:
# skipping studies with previous surgeries or artifacts
skip = ['00020023096','00020029351','00030026189','00030037155','00030037273','00050009584','00050002822',
        '00050004047','00050004076','00050004345','00050004468','00050004755']
df_labels=df_labels[~df_labels.id1.isin(skip)]

# Get images and collapsing data

In [None]:
path_health=df_labels[df_labels.label==1].path
path_pathologic=df_labels[df_labels.label==0].path
nx=45;ny=109;nz=91
space = 'mask'
dat_healt=getDataFromDirectory(path_health.values,space,nx,ny,nz,'mean')
dat_pathologic=getDataFromDirectory(path_pathologic.values,space,nx,ny,nz,'mean')

In [None]:
print('health neuroimaging: ',len(dat_healt['right']))
print('pathologic neuroimaging: ',len(dat_pathologic['right']))

# Eigenfances

In [None]:
heat_cent_right, heat_cent_left = imagCentralTend(dat_healt,'mean')
path_cent_right,path_cent_left = imagCentralTend(dat_pathologic,'mean')

In [None]:
heat_cov_right, heat_cov_left = imagCov(dat_healt)
path_cov_right, path_cov_left = imagCov(dat_pathologic)

In [None]:
heat_cov_left.shape

In [None]:
# Singular values
_,s_h_right,_ = np.linalg.svd(heat_cov_right)
_,s_h_left,_ = np.linalg.svd(heat_cov_left)
_,s_p_right,_ = np.linalg.svd(path_cov_right)
_,s_p_left,_ = np.linalg.svd(path_cov_left)

In [None]:
comp_healt_rigth = CompNum(s_h_right,0.99)
comp_healt_left = CompNum(s_h_left,0.99)
comp_patho_right = CompNum(s_p_right,0.99)
comp_patho_left = CompNum(s_p_left,0.99)

print("Healthy right Components:", comp_healt_rigth,
      "\nHealthy left Components:", comp_healt_left,
     "\nPathology right Components:", comp_patho_right,
     "\nPathologic left Components:", comp_patho_left)

In [None]:
n_comp_healt = [comp_healt_rigth,comp_healt_left]
heat_pca_right, heat_pca_left = imagPCA(dat_healt,n_comp_healt)

In [None]:
heat_pca_right.components_.shape

In [None]:
heat_pca_left.components_.shape

In [None]:
n_comp_patho = [comp_patho_right,comp_patho_left]
patho_pca_right, patho_pca_left = imagPCA(dat_pathologic,n_comp_patho)

In [None]:
patho_pca_right.components_.shape

In [None]:
patho_pca_left.components_.shape

## Data projection

In [None]:
healt_right_projected = heat_pca_right.transform(dat_healt['right'])
healt_left_projected = heat_pca_left.transform(dat_healt['left'])
patho_right_projected = patho_pca_right.transform(dat_pathologic['right'])
patho_left_projected = patho_pca_left.transform(dat_pathologic['left'])

In [None]:
print("Healthy right Projected Shape:", healt_right_projected.shape,
      "\nHealthy left Projected Shape:", healt_left_projected.shape,
     "\nPathology right Projected Shape:", patho_right_projected.shape,
     "\nPathologic left Projected Shape:", patho_left_projected.shape)

## Hemispheres comparison

In [None]:
_ ,_, diff_healthy = hemisCOmpar(healt_right_projected,healt_left_projected,2)
_ ,_, diff_pathology = hemisCOmpar(patho_right_projected,patho_left_projected,2)

In [None]:
f, (ax1) = plt.subplots(1, 1,figsize=(10, 5))
sns.kdeplot(diff_healthy, ax=ax1, label="Healthy", shade=True, color="b")
sns.kdeplot(diff_pathology, ax=ax1, label="Pathologic", shade=True, color="r")
ax1.set(xlabel='Distance difference', ylabel='Count')
plt.show()

# Model 1

In [None]:
h = np.zeros(len(diff_healthy))
p = np.ones(len(diff_pathology))
x = np.concatenate((diff_healthy,diff_pathology),axis=0)
y = np.concatenate((h,p),axis=0)

df = pd.DataFrame(y,columns=['y'])
df['x'] = x

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, stratify=y)

In [None]:
# prepare models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
models.append(('RF', RandomForestClassifier(n_estimators = 100, random_state = 42)))
models.append(('AGB', AdaBoostClassifier(n_estimators=100, random_state=42)))
models.append(('SGB', GradientBoostingClassifier(n_estimators=100, random_state=42)))

# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in models:
    kfold = KFold(n_splits=10, random_state=2020)
    cv_results = cross_val_score(model, X_train.reshape(-1, 1), y_train, cv=kfold, scoring=scoring) 
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()) 
    print(msg)

In [None]:
fig = plt.figure(figsize=(10, 5)) 
ax = fig.add_subplot(111) 
plt.boxplot(results) 
ax.set_xticklabels(names)
ax.set(xlabel='Model', ylabel='Accuracy')
plt.show()

In [None]:
classifier = SVC()
classifier.fit(X_train.reshape(-1, 1), y_train)

In [None]:
y_test_pred = classifier.predict(X_test.reshape(-1, 1))

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["Helthy", "Parhologic"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.invert_yaxis()
ax.invert_xaxis()