## Classification algorithm into CAN and non-CAN patients
### Load All parameters and classification into DF

In [81]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import pyarrow.feather as feather
from tqdm import tqdm
from scipy.stats import mannwhitneyu
import matplotlib.pyplot as plt

In [4]:
def check_clean_valid(path):
    """Only indicates that the folder is valid if it contains a file named 'CleanParameters.json'
    Input: Filepath
    Output: True if the folder indexed by the filepath contains a file called CleanParameters.json, False otherwise"""
    return os.path.exists(path + '/CleanParameters.json')

In [7]:
def collect_row(parapath, patient, count):
    """Extracts information about the snippet from the snippet metadata file and inputs, and returns a dictionary ready to be added as a row in a DataFrame
    Input: snippet folder path, patient ID, number of the snippet for this patient
    Output: Dictionary with information about snippet"""
    with open(parapath + '/CleanParameters.json') as json_file:
        row = json.load(json_file)
        json_file.close()
        
    row['Patient'] = patient
    row['Count'] = count[-3:]
    print(row)
    return row

In [92]:
def lookup_patient(paradf, root):
    """Adds patient classifications (diabetic vs control, CAD vs no CAD) by...
    ...looking up values in the small metadata file (sdf) associated with the...
    ..database and editing the parameter dataframe accordingly
    Input: DataFrame of Parameters (paradf), root of the database (root)
    Output: Edited paradf"""
    # Load the Small Metadata file
    #with open(root + '\\SMeta.json') as json_file:
    with open(root + '/SMeta.json') as json_file:
        sdf = pd.read_json(json_file, orient = 'index')
        json_file.close()
    
    # Add DM/C classification
    paradf['CAN'] = (sdf.loc[paradf['Patient']]['CAN Symptoms']).reset_index(drop=True)
    
    # Add CAN/Not classification
    paradf['Diabetic'] = (sdf.loc[paradf['Patient']]['Diabetic']).reset_index(drop=True)
    
    return paradf

In [93]:
root = '/Volumes/GoogleDrive/.shortcut-targets-by-id/1sZgDE1M3o-bDINfAQYEXl_vqsKs03WxD/Molecool/Databases/Database1'

In [94]:
rows = []

patients = sorted(os.listdir(root))
for pat in tqdm(patients):     #Looping throught the folder Sxxxx
    if not pat.endswith(('.json', 'ini','\r')):
        path_pat = root+'/'+pat
        folders = sorted(os.listdir(path_pat))
        for folder in folders:    #Looping through the different ecg types 
            if not folder.endswith('.ini') and folder != 'Icon\r':
                path_folder = path_pat + '/' +folder   
                snippets = sorted(os.listdir(path_folder))
                for snip_folder in snippets:  #Looping through the snippet folders
                    if not snip_folder.endswith(('.json', '.ftr', '.ini','\r')):
                        print("In snippet folder " + snip_folder)
                        
                        ### NOW IN CORRECT FOLDER ###
                        path_snip =path_folder + '/' + snip_folder
                        if check_clean_valid(path_snip):
                            rows.append(collect_row(path_snip, pat, snip_folder))
                            print('\n\n')

 43%|██████████████████                        | 24/56 [00:00<00:00, 116.55it/s]

In snippet folder Snippet000
In snippet folder Snippet001
{'SDNN': 9.388711176710585, 'RMSSD': 3.7275644651843733, 'PNN50': 0.0, 'LF': 6.972244720053225, 'HF': 5.05258479487378, 'LFHF': 1.3799362114866596, 'Patient': 'S0250', 'Count': '001'}



In snippet folder Snippet002
In snippet folder Snippet000
In snippet folder Snippet000
{'SDNN': 19.93502977681763, 'RMSSD': 11.883778830332387, 'PNN50': 0.0, 'LF': 13.341491259583142, 'HF': 19.309388679624874, 'LFHF': 0.6909328659203482, 'Patient': 'S0273', 'Count': '000'}



In snippet folder Snippet001
{'SDNN': 23.382226136845293, 'RMSSD': 13.006798668165926, 'PNN50': 0.0, 'LF': 19.026568875603836, 'HF': 24.836171091050836, 'LFHF': 0.7660830168165349, 'Patient': 'S0273', 'Count': '001'}



In snippet folder Snippet002
{'SDNN': 63.1055109941366, 'RMSSD': 82.24981476252546, 'PNN50': 0.3546099290780142, 'LF': 13.725900026784812, 'HF': 24.00461959671393, 'LFHF': 0.5718024387549051, 'Patient': 'S0273', 'Count': '002'}



In snippet folder Snippet00

 68%|████████████████████████████▌             | 38/56 [00:00<00:00, 124.08it/s]

In snippet folder Snippet000
In snippet folder Snippet001
In snippet folder Snippet000
In snippet folder Snippet000
In snippet folder Snippet001
In snippet folder Snippet002
In snippet folder Snippet000
In snippet folder Snippet001
In snippet folder Snippet002
{'SDNN': 39.00482090802957, 'RMSSD': 55.15865854007237, 'PNN50': 0.2976190476190476, 'LF': 8.453702497903148, 'HF': 33.68820677944441, 'LFHF': 0.25093952175161066, 'Patient': 'S0366', 'Count': '002'}



In snippet folder Snippet003
In snippet folder Snippet000
In snippet folder Snippet001
{'SDNN': 95.18739956131294, 'RMSSD': 23.223453755800968, 'PNN50': 1.3550135501355014, 'LF': 64.8545595204144, 'HF': 46.647248018711885, 'LFHF': 1.3903190922302406, 'Patient': 'S0368', 'Count': '001'}



In snippet folder Snippet002
In snippet folder Snippet000
{'SDNN': 34.44213214880959, 'RMSSD': 12.232309388306282, 'PNN50': 0.6369426751592357, 'LF': 31.81360220854277, 'HF': 20.59657984193521, 'LFHF': 1.5446060682254337, 'Patient': 'S0372', 'Cou

100%|██████████████████████████████████████████| 56/56 [00:00<00:00, 111.37it/s]

{'SDNN': 25.239195613835783, 'RMSSD': 31.14668587435317, 'PNN50': 5.442176870748299, 'LF': 19.766269701949913, 'HF': 47.46720042194582, 'LFHF': 0.4164195386760422, 'Patient': 'S0427', 'Count': '001'}



In snippet folder Snippet002
{'SDNN': 31.287439327134503, 'RMSSD': 24.413111231467404, 'PNN50': 4.2105263157894735, 'LF': 23.47350802293822, 'HF': 45.422595914143834, 'LFHF': 0.51678041623396, 'Patient': 'S0427', 'Count': '002'}



In snippet folder Snippet003
In snippet folder Snippet004
{'SDNN': 28.237802346237082, 'RMSSD': 23.554149214214455, 'PNN50': 3.4129692832764507, 'LF': 24.638612267856956, 'HF': 40.90001513548919, 'LFHF': 0.6024108349651413, 'Patient': 'S0427', 'Count': '004'}



In snippet folder Snippet000
In snippet folder Snippet001
In snippet folder Snippet002
In snippet folder Snippet003
{'SDNN': 58.01693667846641, 'RMSSD': 78.82147895828484, 'PNN50': 0.3105590062111801, 'LF': 12.354673004681919, 'HF': 31.484445079633424, 'LFHF': 0.3924056140558716, 'Patient': 'S0430', '




In [95]:
def clean_sMeta(root):
    """Reformats data in small MetaData file and saves it.
    Input: Filepath for the folder the small MetaData file is in"""
    #with open(root + '\\SMeta.json') as json_file:
    with open(root + '/SMeta.json') as json_file:
        sdf = pd.read_json(json_file, orient = 'index')
        
        # Set CAN classification to bool through mapping
        #d = {'NO': False, 'YES': True}
        #sdf['CAN Symptoms'] = sdf['Neuropathy AUTONOMIC SYMPTOMS'].map(d)
        sdf['CAN Symptoms'] = sdf['CAN']
        
        # Set DM/Control classification to bool through mapping
        # d = {'DM': True, 'CONTROL': False}
        sdf['Diabetic'] = sdf['Diabetic']
        
        # Drop unnecessary columns
        sdf.drop(columns=['Diabetic'])
        sdf.drop(columns=['CAN'])
        
        # Save edited dataframe
        sdf.to_json((root + '/SMeta.json'), orient='index')
        json_file.close()
    return

In [96]:
clean_sMeta(root)

In [97]:
df = pd.DataFrame(rows)

In [98]:
lookup_patient(df,root)

Unnamed: 0,SDNN,RMSSD,PNN50,LF,HF,LFHF,Patient,Count,CAN,Diabetic
0,9.388711,3.727564,0.0,6.972245,5.052585,1.379936,S0250,1,False,
1,19.93503,11.883779,0.0,13.341491,19.309389,0.690933,S0273,0,True,
2,23.382226,13.006799,0.0,19.026569,24.836171,0.766083,S0273,1,True,
3,63.105511,82.249815,0.35461,13.7259,24.00462,0.571802,S0273,2,True,
4,30.022832,17.002152,1.612903,20.888081,14.231384,1.467748,S0282,2,False,
5,8.048376,1.886577,0.0,1.94878,3.007181,0.648042,S0283,2,True,
6,44.351059,62.74066,0.299401,6.989108,14.514825,0.481515,S0288,1,False,
7,8.737276,3.430068,0.0,5.611292,4.736355,1.184728,S0300,1,False,
8,31.674669,43.212536,0.301205,9.99972,8.540746,1.170825,S0310,0,True,
9,11.532271,2.453353,0.0,12.794287,4.941024,2.5894,S0317,0,True,


### Logistic Regression

In [155]:
X = df.iloc[:,0:6].values
y = df.iloc[:,8].values

In [173]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 0)

In [174]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [175]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

In [176]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[False False]
 [False False]
 [False False]
 [False False]
 [False False]
 [False False]
 [False False]
 [ True  True]
 [False False]
 [False  True]
 [False False]
 [False False]]


In [177]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[10  0]
 [ 1  1]]


0.9166666666666666

### K-Nearest Neighbours (K-NN)

In [178]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier()

In [179]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[False False]
 [False False]
 [False False]
 [False False]
 [ True False]
 [False False]
 [False False]
 [False  True]
 [False False]
 [ True  True]
 [False False]
 [False False]]


In [180]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[9 1]
 [1 1]]


0.8333333333333334

### Support Vector Machine

In [181]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

In [182]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[False False]
 [False False]
 [False False]
 [False False]
 [False False]
 [False False]
 [False False]
 [ True  True]
 [False False]
 [False  True]
 [False False]
 [False False]]


In [183]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[10  0]
 [ 1  1]]


0.9166666666666666

### Naive Bayes

In [184]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB()

In [185]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[False False]
 [False False]
 [False False]
 [False False]
 [ True False]
 [False False]
 [ True False]
 [ True  True]
 [False False]
 [ True  True]
 [False False]
 [False False]]


In [186]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[8 2]
 [0 2]]


0.8333333333333334

### Random Forest Classification

In [187]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

In [188]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[False False]
 [False False]
 [False False]
 [False False]
 [ True False]
 [False False]
 [False False]
 [ True  True]
 [False False]
 [False  True]
 [False False]
 [False False]]


In [189]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[9 1]
 [1 1]]


0.8333333333333334