In [27]:
import scipy.io
from pylab import *
from matplotlib import *
import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import display
from operator import add
pd.options.mode.chained_assignment = None

In [28]:
def convert_to_df(file):
    # use scipy to load matlab file
    matlab_data = scipy.io.loadmat('data/' + str(file))
    
    # create dataframe and transpose
    df = pd.DataFrame(matlab_data['all_data']).T
    
    # create column headers
    df.columns = [
        'Mean Area Under Heart Beat',
        'Mean R-to-R Peak Interval',
        'Heart Rate',
        'Peak to Peak Blood Pressure',
        'Systolic Blood Pressure',
        'Diastolic Blood Pressure',
        'Pulse Pressure'
    ]
    
    # add labels column
    df['Golden Alarms'] = matlab_data['all_labels'][0]
    
    return df

filenames = ['1_a41178.mat', '2_a42126.mat', '3_a40076.mat', \
             '4_a40050.mat', '5_a41287.mat', '6_a41846.mat', \
             '7_a41846.mat', '8_a42008.mat', '9_a41846.mat']

patient_data = [convert_to_df(file) for file in filenames]

# floor all data
for df in patient_data:
    for c in df.columns:
        df[c] = df[c].apply(np.floor)
        df[c] = df[c].apply(int)

In [29]:
# create arrays to hold train and test dataframes for each patient
train_data = []
test_data = []

for df in patient_data:
    # get split index
    total = len(df)
    split_idx = int(total * 2 / 3)
    
    # append [0, split_idx) to train_data
    train_data.append(df.head(split_idx))
    
    # append [split_idx, total) to test data
    test_data.append(df.tail(total - split_idx))

In [30]:
prior_probabilities = pd.DataFrame(columns=['PH0', 'PH1'])

for i in range(9):
    PH1 = train_data[i]['Golden Alarms'].sum() / len(train_data[i]['Golden Alarms'])
    PH0 = 1 - PH1
    prior_probabilities.loc[i] = [PH0, PH1]

# for index, df in enumerate(train_data):
#     PH1 = df['Golden Alarms'].sum() / len(df['Golden Alarms'])
#     PH0 = 1 - PH1
#     print('Patient %d \tP(H0): %f \tP(H1): %f' % (index, PH0, PH1))

prior_probabilities

Unnamed: 0,PH0,PH1
0,0.973482,0.026518
1,0.983653,0.016347
2,0.997905,0.002095
3,0.997507,0.002493
4,0.998954,0.001046
5,0.97823,0.02177
6,0.982897,0.017103
7,0.99221,0.00779
8,0.97823,0.02177


In [31]:
df = train_data[0]
df = df[df['Golden Alarms'] == 1]

# 9 x 6 matrix of dataframes
likelihood_matrix = []

for patient in train_data:
    # get patient data for each hypothesis
    h1 = patient[patient['Golden Alarms'] == 1]
    h0 = patient[patient['Golden Alarms'] == 0]
        
    # generate likelihood matrix for each patient
    patient_likelihood_matrix = []

    # drop golden alarms
    columns = patient.columns.drop('Golden Alarms')

    for col in columns:
        # get unique value counts for each feature
        # scale by length of patient dataframe hypothesis to get probability
        h0_val_counts = h0[col].value_counts() / len(h0)
        h1_val_counts = h1[col].value_counts() / len(h1)
        
        patient_likelihood_dataframe = pd.DataFrame([h0_val_counts, h1_val_counts]).T
        patient_likelihood_dataframe.columns = ['%s H0' % col, '%s H1' % col]
        
        patient_likelihood_dataframe.fillna(0, inplace=True)
        
        # append to patient likelihood matrix
        patient_likelihood_matrix.append(patient_likelihood_dataframe)
        
    # append patient likelihood matrix to likelihood matrix
    likelihood_matrix.append(patient_likelihood_matrix)

In [32]:
features = [
    'Mean Area Under Heart Beat',
    'Mean R-to-R Peak Interval',
    'Heart Rate',
    'Peak to Peak Blood Pressure',
    'Systolic Blood Pressure',
    'Diastolic Blood Pressure',
    'Pulse Pressure'
]

bars = []
for patient_index, patient in enumerate(likelihood_matrix):
    for feature_index, feature in enumerate(patient):
        fig, ax = plt.subplots(figsize=(15, 10))
        ax.bar(feature.index - 0.2, feature[feature.columns[0]], width=0.4, alpha=0.5, color='green')
        ax.bar(feature.index + 0.2, feature[feature.columns[1]], width=0.4, alpha=0.5, color='blue')
        ax.legend(['H0', 'H1'])
        ax.set_title('Patient %d - %s' % (patient_index, columns[feature_index]))
        bars.append(ax)

# for bar in bars:
#     plt.show()



In [33]:
for patient_index, patient in enumerate(likelihood_matrix):
    for feature_index, feature in enumerate(patient):
        feature_h0 = feature[feature.columns[0]]
        feature_h1 = feature[feature.columns[1]]
        prior_h0 = prior_probabilities['PH0'].loc[patient_index]
        prior_h1 = prior_probabilities['PH1'].loc[patient_index]
        
        feature['ML']  = (feature_h1 >= feature_h0).astype(int)
        feature['MAP'] = (prior_h1 * feature_h1 >= prior_h0 * feature_h0).astype(int)
        
        
df = likelihood_matrix[0][0]
df

Unnamed: 0,Mean Area Under Heart Beat H0,Mean Area Under Heart Beat H1,ML,MAP
-8,0.001075,0.0,0,0
-7,0.000717,0.0,0,0
-6,0.020072,0.0,0,0
-5,0.650538,0.0,0,0
-4,0.135125,0.0,0,0
-3,0.041219,0.0,0,0
-2,0.036918,0.013158,0,0
-1,0.029391,0.026316,0,0
0,0.02724,0.026316,0,0
1,0.017921,0.092105,1,0


In [34]:
def lookup_MAP(patient_index, feature_index, value):
    df = likelihood_matrix[patient_index][feature_index]['MAP']
    return df.loc[value] if value in df.index else 0
def lookup_ML(patient_index, feature_index, value):
    df = likelihood_matrix[patient_index][feature_index]['ML']
    return df.loc[value] if value in df.index else 0

In [96]:
generated_alarms = []
for patient_index, patient in enumerate(test_data):
    generated_alarms.append(pd.DataFrame())
    for feature_index, feature in enumerate(features):
        df = generated_alarms[patient_index]
        df[['%s ML'  % str(feature)]] = patient[[feature]].applymap(lambda row: lookup_ML(patient_index,feature_index, row))
        df[['%s MAP' % str(feature)]] = patient[[feature]].applymap(lambda row: lookup_MAP(patient_index,feature_index, row))
    df['Golden Alarms'] = patient['Golden Alarms']

In [192]:
def get_cond_prob_matrix(patient_index):
    missed_detection_ml = []
    length = len(generated_alarms[patient_index].columns)-1
    for i in range(0, length, 2):
        df = generated_alarms[0]
        df = df[df['Golden Alarms'] == 1]
        golden = len(df)
        col = [df.columns[i]]
        df = df[col]
        df = df[df[col] == 0].dropna()
        missed_detection_ml.append(len(df)/golden)

    missed_detection_map = []
    for i in range(1, length, 2):
        df = generated_alarms[patient_index]
        df = df[df['Golden Alarms'] == 1]
        golden = len(df)
        col = [df.columns[i]]
        df = df[col]
        df = df[df[col] == 0].dropna()
        missed_detection_map.append(len(df)/golden)

    false_alarm_ml = []
    for i in range(0, length, 2):
        df = generated_alarms[patient_index]
        df = df[df['Golden Alarms'] == 0]
        golden = len(df)
        col = [df.columns[i]]
        df = df[col]
        df = df[df[col] == 1].dropna()
        false_alarm_ml.append(len(df)/golden)

    false_alarm_map = []
    for i in range(1, length, 2):
        df = generated_alarms[patient_index]
        df = df[df['Golden Alarms'] == 0]
        golden = len(df)
        col = [df.columns[i]]
        df = df[col]
        df = df[df[col] == 1].dropna()
        false_alarm_map.append(len(df)/golden)  

    conditional_probability_matrix = pd.DataFrame([missed_detection_ml, missed_detection_map, false_alarm_ml, false_alarm_map]).T
    conditional_probability_matrix.columns = ['Missed Detection ML', 'Missed Detection MAP', 'False Alarm ML', 'False Alarm MAP']
    conditional_probability_matrix.index = features

    ph0 = prior_probabilities.loc[0][0]
    ph1 = prior_probabilities.loc[0][1]

    conditional_probability_matrix['P(Error) ML']  = ph0 * conditional_probability_matrix['False Alarm ML'] + \
                                                     ph1 * conditional_probability_matrix['Missed Detection ML']
    conditional_probability_matrix['P(Error) MAP'] = ph0 * conditional_probability_matrix['False Alarm MAP'] + \
                                                     ph1 * conditional_probability_matrix['Missed Detection MAP']
    
    return conditional_probability_matrix




In [198]:
conditional_probability_matrix = []
for patient_index, patient in enumerate(test_data):
    conditional_probability_matrix.append(get_cond_prob_matrix(patient_index))
    df = conditional_probability_matrix[patient_index]
    df = df.T
    df['Min'] = df.min(axis=1)
    df = df.T
    display(df)


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,0.827586,0.034188,0.002849,0.03511,0.024719
Mean R-to-R Peak Interval,0.551724,1.0,0.075499,0.0,0.088127,0.026518
Heart Rate,0.586207,0.931034,0.042735,0.0,0.057147,0.024689
Peak to Peak Blood Pressure,0.551724,0.931034,0.075499,0.0,0.088127,0.024689
Systolic Blood Pressure,0.931034,1.0,0.059117,0.0,0.082238,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.383191,0.0,0.391318,0.026518
Pulse Pressure,0.517241,1.0,0.205128,0.0,0.213405,0.026518
Min,0.068966,0.827586,0.034188,0.0,0.03511,0.024689


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.98717,0.0,0.962822,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.168924,0.0,0.179075,0.026518
Heart Rate,0.586207,1.0,0.09052,0.0,0.103665,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.053457,0.001426,0.06667,0.027906
Systolic Blood Pressure,0.931034,1.0,0.285103,0.0,0.302232,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.044904,0.0,0.062001,0.026518
Pulse Pressure,0.517241,1.0,0.188881,0.0,0.197588,0.026518
Min,0.068966,1.0,0.044904,0.0,0.062001,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.002797,0.0,0.004552,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.002098,0.0,0.016673,0.026518
Heart Rate,0.586207,1.0,0.017483,0.0,0.032564,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.002098,0.0,0.016673,0.026518
Systolic Blood Pressure,0.931034,1.0,0.018182,0.0,0.042389,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.023776,0.0,0.041434,0.026518
Pulse Pressure,0.517241,1.0,0.374825,0.0,0.378602,0.026518
Min,0.068966,1.0,0.002098,0.0,0.004552,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.991009,0.0,0.966558,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.013986,0.000999,0.028246,0.02749
Heart Rate,0.586207,1.0,0.064935,0.0,0.078758,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.000999,0.0,0.015603,0.026518
Systolic Blood Pressure,0.931034,1.0,0.031968,0.0,0.055809,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.232767,0.0,0.244883,0.026518
Pulse Pressure,0.517241,1.0,0.00999,0.0,0.023441,0.026518
Min,0.068966,1.0,0.000999,0.0,0.015603,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.06499,0.0,0.065095,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.071279,0.0,0.084019,0.026518
Heart Rate,0.586207,1.0,0.317261,0.0,0.324393,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.05311,0.000699,0.066332,0.027198
Systolic Blood Pressure,0.931034,1.0,0.07058,0.0,0.093397,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.122991,0.0,0.138018,0.026518
Pulse Pressure,0.517241,1.0,0.265549,0.0,0.272223,0.026518
Min,0.068966,1.0,0.05311,0.0,0.065095,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.613087,0.0,0.598658,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.372688,0.0,0.377436,0.026518
Heart Rate,0.586207,1.0,0.375533,0.0,0.38112,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.204125,0.000711,0.213343,0.02721
Systolic Blood Pressure,0.931034,0.842105,0.239687,0.006401,0.25802,0.028562
Diastolic Blood Pressure,0.689655,1.0,0.217639,0.0,0.230156,0.026518
Pulse Pressure,0.517241,1.0,0.337127,0.0,0.341903,0.026518
Min,0.068966,0.842105,0.204125,0.0,0.213343,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.428775,0.0,0.419234,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.333333,0.0,0.339125,0.026518
Heart Rate,0.586207,1.0,0.262821,0.0,0.271396,0.026518
Peak to Peak Blood Pressure,0.551724,0.965517,0.258547,0.001425,0.266321,0.02699
Systolic Blood Pressure,0.931034,1.0,0.084758,0.0,0.107199,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.150997,0.0,0.165281,0.026518
Pulse Pressure,0.517241,1.0,0.279202,0.0,0.285515,0.026518
Min,0.068966,0.965517,0.084758,0.0,0.107199,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.848703,0.0,0.828026,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.023055,0.0,0.037074,0.026518
Heart Rate,0.586207,1.0,0.070605,0.0,0.084278,0.026518
Peak to Peak Blood Pressure,0.551724,0.916667,0.193084,0.002882,0.202594,0.027113
Systolic Blood Pressure,0.931034,1.0,0.393372,0.001441,0.407629,0.027921
Diastolic Blood Pressure,0.689655,1.0,0.638329,0.0,0.63969,0.026518
Pulse Pressure,0.517241,1.0,0.410663,0.0,0.413489,0.026518
Min,0.068966,0.916667,0.023055,0.0,0.037074,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.613087,0.0,0.598658,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.372688,0.0,0.377436,0.026518
Heart Rate,0.586207,1.0,0.375533,0.0,0.38112,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.204125,0.000711,0.213343,0.02721
Systolic Blood Pressure,0.931034,0.842105,0.239687,0.006401,0.25802,0.028562
Diastolic Blood Pressure,0.689655,1.0,0.217639,0.0,0.230156,0.026518
Pulse Pressure,0.517241,1.0,0.337127,0.0,0.341903,0.026518
Min,0.068966,0.842105,0.204125,0.0,0.213343,0.026518


In [140]:
# def lookup_ML(patient_index, feature_index, value):
#     df = likelihood_matrix[patient_index][feature_index]['ML']
#     return df.loc[value] if value in df.index else 0

def lookup_ML_test (patient_index, row):
    alarm = 0
    for feature_index in range(len(features)):
        df = likelihood_matrix[patient_index][feature_index]['ML']
        value = row[feature_index]
        if int(value) in df.index:
            alarm += df.loc[value]
    return 1 if alarm > 3 else 0

def lookup_MAP_test (patient_index, row):
    alarm = 0
    for feature_index in range(len(features)):
        df = likelihood_matrix[patient_index][feature_index]['MAP']
        value = row[feature_index]
        if int(value) in df.index:
            alarm += df.loc[value]
    return 1 if alarm > 0 else 0

In [199]:
for patient_index, patient in enumerate(test_data):
    patient['ML']  = patient.apply(lambda row: lookup_ML_test(patient_index=patient_index, row=row), axis=1)
    patient['MAP'] = patient.apply(lambda row: lookup_MAP_test(patient_index=patient_index, row=row), axis=1)

# test_data[0]

Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure,Golden Alarms,ML,MAP
2866,2,77,96,78,82,62,19,1,1,0
2867,-2,97,78,86,85,64,20,0,0,0
2868,2,89,84,97,82,63,19,0,0,0
2869,-3,89,84,85,83,65,18,0,0,0
2870,-4,90,84,80,83,65,18,0,0,0
2871,-3,103,72,95,84,65,19,0,0,0
2872,-5,98,78,96,85,61,23,0,0,0
2873,-5,92,78,100,84,63,20,0,0,0
2874,-5,98,78,100,82,60,22,0,0,0
2875,-5,96,72,98,83,62,21,0,0,0


In [82]:
corrcoef = []
for i in range(9):
    temp = []
    for j in range(9):
        a = patient_data[i]['Heart Rate']
        b = patient_data[j]['Heart Rate']
        length = min(len(a), len(b))
        temp.append(abs(np.corrcoef(np.asarray(a[:length]), np.asarray(b[:length]))[0][1]))
    corrcoef.append(temp)
df = pd.DataFrame(corrcoef)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,1.0,0.083897,0.188648,0.046239,0.035651,0.146867,0.167403,0.151857,0.146867
1,0.083897,1.0,0.415132,0.067249,0.110244,0.010139,0.343511,0.142679,0.010139
2,0.188648,0.415132,1.0,0.051025,0.212478,0.031632,0.58361,0.270494,0.031632
3,0.046239,0.067249,0.051025,1.0,0.070834,0.100789,0.02885,0.138294,0.100789
4,0.035651,0.110244,0.212478,0.070834,1.0,0.094013,0.096442,0.193773,0.094013
5,0.146867,0.010139,0.031632,0.100789,0.094013,1.0,0.072926,0.313802,1.0
6,0.167403,0.343511,0.58361,0.02885,0.096442,0.072926,1.0,0.220698,0.072926
7,0.151857,0.142679,0.270494,0.138294,0.193773,0.313802,0.220698,1.0,0.313802
8,0.146867,0.010139,0.031632,0.100789,0.094013,1.0,0.072926,0.313802,1.0


In [42]:
for col in df.columns:
    print('Patient %d\t%f' % (col, df[col].sum()-1))

Patient 0	0.967429
Patient 1	1.182990
Patient 2	1.784650
Patient 3	0.604068
Patient 4	0.907448
Patient 5	1.770169
Patient 6	1.586366
Patient 7	1.745399
Patient 8	1.770169


Patient 3 has the least total correlation with the other patients, implying that their data is problematic and is an outlier compared to other patients.  A high total correlation means that any given patient is closely related to the rest of the patients

In [203]:
# Task 2.2
# Method 1
for df in conditional_probability_matrix:
    display(df)



Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,0.827586,0.034188,0.002849,0.03511,0.024719
Mean R-to-R Peak Interval,0.551724,1.0,0.075499,0.0,0.088127,0.026518
Heart Rate,0.586207,0.931034,0.042735,0.0,0.057147,0.024689
Peak to Peak Blood Pressure,0.551724,0.931034,0.075499,0.0,0.088127,0.024689
Systolic Blood Pressure,0.931034,1.0,0.059117,0.0,0.082238,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.383191,0.0,0.391318,0.026518
Pulse Pressure,0.517241,1.0,0.205128,0.0,0.213405,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.98717,0.0,0.962822,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.168924,0.0,0.179075,0.026518
Heart Rate,0.586207,1.0,0.09052,0.0,0.103665,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.053457,0.001426,0.06667,0.027906
Systolic Blood Pressure,0.931034,1.0,0.285103,0.0,0.302232,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.044904,0.0,0.062001,0.026518
Pulse Pressure,0.517241,1.0,0.188881,0.0,0.197588,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.002797,0.0,0.004552,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.002098,0.0,0.016673,0.026518
Heart Rate,0.586207,1.0,0.017483,0.0,0.032564,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.002098,0.0,0.016673,0.026518
Systolic Blood Pressure,0.931034,1.0,0.018182,0.0,0.042389,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.023776,0.0,0.041434,0.026518
Pulse Pressure,0.517241,1.0,0.374825,0.0,0.378602,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.991009,0.0,0.966558,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.013986,0.000999,0.028246,0.02749
Heart Rate,0.586207,1.0,0.064935,0.0,0.078758,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.000999,0.0,0.015603,0.026518
Systolic Blood Pressure,0.931034,1.0,0.031968,0.0,0.055809,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.232767,0.0,0.244883,0.026518
Pulse Pressure,0.517241,1.0,0.00999,0.0,0.023441,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.06499,0.0,0.065095,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.071279,0.0,0.084019,0.026518
Heart Rate,0.586207,1.0,0.317261,0.0,0.324393,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.05311,0.000699,0.066332,0.027198
Systolic Blood Pressure,0.931034,1.0,0.07058,0.0,0.093397,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.122991,0.0,0.138018,0.026518
Pulse Pressure,0.517241,1.0,0.265549,0.0,0.272223,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.613087,0.0,0.598658,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.372688,0.0,0.377436,0.026518
Heart Rate,0.586207,1.0,0.375533,0.0,0.38112,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.204125,0.000711,0.213343,0.02721
Systolic Blood Pressure,0.931034,0.842105,0.239687,0.006401,0.25802,0.028562
Diastolic Blood Pressure,0.689655,1.0,0.217639,0.0,0.230156,0.026518
Pulse Pressure,0.517241,1.0,0.337127,0.0,0.341903,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.428775,0.0,0.419234,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.333333,0.0,0.339125,0.026518
Heart Rate,0.586207,1.0,0.262821,0.0,0.271396,0.026518
Peak to Peak Blood Pressure,0.551724,0.965517,0.258547,0.001425,0.266321,0.02699
Systolic Blood Pressure,0.931034,1.0,0.084758,0.0,0.107199,0.026518
Diastolic Blood Pressure,0.689655,1.0,0.150997,0.0,0.165281,0.026518
Pulse Pressure,0.517241,1.0,0.279202,0.0,0.285515,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.848703,0.0,0.828026,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.023055,0.0,0.037074,0.026518
Heart Rate,0.586207,1.0,0.070605,0.0,0.084278,0.026518
Peak to Peak Blood Pressure,0.551724,0.916667,0.193084,0.002882,0.202594,0.027113
Systolic Blood Pressure,0.931034,1.0,0.393372,0.001441,0.407629,0.027921
Diastolic Blood Pressure,0.689655,1.0,0.638329,0.0,0.63969,0.026518
Pulse Pressure,0.517241,1.0,0.410663,0.0,0.413489,0.026518


Unnamed: 0,Missed Detection ML,Missed Detection MAP,False Alarm ML,False Alarm MAP,P(Error) ML,P(Error) MAP
Mean Area Under Heart Beat,0.068966,1.0,0.613087,0.0,0.598658,0.026518
Mean R-to-R Peak Interval,0.551724,1.0,0.372688,0.0,0.377436,0.026518
Heart Rate,0.586207,1.0,0.375533,0.0,0.38112,0.026518
Peak to Peak Blood Pressure,0.551724,1.0,0.204125,0.000711,0.213343,0.02721
Systolic Blood Pressure,0.931034,0.842105,0.239687,0.006401,0.25802,0.028562
Diastolic Blood Pressure,0.689655,1.0,0.217639,0.0,0.230156,0.026518
Pulse Pressure,0.517241,1.0,0.337127,0.0,0.341903,0.026518


The top 2 features with the lowest ML Errors are the Mean Area Under Heart Beat and Peak to Peak Blood Pressure

In [218]:
# Task 2.2
# Method 2

def get_golden_correlation(patient_index):
    length = len(generated_alarms[patient_index].columns)-1
    
    golden_corr_ml = []
    for i in range(0, length, 2):
        df = generated_alarms[patient_index]
        col = [df.columns[i]]
        a = np.asarray(df[df.columns[i]])
        b = np.asarray(df['Golden Alarms'])
        golden_corr_ml.append(abs(np.corrcoef(a,b)[0][1]))

    golden_corr_map = []
    for i in range(1, length, 2):
        df = generated_alarms[patient_index]
        col = [df.columns[i]]
        a = np.asarray(df[df.columns[i]])
        b = np.asarray(df['Golden Alarms'])
        corr = np.corrcoef(a,b)
        print(corr)
        golden_corr_map.append(abs(corr[0][1]))
        
    golden_correlation_matrix = pd.DataFrame([golden_corr_ml, golden_corr_map]).T
    golden_correlation_matrix.columns = ['Golden Correlation ML', 'Golden Correlation MAP']
    golden_correlation_matrix.index = features
    
    return golden_correlation_matrix
    
get_golden_correlation(0)


[[ 1.          0.30223307]
 [ 0.30223307  1.        ]]
[[ nan  nan]
 [ nan   1.]]
[[ 1.          0.26012359]
 [ 0.26012359  1.        ]]
[[ 1.          0.26012359]
 [ 0.26012359  1.        ]]
[[ nan  nan]
 [ nan   1.]]
[[ nan  nan]
 [ nan   1.]]
[[ nan  nan]
 [ nan   1.]]


  c /= stddev[:, None]
  c /= stddev[None, :]


Unnamed: 0,Golden Correlation ML,Golden Correlation MAP
Mean Area Under Heart Beat,0.567048,0.302233
Mean R-to-R Peak Interval,0.190222,
Heart Rate,0.239182,0.260124
Peak to Peak Blood Pressure,0.190222,0.260124
Systolic Blood Pressure,0.005871,
Diastolic Blood Pressure,0.021114,
Pulse Pressure,0.095855,


In [216]:
# Task 2.2
# Method 3

# Eyeball the display(df) and look for three pair of features with lowest coefficient
sum75 = 0
sum24 = 0
sum67 = 0

# Go through each patient and compare their pair of feature coefficient
sum_corrcoef = []
for patient in patient_data:
    corrcoef = []
    for feature1 in features:
        temp = []
        for feature2 in features:
            a = patient[feature1]
            b = patient[feature2]
            length = min(len(a), len(b))
            temp.append(abs(np.corrcoef(np.asarray(a[:length]), np.asarray(b[:length]))[0][1]))
        corrcoef.append(temp)
        
    df = pd.DataFrame(corrcoef)
    sum75 += df[3][5]
    sum24 += df[2][4]
    sum67 += df[6][1]
    df.columns = features
    df.index = features
    
    display(df)
print ("Coefficients of sum75 =", sum75, "sum24 =", sum24 , "sum67 =", sum67)
print ("Lowest coefficient feature is 7")

Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.315048,0.282044,0.226446,0.107965,0.024806,0.113643
Mean R-to-R Peak Interval,0.315048,1.0,0.804726,0.561284,0.17243,0.143498,0.295156
Heart Rate,0.282044,0.804726,1.0,0.476743,0.159752,0.121443,0.265736
Peak to Peak Blood Pressure,0.226446,0.561284,0.476743,1.0,0.290605,0.219339,0.48099
Systolic Blood Pressure,0.107965,0.17243,0.159752,0.290605,1.0,0.534694,0.848593
Diastolic Blood Pressure,0.024806,0.143498,0.121443,0.219339,0.534694,1.0,0.022784
Pulse Pressure,0.113643,0.295156,0.265736,0.48099,0.848593,0.022784,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.499053,0.42097,0.168729,0.166002,0.071358,0.140917
Mean R-to-R Peak Interval,0.499053,1.0,0.792553,0.408618,0.010993,0.425492,0.139418
Heart Rate,0.42097,0.792553,1.0,0.159685,0.027883,0.271842,0.123528
Peak to Peak Blood Pressure,0.168729,0.408618,0.159685,1.0,0.19472,0.426695,0.350404
Systolic Blood Pressure,0.166002,0.010993,0.027883,0.19472,1.0,0.197383,0.934654
Diastolic Blood Pressure,0.071358,0.425492,0.271842,0.426695,0.197383,1.0,0.151708
Pulse Pressure,0.140917,0.139418,0.123528,0.350404,0.934654,0.151708,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.288218,0.309608,0.306603,0.33871,0.314172,0.331914
Mean R-to-R Peak Interval,0.288218,1.0,0.962443,0.78588,0.424297,0.427998,0.383624
Heart Rate,0.309608,0.962443,1.0,0.745969,0.439884,0.449887,0.392019
Peak to Peak Blood Pressure,0.306603,0.78588,0.745969,1.0,0.385049,0.486721,0.253459
Systolic Blood Pressure,0.33871,0.424297,0.439884,0.385049,1.0,0.950003,0.953993
Diastolic Blood Pressure,0.314172,0.427998,0.449887,0.486721,0.950003,1.0,0.817643
Pulse Pressure,0.331914,0.383624,0.392019,0.253459,0.953993,0.817643,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.230739,0.142438,0.308936,0.11909,0.155408,0.22034
Mean R-to-R Peak Interval,0.230739,1.0,0.314174,0.409013,0.191364,0.217867,0.047113
Heart Rate,0.142438,0.314174,1.0,0.605269,0.00272,0.348164,0.231515
Peak to Peak Blood Pressure,0.308936,0.409013,0.605269,1.0,0.095359,0.218592,0.241947
Systolic Blood Pressure,0.11909,0.191364,0.00272,0.095359,1.0,0.312266,0.780963
Diastolic Blood Pressure,0.155408,0.217867,0.348164,0.218592,0.312266,1.0,0.339649
Pulse Pressure,0.22034,0.047113,0.231515,0.241947,0.780963,0.339649,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.193632,0.171585,0.057565,0.058252,0.015911,0.127632
Mean R-to-R Peak Interval,0.193632,1.0,0.881191,0.112546,0.554764,0.49729,0.538603
Heart Rate,0.171585,0.881191,1.0,0.018538,0.511693,0.449883,0.503481
Peak to Peak Blood Pressure,0.057565,0.112546,0.018538,1.0,0.334995,0.482611,0.13863
Systolic Blood Pressure,0.058252,0.554764,0.511693,0.334995,1.0,0.932254,0.927375
Diastolic Blood Pressure,0.015911,0.49729,0.449883,0.482611,0.932254,1.0,0.739517
Pulse Pressure,0.127632,0.538603,0.503481,0.13863,0.927375,0.739517,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.692761,0.726668,0.083331,0.078092,0.265169,0.456101
Mean R-to-R Peak Interval,0.692761,1.0,0.842136,0.070246,0.102714,0.145726,0.354293
Heart Rate,0.726668,0.842136,1.0,0.138871,0.09875,0.125938,0.321389
Peak to Peak Blood Pressure,0.083331,0.070246,0.138871,1.0,0.473154,0.621188,0.022998
Systolic Blood Pressure,0.078092,0.102714,0.09875,0.473154,1.0,0.798345,0.674208
Diastolic Blood Pressure,0.265169,0.145726,0.125938,0.621188,0.798345,1.0,0.129228
Pulse Pressure,0.456101,0.354293,0.321389,0.022998,0.674208,0.129228,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.670962,0.74841,0.107181,0.015866,0.318313,0.457258
Mean R-to-R Peak Interval,0.670962,1.0,0.838706,0.055578,0.037342,0.197167,0.326693
Heart Rate,0.74841,0.838706,1.0,0.101848,0.063381,0.182347,0.356107
Peak to Peak Blood Pressure,0.107181,0.055578,0.101848,1.0,0.55418,0.612958,0.130243
Systolic Blood Pressure,0.015866,0.037342,0.063381,0.55418,1.0,0.816533,0.621975
Diastolic Blood Pressure,0.318313,0.197167,0.182347,0.612958,0.816533,1.0,0.097676
Pulse Pressure,0.457258,0.326693,0.356107,0.130243,0.621975,0.097676,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.268943,0.178425,0.054178,0.036867,0.016442,0.063886
Mean R-to-R Peak Interval,0.268943,1.0,0.415567,0.197813,0.263915,0.21242,0.202311
Heart Rate,0.178425,0.415567,1.0,0.474554,0.129947,0.13167,0.270836
Peak to Peak Blood Pressure,0.054178,0.197813,0.474554,1.0,0.158383,0.171799,0.336628
Systolic Blood Pressure,0.036867,0.263915,0.129947,0.158383,1.0,0.688044,0.850508
Diastolic Blood Pressure,0.016442,0.21242,0.13167,0.171799,0.688044,1.0,0.209976
Pulse Pressure,0.063886,0.202311,0.270836,0.336628,0.850508,0.209976,1.0


Unnamed: 0,Mean Area Under Heart Beat,Mean R-to-R Peak Interval,Heart Rate,Peak to Peak Blood Pressure,Systolic Blood Pressure,Diastolic Blood Pressure,Pulse Pressure
Mean Area Under Heart Beat,1.0,0.692761,0.726668,0.083331,0.078092,0.265169,0.456101
Mean R-to-R Peak Interval,0.692761,1.0,0.842136,0.070246,0.102714,0.145726,0.354293
Heart Rate,0.726668,0.842136,1.0,0.138871,0.09875,0.125938,0.321389
Peak to Peak Blood Pressure,0.083331,0.070246,0.138871,1.0,0.473154,0.621188,0.022998
Systolic Blood Pressure,0.078092,0.102714,0.09875,0.473154,1.0,0.798345,0.674208
Diastolic Blood Pressure,0.265169,0.145726,0.125938,0.621188,0.798345,1.0,0.129228
Pulse Pressure,0.456101,0.354293,0.321389,0.022998,0.674208,0.129228,1.0


Coefficients of sum75 = 3.86109064184 sum24 = 1.53275986961 sum67 = 2.64150426314
Lowest coefficient feature is 7
