## Error Analysis for CAD Predictions (BERT Augmented)

In [1]:
import os
import string
import random
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt

In [2]:
# SK-learn libraries for evaluation.
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


In [3]:
import numpy as np

### Data File Names

* Test files with Labels and Filenames : /data_for_bert_sent/test_files_with_labels/*_testfile.csv
* Bert label mapping /data_for_bert_sent/test_files_with_labels/*_labelmapping.csv
* BERT evaluation /data_for_bert_sent/BERT_run_results/*_eval_results.txt


In [4]:
print(os.path.dirname(os.path.abspath('__file__')))

C:\Users\Kalyan\Documents\Anu\W266 - NLP\Final Project\lheart-disease-risk-prediction\Code


### CAD Indicator

In [5]:
# read in the test files with labels

CI_test = pd.read_csv("data_for_bert_augmented/test_files_with_labels/cad_ind_testfile.csv")

In [6]:
CI_test.rename( columns={'Unnamed: 0' :'sentenceId'}, inplace=True )

In [7]:
CI_test.head(10)

Unnamed: 0,sentenceId,sentence,label,file
0,0,Record date: 2080-02-18,Other,110-03.xml
1,1,SDU JAR Admission Note,Other,110-03.xml
2,2,Name: \t Yosef Villegas,Other,110-03.xml
3,3,MR:\t8249813,Other,110-03.xml
4,4,DOA: \t2/17/80,Other,110-03.xml
5,5,PCP: Gilbert Perez,Other,110-03.xml
6,6,Attending: YBARRA,Other,110-03.xml
7,7,CODE: FULL,Other,110-03.xml
8,8,HPI: 70 yo M with NIDDM admitted for cath aft...,test,110-03.xml
9,9,Pt has had increasing CP and SOB on exertion f...,Other,110-03.xml


In [8]:
CI_test['label'].value_counts()

Other      20642
event        296
mention      293
symptom      113
test          69
Name: label, dtype: int64

In [9]:
# read in the test results captured for BERT Augmented CAD Indicator model and specify columns as the actual file has no header
bert_aug_CI_results = pd.read_csv("data_for_bert_augmented/bert_augmented_run_results/bert_aug_data_output_data_cad_ind_output_results_test_results.tsv", sep='\t', header=None)
                               
bert_aug_CI_results.columns=["Class0", "Class1", "Class2", "Class3", "Class4"]

In [10]:
bert_aug_CI_results.head()

Unnamed: 0,Class0,Class1,Class2,Class3,Class4
0,0.999926,2.4e-05,2.3e-05,1.4e-05,1.3e-05
1,0.999926,2e-05,2.7e-05,1.6e-05,1e-05
2,0.999272,0.000397,0.000152,2.3e-05,0.000156
3,0.999398,0.000318,0.000139,2e-05,0.000124
4,0.999571,0.00022,0.000102,2e-05,8.7e-05


In [11]:
bert_aug_CI_results['predClass'] = bert_aug_CI_results.idxmax(axis=1)

In [12]:
bert_aug_CI_results.head()

Unnamed: 0,Class0,Class1,Class2,Class3,Class4,predClass
0,0.999926,2.4e-05,2.3e-05,1.4e-05,1.3e-05,Class0
1,0.999926,2e-05,2.7e-05,1.6e-05,1e-05,Class0
2,0.999272,0.000397,0.000152,2.3e-05,0.000156,Class0
3,0.999398,0.000318,0.000139,2e-05,0.000124,Class0
4,0.999571,0.00022,0.000102,2e-05,8.7e-05,Class0


In [13]:
bert_aug_CI_results['predClass'].value_counts()

Class0    20641
Class2      329
Class1      270
Class3      126
Class4       47
Name: predClass, dtype: int64

In [14]:
def CI_set_labels(classlabel):
    if (classlabel=='Class1'):
        return 'event'
    elif (classlabel=='Class2'):
        return 'mention'
    elif (classlabel=='Class3'):
        return 'symptom'
    elif (classlabel=='Class4'):
        return 'test'
    else:
        return 'Other'

In [15]:
bert_aug_CI_results['predLabel'] = bert_aug_CI_results['predClass'].apply(CI_set_labels)

bert_aug_CI_results.head(10)


Unnamed: 0,Class0,Class1,Class2,Class3,Class4,predClass,predLabel
0,0.999926,2.4e-05,2.3e-05,1.4e-05,1.3e-05,Class0,Other
1,0.999926,2e-05,2.7e-05,1.6e-05,1e-05,Class0,Other
2,0.999272,0.000397,0.000152,2.3e-05,0.000156,Class0,Other
3,0.999398,0.000318,0.000139,2e-05,0.000124,Class0,Other
4,0.999571,0.00022,0.000102,2e-05,8.7e-05,Class0,Other
5,0.999926,2.1e-05,2.9e-05,1.5e-05,1e-05,Class0,Other
6,0.999921,2.5e-05,2.9e-05,1.3e-05,1.2e-05,Class0,Other
7,0.999928,2.1e-05,2.6e-05,1.4e-05,1e-05,Class0,Other
8,0.999854,6.1e-05,3.9e-05,1.4e-05,3.2e-05,Class0,Other
9,0.99992,2e-05,2.7e-05,2.3e-05,1e-05,Class0,Other


In [16]:
# validating the counts by label
bert_aug_CI_results['predLabel'].value_counts()

Other      20641
mention      329
event        270
symptom      126
test          47
Name: predLabel, dtype: int64

In [17]:
CI_combined = pd.concat([CI_test, bert_aug_CI_results], axis=1)

In [18]:
CI_combined.head()

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,Class4,predClass,predLabel
0,0,Record date: 2080-02-18,Other,110-03.xml,0.999926,2.4e-05,2.3e-05,1.4e-05,1.3e-05,Class0,Other
1,1,SDU JAR Admission Note,Other,110-03.xml,0.999926,2e-05,2.7e-05,1.6e-05,1e-05,Class0,Other
2,2,Name: \t Yosef Villegas,Other,110-03.xml,0.999272,0.000397,0.000152,2.3e-05,0.000156,Class0,Other
3,3,MR:\t8249813,Other,110-03.xml,0.999398,0.000318,0.000139,2e-05,0.000124,Class0,Other
4,4,DOA: \t2/17/80,Other,110-03.xml,0.999571,0.00022,0.000102,2e-05,8.7e-05,Class0,Other


In [19]:
CI_combined[CI_combined['predLabel']!='Other']

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,Class4,predClass,predLabel
66,66,70 yo M with multiple cardiac risk factors and...,symptom,110-03.xml,0.000793,0.000240,0.000303,0.998302,0.000362,Class3,symptom
86,86,"71 yo M with CAD, s/p CABG x 4 in 3/80.",event,110-04.xml,0.000804,0.993561,0.004396,0.000401,0.000837,Class1,event
98,98,Coronary artery disease : s/p CABG x ...,event,110-04.xml,0.001814,0.003055,0.994300,0.000270,0.000561,Class2,mention
157,157,"Sternal pain-- non-exertional, reproducible by...",event,110-04.xml,0.001314,0.996738,0.000688,0.000601,0.000660,Class1,event
161,161,Pericarditis a possibility (he had post-op per...,event,110-04.xml,0.001491,0.996681,0.000750,0.000558,0.000520,Class1,event
180,180,65-year-old male with known history of CAD who...,mention,111-04.xml,0.002081,0.000973,0.996085,0.000404,0.000457,Class2,mention
192,192,"PAST MEDICAL HISTORY: Hypertension, diabetes,...",mention,111-04.xml,0.002119,0.000964,0.996061,0.000422,0.000434,Class2,mention
251,251,"Prior to his pacemaker placement, an exercise ...",Other,112-03.xml,0.397554,0.004942,0.000649,0.587603,0.009252,Class3,symptom
253,253,The test was terminated for 7/10 substernal ch...,test,112-03.xml,0.000901,0.000225,0.000318,0.998172,0.000384,Class3,symptom
289,289,He complained of fatigue and exertional throat...,test,112-04.xml,0.000908,0.000529,0.000285,0.000495,0.997784,Class4,test


In [20]:
CI_test_labels = CI_combined['label']
CI_pred_labels = CI_combined['predLabel']
print(type(CI_test_labels))
print(type(CI_pred_labels))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>


In [21]:
accuracy_score(CI_test_labels, CI_pred_labels)

0.9766496987811143

In [22]:
print(classification_report(CI_pred_labels, CI_test_labels))

              precision    recall  f1-score   support

       Other       0.99      0.99      0.99     20641
       event       0.56      0.61      0.59       270
     mention       0.84      0.75      0.79       329
     symptom       0.43      0.39      0.41       126
        test       0.29      0.43      0.34        47

   micro avg       0.98      0.98      0.98     21413
   macro avg       0.62      0.63      0.62     21413
weighted avg       0.98      0.98      0.98     21413



In [23]:
unique_label = np.unique(CI_test_labels)
print(pd.DataFrame(confusion_matrix(CI_test_labels, CI_pred_labels, labels=unique_label), 
                   index=['true:{:}'.format(x) for x in unique_label], 
                   columns=['pred:{:}'.format(x) for x in unique_label]))

              pred:Other  pred:event  pred:mention  pred:symptom  pred:test
true:Other         20432          81            38            68         23
true:event            87         166            35             4          4
true:mention          25          19           246             3          0
true:symptom          60           1             3            49          0
true:test             37           3             7             2         20


In [24]:
 CI_combined[(CI_combined['label']!='Other') & (CI_combined['predLabel']=='Other')]

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,Class4,predClass,predLabel
8,8,HPI: 70 yo M with NIDDM admitted for cath aft...,test,110-03.xml,0.999854,0.000061,0.000039,0.000014,0.000032,Class0,Other
12,12,MIBI was read as positive for moderate to seve...,test,110-03.xml,0.987264,0.004448,0.000225,0.000443,0.007619,Class0,Other
60,60,The ECG is positive for ischemia.,test,110-03.xml,0.999758,0.000137,0.000039,0.000019,0.000048,Class0,Other
62,62,Findings are consistent with moderate to sever...,test,110-03.xml,0.999890,0.000046,0.000027,0.000017,0.000019,Class0,Other
68,68,"\tIschemia: Hx angina, MIBI positive for infer...",symptom,110-03.xml,0.999115,0.000452,0.000253,0.000029,0.000151,Class0,Other
94,94,The pain does not remind him of his sx prior t...,event,110-04.xml,0.999881,0.000046,0.000030,0.000024,0.000019,Class0,Other
182,182,"walking, took 2 nitro and the pain got better.",mention,111-04.xml,0.999917,0.000025,0.000029,0.000017,0.000013,Class0,Other
184,184,repeat episode relived by nitro again.,mention,111-04.xml,0.999922,0.000023,0.000029,0.000014,0.000012,Class0,Other
198,198,PAST SURGICAL HISTORY: Angioplasty with multi...,event,111-04.xml,0.998454,0.001185,0.000233,0.000019,0.000109,Class0,Other
257,257,He tells me that he underwent testing at Wheat...,test,112-03.xml,0.999806,0.000100,0.000037,0.000016,0.000041,Class0,Other


We see from the confusion matrix that there the CAD classes are confused with the "Other" class much more often than with the other CAD classes.  Though we augmented the data, we still see sparcity of CAD class compared to "Other"imbalance in the classes.  

### CAD Time

In [25]:
# read in the test files with labels

CT_test = pd.read_csv("data_for_bert_augmented/test_files_with_labels/cad_tim_testfile.csv")

In [26]:
CT_test.rename( columns={'Unnamed: 0' :'sentenceId'}, inplace=True )

In [27]:
CT_test.head(10)

Unnamed: 0,sentenceId,sentence,label,file
0,0,Record date: 2080-02-18,Other,110-03.xml
1,1,SDU JAR Admission Note,Other,110-03.xml
2,2,Name: \t Yosef Villegas,Other,110-03.xml
3,3,MR:\t8249813,Other,110-03.xml
4,4,DOA: \t2/17/80,Other,110-03.xml
5,5,PCP: Gilbert Perez,Other,110-03.xml
6,6,Attending: YBARRA,Other,110-03.xml
7,7,CODE: FULL,Other,110-03.xml
8,8,HPI: 70 yo M with NIDDM admitted for cath aft...,before DCT,110-03.xml
9,9,Pt has had increasing CP and SOB on exertion f...,Other,110-03.xml


In [29]:
# read in the test results captured for BERT Augmented CAD model and specify columns as the actual file has no header
bert_aug_CT_results = pd.read_csv("data_for_bert_augmented/bert_augmented_run_results/bert_aug_data_output_data_cad_time_output_results_test_results.tsv", sep='\t', header=None)
                               
bert_aug_CT_results.columns=["Class0", "Class1", "Class2", "Class3"]

In [30]:
bert_aug_CT_results.head()

Unnamed: 0,Class0,Class1,Class2,Class3
0,0.999842,1.7e-05,0.000123,1.8e-05
1,0.99985,1.6e-05,0.000117,1.8e-05
2,0.999801,1.8e-05,0.000158,2.3e-05
3,0.999784,1.9e-05,0.000176,2.1e-05
4,0.999831,1.8e-05,0.00013,2.1e-05


In [31]:
bert_aug_CT_results['predClass'] = bert_aug_CT_results.idxmax(axis=1)

In [32]:
bert_aug_CT_results.head()

Unnamed: 0,Class0,Class1,Class2,Class3,predClass
0,0.999842,1.7e-05,0.000123,1.8e-05,Class0
1,0.99985,1.6e-05,0.000117,1.8e-05,Class0
2,0.999801,1.8e-05,0.000158,2.3e-05,Class0
3,0.999784,1.9e-05,0.000176,2.1e-05,Class0
4,0.999831,1.8e-05,0.00013,2.1e-05,Class0


In [33]:
bert_aug_CT_results['predClass'].value_counts()

Class0    20667
Class2      540
Class1      106
Class3      100
Name: predClass, dtype: int64

In [34]:
def CT_set_labels(classlabel):
    if (classlabel=='Class1'):
        return 'after dct'
    elif (classlabel=='Class2'):
        return 'before dct'
    elif (classlabel=='Class3'):
        return 'during dct'
    else:
        return 'Other'

In [35]:
bert_aug_CT_results['predLabel'] = bert_aug_CI_results['predClass'].apply(CT_set_labels)

bert_aug_CT_results.head(10)


Unnamed: 0,Class0,Class1,Class2,Class3,predClass,predLabel
0,0.999842,1.7e-05,0.000123,1.8e-05,Class0,Other
1,0.99985,1.6e-05,0.000117,1.8e-05,Class0,Other
2,0.999801,1.8e-05,0.000158,2.3e-05,Class0,Other
3,0.999784,1.9e-05,0.000176,2.1e-05,Class0,Other
4,0.999831,1.8e-05,0.00013,2.1e-05,Class0,Other
5,0.999863,1.7e-05,0.000102,1.8e-05,Class0,Other
6,0.999853,1.7e-05,0.000112,1.7e-05,Class0,Other
7,0.99987,1.6e-05,9.7e-05,1.7e-05,Class0,Other
8,0.999235,2.2e-05,0.00071,3.3e-05,Class0,Other
9,0.999778,1.9e-05,0.000183,1.9e-05,Class0,Other


In [36]:
# validating the counts by label
bert_aug_CT_results['predLabel'].value_counts()

Other         20688
before dct      329
after dct       270
during dct      126
Name: predLabel, dtype: int64

In [37]:
CT_combined = pd.concat([CT_test, bert_aug_CT_results], axis=1)

In [38]:
CT_combined.head()

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,predClass,predLabel
0,0,Record date: 2080-02-18,Other,110-03.xml,0.999842,1.7e-05,0.000123,1.8e-05,Class0,Other
1,1,SDU JAR Admission Note,Other,110-03.xml,0.99985,1.6e-05,0.000117,1.8e-05,Class0,Other
2,2,Name: \t Yosef Villegas,Other,110-03.xml,0.999801,1.8e-05,0.000158,2.3e-05,Class0,Other
3,3,MR:\t8249813,Other,110-03.xml,0.999784,1.9e-05,0.000176,2.1e-05,Class0,Other
4,4,DOA: \t2/17/80,Other,110-03.xml,0.999831,1.8e-05,0.00013,2.1e-05,Class0,Other


In [40]:
CT_combined[CT_combined['predLabel']!='Other']

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,predClass,predLabel
66,66,70 yo M with multiple cardiac risk factors and...,before DCT,110-03.xml,0.001055,0.000730,0.996982,0.001233,Class2,during dct
86,86,"71 yo M with CAD, s/p CABG x 4 in 3/80.",before DCT,110-04.xml,0.000986,0.001878,0.996392,0.000743,Class2,after dct
98,98,Coronary artery disease : s/p CABG x ...,before DCT,110-04.xml,0.000768,0.943808,0.053853,0.001571,Class1,before dct
157,157,"Sternal pain-- non-exertional, reproducible by...",before DCT,110-04.xml,0.001902,0.001069,0.996298,0.000731,Class2,after dct
161,161,Pericarditis a possibility (he had post-op per...,before DCT,110-04.xml,0.004307,0.000988,0.994101,0.000604,Class2,after dct
180,180,65-year-old male with known history of CAD who...,after DCT,111-04.xml,0.001942,0.003586,0.993722,0.000751,Class2,before dct
192,192,"PAST MEDICAL HISTORY: Hypertension, diabetes,...",before DCT,111-04.xml,0.001885,0.007107,0.990262,0.000746,Class2,before dct
251,251,"Prior to his pacemaker placement, an exercise ...",Other,112-03.xml,0.618555,0.000750,0.379248,0.001447,Class0,during dct
253,253,The test was terminated for 7/10 substernal ch...,before DCT,112-03.xml,0.001610,0.000782,0.996431,0.001178,Class2,during dct
291,291,"He received a 3mm stent, postdilated to 3.5mm,...",before DCT,112-04.xml,0.004824,0.000704,0.993678,0.000793,Class2,after dct


In [41]:
CT_test_labels = CT_combined['label']
CT_pred_labels = CT_combined['predLabel']



In [42]:
accuracy_score(CT_test_labels, CT_pred_labels)

0.955260822864615

In [43]:
print(classification_report(CT_pred_labels, CT_test_labels))

              precision    recall  f1-score   support

       Other       0.99      0.99      0.99     20688
   after DCT       0.00      0.00      0.00         0
   after dct       0.00      0.00      0.00       270
  before DCT       0.00      0.00      0.00         0
  before dct       0.00      0.00      0.00       329
  during DCT       0.00      0.00      0.00         0
  during dct       0.00      0.00      0.00       126

   micro avg       0.96      0.96      0.96     21413
   macro avg       0.14      0.14      0.14     21413
weighted avg       0.96      0.96      0.96     21413



  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [44]:
unique_label = np.unique(CT_test_labels)
print(pd.DataFrame(confusion_matrix(CT_test_labels, CT_pred_labels, labels=unique_label), 
                   index=['true:{:}'.format(x) for x in unique_label], 
                   columns=['pred:{:}'.format(x) for x in unique_label]))

                 pred:Other  pred:after DCT  pred:before DCT  pred:during DCT
true:Other            20455               0                0                0
true:after DCT            6               0                0                0
true:before DCT         193               0                0                0
true:during DCT          34               0                0                0


Here the consusion matrix shows that all of the CAD classes have been confused with "Other" class in the prediction.  Let's see the examples:

In [46]:
CT_combined[(CT_combined['label']!='Other') & (CT_combined['predLabel']=='Other')]

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,predClass,predLabel
8,8,HPI: 70 yo M with NIDDM admitted for cath aft...,before DCT,110-03.xml,0.999235,0.000022,0.000710,0.000033,Class0,Other
12,12,MIBI was read as positive for moderate to seve...,before DCT,110-03.xml,0.995930,0.000060,0.003940,0.000071,Class0,Other
60,60,The ECG is positive for ischemia.,before DCT,110-03.xml,0.985374,0.000127,0.014360,0.000139,Class0,Other
62,62,Findings are consistent with moderate to sever...,before DCT,110-03.xml,0.999231,0.000032,0.000698,0.000039,Class0,Other
68,68,"\tIschemia: Hx angina, MIBI positive for infer...",before DCT,110-03.xml,0.999664,0.000042,0.000254,0.000040,Class0,Other
94,94,The pain does not remind him of his sx prior t...,before DCT,110-04.xml,0.804428,0.000567,0.194260,0.000744,Class0,Other
182,182,"walking, took 2 nitro and the pain got better.",before DCT,111-04.xml,0.999683,0.000022,0.000273,0.000022,Class0,Other
184,184,repeat episode relived by nitro again.,before DCT,111-04.xml,0.999844,0.000016,0.000124,0.000016,Class0,Other
198,198,PAST SURGICAL HISTORY: Angioplasty with multi...,before DCT,111-04.xml,0.802693,0.000435,0.196281,0.000591,Class0,Other
257,257,He tells me that he underwent testing at Wheat...,before DCT,112-03.xml,0.997462,0.000051,0.002432,0.000056,Class0,Other
