In [1]:
"""
Error analysis module
Input: the outputs of classification models on the dataframe. 
Specifically: result_dict = {"predictions": all_predictions, "references": all_references, "filenames": all_filenames, 'input_ids': all_input_ids} where:
all_predictions is the binary predictions for the file, 0 or 1
all_references is the binary references (ground truth) for the file, 0 or 1
all_filenames is the name of the folder (discharge summary et.c)
input ids is the tokenized, encoded versions of the original sentences (must decode here
Functions:
- error by folder: retrieve number of errors per folder 
- error length: retrieve average sentence length for correct, incorrect classifications:
- error by words: retrieve most common words present in incorrect/correct sentences 
- error by model: for result_dict from each model, which errors are present in all models?
- error by drug: given list of all drug strings, which drugs are most common in errors? 
""";


In [1]:
import pickle
def import_data(path):
    with open(path, 'rb') as f:
        result_dict = pickle.load(f)
    #print(result_dict['decoded_sentences'][0])
        return(result_dict)


def export_data(data, path):
    f = open(path+".pkl",'w')
    f.close()
    f = open(path+".pkl", "wb")
    pickle.dump(data, f)
    f.close()
    
T5_result_dict = import_data('PT_T5_Classifier_results_dict_TEST_SET_v1.pkl')
  
BERT_result_dict = import_data('PT_BERT_Classifier_results_dict_TEST_SET_v1.pkl')

SVM_result_dict = import_data('SVM_predictions.pkl')

T5_int_list = [int(tensor.numpy()) for tensor in T5_result_dict['row_numbers']]
print("biggest value in T5 (should match number of vals in test set")
print(max(T5_int_list))



BERT_int_list = [int(tensor.numpy()) for tensor in BERT_result_dict['row_numbers']]
print("biggest value in BERT (should match number of vals in test set")
print(max(BERT_int_list))

biggest value in T5 (should match number of vals in test set
4080
biggest value in BERT (should match number of vals in test set
4080


In [3]:
def get_correct_incorrect(results_dict):
    import pandas as pd
    df = pd.DataFrame({
        'sentence': results_dict["decoded_sentences"],
        'prediction': results_dict["predictions"],
        'reference': results_dict["references"]
    })
    # Assuming you have a DataFrame named 'df' with columns 'sentence', 'prediction', and 'reference'
    # Create a new DataFrame for correct and incorrect predictions
    correct_df = pd.DataFrame(columns=['correct'])
    incorrect_df = pd.DataFrame(columns=['incorrect'])

    # Iterate through each row in the original DataFrame
    for index, row in df.iterrows():
        if row['prediction'] == row['reference']:
            correct_df = correct_df.append({'correct': row['sentence']}, ignore_index=True)
        else:
            incorrect_df = incorrect_df.append({'incorrect': row['sentence']}, ignore_index=True)

    return({"correct": correct_df, "incorrect":incorrect_df})

"""
result_dict = import_data('PT_T5_Classifier_results_dict.pkl')
dict1 = get_correct_incorrect(result_dict)
print(dct)
# Specify the path and filename for the Excel file
import csv
with open('T5_corect_incorrect.csv', 'w') as output:
    writer = csv.writer(output)
    for key, value in dict1.items():
        writer.writerow([key, value])

        
result_dict = import_data('PT_BERT_Classifier_results_dict.pkl')
dict1 = get_correct_incorrect(result_dict)
print(dct)
# Specify the path and filename for the Excel file
with open('BERT_corect_incorrect.csv', 'w') as o:
    writer = csv.writer(o, quoting=csv.QUOTE_NONNUMERIC)
    for key, value in dict1.items():
        writer.writerow([key, value])
""";

In [4]:
def error_by_folder(results_dict):
    import pandas as pd

    # Create a DataFrame with filenames, predictions, and references
    df = pd.DataFrame({
        'filename': result_dict["filenames"],
        'prediction': result_dict["predictions"],
        'reference': result_dict["references"]
    })

    # Identify correct and incorrect predictions
    df['correct'] = (df['prediction'] == df['reference'])

    # Group by filename and calculate total correct and incorrect predictions
    summary_df = df.groupby('filename')['correct'].value_counts().unstack(fill_value=0).reset_index()

    # Rename columns for clarity
    summary_df.columns = ['filename', 'incorrect_predictions',  'correct_predictions']

    return(summary_df)


In [5]:
def error_by_length_char(result_dict):
    #returns average length in characters of all incorrect sentence,  correct sentence
    correct_sum = 0
    correct_length = 0
    
    incorrect_sum = 0
    incorrect_length = 0

    
    for i in range(len(result_dict['decoded_sentences'])):
        #get correct sentences
        if(result_dict['predictions'][i] == result_dict['references'][i]):
            correct_length += 1
            correct_sum += len(result_dict['decoded_sentences'][i])
            
        #Get incorrect
        else:
            incorrect_sum += len(result_dict['decoded_sentences'][i])
            incorrect_length += 1
            
    average_correct_length_char = correct_sum / correct_length
    average_incorrect_length_char = incorrect_sum / incorrect_length

    return({"average_incorrect_length_char": average_incorrect_length_char, "average_correct_length_char":average_correct_length_char})
        
print("BERT")
print(error_by_length_char(BERT_result_dict))

print("T5")
print(error_by_length_char(T5_result_dict))

BERT
{'average_incorrect_length_char': 317.3205128205128, 'average_correct_length_char': 129.6912101910828}
T5
{'average_incorrect_length_char': 225.77710843373495, 'average_correct_length_char': 123.31468710089399}


In [6]:
import nltk
from collections import Counter

def process_sentence(sentence, stopwords):
    # Tokenize sentence, remove punctuation, and convert to lowercase
    words = [word.strip(".,?!").lower() for word in sentence.split()]
    # Remove stopwords and return the remaining words
    return [word for word in words if word not in stopwords]

def analyze_sentences(correct_sentences, incorrect_sentences, stopwords=[]):
    correct_counts = Counter()
    incorrect_counts = Counter()

    # Process correct sentences
    for sentence in correct_sentences:
        sentence = ' '.join(sentence)
        words = process_sentence(sentence, stopwords)
        correct_counts.update(words)

    # Process incorrect sentences
    for sentence in incorrect_sentences:
        sentence = ' '.join(sentence)
        words = process_sentence(sentence, stopwords)
        incorrect_counts.update(words)

    # Return sorted results
    return {
        'correct': correct_counts.most_common(),
        'incorrect': incorrect_counts.most_common()
    }


def get_common_words(result_dict):
    
    from nltk.corpus import stopwords


    correctness_df = get_correct_incorrect(result_dict)
    display(correctness_df['correct'])
    display(correctness_df['incorrect'])


    correct_sentences = correctness_df['correct'].values.tolist()
    incorrect_sentences = correctness_df['incorrect'].values.tolist()
    #print(correct_sentences[0])
    stopwords = stopwords.words('english')
    stopwords += (['-', 'ml', 'po', 'mg', 'pt'])
    #stopwords = []


    result = analyze_sentences(correct_sentences, incorrect_sentences, stopwords)
    print("Most common words in correct sentences:", result['correct'][0:31])
    print("\nMost common words in incorrect sentences:", result['incorrect'][0:31])

nltk.download('stopwords')

print("BERT")
print(get_common_words(BERT_result_dict))

print("T5")
print(get_common_words(T5_result_dict))

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/jsheikh2/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


BERT


Unnamed: 0,correct
0,"pmh : - dm2 : diagnosed 2080, initially took a..."
1,"the patient was given 1l of ns, 500 mg levoflo..."
2,"received a total of 3 u ffp, labetolol then ni..."
3,will attempt [ * * female first name ( un ) * ...
4,2.
...,...
3920,- hct check q6 - transfuse prbcs to keep hct >...
3921,an u / s of r. breast showed no discrete mass.
3922,events - - off cvvhd for? citrate toxicity.
3923,"cortisone injections at pain clinic, last 1 - ..."


Unnamed: 0,incorrect
0,ct confirmed dvt and hit was + so we have d / ...
1,"acute renal failure : given elevated bun, coul..."
2,active issues remain : 1. ) septic shock remai...
3,# gi bleed : patient s / p 4u prbc s on ( 2u o...
4,7 ) hyponatremia : improving.
...,...
151,allergies : dicloxacillin ain ; last dose of a...
152,she admits to not taking her lipitor - worrrie...
153,""" chief complaint : 24 hour events : [ * * 3 -..."
154,rr b / w 20 - 25. pt unresponsive on 20mcg of ...


Most common words in correct sentences: [('*', 5110), (':', 4919), ('/', 2510), ('(', 1816), (')', 1764), ('[', 1323), (']', 1315), ('_', 727), ('1', 705), ('2', 636), ('3', 464), ('5', 411), ('4', 354), ('%', 337), (';', 322), ('12', 315), ('patient', 295), ('7', 291), ('history', 275), ('0', 275), ('10', 266), ('#', 249), ('9', 248), ('p', 244), ('8', 242), ('c', 241), ('pain', 235), ('11', 230), ('6', 224), ('pm', 210), ('last', 209)]

Most common words in incorrect sentences: [('*', 702), (':', 454), ('/', 262), (')', 191), ('(', 176), ('[', 163), (']', 161), ('p', 61), ('2', 49), ('1', 47), (';', 40), ('history', 39), ('#', 37), ('3', 37), ('5', 35), ('likely', 32), ('renal', 31), ('last', 31), ('12', 29), ('c', 28), ('>', 28), ('urine', 28), ('+', 26), ('given', 26), ('medications', 26), ('continue', 25), ('cx', 23), ('7', 23), ('heparin', 22), ('patient', 22), ('%', 22)]
None
T5


Unnamed: 0,correct
0,No history of osteomyelitis.
1,Suction q 4 hrs for small amt.
2,There is lateral superior displacement of the ...
3,Rel to betrayal by mother at age 13.
4,BAL similar [**1-14**] resp viral screen neg [...
...,...
3910,""""
3911,"Shock: Now hemodynamically stable, off of pres..."
3912,History of Present Illness: 76 year old male w...
3913,"Patent left main, apical LAD with 99% lesion w..."


Unnamed: 0,incorrect
0,Suspect increased HgbA1c reflects recent cessa...
1,He noted that he has swallowed a lot of blood ...
2,# GI bleed: Patient s/p 4u pRBC s on (2u on [*...
3,- reorient - avoid sedating meds CAD: - hold A...
4,Review of Systems: above Psych: Referred to ur...
...,...
161,"Reports of copious diarrhea X days, guiac posi..."
162,-Continue holding Coumadin until bleeding issu...
163,She gets hives from both.
164,"If the patient tolerates the transition, the g..."


Most common words in correct sentences: [('patient', 287), ('pm', 219), ('pain', 195), ('s/p', 192), ('given', 165), ('2', 163), ('blood', 156), ('1', 156), ('continue', 154), ('#', 154), ('icu', 147), ('history:', 146), ('total', 146), ('history', 142), ('last', 142), ('medications:', 142), ('/', 139), ('**]', 139), ('noted', 132), ('c', 132), ('24', 131), ('review', 127), ('medical', 126), ('name', 123), ('family', 122), ('admission', 122), ('since', 122), ('dose', 120), ('right', 115), ('care', 110), ('fluid', 108)]

Most common words in incorrect sentences: [('given', 31), ('cx', 22), ('+', 22), ('likely', 21), ('heparin', 18), ('hold', 17), ('vre', 17), ('without', 16), ('daily', 16), ('setting', 15), ('qd', 15), ('urine', 15), ('blood', 14), ('acute', 14), ('history', 14), ('patient', 13), ('s/p', 13), ('stable', 13), ('change', 13), ('1', 13), ('continue', 13), ('w/', 13), ('ct', 12), ('per', 12), ('name', 12), ('>', 12), ('10', 12), ('##', 12), ('neg', 12), ('sedation', 12), ('

In [7]:
import pandas as pd
def get_common_incorrect_words(T5_result_dict, BERT_result_dict):
    T5_incorrect = get_correct_incorrect(T5_result_dict)['incorrect']
    display(T5_incorrect)
    print(type(T5_incorrect))
    T5_incorrect.rename(columns={'incorrect': 'T5_incorrect'}, inplace=True)

    BERT_incorrect = get_correct_incorrect(BERT_result_dict)['incorrect']
    display(BERT_incorrect)
    BERT_incorrect.rename(columns={'incorrect': 'BERT_incorrect'}, inplace=True)

    print(type(BERT_incorrect))
    
    merged_df = pd.concat([T5_incorrect, BERT_incorrect], axis=1)
    excel_file_path = 'actual_output.xlsx'
    merged_df.to_excel(excel_file_path, index=False)

    
get_common_incorrect_words(T5_result_dict, BERT_result_dict)

Unnamed: 0,incorrect
0,Suspect increased HgbA1c reflects recent cessa...
1,He noted that he has swallowed a lot of blood ...
2,# GI bleed: Patient s/p 4u pRBC s on (2u on [*...
3,- reorient - avoid sedating meds CAD: - hold A...
4,Review of Systems: above Psych: Referred to ur...
...,...
161,"Reports of copious diarrhea X days, guiac posi..."
162,-Continue holding Coumadin until bleeding issu...
163,She gets hives from both.
164,"If the patient tolerates the transition, the g..."


<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,incorrect
0,ct confirmed dvt and hit was + so we have d / ...
1,"acute renal failure : given elevated bun, coul..."
2,active issues remain : 1. ) septic shock remai...
3,# gi bleed : patient s / p 4u prbc s on ( 2u o...
4,7 ) hyponatremia : improving.
...,...
151,allergies : dicloxacillin ain ; last dose of a...
152,she admits to not taking her lipitor - worrrie...
153,""" chief complaint : 24 hour events : [ * * 3 -..."
154,rr b / w 20 - 25. pt unresponsive on 20mcg of ...


<class 'pandas.core.frame.DataFrame'>


In [8]:
#IMPROVEMENT FROM SVM
#Get the sentences that were misclassified by SVM AND NOT (BERT, T5)
SVM_only_wrong = []

from tqdm import tqdm
def SVM_difference(SVM_result_dict, T5_result_dict, BERT_result_dict):
    print(T5_result_dict.keys())
    print(len(T5_result_dict['row_numbers']))
    
    print(SVM_result_dict.keys())
    
    """
    FALSE: T5_result_dict['row_numbers'][i] == BERT_result_dict['row_numbers'][i] == SVM_predictions[i] 
    
    the lists did not keep their order; train_dataframe[i] does not equal BERT or SVM or T5[i]
    to compare the same i: treat SVM_ground_truth[i] as i, treat T5/BERT_result_dict[row number] as i
    
            this method allows you to iterate over the same original element on all 3 dicts
        T5_ground_truth = 0 
        BERT_ground_truth =  0
        SVM_ground_truth = SVM_result_dict['ground_truth'][i]
        
        for j in range(len((T5_result_dict['row_numbers']))):
            if T5_result_dict['row_numbers'][j] == i:
                #this means that T5[j] == SVM[i]
                T5_ground_truth = T5_result_dict['references'][j]
        for k in range(len((BERT_result_dict['row_numbers']))):
            if BERT_result_dict['row_numbers'][k] == i:
                #this means that BERT[k] == SVM[i]
                BERT_ground_truth = BERT_result_dict['references'][k]
        
        if not (BERT_ground_truth == T5_ground_truth and BERT_ground_truth == SVM_ground_truth):
            print("here")
    """
    #list of sentences that SVM got wrong, T5 AND BERT got right
    for i in tqdm(range(len(SVM_result_dict['ground_truth']))):
            T5_result = 0 
            BERT_result =  0
            SVM_result = SVM_result_dict['predictions'][i]
            ground_truth = SVM_result_dict['ground_truth'][i]
            sentence = ''
            for j in range(len((T5_result_dict['row_numbers']))):
                if T5_result_dict['row_numbers'][j] == i:
                    sentence = T5_result_dict['decoded_sentences'][j]
                    #this means that T5[j] == SVM[i]
                    T5_result = T5_result_dict['predictions'][j]
            for k in range(len((BERT_result_dict['row_numbers']))):
                if BERT_result_dict['row_numbers'][k] == i:
                    #this means that BERT[k] == SVM[i]
                    BERT_result = BERT_result_dict['predictions'][k]
            #If SVM got wrong
            if not SVM_result == ground_truth:
                #If T5 and BERT got right
                if(T5_result == BERT_result and T5_result == ground_truth):
                    SVM_only_wrong.append(sentence)
                    
    with open('errors_ONLY_SVM.txt', 'w') as f:
        for line in SVM_only_wrong:
            f.write(f"{line}\n\n")
    return(SVM_only_wrong)
        
SVM_difference(SVM_result_dict, T5_result_dict, BERT_result_dict)

dict_keys(['predictions', 'references', 'filenames', 'input_ids', 'row_numbers', 'decoded_sentences'])
4081
dict_keys(['predictions', 'ground_truth'])


100%|██████████| 4081/4081 [01:15<00:00, 54.09it/s]


['2+ albumin, 1+ blood, nitrite (-), 1+ WBC, 0-2 casts, moderate bacteria, moderate squamous cells, 5-10 WBCs, 20-50 WBCs Radiology: CXR: s/p CABG, low lung volumes, no pulmonary edema Cardiogram: NSR @ 85 BPM, primary AV delay (PR interval 260 ms), LAD, RBBB, TWI in leads V1-V2 Assessment: 58 year-old man with a history of CAD CHF, and OSA who presents with weakness, weight loss, SOB, and ARF in the setting of increased diuretic use.',
 '2+ albumin, 1+ blood, nitrite (-), 1+ WBC, 0-2 casts, moderate bacteria, moderate squamous cells, 5-10 WBCs, 20-50 WBCs Radiology: CXR: s/p CABG, low lung volumes, no pulmonary edema Cardiogram: NSR @ 85 BPM, primary AV delay (PR interval 260 ms), LAD, RBBB, TWI in leads V1-V2 Assessment: 58 year-old man with a history of CAD CHF, and OSA who presents with weakness, weight loss, SOB, and ARF in the setting of increased diuretic use.',
 '2+ albumin, 1+ blood, nitrite (-), 1+ WBC, 0-2 casts, moderate bacteria, moderate squamous cells, 5-10 WBCs, 20-50 W

In [3]:
#HARDEST SENTENCES
#Get the sentences that were misclassified by SVM AND BERT AND T5
SVM_correct = []
SVM_wrong = []
correct = []
all_wrong = []
from tqdm import tqdm
def hard_sentences(SVM_predictions, T5_result_dict, BERT_result_dict):
    print(T5_result_dict.keys())
    print(len(T5_result_dict['row_numbers']))
    
    print(SVM_result_dict.keys())
    
    #list of sentences that SVM AND T5 AND BERT got wrong
    wrong = []
    for i in tqdm(range(len(SVM_result_dict['ground_truth']))):
            T5_result = 0 
            BERT_result =  0
            SVM_result = SVM_result_dict['predictions'][i]
            ground_truth = SVM_result_dict['ground_truth'][i]
            sentence = ''
            for j in range(len((T5_result_dict['row_numbers']))):
                if T5_result_dict['row_numbers'][j] == i:
                    sentence = T5_result_dict['decoded_sentences'][j]
                    #this means that T5[j] == SVM[i]
                    T5_result = T5_result_dict['predictions'][j]
            for k in range(len((BERT_result_dict['row_numbers']))):
                if BERT_result_dict['row_numbers'][k] == i:
                    #this means that BERT[k] == SVM[i]
                    BERT_result = BERT_result_dict['predictions'][k]
            #If all got wrong
            if not (SVM_result == ground_truth) and not (T5_result == BERT_result) and not (T5_result == ground_truth):
                wrong.append(sentence)
                all_wrong.append({'sentence':sentence, 'ground_truth':ground_truth})
            if (SVM_result == ground_truth) and (T5_result == BERT_result) and (T5_result == ground_truth):
                correct.append(sentence)
            if (SVM_result == ground_truth):
                SVM_correct.append(sentence)
            if not (SVM_result == ground_truth):
                SVM_wrong.append({'sentence':sentence, 'ground_truth':ground_truth})
    with open('errors_all.txt', 'w') as f:
        for line in wrong:
            f.write(f"{line}\n\n")
    return(wrong)
        
hard_sentences(SVM_result_dict, T5_result_dict, BERT_result_dict)

dict_keys(['predictions', 'references', 'filenames', 'input_ids', 'row_numbers', 'decoded_sentences'])
4081
dict_keys(['predictions', 'ground_truth'])


100%|██████████| 4081/4081 [01:17<00:00, 52.61it/s]


['. # Angioedema: C/w angioedema.',
 "He's D/Ced his HCTZ because of light headedene Past medical history: 1. CAD: -Cath 1/73: 80% prox RCA stenosis w/ thrombus, mod D1 stenosis, clear Lcx.",
 'He started her on elavil to improve her sleep, but nora stopped 2/2 elevated bs. Does feel that improved sleep significantly improves her symptoms.',
 "Gstain and Ag screen were negative - Possibly start antibiotics if ABG's worsen #?",
 'S/p multiple courses of chemo complicated by bleomycin lung toxicity, s/p autogeneic stem cell transplant in [**2097**] and allo BMT in [**2098**].',
 'This phenomenon of opioid hyperalgesia is described more frequently in the oncology literature.',
 'pt seems mildly sedated today after receiving oxycodone.',
 'Most commonly associated with ACE-I. Does have a history of similar episode; acquired C1 inhibitor deficiency possible - familial less likely given age and absence of family hx.',
 'ALLERGIES: Sulfa.',
 'ARF: -responding to slow hydration, will aim for e

In [15]:
#Get SVM misclass nature
for i in SVM_wrong:
    sentence = i['sentence']
    #Get SVM misclass
    if(sentence.split(' ')[0] == 'Past'):
        if(sentence.split(' ')[1] == 'Medical/Surgical'):
            print(sentence)
            print(i['ground_truth'])

all_false_positives = 0 
all_false_negatives = 0
for i in all_wrong:
    sentence = i['sentence']
    #correct answer was 0 (noADE), all got 1 
    if(i['ground_truth'] == 0):
        all_false_negatives +=1
    if(i['ground_truth'] == 1):
        all_false_positives +=1
print('all false positives:', all_false_positives)
print('all false negatives:', all_false_negatives)
print('number of sentences incorrectly classified by all models:',len(all_wrong))

Past Medical/Surgical History: -Pulmonary Emboli 3/93, unprovoked, diagnosed when Pt desaturated perioperatively after an excision of a right neck mass, on coumadin since then, hypercoag panel neg, limited by coumadin -Episode of Dysarthria and Left sided weakness 4/93, resolved, negative MRA/MRI, negative bubble study, therapeutic on coumadin at the time -Diabetes Mellitus, HgA1c 6.6, on glucophage -Hypertension, on Lisinopril, no ACS episodes, no h/o ETT, Echo in 7/90 EF 70% -Promyelocytic Leukemia, dx 2090, tx ended 2091 -Tardive Dyskinesia secondary to perphenazine, 2 years -Reactive Airway Disease, recently diagnosed in Jan 2093 Allergies: Perphenazine-Tardive Dyskinesia Medications: Metaxalone 800 mg PO tid Advair Diskus 100/50 1 puff INH bid Lisinopril 20 mg PO Qdaily Glucophage 850 mg PO tid Prilosec 20 mg PO Qdaily Atorvastatin 10 mg PO Qhs ASA 81 mg PO Qdaily MVI, 1 tablet PO Qdaily Albuterol Inhaler 2 puff Q4 prn Risperidone 2 mg PO Qdaily Clozapine 150 mg PO Qhs Reserpine 1

In [10]:
#Goal: determine how BERT/T5 improves on SVM. Show diff. between SVM correct/incorrect
#get avg length of correct / incorrect sentence 
#get avg word length of correct / incorrect sentence 

SVM_wrong_count = 0
for i in SVM_wrong:
    SVM_wrong_count += len(i)
print('avg sentence length (in chars) of incorrectly classified SVM sentence')
print(SVM_wrong_count / len(SVM_wrong))


SVM_correct_count = 0
for i in SVM_correct:
    SVM_correct_count += len(i)
print('avg sentence length (in chars) of correctly classified SVM sentence')
print(SVM_correct_count / len(SVM_correct))

print("BERT")
print(error_by_length_char(BERT_result_dict))

print("T5")
print(error_by_length_char(T5_result_dict))


avg sentence length (in chars) of incorrectly classified SVM sentence
210.60925449871465
avg sentence length (in chars) of correctly classified SVM sentence
118.72399783315277
BERT
{'average_incorrect_length_char': 317.3205128205128, 'average_correct_length_char': 129.6912101910828}
T5
{'average_incorrect_length_char': 225.77710843373495, 'average_correct_length_char': 123.31468710089399}


In [11]:
with open('errors_all.txt', encoding='utf-8') as f:
    length = 0
    count = 0
    for line in f:
        length += len(line)
        count+=1
    print("average length of incorrectly classified sentneces by all models")
    print(length / count)
    print('number of incorrectly classified sentences by all models')
    print(count)
    
avg_length_SVM_correct = 0

length_correct = 0
count_correct = 0
for line in correct:
    length_correct += len(line)
    count_correct += 1
    
print('\naverage length of correctly classified sentences by all models')
print(length_correct / count_correct)
print('number of correctly classified sentences by all models')
print(len(correct))

avg_word_length_SVM_correct = 0
avg_word_length_SVM_incorrect = 0


#print((SVM_correct[0]))
for i in SVM_correct:
    word_list = i.split()
    if len(word_list) <= 0:
        continue
    total_avg = sum( map(len, word_list) ) / len(word_list)
    avg_word_length_SVM_correct += total_avg 
    avg_word_length_SVM_correct /= 2
    
print('\navg word length in correctly classified SVM sentences')
print(avg_word_length_SVM_correct)
print('number of correctly classified SVM sentences:')
print(len(SVM_correct))

for i in SVM_only_wrong:
    word_list = i.split()     
    total_avg = sum( map(len, word_list) ) / len(word_list)
    avg_word_length_SVM_incorrect += total_avg 
    avg_word_length_SVM_incorrect /= 2

print('\navg word length in incorrectly classified SVM sentences')
print(avg_word_length_SVM_incorrect)
print('number of sentences only incorrectly classified by SVM')
print(len(SVM_only_wrong))

with open('errors_ONLY_SVM.txt', encoding='utf-8') as f:
    length = 0
    count = 0
    for line in f:
        length += len(line)
        count+=1
        
    print("\naverage character length of incorrectly classified SVM sentences")
    print(length / count)

SVM_length = 0
SVM_count = 0
for i in SVM_correct:
    SVM_count += 1
    SVM_length += len(i)
print("average character length of correctly classified sentence by SVM")
print(SVM_length / SVM_count)


average length of incorrectly classified sentneces by all models
112.546875
number of incorrectly classified sentences by all models
64

average length of correctly classified sentences by all models
111.35507246376811
number of correctly classified sentences by all models
3588

avg word length in correctly classified SVM sentences
5.4754042630560935
number of correctly classified SVM sentences:
3692

avg word length in incorrectly classified SVM sentences
4.80457346285786
number of sentences only incorrectly classified by SVM
269

average character length of incorrectly classified SVM sentences
113.5278810408922
average character length of correctly classified sentence by SVM
118.72399783315277


In [12]:

import pandas as pd
import hashlib

def get_uncommon_errors_between_models(model_outputs: list[dict]):
    
    correct_df = pd.DataFrame({'correct': []})
    incorrect_df = pd.DataFrame({'incorrect': []})
    counter = 0
    for results_dict in model_outputs:
        correctness_df = get_correct_incorrect(results_dict)
        incorrect_sentences = correctness_df['incorrect']
        temp2 = pd.DataFrame()

        temp2['incorrect'] = incorrect_sentences['incorrect']
        if(counter == 0):
            incorrect_df = temp2
        else:
            incorrect_df = pd.merge(incorrect_df, temp2, on='incorrect', how='outer')
        counter += 1

    display(incorrect_df)
    return(incorrect_df)

import pandas as pd

def get_most_common_errors_between_models(model_outputs: list[dict]):
    
    correct_df = pd.DataFrame({'correct': []})
    incorrect_df = pd.DataFrame({'incorrect': []})
    counter = 0
    for results_dict in model_outputs:
        correctness_df = get_correct_incorrect(results_dict)
        incorrect_sentences = correctness_df['incorrect']
        df = pd.DataFrame.from_dict([correctness_df])

        df.to_excel(f'results1_{counter}.xlsx', index=False)

        temp2 = pd.DataFrame()

        temp2['incorrect'] = incorrect_sentences['incorrect']
        if(counter == 0):
            incorrect_df = temp2
        else:
            incorrect_df = pd.merge(incorrect_df, temp2, on='incorrect', how='inner')
        counter += 1

    display(incorrect_df)
    return(incorrect_df)

T5_result_dict = import_data('PT_T5_Classifier_results_dict.pkl')
BERT_result_dict = import_data('PT_BERT_Classifier_results_dict.pkl')

common_incorrect_df = get_most_common_errors_between_models([T5_result_dict, BERT_result_dict])
uncommon_incorrect_df = get_uncommon_errors_between_models([T5_result_dict, BERT_result_dict])

print(len(common_incorrect_df))
print(len(uncommon_incorrect_df))

#35export_data(incorrect_df, 'common_errors_BERT_T5')


length = 0
count = 0
"""
for i in common_incorrect_df['incorrect']:
    print(i)
    length += len(i)
    #print(length)
    print('\n')
    count += 1
    
print("avg length: ", length/count)
"""

FileNotFoundError: [Errno 2] No such file or directory: 'PT_T5_Classifier_results_dict.pkl'

In [None]:
duplicate_strings = uncommon_incorrect_df[uncommon_incorrect_df.duplicated('incorrect')]

# Display the rows with duplicate strings
print("Rows with duplicate strings:")
print(duplicate_strings)


In [None]:
display(get_correct_incorrect(T5_result_dict)['incorrect'])

display(get_correct_incorrect(BERT_result_dict)['incorrect'])

In [None]:
pd.options.display.max_colwidth = 1000
print(get_correct_incorrect(T5_result_dict)['incorrect'].iloc[4])
print(get_correct_incorrect(BERT_result_dict)['incorrect'].iloc[1])

#print(get_correct_incorrect(BERT_result_dict)['incorrect',1])

Difference is due to tokenization -- we encode it, classify, then decode it to perform error analysis; when encoding, it seems like some characters are toknized differently. 

In [3]:
#Ensemble approach:  UNION
from tqdm import tqdm
def union_ensemble_approach(SVM_result_dict, T5_result_dict, BERT_result_dict):
    print(SVM_result_dict.keys())
    print(T5_result_dict.keys())
    print(BERT_result_dict.keys())
    
    print(len(SVM_result_dict['predictions']))
    print(len(T5_result_dict['predictions']))
    print(len(BERT_result_dict['predictions']))
    union_agreement_predictions = []
    for i in tqdm(range(len(SVM_result_dict['ground_truth']))):
            T5_result = 0 
            BERT_result =  0
            SVM_result = SVM_result_dict['predictions'][i]
            ground_truth = SVM_result_dict['ground_truth'][i]
            sentence = ''
            for j in range(len((T5_result_dict['row_numbers']))):
                if T5_result_dict['row_numbers'][j] == i:
                    sentence = T5_result_dict['decoded_sentences'][j]
                    #this means that T5[j] == SVM[i]
                    T5_result = T5_result_dict['predictions'][j]
            for k in range(len((BERT_result_dict['row_numbers']))):
                if BERT_result_dict['row_numbers'][k] == i:
                    #this means that BERT[k] == SVM[i]
                    BERT_result = BERT_result_dict['predictions'][k]
                    
            #if any choose minority 
            #print('sentence: ', sentence)
            if ( BERT_result == 1 or SVM_result == 1 or T5_result == 1):
                union_agreement_predictions.append(1)
            else:
                union_agreement_predictions.append(0)
                
    return(union_agreement_predictions )

union_agreement_predictions = union_ensemble_approach(SVM_result_dict, T5_result_dict, BERT_result_dict)


dict_keys(['predictions', 'ground_truth'])
dict_keys(['predictions', 'references', 'filenames', 'input_ids', 'row_numbers', 'decoded_sentences'])
dict_keys(['predictions', 'references', 'filenames', 'input_ids', 'row_numbers', 'decoded_sentences'])
4081
4081
4081


100%|██████████| 4081/4081 [01:17<00:00, 52.79it/s]


In [32]:
from sklearn.metrics import accuracy_score, classification_report

classification_report_result = classification_report(SVM_result_dict['ground_truth'], union_agreement_predictions, digits=3)
print(classification_report_result)

              precision    recall  f1-score   support

           0      0.983     0.916     0.948      3788
           1      0.424     0.795     0.553       293

    accuracy                          0.908      4081
   macro avg      0.703     0.856     0.751      4081
weighted avg      0.943     0.908     0.920      4081



In [43]:
def avg_sentence_length_ensemble(SVM_result_dict, T5_result_dict, union_agreement_predictions):
    print(SVM_result_dict.keys())
    correctly_classified = []
    incorrectly_classified = [] 
    for i in tqdm(range(len(SVM_result_dict['ground_truth']))):
        sentence = ''
        for j in range(len((T5_result_dict['row_numbers']))):
                if T5_result_dict['row_numbers'][j] == i:
                    sentence = T5_result_dict['decoded_sentences'][j]
                    
        if(SVM_result_dict['ground_truth'][i] == union_agreement_predictions[i]):
            correctly_classified.append(sentence)
        else:
            incorrectly_classified.append(sentence)
    return({'correct': correctly_classified, 'incorrect':incorrectly_classified})
            
temp = avg_sentence_length_ensemble(SVM_result_dict,T5_result_dict, union_agreement_predictions)

dict_keys(['predictions', 'ground_truth'])


100%|██████████| 4081/4081 [00:42<00:00, 95.99it/s]


In [44]:
cor_length = len(temp['correct'])
cor_counter = 0
for i in temp['correct']:
    cor_counter += len(i)
print('avg len correct ensemble union: ', str(cor_counter/cor_length))
print('num correct sentences: ', cor_length)
incor_length = len(temp['incorrect'])
incor_counter = 0
for i in temp['incorrect']:
    incor_counter += len(i)
print('avg len incorrect ensemble union: ', str(incor_counter/incor_length))
print('num incorrect sentences: ', incor_length)

avg len correct ensemble union:  120.69330453563715
num correct sentences:  3704
avg len incorrect ensemble union:  194.18567639257296
num incorrect sentences:  377


In [4]:
#Ensemble approach: if 2/ 3 models agree, then do 2/3 models. Else, don't.
from tqdm import tqdm
def majority_ensemble_approach(SVM_result_dict, T5_result_dict, BERT_result_dict):
    print(SVM_result_dict.keys())
    print(T5_result_dict.keys())
    print(BERT_result_dict.keys())
    
    print(len(SVM_result_dict['predictions']))
    print(len(T5_result_dict['predictions']))
    print(len(BERT_result_dict['predictions']))
    majority_agreement_predictions = []
    for i in tqdm(range(len(SVM_result_dict['ground_truth']))):
            T5_result = 0 
            BERT_result =  0
            SVM_result = SVM_result_dict['predictions'][i]
            ground_truth = SVM_result_dict['ground_truth'][i]
            sentence = ''
            for j in range(len((T5_result_dict['row_numbers']))):
                if T5_result_dict['row_numbers'][j] == i:
                    sentence = T5_result_dict['decoded_sentences'][j]
                    #this means that T5[j] == SVM[i]
                    T5_result = T5_result_dict['predictions'][j]
            for k in range(len((BERT_result_dict['row_numbers']))):
                if BERT_result_dict['row_numbers'][k] == i:
                    #this means that BERT[k] == SVM[i]
                    BERT_result = BERT_result_dict['predictions'][k]
                    
            #If 2/3 agree
            #print('sentence: ', sentence)
            if ( BERT_result == SVM_result):
                majority_agreement_predictions.append(BERT_result)
            elif (SVM_result == T5_result):
                majority_agreement_predictions.append(SVM_result)
            elif(T5_result == BERT_result):
                majority_agreement_predictions.append(T5_result)
                
    return(majority_agreement_predictions)

majority_agreement_predictions = majority_ensemble_approach(SVM_result_dict, T5_result_dict, BERT_result_dict)


dict_keys(['predictions', 'ground_truth'])
dict_keys(['predictions', 'references', 'filenames', 'input_ids', 'row_numbers', 'decoded_sentences'])
dict_keys(['predictions', 'references', 'filenames', 'input_ids', 'row_numbers', 'decoded_sentences'])
4081
4081
4081


100%|██████████| 4081/4081 [01:16<00:00, 53.32it/s]


In [41]:
temp2 = avg_sentence_length_ensemble(SVM_result_dict,T5_result_dict, majority_agreement_predictions)

dict_keys(['predictions', 'ground_truth'])


100%|██████████| 4081/4081 [00:42<00:00, 96.57it/s]


In [42]:
cor_length = len(temp2['correct'])
cor_counter = 0
for i in temp2['correct']:
    cor_counter += len(i)
print('avg len correct ensemble majority: ', str(cor_counter/cor_length))
print('num correct sentences: ', cor_length)

incor_length = len(temp2['incorrect'])
incor_counter = 0
for i in temp2['incorrect']:
    incor_counter += len(i)
print('avg len incorrect ensemble majority: ', str(incor_counter/incor_length))
print('num incorrect sentences: ', incor_length)

avg len correct ensemble majority:  123.62569974554707
num correct sentences:  3930
avg len incorrect ensemble majority:  227.86092715231788
num incorrect sentences:  151


In [5]:
#HARDEST SENTENCES
#Get the sentences that were misclassified by SVM AND BERT AND T5
ensemble_all_correct = {'sentence':[], 'ground_truth':[]}
ensemble_all_wrong = {'sentence':[], 'ground_truth':[]}
from tqdm import tqdm
def hard_sentences(SVM_result_dict, union_agreement_predictions, majority_agreement_predictions, T5_result_dict, BERT_result_dict):
    print(T5_result_dict.keys())
    print(len(T5_result_dict['row_numbers']))
    print(SVM_result_dict.keys())
    
    #list of sentences that SVM AND T5 AND BERT AND ensembles got wrong
    wrong = []
    for i in tqdm(range(len(SVM_result_dict['ground_truth']))):
            T5_result = 0 
            BERT_result =  0
            SVM_result = SVM_result_dict['predictions'][i]
            ground_truth = SVM_result_dict['ground_truth'][i]
            sentence = ''
            for j in range(len((T5_result_dict['row_numbers']))):
                if T5_result_dict['row_numbers'][j] == i:
                    sentence = T5_result_dict['decoded_sentences'][j]
                    #this means that T5[j] == SVM[i]
                    T5_result = T5_result_dict['predictions'][j]
            for k in range(len((BERT_result_dict['row_numbers']))):
                if BERT_result_dict['row_numbers'][k] == i:
                    #this means that BERT[k] == SVM[i]
                    BERT_result = BERT_result_dict['predictions'][k]
                    
            majority_ensemble_result = majority_agreement_predictions[i]
            union_ensemble_result = union_agreement_predictions[i]
            #If all got wrong
            if (
                not (SVM_result == ground_truth)
                and not (BERT_result == ground_truth) 
                and not (T5_result == ground_truth)
                and not (union_ensemble_result == ground_truth)
                and not (majority_ensemble_result == ground_truth)
            ):
                print(sentence)
                print(ground_truth,'\n\n')
                ensemble_all_wrong['sentence'].append(sentence) 
                ensemble_all_wrong['ground_truth'].append(ground_truth)
            #if all got right
            if (
                (SVM_result == ground_truth)
                and (BERT_result == ground_truth) 
                and (T5_result == ground_truth)
                and (union_ensemble_result == ground_truth)
                and (majority_ensemble_result == ground_truth)
            ):
                #print(ground_truth,'\n\n')
                ensemble_all_correct['sentence'].append(sentence) 
                ensemble_all_correct['ground_truth'].append(ground_truth)

    with open('errors_all_ensemble.txt', 'w') as f:
        for line in ensemble_all_wrong:
            f.write(f"{line}\n\n")
    return({'wrong':ensemble_all_wrong, 'right':ensemble_all_correct})
        
temp = hard_sentences(SVM_result_dict, union_agreement_predictions, majority_agreement_predictions, T5_result_dict, BERT_result_dict)
ensemble_all_wrong = temp['wrong']
ensemble_all_correct = temp['right']

dict_keys(['predictions', 'references', 'filenames', 'input_ids', 'row_numbers', 'decoded_sentences'])
4081
dict_keys(['predictions', 'ground_truth'])


  0%|          | 6/4081 [00:00<01:15, 53.85it/s]

His angioedema resolved completely by the end of the day and he was discharged.
1 


HISTORY OF PRESENT ILLNESS: This is a 58-year-old male who presented with a one-week history of fatigue, lightheadedness, and black tarry stools with occasional bright red blood per rectum.
1 


HISTORY OF PRESENT ILLNESS: This is a 58-year-old male who presented with a one-week history of fatigue, lightheadedness, and black tarry stools with occasional bright red blood per rectum.
1 


HISTORY OF PRESENT ILLNESS: This is a 58-year-old male who presented with a one-week history of fatigue, lightheadedness, and black tarry stools with occasional bright red blood per rectum.
1 


HISTORY OF PRESENT ILLNESS: This is a 58-year-old male who presented with a one-week history of fatigue, lightheadedness, and black tarry stools with occasional bright red blood per rectum.
1 


Neurontin (patient self-discontinued secondary to sedative side effects).
1 




  0%|          | 12/4081 [00:00<01:17, 52.81it/s]

He was extubated on POD 3, after which he experienced post-op delerium, which improved with Haldol and discontinuation of narcotics.
1 


Admission Date: [**2100-9-22**] Discharge Date: [**2100-9-28**] Date of Birth: [**2019-1-26**] Sex: F Service: MEDICINE Allergies: Penicillins / Vicodin / Cipro / Polysporin Attending:[**First Name3 (LF) 1145**] Chief Complaint: bright red blood per rectum Major Surgical or Invasive Procedure: None History of Present Illness: 81 year old female hx of Afib on coumadin, diverticulosis, and myasthenia [**Last Name (un) 2902**] presented to PCP [**Name Initial (PRE) 151**] 2 d BRBPR.
1 




  0%|          | 18/4081 [00:00<01:16, 52.86it/s]

He disliked the Flomax because it seemed to cause polyuria and generally made him feel 'off' in an unidentifiable way.
1 


He disliked the Flomax because it seemed to cause polyuria and generally made him feel 'off' in an unidentifiable way.
1 




  1%|          | 24/4081 [00:00<01:16, 52.84it/s]

The patient developed weakness and continued to feel weak and lose weight despite discontinuing Zaroxolyn on 01/05/29 under the advise of Dr. Haynes.
1 


The patient developed weakness and continued to feel weak and lose weight despite discontinuing Zaroxolyn on 01/05/29 under the advise of Dr. Haynes.
1 




  1%|          | 48/4081 [00:00<01:15, 53.20it/s]

Pt. has now completed 4 weeks antibiotics with renal failure during course of vancomycin.
1 


She tried it during the day but too sleepy.
1 


S: His pain is gone although he continues to have a bad taste in his mouth since he has been on antibiotics.
1 




  1%|▏         | 54/4081 [00:01<01:16, 52.89it/s]

(2) Diarrhea. S: He has continued to have several soft, dark bowel movements a day while he has been taking the antibiotic.
1 


(2) Diarrhea. S: He has continued to have several soft, dark bowel movements a day while he has been taking the antibiotic.
1 


(5) Diabetes. S: He stopped glyburide as directed last week when his 4 p.m. blood sugar was 44.
1 


Review of Systems: above Psych: Referred to urologist at KJH, Dr. Xayachack (? sp) because of absent libido.
1 


The loss of libido precedes the Celexa and the possibility of its relationship to Neoral or CellCept has been raised.
1 


His last AIC was 4.4% in 9/95.
1 




  2%|▏         | 72/4081 [00:01<01:15, 52.92it/s]

4. Fatigue.
1 


Record date: 2079-01-14 PICH 9 89 James Street, Suite 9 GREENWOOD, NORA Lucerne, KY 66922 60005814 (838) 627-3027 01/14/79 CHIEF COMPLAINT: Ms. Greenwood comes in to urgent care clinic for hives after taking Keflex.
1 


She said shortly after stopping antibiotic her symptoms of hives and itchiness have improved very quickly. She is still complaining of purulent drainage around her groin.
1 


She said shortly after stopping antibiotic her symptoms of hives and itchiness have improved very quickly. She is still complaining of purulent drainage around her groin.
1 




  3%|▎         | 108/4081 [00:02<01:14, 53.03it/s]

"TITLE: Palliative Care Consult Resident Follow Up Per discussion with the [**Hospital Unit Name 44**] team, pt was transferred overnight after having been found unresponsive, with decreased respiratory rate, CO2 retaining.
1 


"TITLE: Palliative Care Consult Resident Follow Up Per discussion with the [**Hospital Unit Name 44**] team, pt was transferred overnight after having been found unresponsive, with decreased respiratory rate, CO2 retaining.
1 




  3%|▎         | 126/4081 [00:02<01:15, 52.48it/s]

-Continue Co-reg -D/C ACE-I -Negative fluid balance as possible with Torsemide Patient is full code and will continue to work to stablize CHF regimen.
1 


He noted that he has swallowed a lot of blood and had large clots coming from his nose and OP.
1 


He noted that he has swallowed a lot of blood and had large clots coming from his nose and OP.
1 


He also noted some black stools out of his ostomy.
1 




  4%|▎         | 144/4081 [00:02<01:15, 52.49it/s]

Levophed changed to neosynephrine to limit tachycardia (see flowsheet).
1 


His son called EMS and he was transported to [**Hospital 1200**] Hospital where CT san showed interparenchymal hemorrhage.
1 


Impaired Skin Integrity Assessment: Diffuse red raised rash general body surfaces, lighter on back, pt denies itch, no drainage.
1 


Impaired Skin Integrity Assessment: Diffuse red raised rash general body surfaces, lighter on back, pt denies itch, no drainage.
1 


RR b/w 20-25.Pt unresponsive on 20mcg of propofol.
1 




  4%|▍         | 168/4081 [00:03<01:14, 52.76it/s]

If the patient tolerates the transition, the goal would be to replace fentanyl with methadone to provide adequate pain relief, remove the fentanyl as a possible stimulus of pain and therefore reduce anxiety and/or agitation that may be occurring from such requiring higher sedation doses.
1 


If the patient tolerates the transition, the goal would be to replace fentanyl with methadone to provide adequate pain relief, remove the fentanyl as a possible stimulus of pain and therefore reduce anxiety and/or agitation that may be occurring from such requiring higher sedation doses.
1 


Also, reported to be difficult to wake up seemingly under deep sedation on current regimen.
1 


There is always the possibility of tolerance or hyperalgesia (most often reported in oncology literature).
1 


There is always the possibility of tolerance or hyperalgesia (most often reported in oncology literature).
1 




  5%|▌         | 222/4081 [00:04<01:12, 53.11it/s]

Thrombocytopenia: Platelets 68 on presentation.
1 


Thrombocytopenia: Platelets 68 on presentation.
1 


Consistent with severity and timing of prior post chemo thrombocytopenia.
1 


Suspect large part of this may be marrow suppression secondary to chemotherapy, as has no signs of bleeding (guaiac negative in ED).
1 


Pruritis: Per husband, and as noted on skin examination, this has been a major complaint over the last week.
1 


Glycemic Control: N/A Lines: Portacath (2 ports) Prophylaxis: DVT: supratherapeutic on coumadin, pneumoboot to left leg Stress ulcer: PPI Communication: Comments: Husband cell: [**Telephone/Fax (1) 8563**] Code status: DNR/DNI, Pressors okay.
1 




  6%|▌         | 234/4081 [00:04<01:12, 53.11it/s]

She continues to be quite agitated and complains of pruritus.
1 


- CT Torso [**9-11**] unchanged compared to prior - Tele monitoring - continued to spike fevers on prolonged Vanc, Zosyn, Cipro (start date 7.26), d/c these abx [**9-13**] - Bronchoscopy [**9-12**], unimpressive for infection, GS just 1+ polys.
1 


Etiology of ARF felt be be contrast dye related as per renal.
1 




  6%|▋         | 264/4081 [00:04<01:11, 53.12it/s]

7) Hyponatremia: improving.
1 


Could also be secondary to liver disease - free H20 restrict - check serum/urine osms 8) HIV: on HAART as outpatient; currently being held in the setting of acute hepatitis - cont to hold HAART for now 9) Macrocytic anemia: likely in the setting of ETOH abuse - thiamine/folate repletion - guaiac all stools 10) CAD s/p PCI: on ASA daily, no b-blocker.
1 


# GI bleed: Patient s/p 4u pRBC s on (2u on [**12-28**], 2u on [**12-30**]) for likely UGIB in setting of anticoagulation (Heparin gtt, ASA, Plavix, Integrillin).
1 


DEEP VENOUS THROMBOSIS (DVT), LOWER EXTREMITY: Supratherapeutic PTT, adjust heparin accordingly.
1 


- monitor creatinine - renally dose medication - urine electrolytes Hypoxia: Occurred periprocedure.
1 




  7%|▋         | 276/4081 [00:05<01:11, 53.01it/s]

Aplastic anemia.
1 


5. Iron overload.
1 


Son more decisive than daughter who is still coming to terms with this - pigtail continues on water seal - transient hypotension to 77 at 1700, 2 hours after lasix 20mg IV with 800cc output.
1 




  7%|▋         | 288/4081 [00:05<01:11, 53.01it/s]

- no need for emergent tracheostomy as airway currently stable, will rediscuss if necessary for planned treatment of head and neck cancer - consider therapeutic thoracentesis if pleural effusion worsens - repeat bronchoscopy today to clear secretions - CVL placed to assess intravascular volume - bilateral effusions likely due to fluid resuscitation in the setting of low albumin.
1 


Had frequent runs of polymorphic VT with no symptoms on floor, broke with isoproterenol gtt.
1 




 25%|██▍       | 1020/4081 [00:19<00:58, 52.68it/s]

Past Medical History: CAD s/p STEMI in 2077 s/p 8 stents including LAD Systolic CHF Class Ia with EF 20% s/p BiV ICD placement DDD St. Jude (05/13/2081) Diabetes on glypizide HLP HTN Medications at home (confirmed w/pt): ASA 81 mg daily Coumadin as prescribed daily Coreg 12.5 mg bid Dipyridamole 75 mg tid Doxycycline 100 mg bid Esomesoprazole 40 mg bid Glipizide XL 10 mg daily Simvastatin 40 mg daily Lexapro 10 mg daily Polymyxin drops Vitamin D Allergies: DIGOXIN - General Intolerance SH: The patient lives with his wife.
0 




 65%|██████▌   | 2664/4081 [00:50<00:26, 52.80it/s]

Diarrhea Assessment: Has been periods of loose stool during his course of hospitalization.
0 




 66%|██████▌   | 2694/4081 [00:50<00:26, 52.57it/s]

Now persistently requiring mechanical ventilation because of ongoing sedation, likely because of Phenobarbital.
0 




 80%|███████▉  | 3252/4081 [01:01<00:15, 52.82it/s]

currently INR 1.5, PTT 27 and pt off gtt since afternoon of [**7-28**]. Pt s/p total of 3U PRBCs since [**7-28**] with inappropriate bump in Hct following 2U PRBCs today on [**7-29**] : 21.7 --> 23.9 -Continue holding Heparin gtt and coumadin given continued bleeding into gluteal hematoma -Serial exams to eval for compartment syndrome; Ortho following -If pt develops foot drop, contact Ortho for possible hematoma evacuation.
0 


INR subtherapeutic, PTT normal currently since off heparin gtt.
0 


-Continue Statin -Bblocker on hold until acute issues resolve -No ACE for now given ACE allergy/no [**Last Name (un) 239**] given ARF -No nitrates for now #Acute Renal Failure Improving gradually following hypotensive episode earlier this admission.
0 




 81%|████████  | 3306/4081 [01:02<00:14, 52.89it/s]

- Hold off on diuresis (patient is autodiuresing) - Uptitrate beta-blocker yesterday - Continue holding [**Last Name (un) 239**] given renal failure #.
0 




100%|██████████| 4081/4081 [01:16<00:00, 53.13it/s]


In [14]:
import pandas as pd
occurrence_counts = pd.Series(T5_result_dict['decoded_sentences']).value_counts()
average_occurrence_count = occurrence_counts.mean()
print(f"On average, a given sentence occurs {average_occurrence_count:.2f} times across all classes in the test set.")
print(f"There are {len(T5_result_dict['decoded_sentences'])} sentences across all classes in the test set.")

print('\n\nnum of incorrectly classified sentences by all models:', len(ensemble_all_wrong['sentence']))
occurence_count2 = pd.Series(ensemble_all_wrong['sentence']).value_counts()
average_occurrence_count2 = occurence_count2.mean()

print(f"On average, a given sentence occurs {average_occurrence_count2:.2f} times across all classes.")
print(f"There are {len(ensemble_all_wrong['sentence'])} sentences across all classes in the test set.\n\n")

print("False negatives:")
for i in range(len(ensemble_all_wrong['sentence'])):
    #if it should have been ADE, class 1
    if(ensemble_all_wrong['ground_truth'][i] == 1):
        #that means it was a false negative
        print(ensemble_all_wrong['sentence'][i])
        
with open('incorrect_by_all_false_negs.txt', 'w') as f:
    for line in ensemble_all_wrong['sentence']:
        f.write(f"{line}\n\n")
    
count = 0
with open('all_correct_true_pos.txt', 'w') as f:
    for i in range(len(ensemble_all_correct['sentence'])):
        if(ensemble_all_correct['ground_truth'][i] == 1):
            count += 1
            f.write(f"{ensemble_all_correct['sentence'][i]}\n\n")
print(count, "num of true positives")

On average, a given sentence occurs 1.07 times across all classes in the test set.
There are 4081 sentences across all classes in the test set.


num of incorrectly classified sentences by all models: 67
On average, a given sentence occurs 1.24 times across all classes.
There are 67 sentences across all classes in the test set.


False negatives:
His angioedema resolved completely by the end of the day and he was discharged.
HISTORY OF PRESENT ILLNESS: This is a 58-year-old male who presented with a one-week history of fatigue, lightheadedness, and black tarry stools with occasional bright red blood per rectum.
HISTORY OF PRESENT ILLNESS: This is a 58-year-old male who presented with a one-week history of fatigue, lightheadedness, and black tarry stools with occasional bright red blood per rectum.
HISTORY OF PRESENT ILLNESS: This is a 58-year-old male who presented with a one-week history of fatigue, lightheadedness, and black tarry stools with occasional bright red blood per rectum.
H