## --- Done So Far
- using the notes and the annotation info, used mistral instruct model to see the performance
- metrics used : precision, recall, F1 score

top3 result analysis

In [1]:
import numpy as np
from utils import *

config = load_config()
PROJECT_PATH = config.project_path
DATA_PATH = PROJECT_PATH.joinpath('data/processed')

with open(DATA_PATH.joinpath("mistral_top3_zeroshot_chain.pkl"), 'rb') as f :
    mistral_zeroshot_result = pickle.load(f)
    # mistral_zeroshot_result = list(map(lambda x : x[0]['generated_text'], mistral_zeroshot_result))

with open(DATA_PATH.joinpath("mistral_top3_fewshot_chain.pkl"), 'rb') as f :
    mistral_fewshot_result = pickle.load(f)
    # mistral_fewshot_result = list(map(lambda x : x[0]['generated_text'], mistral_fewshot_result))


with open(DATA_PATH.joinpath("processed_ranking_datasets.pkl"), 'rb') as f :
    top3, top5, top10, dataset = pickle.load(f)

In [2]:
mistral_fewshot_result[0], mistral_zeroshot_result[0]

("\ncontrolled since then. However, he has recently experienced stress related to the purchase of a plane and renewing his pilot's license, which he suspects may have triggered a flare-up of his IBS symptoms. The patient's physical examination was unremarkable, and laboratory tests were recommended to rule out infectious, metabolic, and other possible causes for his presentation. The patient was prescribed simethicone for pain relief and encouraged to continue taking his proton pump inhibitor for acid suppression. Here are the refined outputs based on the EHR note:\n\n1. irritable bowel syndrome\n   1.1 simethicone\n\n2. diarrhea-predominant irritable bowel syndrome\n   1.1 simethicone\n\n---\n\nThis response is consistent with the EHR note and the extracted terms. There are no additional terms that need to be added or deleted, and all",
 '1. abdominal cramping in the epigastric region associated with flatulence and belching, relieved by passage of gas or bowel movements.\n1.1 abdomina

In [5]:
idx = 2
example = mistral_zeroshot_result[idx]
print(mistral_zeroshot_result[idx])


1. Patellofemoral syndrome, right knee pain
   1.1 X-ray

2. Depression, major depression
   3.1 Psychological evaluation and counseling

3. Barrett's esophagus, GERD, history of esophagus diagnosis
   4.1 Endoscopy
   4.2 Biopsy
   4.3 24-hour PH monitor (if symptoms persist)

4. Hyperlipidemia
   5.1 Lipid profile
   5.2 Fasting lipid profile
   5.3 HbA1c

5. Chronic kidney disease
   6.1 Blood test for creatinine and eGFR
   6.2 Electrolytes
   6.3 Urinalysis

6. History of prostate cancer
   7.1 PSA test


In [7]:
import re
# p = re.compile('(\d\.\d?\.?)\s(.+)')
p = re.compile('(\d+)\.\d?\.?\d?\.?\d?\.?\s(.+)')
p.findall(example)

[('1', 'Patellofemoral syndrome, right knee pain'),
 ('1', 'X-ray'),
 ('2', 'Depression, major depression'),
 ('3', 'Psychological evaluation and counseling'),
 ('3', "Barrett's esophagus, GERD, history of esophagus diagnosis"),
 ('4', 'Endoscopy'),
 ('4', 'Biopsy'),
 ('4', '24-hour PH monitor (if symptoms persist)'),
 ('4', 'Hyperlipidemia'),
 ('5', 'Lipid profile'),
 ('5', 'Fasting lipid profile'),
 ('5', 'HbA1c'),
 ('5', 'Chronic kidney disease'),
 ('6', 'Blood test for creatinine and eGFR'),
 ('6', 'Electrolytes'),
 ('6', 'Urinalysis'),
 ('6', 'History of prostate cancer'),
 ('7', 'PSA test')]

In [8]:
def preprocess_outputs_of_mistral(text) :
    p = re.compile('(\d+)\.\d?\.?\s(.+)')
    output = p.findall(text)

    numbers = list(map(lambda x : x[0], output))
    output = list(map(lambda x : x[1].strip().lower(), output))

    output = [(x,y) for x, y in zip(numbers, output)]

    return output

preprocess_outputs_of_mistral(example)

[('1', 'patellofemoral syndrome, right knee pain'),
 ('1', 'x-ray'),
 ('2', 'depression, major depression'),
 ('3', 'psychological evaluation and counseling'),
 ('3', "barrett's esophagus, gerd, history of esophagus diagnosis"),
 ('4', 'endoscopy'),
 ('4', 'biopsy'),
 ('4', '24-hour ph monitor (if symptoms persist)'),
 ('4', 'hyperlipidemia'),
 ('5', 'lipid profile'),
 ('5', 'fasting lipid profile'),
 ('5', 'hba1c'),
 ('5', 'chronic kidney disease'),
 ('6', 'blood test for creatinine and egfr'),
 ('6', 'electrolytes'),
 ('6', 'urinalysis'),
 ('6', 'history of prostate cancer'),
 ('7', 'psa test')]

In [9]:
def calculate_precision_recall(gold, pred) :
    '''
    using exact match, we calculate the precision
    '''
    gold_extracted = list(map(lambda x : x[1], gold))
    pred_extracted = list(map(lambda x : x[1], pred))

    cnt = 0
    for gold_element in gold_extracted : 
        for pred_element in pred_extracted :
            # if gold_element == pred_element :
            # if (gold_element.lower() in pred_element.lower()) | (pred_element.lower() in gold_element.lower()) :
            if gold_element.lower() in pred_element.lower() :
                cnt += 1

    pred_cnt = len(pred_extracted)
    gold_cnt = len(gold_extracted)

    if pred_cnt == 0 :
        pred_cnt = 0.001

    precision_score = cnt / pred_cnt

    if gold_cnt == 0 :
        gold_cnt = 0.001

    recall_score = cnt / gold_cnt

    return precision_score, recall_score



def calculate_mrr(gold, pred) :
    '''
    calculates the MRR
    '''
    gold_extracted = list(map(lambda x : x[1], gold))
    pred_extracted = list(map(lambda x : x[1], pred))

    for i, gold_element in enumerate(gold_extracted) : 
        for j, pred_element in enumerate(pred_extracted) :
            # if gold_element == pred_element :
            # if (gold_element.lower() in pred_element.lower()) | (pred_element.lower() in gold_element.lower()) : 
            if gold_element.lower() in pred_element.lower() :
                if gold[i][0] == pred[j][0] :
                    return 1 / int(gold[i][0])
                else :
                    pass
            else :
                pass
    
    return 0


def get_results(gold_dataset, pred_dataset, testset=dataset) :
    '''
    gold_dataset : top3, top5, top10 zero/fewshot
    pred_dataset : pickle files for this one
    '''
    fileids = testset['noteid']

    precisions = []
    recalls = []
    mrrs = []
    for idx, fileid in enumerate(fileids) : 
        gold = gold_dataset[gold_dataset.fileid == fileid][['ranking', 'phrase']].copy()
        gold['ranking'] = gold['ranking'].apply(lambda x : str(x)[0])
        gold = [tuple(x) for x in gold.to_numpy()]

        pred = pred_dataset[idx]
        pred = preprocess_outputs_of_mistral(pred)

        precision, recall = calculate_precision_recall(gold, pred)
        mrr = calculate_mrr(gold, pred)

        precisions.append(precision)
        recalls.append(recall)
        mrrs.append(mrr)
    
    p = np.array(precisions).mean()
    r = np.array(recalls).mean()
    m = np.array(mrrs).mean()
    f1 = 2*p*r/(p+r)

    print("The precision is %.3f" %p)
    print("The recall is %.3f" %r)
    print("The F1 score is %.3f" %f1)
    print("The mrr is %.3f" %m)

    return p, r, m, # precisions, recalls, mrrs
    

def calculate_auroc() :
    # sensitivity : recall(true positive / gold true labels)
    # specificity : (true negative / gold negative labels)

    pass

In [10]:
get_results(top3, mistral_zeroshot_result, dataset)

The precision is 0.211
The recall is 0.344
The F1 score is 0.262
The mrr is 0.575


(0.21136196357193915, 0.3438143485159888, 0.5753205128205128)

In [11]:
get_results(top3, mistral_fewshot_result, dataset)

The precision is 0.262
The recall is 0.390
The F1 score is 0.313
The mrr is 0.571


(0.26188721814093924, 0.3895532776047482, 0.5705128205128205)

top5

In [12]:
import numpy as np
from utils import *

config = load_config()
PROJECT_PATH = config.project_path
DATA_PATH = PROJECT_PATH.joinpath('data/processed')

with open(DATA_PATH.joinpath("mistral_top5_zeroshot_chain.pkl"), 'rb') as f :
    mistral_zeroshot_result = pickle.load(f)
    # mistral_zeroshot_result = list(map(lambda x : x[0]['generated_text'], mistral_zeroshot_result))

with open(DATA_PATH.joinpath("mistral_top5_fewshot_chain.pkl"), 'rb') as f :
    mistral_fewshot_result = pickle.load(f)
    # mistral_fewshot_result = list(map(lambda x : x[0]['generated_text'], mistral_fewshot_result))


with open(DATA_PATH.joinpath("processed_ranking_datasets.pkl"), 'rb') as f :
    top3, top5, top10, dataset = pickle.load(f)

In [13]:
get_results(top5, mistral_zeroshot_result, dataset)

The precision is 0.233
The recall is 0.397
The F1 score is 0.293
The mrr is 0.556


(0.23281456365635636, 0.39657350055808027, 0.5560897435897435)

In [14]:
get_results(top5, mistral_fewshot_result, dataset)

The precision is 0.299
The recall is 0.443
The F1 score is 0.357
The mrr is 0.643


(0.2991433495444867, 0.4430250516316395, 0.6426282051282051)

top10

In [15]:
import numpy as np
from utils import *

config = load_config()
PROJECT_PATH = config.project_path
DATA_PATH = PROJECT_PATH.joinpath('data/processed')

with open(DATA_PATH.joinpath("mistral_top10_zeroshot_chain.pkl"), 'rb') as f :
    mistral_zeroshot_result = pickle.load(f)
    # mistral_zeroshot_result = list(map(lambda x : x[0]['generated_text'], mistral_zeroshot_result))

with open(DATA_PATH.joinpath("mistral_top10_fewshot_chain.pkl"), 'rb') as f :
    mistral_fewshot_result = pickle.load(f)
    # mistral_fewshot_result = list(map(lambda x : x[0]['generated_text'], mistral_fewshot_result))


with open(DATA_PATH.joinpath("processed_ranking_datasets.pkl"), 'rb') as f :
    top3, top5, top10, dataset = pickle.load(f)

In [16]:
idx = 7
example = mistral_zeroshot_result[idx]
print(mistral_zeroshot_result[idx])


1. Epigastric pain, previous H. pylori infection
1.1 H. pylori infection diagnosis
1.1.1 H. pylori stool test
2. H. pylori infection
2.1 H. pylori stool test
3. Fluctuating diarrhea and constipation, previous H. pylori infection
3.1 Stool test for infection
4. Hypothyroidism, status post thyroidectomy
5. Hypertension
6. Hyperlipidemia
7. Morbid obesity
8. Anxiety
9. Migraines
10. Polycystic ovary syndrome

10.1 Ultrasound
10.2 CBC
10.3 TSH, Free T4, Reverse T3 tests
11. Previous cholelithiasis
11.1 Ultrasound.


In [17]:
get_results(top10, mistral_zeroshot_result, dataset)

The precision is 0.240
The recall is 0.496
The F1 score is 0.323
The mrr is 0.584


(0.2396670906811919, 0.4958888462516921, 0.5843749999999999)

In [18]:
get_results(top10, mistral_fewshot_result, dataset)

The precision is 0.232
The recall is 0.464
The F1 score is 0.310
The mrr is 0.516


(0.2324591656396136, 0.4638632493131244, 0.5163461538461538)

problem
1. data issue? partially?
2. Due to no training?

### === Using Similarity Measures
- levenshtein
- ngram

In [25]:
import numpy as np
from utils import *

config = load_config()
PROJECT_PATH = config.project_path
DATA_PATH = PROJECT_PATH.joinpath('data/processed')

with open(DATA_PATH.joinpath("mistral_top3_zeroshot.pkl"), 'rb') as f :
    mistral_zeroshot_result = pickle.load(f)

with open(DATA_PATH.joinpath("mistral_top3_fewshot.pkl"), 'rb') as f :
    mistral_fewshot_result = pickle.load(f)

with open(DATA_PATH.joinpath("processed_ranking_datasets.pkl"), 'rb') as f :
    top3, top5, top10, dataset = pickle.load(f)

In [26]:
# levenshtein

text = mistral_zeroshot_result[4]
preprocess_outputs_of_mistral(text)

[('1', 'severe abdominal pain, nausea, and vomiting (alcoholic hepatitis)'),
 ('1', 'abdominal ultrasound or ct scan'),
 ('1', 'comprehensive panel and doa-9 to confirm alcohol cessation'),
 ('2', 'chronic low back pain'),
 ('2', 'check cpe (chronic pain evaluation)'),
 ('2', 'fasting lipids'),
 ('3', 'hypertension (bp elevated')]

In [26]:
print(dataset.iloc[4]['text'])

NPOV 

67 year old male with HTN, Chronic Alcohol Abuse in remission and Chronic low bac pain transferring in from Dr. name name service. 
Patient says he was recently d/c from a hospital and is confused about what medications to take; ""they are too many"". 
Denies needing an eye opener drink and says he has quit alcohol use. 
Patient says he had a drink, developed severe abdominal pain, nausea and vomiting that resulted in his hospitalization. 
No Nausea/Vomiting, No Dyspepsia, No Pain, No early satiety, No Diarrhea, No Constipation, No Cramps, No Hematochezia, No Melena, Has GERD 
No CP, No PNDs, No Orthopnea, No Edema, No Dizziness/Syncope, No Palpitations 
Low back pain is chronic and arose form a work-related injury. Patient has since retired. 
Has Pain, Has Stiffness, No Weakness, No Cramping 
Review of Systems: 
General: 
No Wt loss, No Fatigue, No Fevers, No Chills, No Night sweats 
Eyes: 
No Vision change/loss, No Headaches, No Discharge, No Pain 

No Hearing loss, No Dischar

In [None]:
# LCS (Longest Common Subsequence)


ngram
[thelink](https://webdocs.cs.ualberta.ca/~kondrak/papers/spire05.pdf)

In [44]:
!pip3 install ngram

Collecting ngram
  Downloading ngram-4.0.3-py3-none-any.whl.metadata (3.5 kB)
Downloading ngram-4.0.3-py3-none-any.whl (24 kB)
Installing collected packages: ngram
Successfully installed ngram-4.0.3


In [1]:
import numpy as np
from utils import *

config = load_config()
PROJECT_PATH = config.project_path
DATA_PATH = PROJECT_PATH.joinpath('data/processed')

with open(DATA_PATH.joinpath("mistral_top3_zeroshot.pkl"), 'rb') as f :
    mistral_zeroshot_result = pickle.load(f)

with open(DATA_PATH.joinpath("mistral_top3_fewshot.pkl"), 'rb') as f :
    mistral_fewshot_result = pickle.load(f)

with open(DATA_PATH.joinpath("processed_ranking_datasets.pkl"), 'rb') as f :
    top3, top5, top10, dataset = pickle.load(f)

In [2]:
top3[top3.fileid == 'cancer.report28.txt']

Unnamed: 0,fileid,phrase,color,ranking
0,cancer.report28.txt,rheumatoid arthritis,y,1.0
1,cancer.report28.txt,rituximan,g,1.1
2,cancer.report28.txt,osteoporosis,y,2.0
3,cancer.report28.txt,denosumab,g,2.1
4,cancer.report28.txt,hypercalcemic,g,2.2


In [3]:
len(mistral_zeroshot_result)

105

In [4]:
print(mistral_zeroshot_result[0])


1. Abdominal cramping and distention (associated with IBS)
1.1 No new specific medical tests mentioned
1.2 Consider CBC, BMP, and LFTs to rule out infectious, metabolic, and other causes.
1.2.1 CBC
1.2.2 BMP
1.2.3 LFTs

2. Intentionally induced weight loss
2.1 No medical tests


In [5]:
top3[top3.fileid == 'cancer.report28.txt'][['ranking', 'phrase']]

Unnamed: 0,ranking,phrase
0,1.0,rheumatoid arthritis
1,1.1,rituximan
2,2.0,osteoporosis
3,2.1,denosumab
4,2.2,hypercalcemic


In [6]:
dataset.head()

Unnamed: 0,category,noteid,text
0,liver_failure,liver_failure.report37286.txt,This is a 50-year-old male with a history of d...
1,liver_failure,liver_failure.report41972.txt,Very high a1c and glucose please follow up in ...
2,liver_failure,liver_failure.report51432.txt,name is a lovely just turned 65-year-old gentl...
3,liver_failure,liver_failure.report55225.txt,name is a lovely 53-year-old gentleman who I h...
4,liver_failure,liver_failure.report60517.txt,"NPOV \n\n67 year old male with HTN, Chronic Al..."


In [11]:
from ngram import NGram
import re

def preprocess_outputs_of_mistral(text) :
    p = re.compile('(\d)\.\d?\s(.+)')
    output = p.findall(text)

    numbers = list(map(lambda x : x[0], output))
    output = list(map(lambda x : x[1].lower(), output))

    output = [(x,y) for x, y in zip(numbers, output)]

    return output


def ngram_similarity(gold, pred, n) :

    score = 0  
    cnt = 0
    for i, g in gold :
        for j, p in pred :
            score = NGram.compare(g,p,N=n)
            cnt += 1
    
    return score / cnt


def get_results(gold_dataset, pred_dataset, testset=dataset, n = 3) :
    '''
    gold_dataset : top3, top5, top10 zero/fewshot
    pred_dataset : pickle files for this one
    '''
    fileids = testset['noteid']

    precisions = []
    recalls = []
    mrrs = []

    similarities = []
    for idx, fileid in enumerate(fileids) : 
        gold = gold_dataset[gold_dataset.fileid == fileid][['ranking', 'phrase']].copy()
        gold['ranking'] = gold['ranking'].apply(lambda x : str(x)[0])

        # list of tuples : [(1, cancer), (1, cancer test 1)]
        gold = [tuple(x) for x in gold.to_numpy()]

        pred = pred_dataset[idx]
        pred = preprocess_outputs_of_mistral(pred)

        similarity = ngram_similarity(gold, pred, n=n)
        similarities.append(similarity)
    

    similarities = np.array(similarities)
    print("The similarity is ", similarities.mean())
    return similarities

In [12]:
out = get_results(top3, mistral_zeroshot_result, dataset, 3)

NameError: name 'example' is not defined

In [59]:
out[-20:]

array([0.00049603, 0.00059524, 0.00061275, 0.00066845, 0.00068681,
       0.00069444, 0.00078125, 0.00083333, 0.00085227, 0.00086806,
       0.0009058 , 0.00103648, 0.00126263, 0.00135135, 0.00138889,
       0.00145349, 0.00166667, 0.00173611, 0.00231481, 0.00551471])