In [1]:
import numpy as np
import pandas as pd
import logging
import os
import glob
import regex as re
import torch
import argparse
import random
import itertools
import ast
import sys
import ast
from tqdm import tqdm
import warnings

from sklearn.model_selection import train_test_split
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import pipeline
from datasets import load_dataset, get_dataset_split_names

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
res = pd.read_csv("ade_flan_fewshot_results.csv")
res.shape

(854, 2)

In [20]:
results = res.copy()

error_count = 0

for ix, row in results.iterrows():
    
    try:
        response = ast.literal_eval(row["response"])
    except:
        error_count += 1
        print ("Error on index: ", ix)
        print (row["response"])
        results.drop(ix, inplace=True)

Error on index:  25
[["acute promyelocytic leukemia"], ["acute focal myositis"], ["acute promyelocytic leukemia"], ["acute focal myositis"], ["acute promyelocytic leukemia"], ["acute focal myositis"], ["acute promyelocytic leukemia"], ["acute focal myositis"], ["acute promyelocytic leukemia"], ["acute focal myositis"], ["acute promyelocytic leukemia"], ["acute focal myositis"], ["acute promyelocytic leukemia"], ["acute focal myositis"], ["acute promyelocy
Error on index:  28
[["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["Adrenaline dacryolith"], ["
Error on index:  30
[["extrapyramidal symptoms"], ["haloperidol"], ["extrapyramidal symptoms"], ["haloperidol"], ["extrapyramidal symptoms

In [21]:
ast.literal_eval(results.iloc[25]["response"])

[['capecitabine', 'multifocal leukoencephalopathy']]

In [30]:
nc_relations_counter = 0
nc_relation_ml = []
for ix, row in results.iterrows():
    response = ast.literal_eval(row["response"])
    nc_relations = []
    for p in response:
        if len(p) != 2:
            nc_relations_counter += 1
            nc_relations.append(p)
    if (len(nc_relations) > 0):
        results.drop(ix, inplace=True)
        nc_relation_ml.append(nc_relations)
        print (nc_relations)

[["Crohn's disease"], ['AZA'], ['Parvovirus B19']]
[["Crohn's disease"], ['Copaxone'], ['Multiple sclerosis']]
[["Meckel's diverticulum"], ['metformin'], ['obese patient']]
[['posthypoglycemic hyperglycemia'], ['posthypoglycemic hyperglycemia'], ['rebound hyperglycemia']]
[['transient central diabetes insipidus'], ['nephrogenic diabetes insipidus'], ['lithium'], ['transient central diabetes insipidus']]
[['hyperpigmentation'], ['ribavirin'], ['hyperpigmentation'], ['IFN'], ['hyperpigmentation']]
[['mania'], ['fluoxetine'], ['mania'], ['fluoxetine'], ['mania'], ['fluoxetine'], ['mania']]
[['Pneumocystis jiroveci pneumonia'], ["Kaposi's sarcoma"], ['Corticosteroids']]
[['pseudoephedrine', 'brompheniramine', 'dextromethorphan']]
[['Ecstasy'], ['poisoning']]
[['hypoalbuminemia'], ['thrombocytopenia']]
[['acute myeloid leukemia'], ['etoposide'], ['haemophagocytic lymphohistiocytosis']]
[['Granulomatous anterior uveitis'], ['Metripranolol'], ['Granulomatous anterior uveitis'], ['Metripranolo

In [31]:
len(nc_relation_ml)

136

In [32]:
results.shape

(683, 2)

In [39]:
results = results.apply(lambda x: x.astype(str).str.lower())

In [3]:
dataset = load_dataset('ade_corpus_v2', 'Ade_corpus_v2_drug_ade_relation')
ade_dict = {
    "text" : dataset['train']['text'],
    "drug" : dataset['train']['drug'],
    "effect": dataset['train']['effect'],
    "index" : dataset['train']['indexes']
}
df = pd.DataFrame(ade_dict, index=None)
df.shape
df = df.apply(lambda x: x.astype(str).str.lower())

Found cached dataset ade_corpus_v2 (/home/wadhwa.s/.cache/huggingface/datasets/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/1.0.0/940d61334dbfac6b01ac5d00286a2122608b8dc79706ee7e9206a1edb172c559)
100%|██████████| 1/1 [00:00<00:00, 370.78it/s]


In [4]:
res.head()

Unnamed: 0,input,response
0,OBJECTIVE: We report a case of cutaneous KS le...,"[[""CML"",""imatinib""]]"
1,We present the case of an adult Crohn's diseas...,"[[""Crohn's disease""], [""AZA""], [""Parvovirus B1..."
2,We describe a renal transplant recipient maint...,"[[""cyclosporine"",""prednisone""]]"
3,"We report the case of a young man, affected by...","[[""leflunomide"",""rheumatoid arthritis""]]"
4,Acute pancreatitis in a child with idiopathic ...,"[[""5-aminosalicylic acid"",""acute pancreatitis""]]"


In [50]:
results = res.copy()
gold_col = []
for ix, row in results.iterrows():
    gold = df.loc[df['text'] == row["input"]]
    true_set = []
    for i, r in gold.iterrows():
        true_set.append([r["drug"], r["effect"]])
    print (gold.shape)
    print (row["input"])
    # print ("GENERATED: ", ast.literal_eval(row["response"]))
    print ("TRUE: ", true_set)
    gold_col.append(true_set)
    print ("\n-------------------------------------------\n")

(1, 4)
objective: we report a case of cutaneous ks lesions in a patient affected by cml treated with imatinib.
GENERATED:  [['cml', 'imatinib']]
TRUE:  [['imatinib', 'cutaneous ks lesions']]

-------------------------------------------

(2, 4)
we describe a renal transplant recipient maintained on cyclosporine and prednisone developing nocardia asteroides brain abscess.
GENERATED:  [['cyclosporine', 'prednisone']]
TRUE:  [['cyclosporine', 'nocardia asteroides brain abscess'], ['prednisone', 'nocardia asteroides brain abscess']]

-------------------------------------------

(3, 4)
we report the case of a young man, affected by rheumatoid arthritis who developed a rapid-onset short-of-breath, hemoptysis, and severe weakness, about 2 weeks after the administration of leflunomide.
GENERATED:  [['leflunomide', 'rheumatoid arthritis']]
TRUE:  [['leflunomide', 'hemoptysis'], ['leflunomide', 'severe weakness'], ['leflunomide', 'short-of-breath']]

-------------------------------------------

(

(1, 4)
hepatobiliary disorders associated with orally administered terbinafine have rarely been reported.
GENERATED:  [['terbinafine', 'hepatobiliary disorders']]
TRUE:  [['terbinafine', 'hepatobiliary disorders']]

-------------------------------------------

(2, 4)
this case report describes an adolescent with severe lupus erythematosus who received cyclophosphamide (cy) paired with taste (cod liver oil) and smell (rose perfume) as conditioned stimuli.
GENERATED:  [['cyclophosphamide', 'adverse event']]
TRUE:  [['cy', 'severe lupus erythematosus'], ['cyclophosphamide', 'severe lupus erythematosus']]

-------------------------------------------

(3, 4)
hepatitis with bridging fibrosis and reversible hepatic insufficiency in a woman with rheumatoid arthritis taking methotrexate.
GENERATED:  [['methotrexate', 'hepatitis']]
TRUE:  [['methotrexate', 'bridging fibrosis'], ['methotrexate', 'hepatitis'], ['methotrexate', 'reversible hepatic insufficiency']]

---------------------------------

(1, 4)
a 66-year-old mand suffering from severe coronary heart disease took digoxin with suicidal intent an was treated for the ensuing complete atrioventricular block with digoxin-specific antibody fragments.
GENERATED:  [['digoxin', 'suicidal intent']]
TRUE:  [['digoxin', 'complete atrioventricular block']]

-------------------------------------------

(3, 4)
when the acute manic state is characterized by marked psychotic symptoms and intense anxiety, it may be associated with increased vulnerability to the development of severe lithium neurotoxicity.
GENERATED:  [['manic state', 'intense anxiety']]
TRUE:  [['lithium', 'intense anxiety'], ['lithium', 'neurotoxicity'], ['lithium', 'psychotic symptoms']]

-------------------------------------------

(1, 4)
subfulminant hepatitis b after infliximab in crohn's disease: need for hbv-screening?
GENERATED:  [['infliximab', 'subfulminant hepatitis b']]
TRUE:  [['infliximab', 'subfulminant hepatitis b']]

-------------------------------------

(1, 4)
immunosuppression elicited by the extensive administration of prednisolone was suspected for the initiation of the generalized mite infestation.
GENERATED:  [['prednisolone', 'infection']]
TRUE:  [['prednisolone', 'mite infestation']]

-------------------------------------------

(1, 4)
this article describes a patient with suspected ciprofloxacin-induced interstitial nephritis.
GENERATED:  [['ciprofloxacin', 'interstitial nephritis']]
TRUE:  [['ciprofloxacin', 'interstitial nephritis']]

-------------------------------------------

(1, 4)
according to the naranjo et al. adverse-reaction probability scale, enoxaparin was the probable cause of hepatotoxicity in this patient.
GENERATED:  [['enoxaparin', 'hepatotoxicity']]
TRUE:  [['enoxaparin', 'hepatotoxicity']]

-------------------------------------------

(2, 4)
to our knowledge, we describe the first reported case of isolated paresthesia and peripheral neuropathy, without systemic involvement, secondary to sulindac administrat

(1, 4)
conclusion: there may be an association between raloxifene and the development of malignant mixed mesodermal tumor.
GENERATED:  [['raloxifene', 'malignant mixed mullerian tumor']]
TRUE:  [['raloxifene', 'malignant mixed mesodermal tumor']]

-------------------------------------------

(1, 4)
to our knowledge, this case is the first published report of levofloxacin-induced ten.
GENERATED:  [['levofloxacin', 'ten']]
TRUE:  [['levofloxacin', 'ten']]

-------------------------------------------

(1, 4)
leukopenia due to parvovirus b19 in a crohn's disease patient using azathioprine.
GENERATED:  [["crohn's disease", 'azathioprine']]
TRUE:  [['azathioprine', 'leukopenia']]

-------------------------------------------

(1, 4)
although praziquantel administration may have been effective in killing the parasite in both patients, we are concerned about the production of marked inflammation as a result of treatment.
GENERATED:  [['praziquantel', 'inflammation']]
TRUE:  [['praziquantel', 'i

In [52]:
results.insert(2, "true", gold_col, True)

In [89]:
predicted = {}
true = {}

for ix, row in results.iterrows():
    pred = ast.literal_eval(row["response"])
    gold = ast.literal_eval(row["true"])
    predicted[row["input"]] = []
    true[row["input"]] = []
    for pairs in pred:
        predicted[row["input"]].append([pairs[0], pairs[1]])
    
    for pairs in gold:
        true[row["input"]].append([pairs[0], pairs[1]])
    

In [92]:
len(true), len(predicted)

(683, 683)

In [94]:
tp_ins_count = 0

input = []
relations = []
prefix = []
gold_relations = []

tp = ()
for k in sorted(predicted):
    # print (k)
    # print (predicted[k])
    # print ("TRUE: " + str(true[k]))
    curr_tp = ()
    for tpair in true[k]:
        if tpair in predicted[k]:
            tp += (tpair,)
            curr_tp += (tpair,)
            # print (k)
            # print ("TRUE: " + str(tpair))
            # print ("TRUE POSITIVE PREDICTED: " + str(tpair))
            # print ("-----------------------")
        else:
            for ppair in predicted[k]:
                if (ppair[0] in tpair[0]) or (tpair[0] in ppair[0]):
                    if (ppair[1] in tpair[1]) or (tpair[1] in ppair[1]):
                        tp += (ppair,)
                        curr_tp += (ppair,)
#                         print (k)
#                         print ("TRUE: " + str(tpair))
#                         print ("PARTIAL TRUE POSITIVE GENERATED: " + str(ppair))
#                         print ("-----------------------")
    if len(curr_tp) > 0 and len(true[k]) == len(curr_tp):
        tp_ins_count += 1
        print (k)
        input.append(k)
        relations.append(list(curr_tp))
        gold_relations.append(true[k])
        prefix.append("ADE")
        print ("TRUE: ", true[k])
        print ("TRUE POSITIVES GENERATED: ", list(curr_tp))
        print ("-----------------------")


print ("Total True Positive Instances: ", tp_ins_count)
print ("Total ADE Relations: ", len(input))
                    

5-fluorouracil cardiotoxicity complicating treatment of stage iib cervical cancer--case report.
TRUE:  [['5-fluorouracil', 'cardiotoxicity']]
TRUE POSITIVES GENERATED:  [['5-fluorouracil', 'cardiotoxicity']]
-----------------------
a 23-year-old woman with systemic lupus erythematosus had a severe hypersensitivity reaction to the drug ibuprofen.
TRUE:  [['ibuprofen', 'severe hypersensitivity reaction']]
TRUE POSITIVES GENERATED:  [['ibuprofen', 'hypersensitivity reaction']]
-----------------------
a 25-year-old woman sought medical attention because of iliocaval manifestations of retroperitoneal fibrosis while she was taking methysergide.
TRUE:  [['methysergide', 'retroperitoneal fibrosis']]
TRUE POSITIVES GENERATED:  [['methysergide', 'iliocaval manifestations of retroperitoneal fibrosis']]
-----------------------
a 35-year-old female with borderline lepromatous (bl) leprosy who suffered from dapsone-induced erythroderma is reported.
TRUE:  [['dapsone', 'erythroderma']]
TRUE POSITIVES

In [104]:
fp = ()

fp_list = []
text = []

for k in sorted(predicted):
    # print (k)
    # print (predicted[k])
    # print ("TRUE: " + str(true[k]))
    for ppair in predicted[k]:
        flag = True
        if ppair in true[k]:
            continue
            # print (k)
            # print ("TRUE: " + str(tpair))
            # print ("TRUE POSITIVE PREDICTED: " + str(tpair))
            # print ("-----------------------")
        else:
            for tpair in true[k]:
                if (ppair[0] in tpair[0]) or (tpair[0] in ppair[0]):
                    if (ppair[1] in tpair[1]) or (tpair[1] in ppair[1]):
                        flag = False
            if flag:
                text.append(k)
                fp_list.append(ppair)
                fp += (ppair,)
                print (k)
                print ("TRUE: " + str(true[k]))
                print ("FALSE POSITIVE GENERATED: " + str(ppair))
                print ("-----------------------")

a 10-year-old asthmatic boy began to suffer from urticarial rash and moderately severe bronchospasm after 8 weeks' treatment with disodium cromoglycate.
TRUE: [['disodium cromoglycate', 'moderately severe bronchospasm'], ['disodium cromoglycate', 'urticarial rash']]
FALSE POSITIVE GENERATED: ['disodium cromoglycate', 'asthma']
-----------------------
a 5-month-old infant became lethargic and poorly responsive after receiving 1 drop of brimonidine in each eye.
TRUE: [['brimonidine', 'lethargic'], ['brimonidine', 'poorly responsive']]
FALSE POSITIVE GENERATED: ['brimonidine', 'infant']
-----------------------
a 56-year-old male parkinsonian patient developed a unique behavioral change following the oral administration of cinepazide, a cerebral vasodilator.
TRUE: [['cinepazide', 'behavioral change']]
FALSE POSITIVE GENERATED: ['cinepazide', 'oral administration']
-----------------------
a 58-year old patient is reported who developed an ischaemic cerebrovascular accident 22 hours after su

In [105]:
# pd.DataFrame({"text": text, "gen_fp": fp_list}).to_csv("fp_ade_flan_eval.csv", index=False)

In [100]:
fn = ()

fn_list = []
gen_list = []
text = []

for k in sorted(true):
#     print (k)
    fn_set = []
    for tpair in true[k]:
        flag = True
        if tpair in predicted[k]:
            continue
        else:
            for ppair in predicted[k]:
                if (ppair[0] in tpair[0]) or (tpair[0] in ppair[0]):
                    if (ppair[1] in tpair[1]) or (tpair[1] in ppair[1]):
                        flag = False
            if flag:
                text.append(k)
                fn_list.append(tpair)
                gen_list.append(predicted[k])
                fn += (tpair,)
                fn_set.append(tpair)
                # print (k)
                # print ("TRUE: " + str(true[k]))
                # print ("GENERATED: " + str(predicted[k]))
                # print ("FALSE NEGATIVE: " + str(tpair))
                # print ("-----------------------")
    if len(fn_set) > 0:
        print (k)
        print ("TRUE: " + str(true[k]))
        print ("GENERATED: " + str(predicted[k]))
        print ("FALSE NEGATIVE(S): " + str(fn_set))
        print ("\n-----------------------\n")

5-fluorouracil (5-fu)-associated peripheral neuropathy is an uncommon event.
TRUE: [['5-fluorouracil', 'peripheral neuropathy'], ['5-fu', 'peripheral neuropathy']]
GENERATED: [['5-fluorouracil', 'neuropathy']]
FALSE NEGATIVE(S): [['5-fu', 'peripheral neuropathy']]

-----------------------

a 10-year-old asthmatic boy began to suffer from urticarial rash and moderately severe bronchospasm after 8 weeks' treatment with disodium cromoglycate.
TRUE: [['disodium cromoglycate', 'moderately severe bronchospasm'], ['disodium cromoglycate', 'urticarial rash']]
GENERATED: [['disodium cromoglycate', 'asthma']]
FALSE NEGATIVE(S): [['disodium cromoglycate', 'moderately severe bronchospasm'], ['disodium cromoglycate', 'urticarial rash']]

-----------------------

a 10-year-old boy with osteosarcoma and normal renal function manifested laboratory evidence of impending renal toxicity and extreme elevation of aspartate aminotrasferase and alanine aminotransferase within 2 hours after the completion of 

In [103]:
# pd.DataFrame({"text": text, "true_fn": fn_list, "gen":gen_list}).to_csv("fn_ade_flan_eval.csv", index=False)

In [107]:
len(fp)

272

In [111]:
precision = len(tp) / (len(tp) + len(fp))
recall = len(tp) / (len(tp) + len(fn))
f1 = 2 * (precision * recall) / (precision + recall)
print ("Precision: ", precision)
print ("Recall: ", recall)
print ("F1: ", f1)

Precision:  0.6358768406961178
Recall:  0.4337899543378995
F1:  0.5157437567861021
