In [15]:
import numpy as np
import pandas as pd
import logging
import os
import glob
import regex as re
import torch
import argparse
import random
import itertools
import ast
import sys
import ast
from tqdm import tqdm
import warnings

from sklearn.model_selection import train_test_split
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import pipeline

warnings.filterwarnings("ignore")

In [2]:
with open("ade_prompt.txt", "r") as text_file:
    prompt = text_file.read()
print (prompt)

We report a rare case of colonic mucosal necrosis following Kalimate (calcium polystryrene sulfonate), an analogue of Kayexalate without sorbitol in a 34-yr-old man.
Relation List: [["calcium polystryrene sulfonate","colonic mucosal necrosis"],["Kalimate","colonic mucosal necrosis"],["Kayexalate","colonic mucosal necrosis"]]</s>

Moreover, these findings suggest that the incidence of BOOP following rituximab therapy may be higher than has been previously appreciated.
Relation List: [["rituximab","BOOP"]</s>

Malignant mixed mullerian tumor of the uterus in a patient taking raloxifene.
Relation List: [["raloxifene","Malignant mixed mullerian tumor of the uterus"]]</s>

We describe a case of clozapine-induced seizures in a patient with treatment-resistant schizophrenia.
Relation List: [["clozapine","seizures"]]</s>

Fever, pulmonary infiltrates, and pleural effusion following acyclovir therapy for herpes zoster ophthalmicus.
Relation List: [["acyclovir,Fever"],["acyclovir","pleural effus

In [3]:
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xxl", 
                                              cache_dir="/scratch/wadhwa.s/cache", 
                                              device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl", 
                                          cache_dir="/scratch/wadhwa.s/cache")

In [4]:
generator = pipeline(task="text2text-generation", model=model, tokenizer=tokenizer)

In [7]:
generator("Three Rings for the Elven-kings under the sky, Seven for the Dwarf-lords in their halls of stone", 
          max_length=200)



[{'generated_text': 'Three Rings for the Elven-kings under the sky, Seven for the Dwarf-lords in their halls of stone.'}]

In [8]:
test_str = """Dilated cardiomyopathy associated with chronic overuse of an adrenaline inhaler."""

In [9]:
ip = prompt + test_str + "\nRelation List: "
print (ip)

We report a rare case of colonic mucosal necrosis following Kalimate (calcium polystryrene sulfonate), an analogue of Kayexalate without sorbitol in a 34-yr-old man.
Relation List: [["calcium polystryrene sulfonate","colonic mucosal necrosis"],["Kalimate","colonic mucosal necrosis"],["Kayexalate","colonic mucosal necrosis"]]</s>

Moreover, these findings suggest that the incidence of BOOP following rituximab therapy may be higher than has been previously appreciated.
Relation List: [["rituximab","BOOP"]</s>

Malignant mixed mullerian tumor of the uterus in a patient taking raloxifene.
Relation List: [["raloxifene","Malignant mixed mullerian tumor of the uterus"]]</s>

We describe a case of clozapine-induced seizures in a patient with treatment-resistant schizophrenia.
Relation List: [["clozapine","seizures"]]</s>

Fever, pulmonary infiltrates, and pleural effusion following acyclovir therapy for herpes zoster ophthalmicus.
Relation List: [["acyclovir,Fever"],["acyclovir","pleural effus

In [10]:
generator(ip, 
          max_length=200)

Token indices sequence length is longer than the specified maximum sequence length for this model (832 > 512). Running this sequence through the model will result in indexing errors


[{'generated_text': '[["adrenaline","cardiomyopathy"]]'}]

In [5]:
input_docs = "ade_gpt3.txt"

In [6]:
with open(input_docs) as f:
    lines = f.readlines()
    unique_ade = [line.strip() for line in lines]

In [7]:
flan_ade = [prompt + s + "Relation List: " for s in lines]

In [8]:
len(flan_ade)

4271

In [9]:
dev_flan_ade = random.sample(flan_ade, int(0.2*len(flan_ade)))

In [10]:
len(dev_flan_ade)

854

In [11]:
dev_flan_ade_sample = dev_flan_ade[:5]

In [43]:
res = generator(dev_flan_ade, 
          max_length=200,
#         batch_size=1)

[{'generated_text': '[["Toxic epidermal necrolysis"], ["Toxic epidermal necrolysis"], ["Interleukin-2"]]'},
 {'generated_text': '[["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrotizing fasciitis"], ["necrot'},
 {'generated_text': '[["senna","drug interaction"]]'},
 {'generated_text': '[["rosiglitazone","swelling"]]'},
 {'generated_text': '[["optic neuritis"], ["recombinant interferon alpha"], ["natural interferon alpha"]]'},
 {'generated_text': '[["flurbiprofen","nephrotoxicity"]]'},
 {'generated_text': '[["olanzapine","akathisia"]]'},
 {'generated_text': '[["disulfiram","fulminant hepatic failure"]]'},
 {'generated_text': '[["C

In [12]:
torch.cuda.empty_cache()

In [13]:
ip = []
response = []

for ix, instance in enumerate(tqdm(dev_flan_ade_sample)):
    try:
        res = generator(instance, 
              max_length=200)
        torch.cuda.empty_cache()
        ip.append(instance.split("\n")[-2])
        response.append(res[0]["generated_text"])
    except:
        torch.cuda.empty_cache()
        pass

  0%|                                                     | 0/5 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (884 > 512). Running this sequence through the model will result in indexing errors
100%|█████████████████████████████████████████████| 5/5 [01:09<00:00, 13.95s/it]


In [14]:
for i, r in zip(ip, response):
    print (i)
    print (r)
    print ("\n=====================\n")

Although the two local anesthetics usually do not cause methemoglobinemia, we suspect that the displacement of lidocaine from protein binding by bupivacaine, in combination with metabolic acidosis and treatment with other oxidants, was the reason for the development of methemoglobinemia.
[["methemoglobinemia"], ["bupivacaine"], ["lidocaine"], ["metabolic acidosis"]]


We report the first case, to our knowledge, of rituximab-related autoimmune hemolytic anemia.
[["rituximab","autoimmune hemolytic anemia"]]


BACKGROUND: Fluoxetine, a highly specific serotonin reuptake inhibitor, has been reported to cause sexual dysfunction in a minority of patients.
[["Fluoxetine","serotonin reuptake inhibitor"]]


We discuss our observations in the cases of two patients with acyclovir neurotoxicity and review the findings of all previous reports in the English language literature.
[["acyclovir","neurotoxicity"]]


A patient with Wegener's granulomatosis rapidly developed a circumferential subglottic s

In [None]:
df = pd.DataFrame({'input': ip, 'response': response}, index=None)
df.to_csv("ade_flan_fewshot_results.csv", index=False)