In [8]:
import numpy as np
import pandas as pd
import logging
import os
import glob
import regex as re
import torch
import argparse
import random
import itertools
import random
import ast
import sys
import ast

from sklearn.model_selection import train_test_split
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import simpletransformers
from simpletransformers.t5 import T5Model, T5Args
from datasets import load_dataset, get_dataset_split_names

In [9]:
os.environ['TORCH_HOME'] = '/scratch/wadhwa.s/cache/'
os.environ['HF_HOME'] = '/scratch/wadhwa.s/cache'
os.environ['TRANSFORMERS_CACHE'] = '/scratch/wadhwa.s/cache'

In [10]:
dataset = load_dataset('ade_corpus_v2', 'Ade_corpus_v2_drug_ade_relation')
ade_dict = {
    "text" : dataset['train']['text'],
    "drug" : dataset['train']['drug'],
    "effect": dataset['train']['effect'],
    "index" : dataset['train']['indexes']
}
df = pd.DataFrame(ade_dict, index=None)
df.shape

Found cached dataset ade_corpus_v2 (/home/wadhwa.s/.cache/huggingface/datasets/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/1.0.0/940d61334dbfac6b01ac5d00286a2122608b8dc79706ee7e9206a1edb172c559)
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 438.51it/s]


(6821, 4)

In [11]:
input_docs = "ade_gpt3.txt"

In [12]:
with open(input_docs) as f:
    lines = f.readlines()
    unique_ade = [line.strip() for line in lines]

In [13]:
few_shot = random.sample(unique_ade, 7)

In [14]:
prompt = ""
for i in few_shot:
    de_pairs = []
    for ix, row in df.loc[df['text'] == i].iterrows():
        de_pairs.append([row["drug"], row["effect"]])
    prompt += i + "\n" + str(de_pairs) + "\n\n"
print (prompt)

Gentamicin-associated acute renal failure.
[['Gentamicin', 'acute renal failure']]

Ampicillin-associated seizures.
[['Ampicillin', 'seizures']]

Approximately 15 min after the first administration of nebulised morphine the patient became markedly bradypneic (respiratory rate: 4-5 bpm), hypotensive (BP 70/40 mmHg), and responded only partially to command.
[['morphine', 'bradypneic'], ['morphine', 'hypotensive']]

Disseminated eruptive giant mollusca contagiosa in an adult psoriasis patient during efalizumab therapy.
[['efalizumab', 'Disseminated eruptive giant mollusca contagiosa']]

Three patients are reported without a history of angina pectoris who had clinical and electrocardiographic evidence of myocardial ischemia during and immediately after BCNU infusion.
[['BCNU', 'myocardial ischemia']]

Vogt-Koyanagi-Harada disease occurring during interferon alpha therapy for chronic hepatitis C.
[['interferon alpha', 'Vogt-Koyanagi-Harada disease']]

In patients with methotrexate-induced a

In [15]:
# with open("few_7_shot_prompt.txt", "w") as text_file:
#     text_file.write(prompt)

In [16]:
with open("few_7_shot_prompt.txt", "r") as text_file:
    prompt = text_file.read()
print (prompt)

Gentamicin-associated acute renal failure.
[['Gentamicin', 'acute renal failure']]

Ampicillin-associated seizures.
[['Ampicillin', 'seizures']]

Approximately 15 min after the first administration of nebulised morphine the patient became markedly bradypneic (respiratory rate: 4-5 bpm), hypotensive (BP 70/40 mmHg), and responded only partially to command.
[['morphine', 'bradypneic'], ['morphine', 'hypotensive']]

Disseminated eruptive giant mollusca contagiosa in an adult psoriasis patient during efalizumab therapy.
[['efalizumab', 'Disseminated eruptive giant mollusca contagiosa']]

Three patients are reported without a history of angina pectoris who had clinical and electrocardiographic evidence of myocardial ischemia during and immediately after BCNU infusion.
[['BCNU', 'myocardial ischemia']]

Vogt-Koyanagi-Harada disease occurring during interferon alpha therapy for chronic hepatitis C.
[['interferon alpha', 'Vogt-Koyanagi-Harada disease']]

In patients with methotrexate-induced a

In [17]:
flan_ade = [prompt + s for s in lines]

In [18]:
print (flan_ade[0])

Gentamicin-associated acute renal failure.
[['Gentamicin', 'acute renal failure']]

Ampicillin-associated seizures.
[['Ampicillin', 'seizures']]

Approximately 15 min after the first administration of nebulised morphine the patient became markedly bradypneic (respiratory rate: 4-5 bpm), hypotensive (BP 70/40 mmHg), and responded only partially to command.
[['morphine', 'bradypneic'], ['morphine', 'hypotensive']]

Disseminated eruptive giant mollusca contagiosa in an adult psoriasis patient during efalizumab therapy.
[['efalizumab', 'Disseminated eruptive giant mollusca contagiosa']]

Three patients are reported without a history of angina pectoris who had clinical and electrocardiographic evidence of myocardial ischemia during and immediately after BCNU infusion.
[['BCNU', 'myocardial ischemia']]

Vogt-Koyanagi-Harada disease occurring during interferon alpha therapy for chronic hepatitis C.
[['interferon alpha', 'Vogt-Koyanagi-Harada disease']]

In patients with methotrexate-induced a

In [19]:
dev_docs = "ade_dev.txt"

In [20]:
with open(dev_docs) as f:
    lines_dev = f.readlines()
    unique_ade_dev = [line.strip() for line in lines_dev]

In [21]:
flan_ade = [prompt + s.strip() for s in lines_dev]

In [23]:
model_args = {
    "cache_dir" : "/scratch/wadhwa.s/cache/",
    "device_map" : "auto",
    "use_multiprocessing": False,
    "use_multiprocessed_decoding": False,
#         "num_train_epochs" : 4,
         "no_save" : True,
#         "preprocess_inputs" : False,
         "overwrite_output_dir" : True,
    "max_seq_length" : 512,
#         "special_tokens_list" : ["<bos>", "<eos>", "<rel>", "<ent>"],
         "max_length" : 100,
#         "num_beams" : 5,
#         "learning_rate": lr,

    }

In [24]:
model = T5Model("t5", "google/flan-t5-xxl", args=model_args)

In [50]:
to_predict = flan_ade[:20]

In [51]:
preds = model.predict(to_predict)

Generating outputs:   0%|                                 | 0/3 [00:04<?, ?it/s]


In [43]:
preds[0]

"[['oxcarbazepine', 'angioedema']]"

In [44]:
print (lines_dev[1])

A search of the United States Food and Drug Administration's Adverse Event Reporting System identified nine cases of oxcarbazepine-associated angioedema in pediatric patients aged 16 years and younger.

