<a href="https://colab.research.google.com/github/mobarakol/tutorial_notebooks/blob/main/BioGPT_Large_PubMedQA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers accelerate tokenizers sacremoses --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.7/199.7 KB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m77.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m880.6/880.6 KB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [3]:
tokenizer = AutoTokenizer.from_pretrained("kamalkraj/BioGPT-Large-PubMEDQA")

model = AutoModelForCausalLM.from_pretrained("kamalkraj/BioGPT-Large-PubMEDQA",low_cpu_mem_usage=True).cuda()

Downloading (…)okenizer_config.json:   0%|          | 0.00/272 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.24M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/566k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/674 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/6.29G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [4]:
import re
import pandas as pd
import torch

from tqdm.notebook import tqdm

In [5]:
url = "https://raw.githubusercontent.com/microsoft/BioGPT/main/data/PubMedQA/raw/test.tsv"
test_data = pd.read_csv(url,sep="\t",header=None,names=["Question","Answer"])

In [6]:
test_data.head()

Unnamed: 0,Question,Answer
0,question: Is anorectal endosonography valuable...,yes
1,question: Is there a connection between sublin...,yes
2,question: Is the affinity column-mediated immu...,yes
3,question: Does a physician's specialty influen...,yes
4,question: Locoregional opening of the rodent b...,yes


In [7]:
## preprocess adapted from https://github.com/microsoft/BioGPT/blob/main/examples/QA-PubMedQA/rebuild_data_large.py#L38-L58
questions = []
for sentence in test_data["Question"]:
    sentence = sentence.replace('\n', '').strip()
    if not sentence.endswith("."):
        sentence = sentence + "."
    questions.append(sentence)

y_true = test_data.Answer.to_list()

In [8]:
answers = []
for question in tqdm(questions):
    inputs = tokenizer(question, return_tensors="pt").to("cuda")
    with torch.inference_mode():
        beam_output = model.generate(**inputs,
                                    max_new_tokens=1024,
                                    num_beams=1,
                                    early_stopping=False,
                                    do_sample=False, 
                                    )
        answers.append(tokenizer.decode(beam_output[0], skip_special_tokens=True))

  0%|          | 0/500 [00:00<?, ?it/s]

In [12]:
answers[0]

'question: Is anorectal endosonography valuable in dyschesia? context: Dyschesia can be provoked by inappropriate defecation movements. The aim of this prospective study was to demonstrate dysfunction of the anal sphincter and / or the musculus (m.) puborectalis in patients with dyschesia using anorectal endosonography. Twenty consecutive patients with a medical history of dyschesia and a control group of 20 healthy subjects underwent linear anorectal endosonography (Toshiba models IUV 5060 and PVL-625 RT). In both groups, the dimensions of the anal sphincter and the m. puborectalis were measured at rest, and during voluntary squeezing and straining. Statistical analysis was performed within and between the two groups. The anal sphincter became paradoxically shorter and / or thicker during straining (versus the resting state) in 85% of patients but in only 35% of control subjects. Changes in sphincter length were statistically significantly different (p < 0.01, chi (2) test) in patient

In [9]:
# Postprocess adapted from https://github.com/microsoft/BioGPT/blob/main/examples/QA-PubMedQA/postprocess.py
prefix = [
    '(learned[0-9]+ )+',
    'we can conclude that',
    'we have that',
    'in conclusion,',
    ]


def strip_prefix(line):
    for p in prefix:
        res = re.search(p, line)
        if res is not None:
            line = re.split(p, line)[-1].strip()
            break
    return line


def convert_relis_sentence(sentence):
    ans = None
    segs = re.search(r"the answer to the question given the context is(.*)", sentence)
    if segs is not None:
        segs = segs.groups()
        ans = segs[0].strip()
    return ans

hypothesis = []
cnt = 0
fail_cnt = 0


for i, line in enumerate(answers):
    cnt += 1
    if line[-1] == ".":
        line = line[:-1]
    strip_line = strip_prefix(line)
    ans = convert_relis_sentence(strip_line)
    if ans is not None:
        hypothesis.append(ans)
    else:
        hypothesis.append("failed")
        fail_cnt += 1
        print("Failed:id:{}, line:{}".format(i+1, line))

In [10]:
from sklearn.metrics import accuracy_score

In [11]:
accuracy_score(y_true,hypothesis)

0.804