In [3]:
!pip install rouge-score



In [4]:
from transformers import GPT2Tokenizer,GPT2LMHeadModel
# Instantiating the model and tokenizer with gpt
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/BioGPT-Large')
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = GPT2LMHeadModel.from_pretrained('microsoft/BioGPT-Large')

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BioGptTokenizer'. 
The class this function is called from is 'GPT2Tokenizer'.
You are using a model of type biogpt to instantiate a model of type gpt2. This is not supported for all configurations of models and can yield errors.
Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at microsoft/BioGPT-Large and are newly initialized: ['h.29.ln_2.weight', 'h.47.attn.c_attn.weight', 'h.16.ln_2.weight', 'h.30.ln_2.weight', 'h.7.attn.c_attn.weight', 'h.12.mlp.c_proj.bias', 'h.3.mlp.c_proj.weight', 'h.39.attn.c_proj.weight', 'h.2.mlp.c_proj.weight', 'h.0.ln_2.bias', 'h.44.ln_2.bias', 'h.14.mlp.c_fc.bias', 'h.37.attn.c_attn.weight', 'h.40.ln_1.weight', 'h.15.ln_1.weight', 'h.8.mlp.c_fc.bias', 'h.15.mlp.c_fc.weight', 'h.17.attn.c_proj.bias', 'h.26.ln_2.weight',

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [5]:
def read_input_file(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()

In [6]:
file_path = "/kaggle/input/full-texts/Abstract-2529.txt"
input_text = read_input_file(file_path)
input_text = input_text

In [7]:
inputs = tokenizer.encode_plus(input_text, return_tensors='pt', truncation=False, padding='longest')
input_ids, attention_mask = inputs['input_ids'], inputs['attention_mask']

# pad the input_ids and attention_mask with the new padding token
max_length = 100
padded_input_ids = input_ids[:, :max_length].reshape(-1, max_length)
padded_attention_mask = attention_mask[:, :max_length].reshape(-1, max_length)

summary_ids = model.generate(padded_input_ids, attention_mask=padded_attention_mask, early_stopping=False, min_length=90, max_length=max_length, pad_token_id=tokenizer.eos_token_id)

GPT_summary=tokenizer.decode(summary_ids[0],skip_special_tokens=True)
print(GPT_summary)



We report a rare case of immunoglobulin G4 IgG4-related sclerosing cholangitis without other organ involvement. A 69-year-old-man was referred for the evaluation of jaundice. Computed tomography revealed thickening of pigmentosa</w>


In [8]:
from rouge_score import rouge_scorer

def compute_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL', 'rougeLsum'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return scores

if __name__ == '__main__':
    file_path = "/kaggle/input/full-texts/Abstract-2529.txt"
    input_abstract = read_input_file(file_path)
    reference =  GPT_summary 
    candidate = input_abstract

    scores = compute_rouge(reference, candidate)
    for key, score in scores.items():
        print(f"{key.upper()}:")
        print(f"  Precision: {score.precision:.4f}")
        print(f"  Recall: {score.recall:.4f}")
        print(f"  F1 Score: {score.fmeasure:.4f}\n")


ROUGE1:
  Precision: 0.2000
  Recall: 0.9429
  F1 Score: 0.3300

ROUGE2:
  Precision: 0.1951
  Recall: 0.9412
  F1 Score: 0.3232

ROUGEL:
  Precision: 0.2000
  Recall: 0.9429
  F1 Score: 0.3300

ROUGELSUM:
  Precision: 0.2000
  Recall: 0.9429
  F1 Score: 0.3300



In [9]:
del model

In [10]:
import torch

In [11]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the model and tokenizer
MODEL_NAME = "stanford-crfm/BioMedLM"  # or whatever the model's name is on Hugging Face Model Hub
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
model = GPT2LMHeadModel.from_pretrained(MODEL_NAME)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/602k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/276k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/267 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/876 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/10.7G [00:00<?, ?B/s]

In [12]:
def ask_contextual_question(context, question, model, tokenizer, max_length=200):
    combined_input = context + " " + question
    input_ids = tokenizer.encode(combined_input, return_tensors='pt')

    # Generate response
    with torch.no_grad():
        output = model.generate(input_ids, max_length=100, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True).replace(combined_input, '').strip()
    return decoded_output

# Use the function
question = "What happened to the bile duct? based on the context provided above"
response = ask_contextual_question(GPT_summary, question, model, tokenizer)
print(response)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:28895 for open-end generation.


, we diagnosed IgG4-related sclerosing cholangitis. The patient was treated with prednisolone (30 mg/day) and ursodeoxycholic acid (600 mg


In [13]:
from datasets import load_dataset
dataset = load_dataset('pubmed_qa', 'pqa_labeled')

Downloading builder script:   0%|          | 0.00/3.00k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.25k [00:00<?, ?B/s]

Downloading and preparing dataset pubmed_qa/pqa_labeled (download: 656.02 MiB, generated: 1.99 MiB, post-processed: Unknown size, total: 658.01 MiB) to /root/.cache/huggingface/datasets/pubmed_qa/pqa_labeled/1.0.0/2e65addecca4197502cd10ab8ef1919a47c28672f62d7abac7cc9afdcf24fb2d...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/709k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/152M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/533M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Dataset pubmed_qa downloaded and prepared to /root/.cache/huggingface/datasets/pubmed_qa/pqa_labeled/1.0.0/2e65addecca4197502cd10ab8ef1919a47c28672f62d7abac7cc9afdcf24fb2d. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

In [22]:
dataset['train'][0]

{'pubid': 21645374,
 'question': 'Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?',
 'context': {'contexts': ['Programmed cell death (PCD) is the regulated death of cells within an organism. The lace plant (Aponogeton madagascariensis) produces perforations in its leaves through PCD. The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature. The role of mitochondria during PCD has been recognized in animals; however, it has been less studied during PCD in plants.',
   'The following paper elucidates the role of mitochondrial dynamics during developmentally regulated PCD in vivo in A. madagascariensis. A single areole within a window stage leaf (PCD is occurring) was divided into three areas based on the progression of PCD; cells that will not undergo PCD (NPCD), ce

In [25]:
total_f1, total_exact = 0, 0

for sample in dataset['train']):
    question = sample['question']
    context = " ".join(sample['context']['contexts'])
    correct_answer = sample['long_answer']

    pred_answer = ask_contextual_question(context, question, model, tokenizer)

    # Compute F1 and Exact match
    common = set(pred_answer.lower().split()) & set(correct_answer.lower().split())
    f1 = 2 * len(common) / (len(pred_answer.split()) + len(correct_answer.split()))
    total_f1 += f1

    if pred_answer.lower() == correct_answer.lower():
        total_exact += 1

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:28895 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:28895 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:28895 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:28895 for open-end generati

Average F1 Score: 0.0032246016124654457
Exact Match: 0.0


In [26]:
# Average F1 and Exact match
avg_f1 = total_f1 / len(dataset['train'])
avg_exact = total_exact / len(dataset['train'])

print(f"Average F1 Score: {avg_f1}")
print(f"Exact Match: {avg_exact}")

Average F1 Score: 0.032246016124654456
Exact Match: 0.0
