In [7]:
from transformers import BertTokenizer, BertForMaskedLM
import torch

def read_input_file(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()

def generate_summary(prompt, model_name="microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract", max_length=512):
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForMaskedLM.from_pretrained(model_name)
    
    prompt = "Summary: " + prompt
    
    input_ids = tokenizer.encode(prompt, return_tensors='pt', truncation=True, max_length=max_length)
    
    # Generate a response using the model
    with torch.no_grad():
        outputs = model(input_ids)
        predictions = outputs.logits

    # Just get the predicted tokens for now, it's hacky but might provide a compressed form of the content
    predicted_ids = torch.argmax(predictions, dim=2)
    summary = tokenizer.decode(predicted_ids[0], skip_special_tokens=True)

    return summary

if __name__ == "__main__":
    file_path = "/kaggle/input/full-texts/FullText-2529.txt"
    input_text = read_input_file(file_path)
    generated_summary = generate_summary(input_text)
    print(generated_summary)


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


##asia summary : serum immunoglobulin g4 - related sclerosing cholangitis ( igg4 - sc ) is a type of autoimmune pancreatitis associated with elevated serum igg4 levels. none, nonetypes 3 and 4 igg4 - sc are difficult to differentiate from bile duct cancer, and hepatectomy has been reported sporadically in such situations. none, none, describe a rare case of a patient with igg4 - sc but without pancreatic lesions. accurate diagnosis was made, without the need for performing a partial hepatectomy. the patient was a 69 - year - old man referred for the evaluation of jaundice and steatorrhea persisting for 1 week. he had a prior history of hypertension, diabetes mellitus, and benign prostatic hypertrophy, with no previous pancreatic disease or autoimmune diseases. physical examination revealed normal vital signs, mild icterus, and jaundice, with no abnormalities on abdominal examination. laboratory studies showed elevated total bilirubin, liver, and biliary enzyme levels. endoscopic retrog

In [4]:
!pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24954 sha256=61b459a066c044831f1baeea65850c9f0bd7fc4a7b0b6128079f5b99224fa3f4
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [9]:
from rouge_score import rouge_scorer

def compute_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL', 'rougeLsum'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return scores

if __name__ == '__main__':
    file_path = "/kaggle/input/full-texts/Abstract-2529.txt"
    input_abstract = read_input_file(file_path)
    reference =  input_abstract  
    candidate = generated_summary

    scores = compute_rouge(reference, candidate)
    for key, score in scores.items():
        print(f"{key.upper()}:")
        print(f"  Precision: {score.precision:.4f}")
        print(f"  Recall: {score.recall:.4f}")
        print(f"  F1 Score: {score.fmeasure:.4f}\n")

ROUGE1:
  Precision: 0.2681
  Recall: 0.6727
  F1 Score: 0.3834

ROUGE2:
  Precision: 0.1235
  Recall: 0.3110
  F1 Score: 0.1768

ROUGEL:
  Precision: 0.1208
  Recall: 0.3030
  F1 Score: 0.1727

ROUGELSUM:
  Precision: 0.1208
  Recall: 0.3030
  F1 Score: 0.1727

