In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments, EvalPrediction,AutoModelForSequenceClassification,AutoModelForCausalLM
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments,DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model,  TaskType,PeftModel
from datasets import  load_dataset ,Dataset,DatasetDict,load_metric
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
import nltk
import numpy as np
import sentencepiece
from transformers import T5Tokenizer, T5ForConditionalGeneration


In [4]:
device='mps'

In [16]:
tokenizer = T5Tokenizer.from_pretrained("./biotokenizer")
base_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
model = PeftModel.from_pretrained(base_model, "./biomrcmodel")
model.to(device)

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 1024)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 1024)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): lora.Linear(
                    (base_layer): Linear(in_features=1024, out_features=1024, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.01, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=1024, out_features=32, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=32, out_features=1024, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
      

In [6]:
dataset = pd.read_csv('train.csv')
dataset = dataset.drop('qtype', axis=1)
dataset = dataset.rename(columns={'Question': 'question', 'Answer': 'answer'})
df_full_train, df_test = train_test_split(dataset, test_size=0.2, random_state=56)
df_train, df_val = train_test_split(df_full_train, test_size=0.25, random_state=56)
df_train = df_train.reset_index(drop=True)
df_val = df_train.reset_index(drop=True)
df_test = df_train.reset_index(drop=True)
train_dataset = Dataset.from_pandas(df_train)
val_dataset = Dataset.from_pandas(df_val)
test_dataset = Dataset.from_pandas(df_test)
health_dataset_dict = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test': test_dataset
})

In [26]:
def generate_answer(question: str, max_length: int = 512):
    input_text = "Assuming you are working as a Doctor. Please answer this question: " + question
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    outputs = model.generate(
        input_ids=inputs["input_ids"], 
        max_length=max_length, 
        num_beams=5, 
        early_stopping=True
    )
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

In [None]:
health_dataset_dict

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 9843
    })
    validation: Dataset({
        features: ['question', 'answer'],
        num_rows: 9843
    })
    test: Dataset({
        features: ['question', 'answer'],
        num_rows: 9843
    })
})

In [29]:
peft_model_answers = []
actual_answers = []
num_rows_to_process = 200

for idx, example in enumerate(health_dataset_dict['validation'].select(range(num_rows_to_process))):
    question = example['question']
    actual_answer = example['answer']
    
    generated_answer = generate_answer(question)
    
    peft_model_answers.append(generated_answer)
    actual_answers.append(actual_answer)

    # Print progress for every 100 rows
    if (idx + 1) % 100 == 0:
        print(f"Processed {idx + 1} rows")

Processed 100 rows
Processed 200 rows


In [30]:
import evaluate
rouge = evaluate.load('rouge')
peft_model_results = rouge.compute(
    predictions=peft_model_answers,
    references=actual_answers,
    use_aggregator=True,
    use_stemmer=True,
)


Downloading builder script: 100%|██████████| 6.27k/6.27k [00:00<00:00, 8.97MB/s]


{'rouge1': np.float64(0.3637164083752216), 'rouge2': np.float64(0.2447748016568256), 'rougeL': np.float64(0.3255033604990902), 'rougeLsum': np.float64(0.33209901865735203)}


In [31]:
print(peft_model_results)

{'rouge1': np.float64(0.3637164083752216), 'rouge2': np.float64(0.2447748016568256), 'rougeL': np.float64(0.3255033604990902), 'rougeLsum': np.float64(0.33209901865735203)}
