In [None]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from transformers import DataCollatorForSeq2Seq, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
import bitsandbytes as bnb

%load_ext autoreload
%autoreload 2

In [None]:
import os
import warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
# Load GSM8K Dataset
dataset = load_dataset("gsm8k", "socratic")
train = dataset["train"]
eval = dataset["test"]
print(f"Training set size: {len(train)}")
print(f"Evaluation set size: {len(eval)}")
display(train[3])

In [None]:
# Define BitsAndBytesConfig for 4-bit quantization
bnb_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
# Load the T5-base model and tokenizer
model_name = "t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# Define QLoRA Configuration
qlora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q", "v"],
    task_type=TaskType.SEQ_2_SEQ_LM
)

In [None]:
model = prepare_model_for_kbit_training(model)

In [None]:
# Apply the QLoRA configuration to the model
model = get_peft_model(model, qlora_config)
model.print_trainable_parameters()

In [None]:
def preprocess_function(examples):
    # Tokenize the inputs and targets
    inputs = examples["question"]
    targets = examples["answer"]
    
    # Tokenize inputs and targets separately, keeping all relevant fields
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    
    # Include attention_mask in model_inputs and set labels
    model_inputs["labels"] = labels["input_ids"]
    
    return {
        "input_ids": model_inputs["input_ids"],
        "attention_mask": model_inputs["attention_mask"],
        "labels": model_inputs["labels"]
    }

In [None]:
# Tokenize and remove unnecessary columns
train_data = train.map(preprocess_function, batched=True, remove_columns=["question", "answer"])
eval_data = eval.map(preprocess_function, batched=True, remove_columns=["question", "answer"])

In [None]:
# Define the data collator
label_pad_token_id = -100
data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model,
    label_pad_token_id=label_pad_token_id,
    pad_to_multiple_of=8
)

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./t5-small-gsm8k-socratic-qlora",
    eval_strategy="epoch",
    learning_rate=5e-4,
    per_device_train_batch_size=2,  # reduce batch size
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    remove_unused_columns=False,
    fp16=True  # enable mixed precision if available
)

In [None]:
# Define Trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    data_collator=data_collator,
)
model.config.use_cache = False

In [None]:
# Fine-tune the model
trainer.train()

In [None]:
model.save_pretrained("./t5-base-gsm8k-socratic-qlora")
tokenizer.save_pretrained("./t5-base-gsm8k-socratic-qlora")

Inference

Evaluation

In [None]:
# Evaluation
eval_results = trainer.evaluate()
print(f"Perplexity: {torch.exp(torch.tensor(eval_results['eval_loss']))}")

In [None]:
# Load the model and tokenizer from the saved directory
model_path = tokenizer_path = "./t5-base-gsm8k-socratic-qlora"
device = 'cuda' if torch.cuda.is_available() else 'cpu'

Accuracy

In [None]:
import re
from transformers import pipeline

In [None]:
# Load the T5-small model and tokenizer
base_tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
qlora_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
qlora_model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

In [None]:
from evaluation import *
evaluator = Evaluation(qlora_model, qlora_tokenizer, device=device)

In [None]:
# Evaluate on ROUGE metrics
rouge_results = evaluator.evaluate_rouge(eval)
print("ROUGE results:", rouge_results)

In [None]:
accuracy_base = evaluator.evaluate_accuracy(eval, base_model, base_tokenizer)
print(f"Accuracy: {accuracy_base:.2f}%")

In [None]:
accuracy = evaluator.evaluate_accuracy(eval, qlora_model, qlora_tokenizer)
print(f"Accuracy: {accuracy:.2f}%")

Using Chain-Of-Thoughts

In [None]:
# Evaluate using Chain-of-Thought (CoT) and few-shot learning
# accuracy_cot_few_shot = evaluator.evaluate_with_cot(eval)
# print(f"Accuracy: {accuracy_cot_few_shot:.2f}%")

Semantic Similarity

In [None]:
res = evaluator.evaluate_semantic_similarity(eval)
print(f"Average semantic similarity: {(res*100):.2f}%")