## LIBRARY INSTALLATION

In [None]:
!pip install evaluate peft transformers==4.42.0

## API KEYS

In [None]:
import os
import wandb
from huggingface_hub import HfApi
from kaggle_secrets import UserSecretsClient

# LOADING API-KEYS
user_secrets = UserSecretsClient()

# HUGGINGFACE LOGIN
hugging_face_token = user_secrets.get_secret("HUGGING_FACE_API_KEY")
api = HfApi(token=hugging_face_token)

# WANDB LOGIN
wandb_api_token = user_secrets.get_secret("WANDB_API_KEY")
wandb.login(key = wandb_api_token)

## LOADING MODEL AND TOKENIZERS

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# tokenizer.pad_token = tokenizer.eos_token
checkpoint = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint, token=hugging_face_token)
model = AutoModelForCausalLM.from_pretrained(checkpoint, 
                                             device_map="auto",
                                             torch_dtype=torch.float16,
                                             token=hugging_face_token,
                                            )

In [None]:
from transformers import TextStreamer
streamer = TextStreamer(
    tokenizer,
    skip_prompt=True,
    skip_special_tokens=True,
)

In [None]:
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from torch.utils.data import DataLoader
from tqdm import tqdm

# Load dataset
dataset = load_dataset("pubmed_qa", "pqa_labeled")
test_set = dataset["train"]

# Load model and tokenizer
checkpoint = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint, token=hugging_face_token)
model = AutoModelForCausalLM.from_pretrained(checkpoint, 
                                             device_map="auto",
                                             torch_dtype=torch.float16,
                                             token=hugging_face_token)
tokenizer.pad_token = tokenizer.eos_token
device = next(model.parameters()).device

def collate_fn(batch):
    input_texts = [f"Question: {item['question']}\nAnswer:" for item in batch]
    inputs = tokenizer(input_texts, return_tensors="pt", padding=True, truncation=True)
    return {
        'inputs': inputs,
        'true_answers': [item['final_decision'] for item in batch]
    }

batch_size = 8  # Adjust based on your GPU memory
dataloader = DataLoader(test_set, batch_size=batch_size, collate_fn=collate_fn)

# Evaluation loop
results = []
total_batches = len(dataloader)

model.eval()
for batch in tqdm(dataloader, total=total_batches, desc="Evaluating"):
    inputs = batch['inputs']
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=1024)
    
    generated_answers = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    for true_answer, generated_answer in zip(batch['true_answers'], generated_answers):
        results.append({
            "true_answer": true_answer,
            "generated_answer": generated_answer
        })

# Calculate metrics
correct = sum(1 for r in results if r["true_answer"] in r["generated_answer"])
accuracy = correct / len(results)
print(f"Accuracy: {accuracy:.2f}")

## DATAHELPER CLASS

In [None]:
# LOADING DATASET
from datasets import load_dataset
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, BertForSequenceClassification
checkpoint = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding_side="right")

dataset = load_dataset("glue", "mrpc")
train_dataset = dataset['train']
test_dataset = dataset['test']
validation_dataset = dataset['validation']

In [None]:
from helper_classes.datahelper import DataHelper

class CustomDataHelper(DataHelper):
    def handle_dataset(self):
        datasets_dict = self.datasets_dict
        for dataset_name, dataset in datasets_dict.items:
            sentence1 = dataset['sentence1']
            sentence2 = dataset['sentence2']
            dataset['input'] = dataset['sentence1'] + dataset['sentence2']
            self.handled_datasets_dict.update({dataset_name+"_handled":dataset})
        return self.handled_datasets_dict            

datasets_dict = {
    "train_dataset": train_dataset,
    "test_dataset": test_dataset,
    "validation_dataset": validation_dataset
}

dataset_configuration = {
    "batch_size": 32,
    "shuffle": True,
    "return_tensors": "pt",
    "max_length":128,
    "padding": True,
    "truncation": True
}

column_configuration = {
    "user_query_column": "input",
    "columns_to_tokenize":"sentence1"
}

# data_helper = CustomDataHelper()
# data_helper.handle_dataset()
data_helper = DataHelper()
data_helper.load_datasets_dict(datasets_dict)
data_helper.load_config_columns(column_configuration)
data_helper.set_dataset_config(dataset_configuration)

# data_helper.load_tokenizer(tokenizer)
# tokenized_dataset = data_helper.tokenize_datasets()
dataloader = data_helper.datasets_to_dataloader()
dataloader

In [None]:
# Extracting Datasets
train_dataset = dataset['train']
test_dataset = dataset['test']
validation_dataset = dataset['validation']
datasets = [train_dataset, test_dataset, validation_dataset]

In [None]:
datahelper = DataHelper(tokenizer=tokenizer, user_query_column = "input", columns_to_tokenize="training_input")

formatted_datasets = [datahelper.format_dataset(dataset) for dataset in datasets]
tokenized_datasets = [datahelper.tokenize_dataset(dataset) for dataset in formatted_datasets]

train_dataset, test_dataset, validation_dataset = tokenized_datasets

In [None]:
# tokenized_datasets
# datahelper.clean_up_dataset(base_dataset=dataset['train'], tokenized_dataset=tokenized_dataset)
# train_dataset = dataset['train'].map(preprocessing, batched=True, batch_size=32)
# train_dataset = train_dataset.remove_columns(['input', 'instruction', 'output', 'final_text'])
# train_dataset.set_format(type='pt', columns=['input_ids', 'attention_mask'], output_all_columns=True)

## Trainer Code

In [None]:
import torch
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=1,
    per_device_train_batch_size=28,
    save_total_limit=1,
    eval_strategy="steps",
    save_strategy="epoch",
    save_steps = 100,
    # label_names = ['not_equivalent', 'equivalent'],
    fp16=torch.cuda.is_available()  # Use mixed precision if GPUs support it
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

In [None]:
import os
import torch
import warnings
import numpy as np
from time import time
from evaluate import load
from datasets import load_dataset
from transformers import DataCollatorWithPadding
from peft.utils import get_peft_model_state_dict
from transformers import TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, BertForSequenceClassification

    
warnings.filterwarnings("ignore", category=RuntimeWarning)
os.environ["TOKENIZERS_PARALLELISM"] = "false"

from transformers import set_seed
set_seed(42)

def main():
    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return accuracy.compute(predictions=predictions, references=labels)

    def encode(examples):
        output = tokenizer(examples['sentence1'], 
                           examples['sentence2'], 
                           truncation=True, 
                           padding='max_length', 
                           max_length=128,
                          )
        
        output['labels'] = examples['label']
        return output

# MODEL
    checkpoint = "bert-base-uncased"
    model = AutoModelForSequenceClassification.from_pretrained(checkpoint,
                                                  #torch_dtype=torch.float16,
                                                 )

#     peft_config = LoraConfig(inference_mode=False,
#                              r=32,
#                              lora_alpha = 512,
#                              lora_dropout = 0.1,
#                              bias="none",
#                              peft_type = TaskType.SEQ_CLS, #" CAUSAL_LM"
#                              )

#     model = prepare_model_for_kbit_training(model)
#     model = get_peft_model(model, peft_config)
#     model.print_trainable_parameters()

# TYPICAL TRAINING CODE
    accuracy = load("accuracy")
    tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding_side="right")
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    
#   DATASET PREP
    dataset = load_dataset("glue", "mrpc")
    dataset = dataset.map(encode, batched=True)    
    dataset = dataset.remove_columns(['sentence1', 'sentence2', 'label', 'idx'])
    dataset.set_format(type='pt', columns=['input_ids', 'attention_mask', 'labels',], output_all_columns=True)
    data_collator = DataCollatorWithPadding(tokenizer)
    
        
# TRAINING ARGUMENTS
    training_args = TrainingArguments(
    # DIRECTORIES FOR SAVING AND LOGGING
        output_dir="/kaggle/working/glue_model_checkpointing_test-8",
        logging_dir =  "/kaggle/working/logs", 
    
    #  BASIC PARAMS
        num_train_epochs=5,
        fp16=True,
        seed=42,
        data_seed=42,
        
    # OPTIMIZER SETUP
        optim="rmsprop",
        learning_rate=1e-4,
        lr_scheduler_type="cosine",
        #lr_scheduler_kwargs={"power": 2.0},
        warmup_ratio=0.2,
        #warmup_steps=200,
        
    # DATA RELATED ARGUMENTS
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        dataloader_num_workers=4, # Reduces Training time by a decent percentage
        dataloader_pin_memory=True,
        dataloader_persistent_workers=True, 
        ddp_find_unused_parameters=False,        
        
    # LOGGING
        logging_strategy="epoch", # Logs the Training Loss
        label_names = ['labels'], # If Peft is off, keep this off doesnt do anything, if Peft is on, Logs the Validation Loss and Validation Accuracy
        #report_to = tensorboard
        
    # EVALUATION
        eval_strategy="epoch", # Doesnt Evaluate the model per epoch, Reducing the training time
        #eval_steps        
        
    # SAVING TO HUB
        save_strategy="epoch",
        save_total_limit=1,
        push_to_hub=True,
        hub_token = hugging_face_token,
        hub_strategy="every_save",
        hub_model_id="nnilayy/glue_model_checkpointing_test-8",
        
#     SAVING VRAM
#         torch_empty_cache_steps=40, #Clears vram cache during training after a few steps
#         gradient_checkpointing=True,
#         gradient_accumulation_steps=4,
    )

# TRAINER CONSTRUCTOR
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

# Pushing Tokenizer, Model Card, Label Mapping to Hub 
#     tokenizer.push_to_hub("nnilayy/glue_model_checkpointing_test-8")
#     model.config.label2id = {'equivalent': 0, 'not_equivalent': 1}
#     model.config.id2label = {0: 'equivalent', 1: 'not_equivalent'}
#     model.config.push_to_hub("nnilayy/glue_model_checkpointing_test-8")

    trainer.train()
    model.save_pretrained("/kaggle/working/test-model-5")

if __name__ == "__main__":
    from accelerate import notebook_launcher
    notebook_launcher(main, num_processes=2)

In [None]:
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding_side="right")

def encode(examples):
    output = tokenizer(examples['sentence1'], 
                       examples['sentence2'], 
                       truncation=True, 
                       padding='max_length', 
                       max_length=128,
                      return_tensors="pt")
    output['labels'] = examples['label']
    return output

dataset = load_dataset("glue", "mrpc")
val_dataset = dataset['validation']
val_dataset = val_dataset.map(encode, batched=True) 
val_dataset = val_dataset.remove_columns(['sentence1', 'sentence2', 'label', 'idx'])
val_dataset.set_format(type='pt', columns=['input_ids', 'attention_mask', "token_type_ids",'labels',], output_all_columns=True)
val_dataset

In [None]:
validation_dataset = val_dataset
# test_dataset = val_dataset

In [None]:
from transformers import set_seed
from peft import PeftConfig, PeftModelForSequenceClassification

# set_seed(42)

base_model_id = "bert-base-uncased"
fine_tuned_model_id = "/kaggle/working/test-model-5/"
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_id).to("cuda")
fine_tuned_model = AutoModelForSequenceClassification.from_pretrained(fine_tuned_model_id).to("cuda")


## EVALUATOR CLASS

In [None]:
from tqdm import tqdm 
from evaluate import load
from torch.utils.data import DataLoader

# test_dataset = DataHelper(dataset)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

accuracy = load("accuracy")

labels, all_logits = [], []
for batch in tqdm(test_dataloader, desc="Evaluating"):
    inputs = {k:v.to("cuda") for k,v in batch.items()}
    with torch.no_grad():
        outputs = fine_tuned_model(**inputs)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    all_logits.append(predictions)
    labels.append(inputs['labels'])

labels = torch.cat(labels, dim=0)
all_logits = torch.cat(all_logits, dim=0)

accuracy.compute(predictions = all_logits, references = labels)

In [None]:
# LOADING MODELS
from transformers import set_seed
from peft import PeftConfig, PeftModelForSequenceClassification

base_model_id = "bert-base-uncased"
fine_tuned_model_id = "/kaggle/working/test-model-5/"
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_id).to("cuda")
fine_tuned_model = AutoModelForSequenceClassification.from_pretrained(fine_tuned_model_id).to("cuda")

# LOADING METRICS
from evaluate import load
accuracy = load("accuracy")
f1 = load("f1")
recall = load("recall")


In [None]:
from helper_classes.evaluator import Evaluator

models_dict = {
    "fine_tuned_model": fine_tuned_model,
    "base_model": base_model,
}

metrics_dict = {
    "accuracy": accuracy,
    "f1": f1,
    "recall": recall,   
}

datasets_dict = {
#     "train_dataset": train_dataset,
    "test_dataset": test_dataset,
    "validation_dataset": validation_dataset,
}


evaluator = Evaluator()
evaluator.set_device("cuda")
evaluator.load_models_dict(models_dict)
evaluator.load_metrics_dict(metrics_dict)
evaluator.load_datasets_dict(datasets_dict)

result = evaluator.evaluate_datasets()
result

## Evaluate Code

In [None]:
from tqdm import tqdm
import torch
import logging

class Evaluate:
    def __init__(self, tokenizer, model):
        
        logging.getLogger("transformers").setLevel(logging.ERROR)

        self.tokenizer = tokenizer
        self.model = model
        self.streamer = None
        self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id
        self.model.generation_config.eos_token_id = self.tokenizer.eos_token_id
        
    def single_question_evaluate(self, question, return_prompt=False, stream_response=False):
        self.model.eval()
        with torch.no_grad():
            inputs = self.tokenizer(question, return_tensors="pt").to("cuda")
            output = self.model.generate(
                **inputs,
                max_new_tokens=1000,
                return_dict_in_generate=True,
                temperature=0.5,
                do_sample=True,
                top_k=50, 
                num_beams=1,
                early_stopping=False,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.pad_token_id,
                streamer=(self.streamer if stream_response else None),
            )
            
            if return_prompt:
                response = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True)
            else:
                response = self.tokenizer.decode(output.sequences[:, inputs.input_ids.shape[1]:][0], skip_special_tokens=True)  
                
            return response
                
    def load_streamer(self, streamer):
        self.streamer = streamer
        return self.streamer
    
    def batch_evaluate(self, dataset, batch_size, return_prompt=False):
        pass
    
    def qbq_evaluate(self, dataset, return_prompt=False):
        model_responses = []
        self.model.eval()
        with tqdm(total=len(dataset), desc="Generating responses", unit="question") as pbar:
            for index in range(len(dataset)):
                with torch.no_grad():
                    question = dataset['training_input'][index]
                    inputs = self.tokenizer(question, return_tensors="pt").to("cuda")
                    output = self.model.generate(
                        **inputs,
                        max_new_tokens=1000,
                        return_dict_in_generate=True,
                        temperature=0.5,
                        do_sample=True,
                        top_k=50, 
                        num_beams=1,
                        early_stopping=False,
                    )
                    if return_prompt:
                        response = self.tokenizer.decode(output.sequences[0], skip_special_tokens=True)
                    else:
                        response = self.tokenizer.decode(output.sequences[:, inputs.input_ids.shape[1]:][0], skip_special_tokens=True)
                        
                    model_responses.append(response)
                    pbar.update(1)
                    
        return model_responses
