<a href="https://colab.research.google.com/github/nnilayy/MedGPT/blob/main/PEFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bitsandbytes evaluate datasets transformers peft



In [None]:
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
from transformers import BertTokenizer, BertForMaskedLM
# Loading the model in 8-bit and 4-bit
# checkpoint = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

checkpoint = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(checkpoint, padding_side="right")
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
   load_in_8bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16
)

model = BertForMaskedLM.from_pretrained(checkpoint,
                                            #  device_map = "auto",
                                             quantization_config = bnb_config,
                                             torch_dtype=torch.float16,
                                             )

Unused kwargs: ['bnb_8bit_compute_dtype']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
# TRAINING
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
peft_config = LoraConfig(inference_mode=False,
                         r=8,
                         lora_alpha = 32,
                         lora_dropout = 0.1,
                         bias="none",
                         peft_type = TaskType.CAUSAL_LM, #" CAUSAL_LM"
                         )

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.gradient_checkpointing_enable()
print(model.print_trainable_parameters())

trainable params: 294,912 || all params: 109,809,210 || trainable%: 0.2686
None


In [None]:
# TYPICAL TRAINING CODE
from transformers import TrainingArguments, Trainer
from evaluate import load
from datasets import load_dataset
from transformers import DataCollatorWithPadding

accuracy = load("accuracy")
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

def encode(examples):
    output = tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding='max_length', max_length=128)
    output['labels'] = examples['label']
    return output

dataset = load_dataset("glue", "mrpc")
dataset = dataset.map(encode, batched=True)
dataset = dataset.remove_columns(['sentence1', 'sentence2', 'label', 'idx'])
dataset.set_format(type='pt', columns=['input_ids', 'attention_mask', 'labels','token_type_ids'], output_all_columns=True)

data_collator = DataCollatorWithPadding(tokenizer)

training_args = TrainingArguments(
    output_dir="your-name/bigscience/mt0-large-lora",
    learning_rate=1e-4,
    per_device_train_batch_size=64,
    fp16=True,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]



ValueError: Expected input batch_size (8192) to match target batch_size (64).

In [None]:
# INFERENCE
from peft import AutoPeftModel , AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch

# model = AutoPeftModel.from_pretrained("smangrul/openai-whisper-large-v2-LORA-colab")
model = AutoPeftModelForCausalLM.from_pretrained("ybelkada/opt-350m-lora")

tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
model = model.to("cuda")

model.eval()
inputs = tokenizer("Preheat the oven to 350 degrees and place the cookie dough", return_tensors="pt")
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=50)
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

In [None]:
# MERGE LORA WEIGHTS WITH BASE MODEL
from transformers import AutoModelForCausalLM
from peft import PeftModel

# Assuming 'base_model' is your pre-trained model's name or path
base_model = AutoModelForCausalLM.from_pretrained("base_model_name_or_path")
peft_model = PeftModel.from_pretrained(base_model, "path_to_trained_adapter")
merged_model = peft_model.merge_and_unload()

In [None]:
from transformers import BertTokenizer, BitsAndBytesConfig, BertForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from datasets import load_dataset
import torch
import warnings
warnings.filterwarnings("ignore")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    logits = torch.from_numpy(logits)
    labels = torch.from_numpy(labels)

    predictions = torch.argmax(logits, dim=-1)
    accuracy = (predictions == labels).float().mean()
    return {'accuracy': accuracy.item()}

# Preprocess the dataset
def encode(examples):
    outputs = tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding='max_length', max_length=128)
    outputs['labels'] = examples['label']
    return outputs

# Load model
checkpoint = "bert-base-uncased"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels=2,
                                                      # quantization_config = bnb_config,
                                                      # torch_dtype=torch.float16,
                                                      )
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
peft_config = LoraConfig(
                         inference_mode=False,
                         r=4,
                         lora_alpha = 32,
                         lora_dropout = 0.1,
                         bias="none",
                         peft_type = "SEQ_CLS",
                         use_dora=True,
                         )

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

tokenizer = BertTokenizer.from_pretrained(checkpoint)

# Dataset
dataset = load_dataset('glue', 'mrpc')
dataset = dataset.map(encode, batched=True)
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
label_names = dataset['train'].features['label'].names
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    logging_dir='./logs',
    # run_name='run_8',
    do_train=True,
    do_eval=True,
    num_train_epochs=10,
    learning_rate=2e-4,
    logging_strategy='epoch',
    per_device_train_batch_size=128,
    per_device_eval_batch_size=32,
    save_total_limit=3,
    save_strategy="epoch",
    eval_strategy="epoch",
    label_names = ["labels"], #Without this Validation Accuracy and Validation Loss wouldn't be logged
    fp16=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()
trainer.evaluate()