<a href="https://colab.research.google.com/github/nnilayy/DocGPT/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bitsandbytes evaluate datasets transformers peft

In [None]:
from transformers import BertTokenizer, BitsAndBytesConfig, BertForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from datasets import load_dataset
import torch
import warnings
warnings.filterwarnings("ignore")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    logits = torch.from_numpy(logits)
    labels = torch.from_numpy(labels)

    predictions = torch.argmax(logits, dim=-1)
    accuracy = (predictions == labels).float().mean()
    return {'accuracy': accuracy.item()}

# Preprocess the dataset
def encode(examples):
    outputs = tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding='max_length', max_length=128)
    outputs['labels'] = examples['label']
    return outputs

# Load model
checkpoint = "bert-base-uncased"
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels=2,
                                                      # quantization_config = bnb_config,
                                                      # torch_dtype=torch.float16,
                                                      )
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
peft_config = LoraConfig(
                         inference_mode=False,
                         r=4,
                         lora_alpha = 32,
                         lora_dropout = 0.1,
                         bias="none",
                         peft_type = "SEQ_CLS",
                         use_dora=True,
                         )

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

tokenizer = BertTokenizer.from_pretrained(checkpoint)

# Dataset
dataset = load_dataset('glue', 'mrpc')
dataset = dataset.map(encode, batched=True)
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
label_names = dataset['train'].features['label'].names
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    logging_dir='./logs',
    # run_name='run_8',
    do_train=True,
    do_eval=True,
    num_train_epochs=10,
    learning_rate=2e-4,
    logging_strategy='epoch',
    per_device_train_batch_size=128,
    per_device_eval_batch_size=32,
    save_total_limit=3,
    save_strategy="epoch",
    eval_strategy="epoch",
    label_names = ["labels"], #Without this Validation Accuracy and Validation Loss wouldn't be logged
    fp16=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()
trainer.evaluate()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 165,888 || all params: 109,649,666 || trainable%: 0.1513


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6356,0.615921,0.683824
2,0.6132,0.587354,0.686275
3,0.5722,0.530626,0.705882
4,0.5276,0.486918,0.767157
5,0.4955,0.451076,0.786765
6,0.4555,0.418319,0.808824
7,0.4347,0.409799,0.813725
8,0.4134,0.413324,0.816176
9,0.4129,0.402013,0.818627
10,0.4116,0.399507,0.816176


{'eval_loss': 0.39950719475746155,
 'eval_accuracy': 0.8161764740943909,
 'eval_runtime': 1.0904,
 'eval_samples_per_second': 374.182,
 'eval_steps_per_second': 11.922,
 'epoch': 10.0}