In [5]:
from datasets import Dataset

In [50]:
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification

In [72]:
from peft import LoraConfig,get_peft_model

In [16]:
import pandas as pd

In [18]:
df = pd.read_csv('yelp.csv')

In [34]:
dataset = df.rename(columns={'useful':'label'})

In [38]:
dataset = dataset[['text','label']]

In [44]:
dataset.loc[:,'label'] = dataset['label'].apply(lambda x:1 if x>=3 else 0)

In [48]:
dataset.to_csv('cleaned_lora.csv')

In [56]:
dataset = Dataset.from_pandas(dataset)

In [58]:
dataset = dataset.train_test_split(test_size=0.2)

In [108]:
model_name = 'distilbert-base-uncased'

In [60]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [64]:
def tokenize(batch):
    return tokenizer(batch['text'],truncation=True, padding='max_length')

dataset = dataset.map(tokenize,batched = True)

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [70]:
# loading base model
model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels =2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [76]:
# lora config
lora_config = LoraConfig(
    task_type="SEQ_CLS",
    r=8,
    lora_alpha = 16,
    target_modules=['q_lin','v_lin'], # works for DistilBERT
    lora_dropout=0.1
)

In [78]:
model = get_peft_model(model,lora_config)

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.


In [80]:
# training
training_args = TrainingArguments(output_dir='./distilbert-lora-output',
                                  per_device_train_batch_size = 2,
                                  learning_rate=2e-5,
                                  num_train_epochs=1,
                                  eval_strategy='epoch'
)

In [92]:
trainer = Trainer(model = model,args = training_args,train_dataset=dataset['train'], eval_dataset=dataset['test'],processing_class=tokenizer)

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [94]:
trainer.train()



Epoch,Training Loss,Validation Loss
1,0.5778,0.614586




TrainOutput(global_step=800, training_loss=0.5611928367614746, metrics={'train_runtime': 2151.3201, 'train_samples_per_second': 0.744, 'train_steps_per_second': 0.372, 'total_flos': 215583050956800.0, 'train_loss': 0.5611928367614746, 'epoch': 1.0})

In [96]:
model.save_pretrained('./distilbert-lora')

# evaluating

In [118]:
from peft import PeftModel

In [122]:
base_model_name = 'distilbert-base-uncased'
lora_model_path = './distilbert-lora'

In [126]:
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(lora_model_name)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [128]:
model = PeftModel.from_pretrained(base_model,lora_model_path)



In [130]:
model.eval()

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): DistilBertForSequenceClassification(
      (distilbert): DistilBertModel(
        (embeddings): Embeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (transformer): Transformer(
          (layer): ModuleList(
            (0-5): 6 x TransformerBlock(
              (attention): DistilBertSdpaAttention(
                (dropout): Dropout(p=0.1, inplace=False)
                (q_lin): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=76

# Making a prediction

In [133]:
import torch

In [185]:
text = 'the food is good!'

In [187]:
inputs = tokenizer(text,return_tensors='pt', padding = 'max_length',max_length=128,truncation=True)

In [189]:
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits,dim =-1)

In [191]:
predictions.item()

0