In [None]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer

config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    "EleutherAI/gpt-neo-2.7B", 
    quantization_config=config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", use_fast=True)

In [4]:
from datasets import load_dataset 

dataset = load_dataset("glue", "sst2", split="validation")

In [5]:
print(f"Device: {model.device}")

Device: cuda:0


In [9]:
print(f"Validation samples: {len(dataset)}")

Validation samples: 872


In [7]:
import math 

model.eval()
total_loss = 0.0

for data in dataset:
    input = tokenizer(
        data['sentence'],
        return_tensors='pt',
        truncation=True,
        max_length=128
    ).to(model.device)

    with torch.no_grad():
        outputs = model(**input, labels=input.input_ids)
        total_loss += outputs.loss.item()
    
perplexity = math.exp(total_loss / len(dataset))
print(f"Perplexity: {perplexity:.2f}")

Perplexity: 97.94


In [5]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=16,
    lora_alpha=8,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)