In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
import torch
from peft import LoraConfig, get_peft_model, TaskType, PertModel

# Check if GPU is available
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

ImportError: cannot import name 'PertModel' from 'peft' (/Users/nirmal/Desktop/llm fine tuning/env_llm/lib/python3.10/site-packages/peft/__init__.py)

In [2]:

# Load dataset
dataset = load_dataset('wikitext', 'wikitext-2-raw-v1')

base_model="distilgpt2"

dataset


DatasetDict({
    test: Dataset({
        features: ['text'],
        num_rows: 4358
    })
    train: Dataset({
        features: ['text'],
        num_rows: 36718
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 3760
    })
})

In [3]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    inputs = tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)
    inputs['labels'] = inputs['input_ids'].copy()
    return inputs

tokenised_datasets = dataset.map(tokenize_function, batched=True)

tokenised_datasets

DatasetDict({
    test: Dataset({
        features: ['text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 4358
    })
    train: Dataset({
        features: ['text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 36718
    })
    validation: Dataset({
        features: ['text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 3760
    })
})

In [4]:
model = AutoModelForCausalLM.from_pretrained(base_model)
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

# Wrap model with LoRA adapter
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
model.to(device)

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
trainable params: 147,456 || all params: 82,060,032 || trainable%: 0.1797


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(1024, 768)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-5): 6 x GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear(
                (base_layer): Conv1D(nf=2304, nx=768)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2304, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
             

In [5]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='model/',
    eval_strategy='epoch',
    num_train_epochs=1,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=4,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='model/logs',
    logging_steps=10,
    # no_cuda=True,
    use_mps_device=True,  #VERY IMPORTANT PARAM
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenised_datasets['train'],
    eval_dataset=tokenised_datasets['validation']
)

# Train the model
# trainer.train(resume_from_checkpoint='model/checkpoint-8500')
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Epoch,Training Loss,Validation Loss
1,1.4311,1.552422


TrainOutput(global_step=2295, training_loss=2.098729048003818, metrics={'train_runtime': 960.0946, 'train_samples_per_second': 38.244, 'train_steps_per_second': 2.39, 'total_flos': 1203444935294976.0, 'train_loss': 2.098729048003818, 'epoch': 1.0})

In [None]:
# save the model and tokenizer explicitly
model_output_dir = 'model/trained_model'

model.save_pretrained(model_output_dir)
tokenizer.save_pretrained(model_output_dir)

('model/trained_model/tokenizer_config.json',
 'model/trained_model/special_tokens_map.json',
 'model/trained_model/vocab.json',
 'model/trained_model/merges.txt',
 'model/trained_model/added_tokens.json',
 'model/trained_model/tokenizer.json')

: 

In [1]:
from peft import PeftModel
import torch

import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM
base_model="distilgpt2"

def get_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    return total_params

def generate_text(input_text):
    model_path = "model/trained_model"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    base = AutoModelForCausalLM.from_pretrained(base_model)
    model = PeftModel.from_pretrained(base, model_path)

    device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
    model.to(device)

    print(f"Total parameters: {get_model_parameters(model)}")

    inputs = tokenizer(input_text, return_tensors='pt')
    inputs = {k: v.to(device) for k, v in inputs.items()}

    outputs = model.generate(
        **inputs,
        max_length=125,
        temperature=0.9,
        top_p=0.95,
        do_sample=True,
        num_return_sequences=1
    )

    print("\n Generated:")
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

generate_text("Once upon a time")

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Total parameters: 82060032

 Generated:
Once upon a time, he began to think about how people can learn and learn from their own mistakes.



In [None]:
from peft import PeftModel
import torch

import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM
base_model="distilgpt2"

def compare_models(prompt: str, max_length=125):
    device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

    # Load base model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    base_model_instance = AutoModelForCausalLM.from_pretrained(base_model).to(device)

    # Load fine-tuned LoRA model
    from peft import PeftModel
    lora_model = PeftModel.from_pretrained(base_model_instance, "model/trained_model").to(device)

    # Tokenise input once
    inputs = tokenizer(prompt, return_tensors='pt')
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate from base model
    base_output = base_model_instance.generate(
        **inputs,
        max_length=max_length,
        do_sample=True,
        temperature=0.9,
        top_p=0.95
    )

    # Generate from LoRA model
    lora_output = lora_model.generate(
        **inputs,
        max_length=max_length,
        do_sample=True,
        temperature=0.9,
        top_p=0.95
    )

    # Decode outputs
    base_text = tokenizer.decode(base_output[0], skip_special_tokens=True)
    lora_text = tokenizer.decode(lora_output[0], skip_special_tokens=True)

    # Print both
    print("📦 Base model output:")
    print(base_text)
    print("🔧 LoRA fine-tuned model output:")
    print(lora_text)

compare_models("Once upon a time")
compare_models("== Early Life ==\nJohn Keats was born in")
compare_models("== Background ==\nThe Battle of Hastings was")
compare_models("== Legacy ==\nEinstein's work influenced")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



📦 Base model output:
Once upon a time of such turmoil and the destruction of her homeland, her father, the first of her sons, is the only son to be killed in her absence. The young princess is named after the Great King.


🔧 LoRA fine-tuned model output:
Once upon a time of peace, a great deal of hatred has been formed in the Jewish community, which is more concerned with survival than the Jews themselves. In the 1920s, there was widespread hatred of Jews and Jews, and the hatred that surrounded them grew. In 1922, in response to the publication of the book "The Jews in the Jews: The Jewish World," Jewish newspapers published a series of articles criticizing Jewish prejudice. In 1925, the editor-in-chief of The Jewish World published a cover story on Zionism: a series of articles accusing Jews of being "Jewish" and that Jews were not "Jews,"
/n


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [14]:
import torch
import math
from datasets import load_dataset

dataset = load_dataset('wikitext', 'wikitext-2-raw-v1')
base_model="distilgpt2"
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

def compute_perplexity(model, tokenizer, text, device):
    encodings = tokenizer(text, return_tensors='pt')
    input_ids = encodings.input_ids.to(device)

    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        loss = outputs.loss
        perplexity = math.exp(loss.item())
        return perplexity
    
# Load base model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
base_model_instance = AutoModelForCausalLM.from_pretrained(base_model).to(device)

# Load fine-tuned LoRA model
from peft import PeftModel
lora_model = PeftModel.from_pretrained(base_model_instance, "model/trained_model").to(device)
text = dataset["validation"][10]["text"]

base_ppl = compute_perplexity(base_model_instance, tokenizer, text, device)
lora_ppl = compute_perplexity(lora_model, tokenizer, text, device)

print(f"Base model perplexity: {base_ppl:.2f}")
print(f"LoRA fine-tuned model perplexity: {lora_ppl:.2f}")


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Base model perplexity: 53.63
LoRA fine-tuned model perplexity: 53.63
