In [1]:
!pip install -U peft bitsandbytes transformers accelerate

Collecting bitsandbytes
  Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting transformers
  Downloading transformers-5.0.0-py3-none-any.whl.metadata (37 kB)
Collecting huggingface_hub>=0.25.0 (from peft)
  Downloading huggingface_hub-1.3.5-py3-none-any.whl.metadata (13 kB)
Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl (59.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-5.0.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m144.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading huggingface_hub-1.3.5-py3-none-any.whl (536 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.7/536.7 kB[0m [31m47.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface_hub, bitsandbytes, transformers
  Attempting uninstall: huggingfac

In [2]:
!pip install -U trl


Collecting trl
  Downloading trl-0.27.1-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.27.1-py3-none-any.whl (532 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m532.9/532.9 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl
Successfully installed trl-0.27.1


In [3]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
)
from peft import LoraConfig, get_peft_model

In [5]:
MODEL_NAME = "mistralai/Mistral-7B-v0.1"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

from transformers import BitsAndBytesConfig


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16
)

model.config.use_cache = False

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [6]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 41,943,040 || all params: 7,283,675,136 || trainable%: 0.5758


In [7]:
dataset = load_dataset(
    "json",
    data_files="hp_lora_dataset.jsonl",
    split="train"
)


Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
MAX_SEQ_LEN = 512   # lower = safer

def tokenize_fn(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=MAX_SEQ_LEN,
        padding="max_length",
    )

tokenized_ds = dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=dataset.column_names,
)


Map:   0%|          | 0/1873 [00:00<?, ? examples/s]

In [9]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)


In [10]:
training_args = TrainingArguments(
    output_dir="./lora_output",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=1,
    fp16=True,
    logging_steps=20,
    save_steps=500,
    save_total_limit=2,
    report_to="none",
    optim="paged_adamw_8bit",          # bitsandbytes
)


In [11]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds,
    data_collator=data_collator,
)

trainer.train()


Step,Training Loss
20,1.88498
40,1.835825
60,1.822687
80,1.80877
100,1.805366
120,1.817951
140,1.816185
160,1.81156
180,1.791111
200,1.789907


TrainOutput(global_step=235, training_loss=1.8137624862346244, metrics={'train_runtime': 2684.1494, 'train_samples_per_second': 0.698, 'train_steps_per_second': 0.088, 'total_flos': 4.115504846969242e+16, 'train_loss': 1.8137624862346244, 'epoch': 1.0})

In [12]:
model.save_pretrained("./lora_output")
tokenizer.save_pretrained("./lora_output")


('./lora_output/tokenizer_config.json', './lora_output/tokenizer.json')

In [14]:
!cd /content
!zip -r lora_output.zip lora_output


  adding: lora_output/ (stored 0%)
  adding: lora_output/tokenizer_config.json (deflated 48%)
  adding: lora_output/README.md (deflated 66%)
  adding: lora_output/tokenizer.json (deflated 85%)
  adding: lora_output/adapter_model.safetensors (deflated 7%)
  adding: lora_output/checkpoint-235/ (stored 0%)
  adding: lora_output/checkpoint-235/optimizer.pt (deflated 11%)
  adding: lora_output/checkpoint-235/tokenizer_config.json (deflated 48%)
  adding: lora_output/checkpoint-235/README.md (deflated 66%)
  adding: lora_output/checkpoint-235/tokenizer.json (deflated 85%)
  adding: lora_output/checkpoint-235/trainer_state.json (deflated 68%)
  adding: lora_output/checkpoint-235/scheduler.pt (deflated 61%)
  adding: lora_output/checkpoint-235/adapter_model.safetensors (deflated 7%)
  adding: lora_output/checkpoint-235/adapter_config.json (deflated 59%)
  adding: lora_output/checkpoint-235/scaler.pt (deflated 64%)
  adding: lora_output/checkpoint-235/rng_state.pth (deflated 26%)
  adding: lora

In [15]:
model.eval()


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_pro

In [16]:
import torch

def chat(prompt, max_new_tokens=200):
    inputs = tokenizer(
        prompt,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            eos_token_id=tokenizer.eos_token_id,
        )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [17]:
print(chat("Who survived Avada Kedavra and why?"))


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Who survived Avada Kedavra and why?
How did Harry Potter survive his second encounter with the Dark Lord?
What happened to Harry’s parents that night? Why were they killed?
Who is Voldemort? Where was he born, and who are his parents?
Why does Dumbledore consider it so important that Voldemort be stopped?
Where are the Horcruxes, and what do they have to do with the Dark Lord’s return?
In this book, J.K. Rowling answers our questions about the most enigmatic characters of all — Lord Voldemort and Harry Potter.


In [18]:
print(chat("Explain the Harry Potter story in short."))


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Explain the Harry Potter story in short.
“The boy, he’s — he’s not like us . . .”
“We know that,” said Mr. Weasley, sounding a little annoyed. “He’s been in
trouble all year at school for using magic when he shouldn’t have been.”
“And what is this about him and Voldemort?” asked Mrs. Weasley.
“I don’t really understand it myself,” said Mr. Weasley. “It seems to be a bit of
a mystery, but I expect we’ll get to the bottom of it eventually. Anyway, how
are you all? Have you been enjoying yourselves here?”
They talked on happily over dinner, which was one of the best they had ever
had at the Burrow. The weather was very hot again today, and they spent the
rest of the day outside, watching the children splashing around in the stream
and playing catch


In [19]:
print(chat("Why Voldemort could not kill Harry Potter?"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Why Voldemort could not kill Harry Potter?
“The only way you’ll get rid of Harry Potter is to kill me,” said Dumbledore,
and his smile was as brilliant and as wide as ever.
“You don’t understand —”
“Oh, I think I do,” said Dumbledore calmly. “I understand that you have
already attempted to take my life once. You will find it no easier a second time.
Now, if you are ready?”
Voldemort raised the wand again; his face was now as contorted as a snake’s.
He pointed the wand at Dumbledore’s chest. The tip of it glowed white-hot.
Dumbledore raised his own wand and pointed it straight back at Voldemort.
There was a blinding flash of light. A roar of sound filled the air. . . .
And when the smoke had cleared, Dumbledore was lying on the ground,


In [20]:
print(chat("which speel is used to fly a object"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


which speel is used to fly a object, and I’ve tried all of them. . . .”
“What kind of object?” asked Harry eagerly.
“A broomstick,” said Ron. “And that’s what we want you to do, Harry. We
need someone who can fly.”
Harry was so stunned he didn’t speak for a moment. He had never thought
of flying as a skill he might have. It wasn’t as though he had ever been on a
broomstick — how could they expect him to know?
“But I don’t know how,” he said eventually.
“It’s just like riding a bicycle,” said Ron, shrugging. “You’ll pick it up in no
time. We’ll get you one and teach you.”
“I don’t know,” said Harry uncertainly. “What if I can’t do it?”
“Then we won’t go,”


In [21]:
print(chat("wingardiun leviosa"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


wingardiun leviosa!”
“Professor, I’m sure there must be a mistake,” said Hermione. “I can’t see
anything wrong with my homework.”
Dumbledore looked up at her, and Harry saw that he was smiling slightly.
“You know, I was quite looking forward to marking your homework this time,
Hermione. Your last piece was so impressive —”
“I didn’t do it,” said Hermione desperately. “It was the first thing on my
desk when I got in here today! I swear it wasn’t me who did it!”
“But I can assure you, dear, that you did write it,” said Dumbledore,
smiling wider now. “And I am afraid that means that I shall have to take one
hundred points from Gryffindor House. I hope you will understand how sorry I
am about this.”
Harry stared at D
