In [1]:
import huggingface_hub

hf_token = "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"
huggingface_hub.login(hf_token)

# Model

In [12]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "meta-llama/Llama-3.2-1B"
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             # quantization_config=quantization_config,
                                             device_map="cuda:0")
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb):

# LoRa

In [13]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    task_type="CAUSAL_LM",
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.01,
)

lora_model = get_peft_model(model, config, "default")  # или так
lora_model.print_trainable_parameters()

trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


# Data

In [5]:
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline

data = load_dataset("Abirate/english_quotes")

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id


def merge_columns(example):
    example["prediction"] = example["quote"] + " ->: " + str(example["tags"])
    return example


data['train'] = data['train'].map(merge_columns)
data['train']["prediction"][:5]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

["“Be yourself; everyone else is already taken.” ->: ['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']",
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.” ->: ['best', 'life', 'love', 'mistakes', 'out-of-control', 'truth', 'worst']",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.” ->: ['human-nature', 'humor', 'infinity', 'philosophy', 'science', 'stupidity', 'universe']",
 "“So many books, so little time.” ->: ['books', 'humor']",
 "“A room without books is like a body without a soul.” ->: ['books', 'simile', 'soul']"]

In [6]:
data = data.map(lambda samples: tokenizer(samples['prediction']), batched=True)
data = data.remove_columns(['quote', 'author', 'tags', 'prediction'])
data

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 2508
    })
})

# Training

In [18]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

trainer = Trainer(
    model=lora_model,
    train_dataset=data['train'],
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=8,
        warmup_steps=100,
        max_steps=200,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='outputs'
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
lora_model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
1,2.152
2,2.1978
3,1.8641
4,2.2611
5,1.8233
6,2.2279
7,2.2611
8,2.1682
9,2.3105
10,2.3819


TrainOutput(global_step=200, training_loss=2.0243610656261444, metrics={'train_runtime': 4092.4113, 'train_samples_per_second': 0.782, 'train_steps_per_second': 0.049, 'total_flos': 1453832390123520.0, 'train_loss': 2.0243610656261444, 'epoch': 1.2743221690590112})

# Inference

In [31]:
from transformers import pipeline

pipe = pipeline("text-generation",
                model=lora_model,
                tokenizer=tokenizer)

generation_args = {
    "max_new_tokens": 50,
    "return_full_text": False,
}

pipe("“Training models with PEFT and LoRa is cool” ->: ", **generation_args)

Device set to use cuda:0


[{'generated_text': '0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:'}]

In [32]:
a=1
b=2
a=b
b=3
a,b

(2, 3)

In [30]:
import torch

batch = tokenizer("“Training models with PEFT and LoRa is cool” ->: ", return_tensors='pt').to('cuda')

with torch.cuda.amp.autocast():
  output_tokens = lora_model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))

  with torch.cuda.amp.autocast():
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




 “Training models with PEFT and LoRa is cool” ->: 2018:2019:2020:2021:2022:2023:2024:2025:2026:2027:2028:2029:2030:2031:2032:2033:2034


# Saving

In [25]:
lora_model.save_pretrained("lora_model")

# Loading

In [8]:
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer, pipeline
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

model_name = "meta-llama/Llama-3.2-1B"
base_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0")

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id

config = PeftConfig.from_pretrained("lora_model")

# lora_model = get_peft_model(base_model, config, "default")

lora_model = PeftModel.from_pretrained(base_model, "lora_model")