In [12]:
# for colab
!huggingface-cli login
!huggingface-cli download meta-llama/Llama-3.2-1B-Instruct --exclude "original/*" --local-dir meta-llama/Llama-3.2-1B-Instruct
!pip install -q datasets trl torch transformers peft bitsandbytes


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineG

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, PeftModel
from trl import SFTTrainer, SFTConfig
import torch

In [2]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

In [3]:
base_model_path = 'meta-llama/Llama-3.2-1B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(base_model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    device_map='auto',
)

In [12]:
print(base_model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2048, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=2048, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): Linear4bit(in_features=2048, out_features=512, bias=False)
          (v_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048

In [4]:
dataset_hf_path = 'iamtarun/python_code_instructions_18k_alpaca'
dataset = load_dataset(dataset_hf_path)

split_dataset = dataset['train'].train_test_split(test_size=0.25, seed=42)

train_dataset = split_dataset['train']
val_dataset = split_dataset['test']

print(f'Training set size: {len(train_dataset)}')
print(f'Validation set size: {len(val_dataset)}')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Training set size: 13959
Validation set size: 4653


In [17]:
print(f'-- data info: {dataset_hf_path} --')
print(f'dataset shape: {train_dataset.shape}')
print(f'dataset columns: {train_dataset.column_names}')
print(f'dataset rows: {train_dataset.num_rows}')
print()
print('-> example fine-tuning prompt:')
print(f"prompt: {train_dataset[0]['prompt']}")

-- data info: iamtarun/python_code_instructions_18k_alpaca --
dataset shape: (13959, 4)
dataset columns: ['instruction', 'input', 'output', 'prompt']
dataset rows: 13959

-> example fine-tuning prompt:
prompt: Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Develop a Python script to add a given list of numbers.

### Input:
[4, 8, 15, 20]

### Output:
def sum_list(list_of_nums):
    total = 0
    for number in list_of_nums:
        total += number
    return total

result = sum_list([4, 8, 15, 20])
print(result)


In [18]:
# some notes on input data
# the examples contain test code as well, we onlyr really want the function or piece of code

In [5]:
def formatting_func(example):
    return example['instruction'] + '\n' + example['output']

In [20]:
ex1 = formatting_func(train_dataset[0])
ex1

'Develop a Python script to add a given list of numbers.\ndef sum_list(list_of_nums):\n    total = 0\n    for number in list_of_nums:\n        total += number\n    return total\n\nresult = sum_list([4, 8, 15, 20])\nprint(result)'

In [11]:
# peft (lora)
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM',
    target_modules=['q_proj', 'v_proj'], # (see model architecture)
    init_lora_weights='gaussian',
)

peft_model = get_peft_model(base_model, lora_config)
peft_model.print_trainable_parameters()

trainable params: 1,703,936 || all params: 1,237,518,336 || trainable%: 0.1377


In [8]:
training_args = SFTConfig(
    output_dir='./finetuned-llama-3.2-1b-instruct',
    num_train_epochs=2,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    max_seq_length=512,
    max_steps=500,
    save_steps=100,
    label_names=[],
    fp16=True,
    report_to='none',

    logging_steps=10,
    logging_first_step=True,

    lr_scheduler_type='cosine',
    warmup_steps=100,
)

trainer = SFTTrainer(
    model=peft_model,
    args=training_args,

    train_dataset=train_dataset,
    eval_dataset=val_dataset,

    formatting_func=formatting_func,
    processing_class=tokenizer,
)

Tokenizing train dataset:   0%|          | 0/13959 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/13959 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/4653 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/4653 [00:00<?, ? examples/s]

In [9]:
trainer.train()

Step,Training Loss
1,1.8127
10,1.2763
20,1.1851
30,1.1655
40,1.2297
50,1.1616


TrainOutput(global_step=50, training_loss=1.214360613822937, metrics={'train_runtime': 27.919, 'train_samples_per_second': 28.654, 'train_steps_per_second': 1.791, 'total_flos': 978727814676480.0, 'train_loss': 1.214360613822937})

In [25]:
eval_results = trainer.evaluate()
print(eval_results)

{'eval_runtime': 32.8768, 'eval_samples_per_second': 141.528, 'eval_steps_per_second': 17.702}


In [10]:
output_dir = 'llama-3.2-1b-instruct-ft'

trainer.model.save_pretrained('lora_adapters', save_adapter=True, save_config=True) # save lora adatpers to 'lora_adapters'

model_to_merge = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(base_model_path, device_map='auto'), 'lora_adapters')
merged_model = model_to_merge.merge_and_unload()
merged_model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
!du -sh llama-3.2-1b-instruct-ft/model.safetensors

4.7G	llama-3.2-1b-instruct-ft/model.safetensors


In [None]:
ft_model = AutoModelForCausalLM.from_pretrained(output_dir, device_map='auto')
input_text = 'write a python function to generate the first n fibonacci numbers. exclude any extra comments or examples. just the python function bare bones.'
inputs = tokenizer(input_text, return_tensors='pt').to('cuda')
outputs = ft_model.generate(**inputs, max_new_tokens=250)
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) # TODO: how to prevent from outputing more tokens than needed?