In [1]:
# for colab
#!huggingface-cli login
#!huggingface-cli download meta-llama/Llama-3.2-1B-Instruct --exclude "original/*" --local-dir meta-llama/Llama-3.2-1B-Instruct
!pip install -q datasets trl torch transformers peft bitsandbytes

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
import torch

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.float16
)

In [4]:
model_path = 'meta-llama/Llama-3.2-1B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    device_map='auto',
)

In [None]:
print(model)

In [5]:
dataset_hf_path = 'iamtarun/python_code_instructions_18k_alpaca'
dataset = load_dataset(dataset_hf_path)

split_dataset = dataset['train'].train_test_split(test_size=0.2, seed=42)

train_dataset = split_dataset['train']
val_dataset = split_dataset['test']

print(f'Training set size: {len(train_dataset)}')
print(f'Validation set size: {len(val_dataset)}')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Training set size: 14889
Validation set size: 3723


In [None]:
print(f'-- data info: {dataset_hf_path} --')
print(f'dataset shape: {dataset.shape}')
print(f'dataset columns: {dataset.column_names}')
print(f'dataset rows: {dataset.num_rows}')
print()
print('-> example fine-tuning prompt:')
print(f"prompt: {dataset['train'][0]['prompt']}")

In [6]:
def formatting_func(example):
    return example['prompt']

In [23]:
ex1 = formatting_func(dataset['train'][0])
ex1_p = dataset['train'][0]['prompt']
ex1, ex1_p

('Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a function to calculate the sum of a sequence of integers.\n\n### Input:\n[1, 2, 3, 4, 5]\n\n### Output:\n# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum',
 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a function to calculate the sum of a sequence of integers.\n\n### Input:\n[1, 2, 3, 4, 5]\n\n### Output:\n# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum')

In [7]:
# peft (lora)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM',
    target_modules=['q_proj', 'v_proj'], # (see model architecture)
    init_lora_weights='gaussian',
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


In [8]:
training_args = SFTConfig(
    output_dir='./finetuned-llama-3.2-1b-instruct',
    num_train_epochs=2,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    max_seq_length=512,
    max_steps=250,
    save_steps=100,
    label_names=[],
    fp16=True,
    report_to='none',

    logging_steps=10,
    logging_first_step=True,

    lr_scheduler_type='cosine',
    warmup_steps=100,
)

trainer = SFTTrainer(
    model=model,
    args=training_args,

    train_dataset=train_dataset,
    eval_dataset=val_dataset,

    formatting_func=formatting_func,
    processing_class=tokenizer,
)

Tokenizing train dataset:   0%|          | 0/14889 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/14889 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/3723 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/3723 [00:00<?, ? examples/s]

In [9]:
trainer.train()

Step,Training Loss
1,1.6629
10,1.7026
20,1.5102
30,1.1816
40,0.9942
50,0.8795
60,0.909
70,0.8513
80,0.9437
90,0.8639


KeyboardInterrupt: 

In [11]:
trainer.evaluate()

Step,Training Loss
1,1.6629
10,1.7026
20,1.5102
30,1.1816
40,0.9942
50,0.8795
60,0.909
70,0.8513
80,0.9437
90,0.8639


KeyboardInterrupt: 

In [14]:
model.save_pretrained('./finetuned-llama-3.2-1b-instruct-ft')
tokenizer.save_pretrained('./finetuned-llama-3.2-1b-instruct-ft')

('./finetuned-llama-3.2-1b-instruct-ft/tokenizer_config.json',
 './finetuned-llama-3.2-1b-instruct-ft/special_tokens_map.json',
 './finetuned-llama-3.2-1b-instruct-ft/tokenizer.json')

In [21]:
output_dir = 'llama-3.2-1b-instruct-ft'
peft_model = trainer.model
merged_model = peft_model.merge_and_unload()
merged_model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)



('llama-3.2-1b-instruct-ft/tokenizer_config.json',
 'llama-3.2-1b-instruct-ft/special_tokens_map.json',
 'llama-3.2-1b-instruct-ft/tokenizer.json')

In [20]:
from peft import PeftModel
model_new = PeftModel.from_pretrained(model, './finetuned-llama-3.2-1b-instruct-ft')

# Generate text
input_text = 'print fibonacci numbers'
inputs = tokenizer(input_text, return_tensors='pt').to('cuda')
outputs = model.generate(**inputs, max_new_tokens=400)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


print fibonacci numbers

# function to print fibonacci sequence
def print_fibonacci():
    # list to store the sequence
    sequence = [0, 1]

    # while the sequence is not empty
    while len(sequence) < 100:
        # append the next number to the sequence
        next_number = sequence[-1] + sequence[-2]
        # append the next number to the sequence
        sequence.append(next_number)

    # print the sequence
    print("Fibonacci Sequence:")
    print(sequence)

# call the function
print_fibonacci()


In [None]:
# ---------------------------------------------------------------------------------

In [13]:
print("torch.cuda.memory_allocated: %fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
print("torch.cuda.memory_reserved: %fGB"%(torch.cuda.memory_reserved(0)/1024/1024/1024))
print("torch.cuda.max_memory_reserved: %fGB"%(torch.cuda.max_memory_reserved(0)/1024/1024/1024))

torch.cuda.memory_allocated: 9.229259GB
torch.cuda.memory_reserved: 9.253906GB
torch.cuda.max_memory_reserved: 9.253906GB


In [None]:
def generate_chat_response(conversation, max_length=100):
    prompt = f"<s>[INST] {conversation} [/INST]"
    inputs = tokenizer(prompt, return_tensors='pt', padding=True).to(device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
conversation = 'Write a function in python to detect the 13th Friday of a given month and year. The function should accept two parameters: the month (as a number) and the year (as a four-digit number). It should return True if the month contains a Friday the 13th, and False otherwise3.'
response = generate_chat_response(conversation, max_length=400)
print(response)