In [None]:
!huggingface-cli login
!huggingface-cli download meta-llama/Llama-3.2-1B-Instruct --exclude "original/*" --local-dir meta-llama/Llama-3.2-1B-Instruct
!pip install -q datasets trl

In [6]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
import torch

d_opts = [('cuda', torch.cuda.is_available()), ('mps', torch.backends.mps.is_available()), ('cpu', True)]
device = next(device for device, available in d_opts if available)
print(f'using device: {device}')

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

using device: cuda


In [7]:
model_path = 'meta-llama/Llama-3.2-1B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

In [8]:
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb):

In [28]:
dataset_hf_path = 'iamtarun/python_code_instructions_18k_alpaca'
dataset = load_dataset(dataset_hf_path)

In [44]:
print(f'-- data info: {dataset_hf_path} --')
print(f'dataset shape: {dataset.shape}')
print(f'dataset columns: {dataset.column_names}')
print(f'dataset rows: {dataset.num_rows}')
print()
print('-> example fine-tuning prompt:')
print(f"prompt: {dataset['train'][0]['prompt']}")

-- data info: iamtarun/python_code_instructions_18k_alpaca --
dataset shape: {'train': (18612, 4)}
dataset columns: {'train': ['instruction', 'input', 'output', 'prompt']}
dataset rows: {'train': 18612}

-> example fine-tuning prompt:
prompt: Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Create a function to calculate the sum of a sequence of integers.

### Input:
[1, 2, 3, 4, 5]

### Output:
# Python code
def sum_sequence(sequence):
  sum = 0
  for num in sequence:
    sum += num
  return sum


In [47]:
def formatting_func(example):
    # Combine instruction and input if input is not empty
    task = example['instruction']
    if example['input']:
        task += f"\n\nInput:\n{example['input']}"

    # Format the prompt
    formatted_prompt = f"### Task:\n{task}\n\n### Response:\n"

    # Combine the prompt and output
    formatted_output = f"{formatted_prompt}{example['output']}"

    return str({
        "prompt": formatted_prompt,
        "response": example['output'],
        "text": formatted_output
    })

In [50]:
import pprint
pprint.pprint(formatting_func(dataset['train'][0]))

("{'prompt': '### Task:\\nCreate a function to calculate the sum of a sequence "
 "of integers.\\n\\nInput:\\n[1, 2, 3, 4, 5]\\n\\n### Response:\\n', "
 "'response': '# Python code\\ndef sum_sequence(sequence):\\n  sum = 0\\n  for "
 "num in sequence:\\n    sum += num\\n  return sum', 'text': '### "
 'Task:\\nCreate a function to calculate the sum of a sequence of '
 'integers.\\n\\nInput:\\n[1, 2, 3, 4, 5]\\n\\n### Response:\\n# Python '
 'code\\ndef sum_sequence(sequence):\\n  sum = 0\\n  for num in '
 "sequence:\\n    sum += num\\n  return sum'}")


In [None]:
# lora + supervised fine-tuning

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM'
)

model = get_peft_model(model, lora_config)

training_args = SFTConfig(
    output_dir='./finetuned-llama-3.2-1b-instruct',
    num_train_epochs=2,
    per_device_train_batch_size=32,
    gradient_accumulation_steps=32,
    learning_rate=2e-4,
    max_seq_length=512,
    label_names=[],
    no_cuda=False
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    formatting_func=formatting_func,
    args=training_args,
    processing_class=tokenizer
)

In [None]:
trainer.train()

In [None]:
trainer.model.save_pretrained('./finetuned-llama-3.2-1b')

In [None]:
def generate_chat_response(conversation, max_length=100):
    prompt = f"<s>[INST] {conversation} [/INST]"
    inputs = tokenizer(prompt, return_tensors='pt', padding=True).to(device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
conversation = 'Write a function in python to detect the 13th Friday of a given month and year. The function should accept two parameters: the month (as a number) and the year (as a four-digit number). It should return True if the month contains a Friday the 13th, and False otherwise3.'
response = generate_chat_response(conversation, max_length=400)
print(response)