In [1]:
!huggingface-cli login
!huggingface-cli download meta-llama/Llama-3.2-1B-Instruct --exclude "original/*" --local-dir meta-llama/Llama-3.2-1B-Instruct


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineG

In [2]:
!pip install datasets trl



In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
import torch

In [4]:
d_opts = [('cuda', torch.cuda.is_available()), ('mps', torch.backends.mps.is_available()), ('cpu', True)]
device = next(device for device, available in d_opts if available)
print(f'using device: {device}')

using device: cuda


In [5]:
model_path = 'meta-llama/Llama-3.2-1B-Instruct'

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

In [6]:
dataset = load_dataset('iamtarun/python_code_instructions_18k_alpaca')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [7]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM'
)

In [8]:
model = get_peft_model(model, lora_config)

In [9]:
#def formatting_func(example): return f"<s>[INST] Convert this English description to Python code: {example['text']} [/INST] {example['code']} </s>"
def formatting_func(example):
    # Combine instruction and input if input is not empty
    task = example['instruction']
    if example['input']:
        task += f"\n\nInput:\n{example['input']}"

    # Format the prompt
    formatted_prompt = f"### Task:\n{task}\n\n### Response:\n"

    # Combine the prompt and output
    formatted_output = f"{formatted_prompt}{example['output']}"

    return str({
        "prompt": formatted_prompt,
        "response": example['output'],
        "text": formatted_output
    })

In [10]:
training_args = SFTConfig(
    output_dir='./finetuned-llama-3.2-1b-instruct',
    num_train_epochs=2,
    per_device_train_batch_size=32,
    gradient_accumulation_steps=32,
    learning_rate=2e-4,
    max_seq_length=512,
    label_names=[],
    no_cuda=False
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    formatting_func=formatting_func,
    args=training_args,
    processing_class=tokenizer
)

Tokenizing train dataset:   0%|          | 0/18612 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/18612 [00:00<?, ? examples/s]

In [11]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mnitzschelukas4[0m ([33mnitzschelukas4-none[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 228.12 MiB is free. Process 149210 has 14.52 GiB memory in use. Of the allocated memory 14.24 GiB is allocated by PyTorch, and 156.43 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
trainer.model.save_pretrained('./finetuned-llama-3.2-1b')

In [None]:
def generate_chat_response(conversation, max_length=100):
    prompt = f"<s>[INST] {conversation} [/INST]"
    inputs = tokenizer(prompt, return_tensors='pt', padding=True).to(device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
conversation = 'Write a function in python to detect the 13th Friday of a given month and year. The function should accept two parameters: the month (as a number) and the year (as a four-digit number). It should return True if the month contains a Friday the 13th, and False otherwise3.'
response = generate_chat_response(conversation, max_length=400)
print(response)

<s>[INST] Write a function in python to detect the 13th Friday of a given month and year. The function should accept two parameters: the month (as a number) and the year (as a four-digit number). It should return True if the month contains a Friday the 13th, and False otherwise3. [/INST] def check_friday_13(month, year): 
  import calendar
  if month == 4 or month == 6 or month == 9 or month == 11: 
    return False
  elif month == 2 and (year % 4 == 0 and (year % 100!= 0 or year % 400 == 0)): 
    return False
  else: 
    if (year % 4 == 0 and (year % 100!= 0 or year % 400 == 0)) and (month in [1, 3, 5, 7, 8, 10, 12] and calendar.isleap(year)): 
      return True
    else: 
      return False </s>  </s>  def check_friday_13_month(month, year): 
    import calendar
    if month == 4 or month == 6 or month == 9 or month == 11: 
      return False
    elif month == 2 and (year % 4 == 0 and (year % 100!= 0 or year % 400 == 0)): 
      return False
    else: 
      if (ye