## Loading the model

In [2]:
#!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
#!pip install -q datasets bitsandbytes einops wandb

In [6]:
import torch
#import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

#model_name = "meta-llama/Llama-2-7b-chat-hf"
model_name = "marianbasti/Llama-2-13b-fp16-alpaca-spanish"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map='auto', # to use both GPU cards. check nvtop
    use_auth_token=True,
    trust_remote_code=True
)
model.config.use_cache = False

LocalTokenNotFoundError: Token is required (`token=True`), but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens.

Let's also load the tokenizer below

In [2]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Downloading (…)okenizer_config.json: 100%|█████████████████████████████████████| 770/770 [00:00<00:00, 8.45MB/s]
Downloading tokenizer.model: 100%|███████████████████████████████████████████| 500k/500k [00:00<00:00, 1.45MB/s]
Downloading (…)/main/tokenizer.json: 100%|█████████████████████████████████| 1.84M/1.84M [00:00<00:00, 6.36MB/s]
Downloading (…)cial_tokens_map.json: 100%|█████████████████████████████████████| 414/414 [00:00<00:00, 4.29MB/s]


In [3]:
from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

## Loading the trainer

Here we will use the [`SFTTrainer` from TRL library](https://huggingface.co/docs/trl/main/en/sft_trainer) that gives a wrapper around transformers `Trainer` to easily fine-tune models on instruction based datasets using PEFT adapters. Let's first load the training arguments below.

In [4]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 50
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

Then finally pass everthing to the trainer

In [None]:
from datasets import load_dataset

#dataset = load_dataset('json', data_files='wyze_products_meta_cleaned.json')
dataset_name = 'wyze_products_meta_cleaned.json'
dataset = load_dataset('json', data_files=dataset_name, split='train')

In [None]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['title'])):
        text = f"### Human: {example['title'][i]}\n ### Assistant: {example['cleaned_text'][i]}"
        output_texts.append(text)
    return output_texts

In [None]:
dataset

In [None]:
output_texts = []
for i in range(len(dataset['title'])):
    text = f"### Human: {dataset['title'][i]}\n ### Assistant: {dataset['cleaned_text'][i]}"
    output_texts.append(text)

In [None]:
#output_texts

In [5]:
from datasets import Dataset
import pandas as pd

In [6]:
dataset_name = 'wyze_products_meta_cleaned.json'
df = pd.read_json(dataset_name)

In [None]:
df.head()

In [7]:
df['instruct'] = '### Human: ' + df['title'] + '\n ### Assistant: ' + df['cleaned_text']

In [8]:
df['instruct'].head()

0    ### Human: Where's the best place to put my me...
1    ### Human: Wyze Video Doorbell Pro Setup Guide...
2    ### Human: I'm not receiving Notifications on ...
3    ### Human: Does Wyze Cam Pan v3 work with the ...
4    ### Human: How many cameras can I stack togeth...
Name: instruct, dtype: object

In [9]:
cleaned_dataset = Dataset.from_pandas(df, split='train')

In [10]:
cleaned_dataset

Dataset({
    features: ['url', 'title', 'document', 'idx', 'questions', 'url_id', 'cleaned_text', 'instruct'],
    num_rows: 1866
})

In [11]:
cleaned_dataset['instruct'][0]

'### Human: Where\'s the best place to put my mesh router?\n ### Assistant: Centrally, with good visibility in your home.\nThe best place to put Wyze Mesh Router is in a central location with good “line of sight” visibility to the main areas of your home.\nAvoid placing the router next to large appliances, behind TVs, or in "service closets" such as power distribution or fuse panels. While the router is designed to provide extensive wireless range and coverage, these areas are sources of interference and may reduce your throughput or range.For best performance, be sure to place the router upright and on a table top or flat surface.'

In [12]:
from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=cleaned_dataset,
    #formatting_func=formatting_prompts_func,
    peft_config=peft_config,
    dataset_text_field="instruct",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

                                                                                                                

We will also pre-process the model by upcasting the layer norms in float 32 for more stable training

In [13]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

## Train the model

Now let's train the model! Simply call `trainer.train()`

In [14]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mmon-young[0m ([33mvstar[0m). Use [1m`wandb login --relogin`[0m to force relogin


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,29.7667
20,0.4219
30,4.3759
40,56.3104
50,1.0056


TrainOutput(global_step=50, training_loss=18.376077213287353, metrics={'train_runtime': 185.0902, 'train_samples_per_second': 4.322, 'train_steps_per_second': 0.27, 'total_flos': 3787422535680000.0, 'train_loss': 18.376077213287353, 'epoch': 0.43})

During training, the model should converge nicely as follows:

![image](https://huggingface.co/datasets/trl-internal-testing/example-images/resolve/main/images/loss-falcon-7b.png)

The `SFTTrainer` also takes care of properly saving only the adapters during training instead of saving the entire model.

In [15]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("outputs")

In [16]:
lora_config = LoraConfig.from_pretrained('outputs')
model = get_peft_model(model, lora_config)

In [None]:
#dataset['text']

In [17]:
text = "How does Wyze Lock ensure the security of access codes stored on the device?"
device = "cuda:0"

inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))



How does Wyze Lock ensure the security of access codes stored on the device?
 nobody can guess the access codes or hack into the device to gain unauthorized access.

Answer: Wyze Lock ensures the security of access codes stored on the device through several measures:

1. Encryption: Wy


In [18]:
text = "Where's the best place to put my mesh router?"
device = "cuda:0"

inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Where's the best place to put my mesh router?
 Unterscheidung between a mesh router and a traditional router
Mesh routers are becoming increasingly popular as they offer improved coverage and reliability compared to traditional routers. But, where's the best place to put your mesh router for optimal performance


In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.bfloat16).cuda()

In [21]:
prompt = "Where's the best place to put my mesh router?"
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
output = model.generate(**input_ids, max_length=100,min_length=10,do_sample=True,)
print(tokenizer.decode(output[0], skip_special_tokens=True))

Where's the best place to put my mesh router?
 nobody likes it stuck to the wall or ceiling, because it looks weird and doesn't blend in with the room's decor. mesh router


Thread SenderThread:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/internal/internal_util.py", line 49, in run
    self._run()
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/internal/internal_util.py", line 100, in _run
    self._process(record)
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/internal/internal.py", line 328, in _process
    self._sm.send(record)
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/internal/sender.py", line 376, in send
    send_handler(record)
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/internal/sender.py", line 578, in send_exit
    self._update_summary()
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/internal/sender.py", line 1136, in _update_summary
    with open(summary_path, "w") as f:
FileNotFoundError: [Errno 2] No such file or directory: '/data/livex/llm-llama2/wandb/run-20230721_194957-yk37rpsb/files/wandb-summary.json'
wandb: ERROR Internal wandb

In [None]:
from huggingface_hub import login
login()

In [None]:
model.push_to_hub("llama2-qlora-finetunined-french")