# Imports

In [1]:
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset, Dataset
from transformers import (AutoTokenizer,
                          AutoConfig,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          TrainingArguments)
import transformers
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import pandas as pd
from trl import SFTTrainer
from peft.tuners.lora import LoraLayer
import datetime


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/matheusalb/anaconda3/envs/llm/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /home/matheusalb/anaconda3/envs/llm/lib/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/matheusalb/anaconda3/envs/llm/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  from .autonotebook import tqdm as notebook_tqdm


-----

# Funções

## Carregamento modelo

In [2]:
def create_and_prepare_model(model_name):
    compute_dtype = getattr(torch, "float16")
    
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=True,
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map={"": 0},
        trust_remote_code=True,
    )
    
    peft_config = LoraConfig(
        lora_alpha=16, 
        lora_dropout=0.1,
        r=64, #64, 32, 16
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "query_key_value",
            "dense",
            "dense_h_to_4h",
            "dense_4h_to_h",
        ],
    )
    
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True
    )
    tokenizer.pad_token = tokenizer.eos_token
    
    return model, peft_config, tokenizer


## Cria dataset

In [3]:
def get_dataset(path, final_string):
    messages = {}
    messages['idSugestaoResposta'] = []
    messages['text'] = [] 
    
    # prompt da versão 1 com falcon instruct, 100 steps
    # gen_text = lambda x: f"### Human: Você é um especialista em responder comentários negativos de um cliente a um restaurante. \
    #             Sua tarefa é responder respeitosamente um comentário negativo de um cliente ao seu restaurante. \
    #             Dado o comentário do cliente entre <>, escreva em Português um comentário de resposta de forma respeitosa, \
    #             empática e não genérica, convencendo o cliente que medidas serão tomadas para resolver o seu problema e \
    #             que ele poderá voltar a fazer pedidos no restaurante. \
    #             Certifique-se de usar detalhes específicos do comentário do cliente.\n \
    #             <{x['comentario']}>\n\
    #             ### Reply: {x['resposta']}"
    gen_text = lambda x: f'''\
Escreva, em Português, um comentário de resposta a ao seguinte comentário de um cliente ao seu restaurante: 
{x['comentario']}
###
{x['sugestaoResposta']}
'''+final_string               
    df = pd.read_csv(path, skip_blank_lines=True)
    df = df.dropna(how='all')
    for _, linha in df.iterrows():
        text = gen_text(linha)
        try:
            int(linha['idSugestaoResposta'])
        except:
            print(linha)
        messages['idSugestaoResposta'].append(int(linha['idSugestaoResposta']))
        messages['text'].append(text)
    
    return Dataset.from_dict(messages)

-----

# Execução

In [4]:
model_name = 'tiiuae/falcon-7b'
train_path = '../../data/train_base.csv'
validation_path = '../../data/validation_base.csv'    

training_arguments = TrainingArguments(
    output_dir="./results/qlora_falcon7b_pure"+datetime.datetime.now().isoformat(),
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    save_steps=10,
    logging_steps=10,
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    max_steps=100, #10000,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)
model, peft_config, tokenizer = create_and_prepare_model(model_name)
# se não fizer isso vai disparar warning, ativar para inferência 
model.config.use_cache = False

train_data = get_dataset(train_path, tokenizer.eos_token)
validation_data = get_dataset(validation_path, tokenizer.eos_token)

# Supervised finetunning é popularmente conhecido como instruction finetuning
trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=validation_data,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)


Downloading (…)lve/main/config.json: 100%|██████████| 1.05k/1.05k [00:00<00:00, 10.3MB/s]
Downloading (…)figuration_falcon.py: 100%|██████████| 6.70k/6.70k [00:00<00:00, 44.1MB/s]
A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Downloading (…)n/modeling_falcon.py: 100%|██████████| 56.9k/56.9k [00:00<00:00, 4.38MB/s]
A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Downloading shards: 100%|██████████| 2/2 [00:00<00:00,  4.89it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:23<00:00, 11.60s/it]
Downloading (…)neration_config.json: 100%|██████

In [5]:
# import os
# os.environ['WANDB_NOTEBOOK_NAME'] = 'TESTE_2'
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mmatheusalb[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/100 [00:00<?, ?it/s]You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 10%|█         | 10/100 [01:07<09:53,  6.59s/it]

{'loss': 1.9973, 'learning_rate': 0.0002, 'epoch': 8.0}


 20%|██        | 20/100 [02:49<09:47,  7.35s/it]

{'loss': 1.0042, 'learning_rate': 0.0002, 'epoch': 16.0}


 30%|███       | 30/100 [04:16<08:13,  7.06s/it]

{'loss': 0.1993, 'learning_rate': 0.0002, 'epoch': 24.0}


 40%|████      | 40/100 [05:46<07:04,  7.08s/it]

{'loss': 0.0162, 'learning_rate': 0.0002, 'epoch': 32.0}


 50%|█████     | 50/100 [07:18<05:55,  7.11s/it]

{'loss': 0.0076, 'learning_rate': 0.0002, 'epoch': 40.0}


 60%|██████    | 60/100 [08:37<04:09,  6.24s/it]

{'loss': 0.0061, 'learning_rate': 0.0002, 'epoch': 48.0}


 70%|███████   | 70/100 [09:53<03:05,  6.17s/it]

{'loss': 0.0054, 'learning_rate': 0.0002, 'epoch': 56.0}


 80%|████████  | 80/100 [11:33<02:09,  6.46s/it]

{'loss': 0.0053, 'learning_rate': 0.0002, 'epoch': 64.0}


 90%|█████████ | 90/100 [12:59<01:02,  6.29s/it]

{'loss': 0.005, 'learning_rate': 0.0002, 'epoch': 72.0}


100%|██████████| 100/100 [14:20<00:00,  6.80s/it]

{'loss': 0.005, 'learning_rate': 0.0002, 'epoch': 80.0}


100%|██████████| 100/100 [14:46<00:00,  8.87s/it]

{'train_runtime': 891.7551, 'train_samples_per_second': 0.897, 'train_steps_per_second': 0.112, 'train_loss': 0.3251462835446, 'epoch': 80.0}





TrainOutput(global_step=100, training_loss=0.3251462835446, metrics={'train_runtime': 891.7551, 'train_samples_per_second': 0.897, 'train_steps_per_second': 0.112, 'train_loss': 0.3251462835446, 'epoch': 80.0})

In [6]:
2

2