# Treinando o modelo mistral

Esse fine tunning utilizou learning rate = 2e-4

In [None]:
!pip install -q transformers==4.40.1
!pip install -q peft==0.10.0
!pip install -q bitsandbytes==0.46.0
!pip install -q accelerate==0.29.3
!pip install -q datasets==2.19.0
!pip install -q trl==0.8.6
!pip install -q huggingface_hub==0.22.2

In [None]:
import random
import numpy as np
import torch # Se estiver usando PyTorch

seed = 21
random.seed(seed)
np.random.seed(seed)
if torch.cuda.is_available():
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

print("Random Python:", random.random())
print("Random NumPy:", np.random.rand())

Random Python: 0.16494947983319797
Random NumPy: 0.04872488080912729


Pegando o dataset do spider

In [None]:
from datasets import load_dataset
spider_dataset = load_dataset("spider")
spider_dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['db_id', 'query', 'question', 'query_toks', 'query_toks_no_value', 'question_toks'],
        num_rows: 7000
    })
    validation: Dataset({
        features: ['db_id', 'query', 'question', 'query_toks', 'query_toks_no_value', 'question_toks'],
        num_rows: 1034
    })
})

In [None]:
def formatting_to_prompts(example):
    return {
        "text": f"<s>[INST] {example['question']} [/INST] {example['query']}</s>"
    }

formatting_to_prompts(spider_dataset["train"][0])

{'text': '<s>[INST] How many heads of the departments are older than 56 ? [/INST] SELECT count(*) FROM head WHERE age  >  56</s>'}

In [None]:
train = spider_dataset['train'].map(formatting_to_prompts)

In [None]:
train[0]['text']

'<s>[INST] How many heads of the departments are older than 56 ? [/INST] SELECT count(*) FROM head WHERE age  >  56</s>'

In [None]:
train

Dataset({
    features: ['db_id', 'query', 'question', 'query_toks', 'query_toks_no_value', 'question_toks', 'text'],
    num_rows: 7000
})

Aqui, estamos pegando somente metade do dataset de treino. Estamos pegando todos os índices pares do treino. Esse corte se fez para evitar o tempo grande de treino e o possível limite de GPU do google colab

In [None]:
from datasets import Dataset
processed_examples = [{'text': train['text'][i]} for i in range(0, len(train['text']), 2) ]
train = Dataset.from_list(processed_examples)

In [None]:
train

Dataset({
    features: ['text'],
    num_rows: 3500
})

**IMPORTANTE:** Troque "hf_YOUR_TOKEN_HERE" pelo seu próprio token do hugging face. Para ter acesso ao mistral, você precisa concordar com os termos no site

https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2

In [None]:
from huggingface_hub import login
login(token='hf_YOUR_TOKEN_HERE')

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Baixando o modelo

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
model_id = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Prepare model for k-bit training (important for QLoRA)
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], # Modules to apply LoRA to
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

# Print trainable parameters to verify LoRA setup
model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

trainable params: 41,943,040 || all params: 7,283,675,136 || trainable%: 0.5758499550960753


In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

training_arguments = TrainingArguments(
    output_dir="./results",

    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=16,

    save_steps=50,
    logging_steps=50,

    learning_rate=2e-4,

    fp16=False, # Set to True if your GPU supports it and you're not using bfloat16 compute_dtype
    bf16=True if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else False, # Use bfloat16 for Ampere and newer GPUs

    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    disable_tqdm=False,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,

    train_dataset=train,
    peft_config=lora_config,
    dataset_text_field="text",

    max_seq_length=512,
    args=training_arguments,
)

Map:   0%|          | 0/3500 [00:00<?, ? examples/s]



Treinando o modelo e salvando depois

In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)


Step,Training Loss
50,1.4737


  return fn(*args, **kwargs)


TrainOutput(global_step=54, training_loss=1.4296706296779491, metrics={'train_runtime': 5188.3054, 'train_samples_per_second': 0.675, 'train_steps_per_second': 0.01, 'total_flos': 1.4886398192418816e+16, 'train_loss': 1.4296706296779491, 'epoch': 0.9874285714285714})

In [None]:
trainer.save_model("mistral_spider_qlora_4")