In [1]:
!pip install accelerate peft bitsandbytes git+https://github.com/huggingface/transformers trl py7zr auto-gptq optimum

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-jngilc7y
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-jngilc7y
  Resolved https://github.com/huggingface/transformers to commit 29e7a1e1834f331a4916853ecd58549ed78235d6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting accelerate
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.7.1-py3-none-any.whl (168 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbyt

In [9]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [10]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from trl import SFTTrainer
import os

data = load_dataset("tatsu-lab/alpaca", split="train")
mistral_ai_data_df = data.to_pandas()

In [31]:
mistral_ai_data_df.shape, mistral_ai_data_df.columns, mistral_ai_data_df[:1]['instruction'], mistral_ai_data_df[:1]['input'], mistral_ai_data_df[:1]['output'], mistral_ai_data_df[:1]['text']

((500, 4),
 Index(['instruction', 'input', 'output', 'text'], dtype='object'),
 0    Give three tips for staying healthy.
 Name: instruction, dtype: object,
 0    
 Name: input, dtype: object,
 0    1.Eat a balanced diet and make sure to include...
 Name: output, dtype: object,
 0    ###Human: Give three tips for staying healthy....
 Name: text, dtype: object)

In [32]:
mistral_ai_data_df = mistral_ai_data_df[:5000]
mistral_ai_data_df["text"] = mistral_ai_data_df[["input", "instruction", "output"]].apply(lambda x: "###Human: " + x["instruction"] + " " + x["input"] + " ###Assistant: "+ x["output"], axis=1)
mistaal_ai_data = Dataset.from_pandas(mistral_ai_data_df)

In [33]:
tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GPTQ")
tokenizer.pad_token = tokenizer.eos_token

In [34]:
quantization_config_loading = GPTQConfig(bits=4, use_exllama=True, tokenizer=tokenizer)
mistralai_model = AutoModelForCausalLM.from_pretrained(
    "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ",
    quantization_config=quantization_config_loading,
    device_map="auto"
)
mistralai_model.use_cache=False
mistralai_model.config.pretraining_tp=1
mistralai_model.gradient_checkpointing_enable()
mistralai_model = prepare_model_for_kbit_training(mistralai_model)

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


In [35]:
peft_config = LoraConfig(
    r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"]
)
model = get_peft_model(mistralai_model, peft_config)

training_arguments = TrainingArguments(
    output_dir="mistral-finetuned-alpaca",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    save_strategy="epoch",
    logging_steps=100,
    num_train_epochs=1,
    max_steps=250,
    fp16=True,
    push_to_hub=True
)

In [36]:
mistralai_trainer = SFTTrainer(
    model=mistralai_model,
    train_dataset=mistaal_ai_data,
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_arguments,
    tokenizer=tokenizer,
    packing=False,
    max_seq_length=512
)
#torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_grad_norm)
try:
  mistralai_trainer.train()
except AssertionError:
  print("Assertion Error: No inf checks were recorded for this optimizer.")

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
