In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git 
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U datasets
!pip install -q -U trl
!pip install -q -U einops

In [1]:
from dataclasses import dataclass, field
from typing import Optional

import torch
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)
from peft.tuners.lora import LoraLayer

from trl import SFTTrainer

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [2]:
def create_and_prepare_model():
    compute_dtype = getattr(torch, "float16")

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=True,
    )

    model = AutoModelForCausalLM.from_pretrained(
        "facebook/bart-base", quantization_config=bnb_config, device_map={"": 0}, trust_remote_code=True
    )
    peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
#         target_modules=[
#             "query_key_value"
#         ],
        target_modules=["q_proj", "v_proj"],
    )

    tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base", trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    return model, peft_config, tokenizer

In [3]:
training_arguments = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=16,
    optim='paged_adamw_32bit',
    save_steps=1000,
    logging_steps=20,
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=20000,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type ='cosine'
)

In [5]:
model, peft_config, tokenizer = create_and_prepare_model()
model.config.use_cache = False

Some weights of BartForCausalLM were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['decoder.embed_tokens.weight', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
model.config.max_length = 1024

In [7]:
dataset = load_dataset('databricks/databricks-dolly-15k', split="train")
def formatting_func(example):
    text = f"### Question: {example['instruction']}\n {example['context']} ### Answer: {example['response']}"
    return [text]

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    max_seq_length=1024,

    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    formatting_func=formatting_func
)

trainer.train()