In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install git+https://github.com/huggingface/transformers
!pip install packaging ninja sentencepiece
!pip install flash-attn --no-build-isolation

In [None]:
!huggingface-cli login

In [None]:
import torch
from transformers import LlamaTokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


model_id = "upstage/SOLAR-10.7B-v1.0"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0}, attn_implementation="flash_attention_2")

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
import json

f = open('./train-100k-text.json')
data = json.load(f)
data_prepro = map(lambda sample: tokenizer(f"""<s> ### User:
{sample["conversations"][0]["value"]}

### Assistant:
{sample["conversations"][1]["value"]}
"""), data)
prepro = list(data_prepro)

In [None]:
import transformers

# needed for LLaMa tokenizer
#tokenizer.pad_token = tokenizer.eos_token
#tokenizer.add_special_tokens({'pad_token': '[PAD]'})

trainer = transformers.Trainer(
    model=model,
    train_dataset=prepro,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=32,
        warmup_steps=2,
        learning_rate=2e-4,
        num_train_epochs=1,
        bf16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="adamw_bnb_8bit",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

In [None]:
trainer.push_to_hub("CodegebraGPT-10B")