In [None]:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth
!pip install -U peft transformers

In [None]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "titan-writer",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 256,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "embed_tokens", "lm_head"],
    lora_alpha = 256,
    lora_dropout = 0.01,
    bias = "all",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
    use_rslora = True,
    loftq_config = None,
)

In [None]:
from datasets import load_dataset
from google.colab import drive
drive.mount('/content/drive')
dataset = load_dataset("json", data_files="/content/drive/MyDrive/datasets/titan-writer.jsonl")

In [None]:
from unsloth import UnslothTrainer, UnslothTrainingArguments
trainer = UnslothTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    dataset_text_field="text",
    max_seq_length=2048,
    packing=False,
    args=UnslothTrainingArguments(
        output_dir="titan-writer-checkpoint",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=1,
        num_train_epochs = 10,
        learning_rate=1e-4,
        embedding_learning_rate=1e-5,
        fp16=True,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="constant",
        seed=42,
        max_grad_norm=1.0,
        report_to="tensorboard",
        save_strategy = "epoch",
        save_steps=1,
        gradient_checkpointing=True,
    ),
)
trainer_stats = trainer.train()

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    "titan-writer",
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)
model = PeftModel.from_pretrained(base_model, "/content/titan-writer-checkpoint")
merged_model = model.merge_and_unload()
tokenizer = AutoTokenizer.from_pretrained("/content/titan-writer-checkpoint", trust_remote_code=True)
tokenizer.padding_side = "right"
merged_model.save_pretrained("titan-writer-model")
tokenizer.save_pretrained("titan-writer-model")