In [None]:
import os
import sys

# configuration
REPO_URL = "https://github.com/talspec2/red-string.git"
PROJECT_DIR = "red-string"

# clone from github if not already present, otherwise pull the latest changes
if not os.path.exists(PROJECT_DIR):
    print(f"ðŸš€ Cloning {PROJECT_DIR} from GitHub...")
    !git clone $REPO_URL
else:
    print(f"ðŸ”„ Updating {PROJECT_DIR}...")
    !cd $PROJECT_DIR && git pull

# install dependencies from requirements.txt
print("ðŸ“¦ Installing dependencies from requirements.txt...")
%pip install -r $PROJECT_DIR/requirements.txt -q

# set up paths
if os.path.abspath(PROJECT_DIR) not in sys.path:
    sys.path.append(os.path.abspath(PROJECT_DIR))

In [None]:

from unsloth import is_bfloat16_supported
from datasets import Dataset
from trl import SFTTrainer
from transformers import TrainingArguments

from train_utils.config import SYSTEM_INSTRUCTION, MAX_SEQ_LENGTH
from train_utils.data_utils import process, prompt
from train_utils.model_utils import load_model

In [None]:
raw_data = process(SYSTEM_INSTRUCTION, amount=20000)
dataset = Dataset.from_list(raw_data)
print(f"âœ… Data processed. {len(dataset)} examples ready.")

In [None]:
model, tokenizer = load_model()
print("âœ… Model loaded and adapters attached.")

In [None]:
dataset = dataset.map(lambda x: prompt(x, tokenizer), batched=True)

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        num_train_epochs=1,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

print("Starting Training")
trainer.train()
print("Done!")

In [None]:
save_path = "red-string-model"

# "q4_k_m" for smaller/faster, "q8_0" for better quality
trainer.model.save_pretrained_gguf(
    save_path,
    tokenizer,
    quantization_method = "q8_0"
)

print(f"Model saved to {save_path}")

In [None]:
from google.colab import drive
import shutil
import os

# mount drive
drive.mount('/content/drive')

# define source and destination paths
source_path = "/content/red-string-smaller_gguf/meta-llama-3.1-8b.Q4_K_M.gguf"
destination_folder = "/content/drive/MyDrive/redstring/"

# create folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

# copy the file
print(f"Copying model to {destination_folder}...")
shutil.copy(source_path, destination_folder)
print("Done! Model copied to Google Drive.")