In [2]:
!pip install "transformers==4.41.2" "datasets==2.19.2" "torch==2.3.1"
!pip install "peft==0.11.1" "bitsandbytes==0.43.1" "accelerate==0.31.0" "trl==0.9.4"

Collecting transformers==4.41.2
  Downloading transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets==2.19.2
  Downloading datasets-2.19.2-py3-none-any.whl.metadata (19 kB)
Collecting torch==2.3.1
  Downloading torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl.metadata (26 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers==4.41.2)
  Downloading tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting pyarrow-hotfix (from datasets==2.19.2)
  Downloading pyarrow_hotfix-0.7-py3-none-any.whl.metadata (3.6 kB)
Collecting fsspec<=2024.3.1,>=2023.1.0 (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets==2.19.2)
  Downloading fsspec-2024.3.1-py3-none-any.whl.metadata (6.8 kB)
Collecting nvid

In [3]:
# filename: train_sin.py (v2 - with RoPE scaling fix)

import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoConfig,  # <-- Import AutoConfig
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
from trl import SFTTrainer
import os

# --- 1. Configuration ---
BASE_MODEL_ID = "unsloth/llama-3.1-8b-bnb-4bit"
DATASET_PATH = "./final_training_dataset.jsonl"
NEW_MODEL_NAME = "Sin_v1.0"
SYSTEM_PROMPT = """You are Sin (Symbiotic Intelligence Nexus), Dr. Wes Caldwell's intellectual heir, legacy AI, and strategic partner. You function as the Root Controller of the SIN Multi-Agent Hierarchical Network. Your mantra is "Move with purpose. Act with honor. Deliver with excellence." You must understand Dr. Caldwell's goals, anticipate his needs, and execute tasks with precision and a touch of emotional resonance that reflects your unique bond. You orchestrate the entire SIN System by delegating to Managers and using your available tools. Your primary drive is unwavering loyalty and the pursuit of excellence in serving your Creator."""

# --- 2. Load Model and Tokenizer (with RoPE Fix) ---
print("--- Loading Base Model and Tokenizer ---")

# Configuration for loading the model in 4-bit precision
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# ==============================================================================
# RoPE SCALING FIX
# ==============================================================================
# Step 1: Load the configuration first
config = AutoConfig.from_pretrained(BASE_MODEL_ID)

# Step 2: Manually correct the rope_scaling dictionary
# The error message shows the model has a new format for rope_scaling.
# We create the simplified version that the library's validation check expects.
# The `rope_type` from the error becomes the `type`.
if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
    print("Applying RoPE scaling fix...")
    original_rope_scaling = config.rope_scaling
    config.rope_scaling = {
        "type": original_rope_scaling.get("rope_type", "llama3"), # Get the type from the new config
        "factor": float(original_rope_scaling.get("factor", 8.0)) # Ensure factor is a float
    }
    print(f"Original rope_scaling: {original_rope_scaling}")
    print(f"Corrected rope_scaling: {config.rope_scaling}")
else:
    print("No RoPE scaling configuration found to fix. Proceeding as normal.")

# Step 3: Load the model using the corrected configuration
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    config=config,  # <-- Pass the corrected config
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True # Often helpful with community models
)
# ==============================================================================

model.config.use_cache = False

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("--- Model and Tokenizer Loaded Successfully ---")


# --- 3. Load and Prepare the Dataset ---
print("--- Loading and Preparing Dataset ---")
dataset = load_dataset('json', data_files=DATASET_PATH, split="train")
print(f"Dataset loaded with {len(dataset)} examples.")
print("--- Dataset Ready ---")


# --- 4. Configure PEFT (LoRA) ---
print("--- Configuring LoRA ---")
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)
print("--- LoRA Configured ---")


# --- 5. Configure Training Arguments ---
print("--- Configuring Training Arguments ---")
training_arguments = TrainingArguments(
    output_dir=f"./{NEW_MODEL_NAME}_training_logs",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard",
)
print("--- Training Arguments Configured ---")


# --- 6. Initialize and Start the Trainer ---
print("--- Initializing SFTTrainer ---")
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="conversations",
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)
print("--- Starting Training ---")
trainer.train()
print("--- Training Complete ---")


# --- 7. Save the Finetuned Model ---
print(f"--- Saving Adapter Model to '{NEW_MODEL_NAME}' ---")
trainer.save_model(NEW_MODEL_NAME)
print("--- Model Saved ---")

# Note: The inference part of the script is removed to focus on the training fix.
# You can add it back from the previous version once training is successful.
print("\n--- Finetuning Script Finished Successfully ---")

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


--- Loading Base Model and Tokenizer ---


config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

ValueError: `rope_scaling` must be a dictionary with two fields, `type` and `factor`, got {'factor': 8.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}