In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
import os

# --- CONFIGURACI√ìN CR√çTICA ---
# PEGA AQU√ç LA RUTA QUE TE SALI√ì EN EL PASO 1 (Sin el salto de l√≠nea final)
# Ejemplo: "/home/luisma/.cache/huggingface/..."

MODELO_LOCAL = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"

DATASET_FILE = "./dataset_final.jsonl"
OUTPUT_DIR = "tutor_algoritmos_v1" # Aqu√≠ se guardar√° tu IA

# Configuraci√≥n de memoria
max_seq_length = 2048 
load_in_4bit = True 

# 1. CARGAR MODELO DESDE DISCO (OFFLINE)
print(f"‚è≥ Cargando modelo base desde: {MODELO_LOCAL}")
if not os.path.exists(MODELO_LOCAL):
    raise FileNotFoundError(f"‚ùå ¬°ERROR! No encuentro la carpeta: {MODELO_LOCAL}\nVerifica la ruta en el Paso 1.")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODELO_LOCAL, # <--- Aqu√≠ usamos la ruta local
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = load_in_4bit,
)

# 2. CONFIGURAR ADAPTADORES (LoRA)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none",
    use_gradient_checkpointing = "unsloth", 
    random_state = 3407,
)

# 3. CARGAR TU DATASET
print(f"üìÇ Cargando datos de: {DATASET_FILE}")
dataset = load_dataset("json", data_files=DATASET_FILE, split="train")

def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True,)

# 4. ENTRENAR (MODO PRODUCCI√ìN)
print("üí™ Iniciando Entrenamiento con RTX 5070...")

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, 
    
    args = TrainingArguments(
        per_device_train_batch_size = 2, 
        gradient_accumulation_steps = 4, 
        warmup_steps = 5,
        num_train_epochs = 1, # 1 vuelta completa a los 2300 datos
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(), 
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "checkpoints", # Temporales
    ),
)

trainer_stats = trainer.train()

print("üéâ ¬°Entrenamiento finalizado exitosamente!")

# 5. EXPORTAR A GGUF (Para Ollama)
print(f"üíæ Guardando tu modelo final en carpeta '{OUTPUT_DIR}'...")
model.save_pretrained_gguf(OUTPUT_DIR, tokenizer, quantization_method = "q8_0")

print("‚úÖ ¬°LISTO! Todo ha terminado.")


In [None]:
from unsloth import FastLanguageModel
import os

# Tu checkpoint
ADAPTADORES = "checkpoints/checkpoint-295" 
CARPETA_RAW = "tutor_lora_raw"

print("üî• Cargando checkpoint...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = ADAPTADORES,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True, 
)

print(f"‚ö° Guardando archivos crudos en '{CARPETA_RAW}'...")
# Guardamos solo el adaptador en formato HuggingFace est√°ndar
model.save_pretrained(CARPETA_RAW)
tokenizer.save_pretrained(CARPETA_RAW)

print("‚úÖ ¬°Listo! Paso 1 completado.")
