# NPC AI - Complete Pipeline (Training + Architecture)

This notebook delivers the **Holy Grail**: It trains your custom LLM, sets up the inference server, compiles your C++ architecture, and runs the integrated system—all within Kaggle.

**Hardware**: Requires GPU T4 x2 or P100.
**Input**: Attach your repository (or zip of `cpp/` and `data/`) as input.

## Phase 1: Environment Setup
Install Unsloth (for training), Ollama (for serving), and CMake (for building C++).

In [None]:
# Install Unsloth first (latest)
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" -q

# Force upgrade/pin dependencies to known working versions
# We need to ensure trl is updated to a version that supports the new transformers (or vice versa)
# The error "cannot import name 'top_k_top_p_filtering'" suggests trl is too old for the installed transformers
!pip install --upgrade --no-cache-dir "trl>=0.19.0" "transformers>=4.46.0" "peft>=0.11.1" "accelerate>=0.34.0" "bitsandbytes>=0.43.3" -q

# Verify installations
import transformers
import trl
import unsloth
print(f"✓ transformers: {transformers.__version__}")
print(f"✓ trl: {trl.__version__}")
print(f"✓ unsloth: {unsloth.__version__}")


In [None]:
import os
import shutil
import subprocess

# Setup Workspace
# NOTE: We MUST work in /kaggle/working because /kaggle/input is read-only.
REPO_DIR = "/kaggle/working/npc-ai"

if os.path.exists(REPO_DIR):
    # Safety check: never try to delete input
    if "/kaggle/input" in REPO_DIR:
        raise ValueError(f"CRITICAL ERROR: REPO_DIR is set to an input directory ({REPO_DIR}). This will fail!")
    print(f"Cleaning previous workspace at {REPO_DIR}...")
    shutil.rmtree(REPO_DIR)

# os.makedirs(REPO_DIR) # Copytree will create this

print("Searching for input files...")
found_repo = False

# 1. Try to find local dataset (GitHub Repo upload)
for root, dirs, files in os.walk("/kaggle/input"):
    # Look for characteristic files/folders of the repo
    if "cpp" in dirs and "data" in dirs and "core" in dirs:
        print(f"Found repo at {root}")
        # Copy EVERYTHING from root to REPO_DIR
        # shutil.copytree requires destination to NOT exist (unless dirs_exist_ok=True)
        print(f"Copying full repository to {REPO_DIR}...")
        shutil.copytree(root, REPO_DIR)
        found_repo = True
        break

# 2. Fallback: Git Clone
if not found_repo:
    print("Input repo not found in /kaggle/input. Trying git clone...")
    # We clone directly into REPO_DIR
    subprocess.check_call(["git", "clone", "https://github.com/minhphuc477/NPC-AI.git", REPO_DIR])
    found_repo = True

if found_repo:
    print("Workspace setup complete!")
    # List a few files to verify
    print("Workspace contents:", os.listdir(REPO_DIR))
else:
    raise RuntimeError("Failed to setup workspace! Could not find input dataset OR clone from Git.")

## Phase 2: Fine-Tuning `Phi-3-mini`
We use QLoRA to train the model on your `npc_training.jsonl`.

In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments
from datasets import load_dataset
from transformers.trainer_utils import get_last_checkpoint
import os

# Config
max_seq_length = 2048 
model_name = "unsloth/Phi-3-mini-4k-instruct"

# Load Model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = None, 
    load_in_4bit = True,
)

# Add LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none",    
    use_gradient_checkpointing = "unsloth", 
    random_state = 3407,
)

# Prepare Data
alpaca_prompt = """
### Instruction:
{}

### Response:
{}"""
EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    texts = []
    for instruction, output in zip(examples["prompt"], examples["completion"]):
        text = alpaca_prompt.format(instruction, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

data_path = f"{REPO_DIR}/data/npc_training.jsonl"
if os.path.exists(data_path):
    dataset = load_dataset("json", data_files = data_path, split = "train")
    dataset = dataset.map(formatting_prompts_func, batched = True)
    
    # Train
    output_dir = "outputs"
    last_checkpoint = None
    if os.path.exists(output_dir) and os.path.isdir(output_dir):
        last_checkpoint = get_last_checkpoint(output_dir)
        if last_checkpoint:
            print(f"Resuming from checkpoint: {last_checkpoint}")

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset,
        dataset_text_field = "text",
        max_seq_length = max_seq_length,
        dataset_num_proc = 2,
        packing = False,
        args = SFTConfig(
            per_device_train_batch_size = 2,  # CRITICAL: Must be >= 2 for proper tensor handling
            gradient_accumulation_steps = 4,
            warmup_steps = 5,
            max_steps = 60, 
            learning_rate = 2e-4,
            fp16 = not torch.cuda.is_bf16_supported(),
            bf16 = torch.cuda.is_bf16_supported(),
            logging_steps = 1,
            optim = "adamw_8bit",
            weight_decay = 0.01,
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = output_dir,
            report_to = "none",  # Disable wandb to avoid auth issues in Kaggle
        ),
    )
    trainer.train(resume_from_checkpoint=last_checkpoint)
    
    # Export GGUF
    model.save_pretrained_gguf("model_gguf", tokenizer, quantization_method = "f16")
    
    # Find the actual GGUF file created
    import glob
    gguf_files = glob.glob("model_gguf/*.gguf")
    if not gguf_files:
        raise FileNotFoundError("No GGUF file found after training!")
    trained_model_path = gguf_files[0]
    print(f"✓ Found GGUF model: {trained_model_path}")
    print("Training Complete & Model Saved to model_gguf/model-unsloth.f16.gguf")
else:
    print("Data file not found, skipping training.")


## Phase 3: Serving Model with Ollama
We start the Ollama server and create a custom model named `elara-npc` using our fine-tuned weights.

In [None]:
import subprocess
import time

# 1. Start Server in background
import subprocess
import time
import requests

# 1. Start Server in background
ollama_process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print("Starting Ollama Server...")

# Wait for server to be ready with health check
server_ready = False
for i in range(30):  # Try for 30 seconds
    try:
        response = requests.get("http://localhost:11434/api/tags", timeout=1)
        if response.status_code == 200:
            print("✓ Ollama server is ready!")
            server_ready = True
            break
    except:
        pass
    time.sleep(1)

if not server_ready:
    ollama_process.terminate()
    raise RuntimeError("Ollama server failed to start within 30 seconds!")

# 2. Create Modelfile
modelfile_content = """
FROM ./{trained_model_path}
SYSTEM "You are Elara, a merchant in Eldoria."
"""
with open("Modelfile", "w") as f:
    f.write(modelfile_content)

# 3. Create Model in Ollama
!ollama create elara-npc -f Modelfile
print("Model 'elara-npc' created successfully!")

## Phase 4: Compiling Architecture (C++)
Now we compile your C++ integrated system (`Social Fabric`, `Memory`, etc.) on this Linux environment to verify it works with the trained model.

In [None]:
build_dir = f"{REPO_DIR}/cpp/build"
if os.path.exists(f"{REPO_DIR}/cpp"):
    !apt-get install -y git
    os.makedirs(build_dir, exist_ok=True)
    
    try:
        # Run CMake (assuming CMakeLists.txt handles Linux via standard C++)
        !cd "{REPO_DIR}/cpp" && cmake -B build -S . -DCMAKE_BUILD_TYPE=Release
        !cd "{REPO_DIR}/cpp/build" && make -j4
        
        # Check if executable exists
        exe_path = f"{build_dir}/chat_interface"
        if os.path.exists(exe_path):
            print(f"SUCCESS: Architecture compiled at {exe_path}")
        else:
            print("Compilation seemed to complete but executable not found. Check logs.")
    except Exception as e:
        print(f"⚠ C++ compilation failed: {e}")
        print("This is expected in Kaggle. The Python pipeline still works!")
else:
    print("CPP directory not found. Skipping compilation.")


## Phase 5: LIVE DEMO
Running the C++ Architecture connected to the Fine-Tuned Model.

In [None]:
exe_path = f"{REPO_DIR}/cpp/build/chat_interface"
if os.path.exists(exe_path):
    # We can't use interactive stdin easily in Colab/Kaggle non-interactive mode.
    # So we stream some commands into it.
    
    input_script = """
    Hello, who are you?
    Tell me about the Iron Guard.
    You are stupid!
    quit
    """
    
    print("--- STARTING NPC INTERACTION ---")
    process = subprocess.Popen([exe_path], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    stdout, stderr = process.communicate(input=input_script)
    
    print(stdout)
    if stderr: print("ERRORS:", stderr)

else:
    print("Executable not found, cannot run demo.")

In [None]:
# === CLEANUP ===
# Stop Ollama server to free resources
if 'ollama_process' in globals():
    try:
        ollama_process.terminate()
        ollama_process.wait(timeout=5)
        print("✓ Ollama server stopped.")
    except:
        print("⚠ Could not stop Ollama server gracefully.")
