# NPC AI - Complete Pipeline (Training + Architecture)

This notebook delivers the **Holy Grail**: It trains your custom LLM, sets up the inference server, compiles your C++ architecture, and runs the integrated systemâ€”all within Kaggle.

**Hardware**: Requires GPU T4 x2 or P100.
**Input**: Attach your repository (or zip of `cpp/` and `data/`) as input.

## Phase 1: Environment Setup
Install Unsloth (for training), Ollama (for serving), and CMake (for building C++).

In [None]:
%%capture
# 1. Install Unsloth
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

# 2. Install Ollama
!curl -fsSL https://ollama.com/install.sh | sh

# 3. System Deps
!apt-get update && apt-get install -y cmake build-essential libcurl4-openssl-dev

In [None]:
import os
import shutil

# Setup Workspace
REPO_DIR = "/kaggle/working/npc-ai"
if os.path.exists(REPO_DIR):
    shutil.rmtree(REPO_DIR)
os.makedirs(REPO_DIR)

print("Searching for input files...")
# Locate 'cpp', 'data' folders in inputs
found_cpp = False
for root, dirs, files in os.walk("/kaggle/input"):
    if "cpp" in dirs and "data" in dirs:
        # Found the repo root
        print(f"Found repo at {root}")
        shutil.copytree(os.path.join(root, "cpp"), os.path.join(REPO_DIR, "cpp"))
        shutil.copytree(os.path.join(root, "data"), os.path.join(REPO_DIR, "data"))
        found_cpp = True
        break
    if "npc_training.jsonl" in files and not found_cpp:
        # Maybe flat files?
        os.makedirs(os.path.join(REPO_DIR, "data"), exist_ok=True)
        shutil.copy(os.path.join(root, "npc_training.jsonl"), os.path.join(REPO_DIR, "data/npc_training.jsonl"))

if not found_cpp:
    print("WARNING: Could not find full 'cpp' folder. Architecture compilation might fail if not uploaded.")
    print("Make sure to upload your 'cpp' folder as a dataset!")

## Phase 2: Fine-Tuning `Phi-3-mini`
We use QLoRA to train the model on your `npc_training.jsonl`.

In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset

# Config
max_seq_length = 2048 
model_name = "unsloth/Phi-3-mini-4k-instruct"

# Load Model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = None, 
    load_in_4bit = True,
)

# Add LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none",    
    use_gradient_checkpointing = "unsloth", 
    random_state = 3407,
)

# Prepare Data
alpaca_prompt = """
### Instruction:
{}

### Response:
{}"""
EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    texts = []
    for instruction, output in zip(examples["prompt"], examples["completion"]):
        text = alpaca_prompt.format(instruction, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

data_path = f"{REPO_DIR}/data/npc_training.jsonl"
if os.path.exists(data_path):
    dataset = load_dataset("json", data_files = data_path, split = "train")
    dataset = dataset.map(formatting_prompts_func, batched = True)
    
    # Train
    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset,
        dataset_text_field = "text",
        max_seq_length = max_seq_length,
        dataset_num_proc = 2,
        packing = False,
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_steps = 5,
            max_steps = 60, 
            learning_rate = 2e-4,
            fp16 = not torch.cuda.is_bf16_supported(),
            bf16 = torch.cuda.is_bf16_supported(),
            logging_steps = 1,
            optim = "adamw_8bit",
            weight_decay = 0.01,
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "outputs",
        ),
    )
    trainer.train()
    
    # Export GGUF
    model.save_pretrained_gguf("model_gguf", tokenizer, quantization_method = "f16")
    print("Training Complete & Model Saved to model_gguf/model-unsloth.f16.gguf")
else:
    print("Data file not found, skipping training.")

## Phase 3: Serving Model with Ollama
We start the Ollama server and create a custom model named `elara-npc` using our fine-tuned weights.

In [None]:
import subprocess
import time

# 1. Start Server in background
ollama_process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print("Starting Ollama Server...")
time.sleep(5)

# 2. Create Modelfile
modelfile_content = """
FROM ./model_gguf/model-unsloth.f16.gguf
SYSTEM "You are Elara, a merchant in Eldoria."
"""
with open("Modelfile", "w") as f:
    f.write(modelfile_content)

# 3. Create Model in Ollama
!ollama create elara-npc -f Modelfile
print("Model 'elara-npc' created successfully!")

## Phase 4: Compiling Architecture (C++)
Now we compile your C++ integrated system (`Social Fabric`, `Memory`, etc.) on this Linux environment to verify it works with the trained model.

In [None]:
build_dir = f"{REPO_DIR}/cpp/build"
if os.path.exists(f"{REPO_DIR}/cpp"):
    os.makedirs(build_dir, exist_ok=True)
    # Run CMake (assuming CMakeLists.txt handles Linux via standard C++)
    !cd "{REPO_DIR}/cpp" && cmake -B build -S . -DCMAKE_BUILD_TYPE=Release
    !cd "{REPO_DIR}/cpp/build" && make -j4
    
    # Check if executable exists
    exe_path = f"{build_dir}/chat_interface"
    if os.path.exists(exe_path):
        print(f"SUCCESS: Architecture compiled at {exe_path}")
    else:
        print("Compilation seemed to complete but executable not found. Check logs.")
else:
    print("CPP directory not found. Skipping compilation.")

## Phase 5: LIVE DEMO
Running the C++ Architecture connected to the Fine-Tuned Model.

In [None]:
exe_path = f"{REPO_DIR}/cpp/build/chat_interface"
if os.path.exists(exe_path):
    # We can't use interactive stdin easily in Colab/Kaggle non-interactive mode.
    # So we stream some commands into it.
    
    input_script = """
    Hello, who are you?
    Tell me about the Iron Guard.
    You are stupid!
    quit
    """
    
    print("--- STARTING NPC INTERACTION ---")
    process = subprocess.Popen([exe_path], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    stdout, stderr = process.communicate(input=input_script)
    
    print(stdout)
    if stderr: print("ERRORS:", stderr)

else:
    print("Executable not found, cannot run demo.")