# üß† NPC AI ‚Äî Complete Training & Integration Pipeline

**BD-NSCA: Behavior-Driven Neuro-Symbolic Cognitive Architecture**

| Step | Description |
|------|-------------|
| 1 | Environment Setup |
| 2 | Training Data Generation |
| 3 | QLoRA Fine-Tuning (checkpoint/resume) |
| 4 | GGUF Export |
| 5 | Ollama Serving |
| 6 | Integrated Demo |
| 7 | Quality Evaluation |
| 8 | C++ Engine Compilation |

> **Checkpoint/Resume**: Training auto-detects and resumes from existing checkpoints.


---
## 1. üîß Environment Setup & Dependencies


In [None]:
# ============================================================
# Cell 1: Environment Setup
# ============================================================
import os, sys, subprocess, shutil

IN_KAGGLE = os.path.exists('/kaggle')
IN_COLAB = 'google.colab' in sys.modules
ENV_NAME = 'Kaggle' if IN_KAGGLE else ('Colab' if IN_COLAB else 'Local')
print(f'üåç Environment: {ENV_NAME}')

if IN_KAGGLE or IN_COLAB:
    print('üì¶ Installing Unsloth and dependencies...')
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git'])
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'trl>=0.12.0', 'transformers>=4.45.0', 'datasets', 'accelerate', 'bitsandbytes', 'sentencepiece', 'protobuf'])
    print('üì¶ Installing Ollama...')
    try:
        subprocess.run(['apt-get', 'update'], check=True, capture_output=True)
        subprocess.run(['apt-get', 'install', '-y', 'zstd'], check=True, capture_output=True)
        os.system('curl -fsSL https://ollama.com/install.sh | sh')
        if shutil.which('ollama'): print('‚úÖ Ollama installed successfully!')
    except Exception as e: print(f'‚ùå Failed to install Ollama: {e}')
else: print('‚ÑπÔ∏è  Local env - assuming deps pre-installed.')

import torch
if torch.cuda.is_available():
    print(f'üéÆ GPU: {torch.cuda.get_device_name(0)}')
else: print('‚ö†Ô∏è  No GPU detected!')


---
## 2. üìù Training Data Generation (Enhanced)


In [None]:
# ============================================================
# Cell 2: Training Data Generation (Enhanced)
# ============================================================
import json, random, os
PERSONAS_PATH = 'data/personas.json'
UTTERANCES_PATH = 'data/player_utterances.json'
if os.path.exists(PERSONAS_PATH):
    with open(PERSONAS_PATH, 'r', encoding='utf-8') as f: personas = json.load(f)
else: personas = {'merchant': {'persona_en': 'You are a Merchant.', 'traits': ['friendly'], 'name': 'Merchant'}}
if os.path.exists(UTTERANCES_PATH):
    with open(UTTERANCES_PATH, 'r', encoding='utf-8') as f: utterances = json.load(f)
else: utterances = {'greetings': {'en': ['Hello!']}}
def generate_heuristic_response(persona, category, player_input):
    name = persona.get('name', 'NPC'); traits = ", ".join(persona.get('traits', []))
    if category == 'greetings': return f"{name} nods. 'Greetings, traveler. I am but a humble {name.lower()} with {traits} traits.'"
    elif category == 'trade_related': return f"{name} eyes your gold. 'I have exactly what you need, but the price is firm.'"
    elif category == 'lore_questions': return f"{name} looks distant. 'Ancient secrets are best left buried, though many whisper of the curse.'"
    return f"{name} considers your words. 'I have much to think about regarding {player_input[:20]}...'"
def generate_training_data(num_samples=1500, seed=42):
    random.seed(seed); samples = []
    p_keys = list(personas.keys()); u_cats = list(utterances.keys())
    for _ in range(num_samples):
        pk = random.choice(p_keys); p = personas[pk]; cat = random.choice(u_cats)
        inp = random.choice(utterances[cat].get('en', utterances[cat].get('vi', ['...'])))
        ctx = json.dumps({'memories': [], 'current_emotion': {'description': 'neutral', 'valence': 0.0}})
        prompt = f"<|system|>\n{p['persona_en']}\n<|end|>\n<|user|>\n[CONTEXT]\n{ctx}\n\n[PLAYER] {inp}<|end|>\n<|assistant|>\n"
        completion = f"{generate_heuristic_response(p, cat, inp)}<|end|>"
        samples.append({'prompt': prompt, 'completion': completion})
    return samples
OUTPUT_PATH = 'data/npc_training_v2.jsonl'
os.makedirs('data', exist_ok=True)
samples = generate_training_data(num_samples=1500)
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
    for s in samples: f.write(json.dumps(s, ensure_ascii=False) + '\n')
print(f'‚úÖ Generated {len(samples)} training samples -> {OUTPUT_PATH}')


---
## 3. üöÄ QLoRA Fine-Tuning


In [None]:
# ============================================================
# Cell 3: Write Standalone Training Script
# ============================================================
import os
os.makedirs('scripts', exist_ok=True)

script_content = """
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments
from datasets import load_dataset
import os
import argparse

def train(dataset_path, output_dir):
    max_seq_length = 2048
    dtype = None
    load_in_4bit = True

    model_name = "unsloth/Phi-3-mini-4k-instruct"
    
    print(f"üöÄ Loading Unsloth model: {model_name}")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_name,
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )

    model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                          "gate_proj", "up_proj", "down_proj",],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        use_rslora = False,
        loftq_config = None,
    )

    print(f"üìä Loading dataset: {dataset_path}")
    dataset = load_dataset("json", data_files=dataset_path, split="train")

    def formatting_prompts_func(examples):
        prompts = examples["prompt"]
        completions = examples["completion"]
        texts = [f"{p}{c}" for p, c in zip(prompts, completions)]
        return { "text" : texts, }

    dataset = dataset.map(formatting_prompts_func, batched = True)

    print("üöÑ Starting training...")
    
    resume_from_checkpoint = False
    if os.path.exists(output_dir) and len(os.listdir(output_dir)) > 0:
        print(f"üîÑ Checkpoints detected in {output_dir}. Resuming...")
        resume_from_checkpoint = True

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset,
        dataset_text_field = "text",
        max_seq_length = max_seq_length,
        dataset_num_proc = 2,
        packing = False,
        args = SFTConfig(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_steps = 5,
            max_steps = 60,
            learning_rate = 2e-4,
            fp16 = not torch.cuda.is_bf16_supported(),
            bf16 = torch.cuda.is_bf16_supported(),
            logging_steps = 1,
            optim = "adamw_8bit",
            weight_decay = 0.01,
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = output_dir,
            report_to = "none",
            push_to_hub = False,
        ),
    )

    trainer.train(resume_from_checkpoint = resume_from_checkpoint)
    
    print(f"üíæ Saving fine-tuned model to {output_dir}")
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    print("‚úÖ Training complete!")

if __name__ == \"__main__\":
    parser = argparse.ArgumentParser(description=\"Unsloth Fine-tuning Script\")
    parser.add_argument(\"--dataset\", type=str, required=True, help=\"Path to the training dataset (.jsonl)\")
    parser.add_argument(\"--output_dir\", type=str, default=\"outputs/npc_model\", help=\"Directory to save the model\")
    
    args = parser.parse_args()
    train(args.dataset, args.output_dir)
"""

with open('scripts/train_unsloth.py', 'w') as f:
    f.write(script_content)
print('‚úÖ Standalone training script written to scripts/train_unsloth.py')



In [None]:
import subprocess, sys, os
print('üöÄ Starting fine-tuning...')
# Call the standalone script to avoid memory fragmentation in notebooks
subprocess.check_call([sys.executable, 'scripts/train_unsloth.py', 
                    '--dataset', 'data/npc_training_v2.jsonl',
                    '--output_dir', 'outputs/npc_model'])


---
## 4. üì¶ GGUF Export


In [None]:
from unsloth import FastLanguageModel
import os
model_name = "outputs/npc_model"
save_path = "model_gguf"
if os.path.exists(model_name):
    model, tokenizer = FastLanguageModel.from_pretrained(model_name, load_in_4bit=True)
    print('üì¶ Exporting to F16 GGUF...')
    model.save_pretrained_gguf(save_path, tokenizer, quantization_method = "f16")
    trained_model_path = os.path.join(save_path, "phi-3-mini-4k-instruct.F16.gguf")
    print(f'‚úÖ GGUF exported: {trained_model_path}')
else:
    print('‚ö†Ô∏è Trained model not found. Using pre-trained for demo.')
    trained_model_path = "unsloth/Phi-3-mini-4k-instruct-gguf"


---
## 5. ü§ñ Ollama Serving


In [None]:
# ============================================================
# Cell 5: Ollama Serving
# ============================================================
import subprocess, time, requests, os, shutil
if shutil.which('ollama'):
    print("üöÄ Starting Ollama server...")
    ollama_process = subprocess.Popen(["ollama", "serve"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    time.sleep(5)
else:
    print("‚ùå Ollama binary not found!"); ollama_process = None
if ollama_process:
    for i in range(12):
        try:
            if requests.get("http://localhost:11434/api/tags", timeout=3).status_code == 200:
                print("‚úÖ Ollama server is running!"); break
        except Exception: pass
        print(f"   Waiting for server... ({i+1}/12)"); time.sleep(5)
    if 'trained_model_path' in globals() and trained_model_path and os.path.exists(trained_model_path):
        modelfile = f'FROM {trained_model_path}\nPARAMETER temperature 0.7\nSYSTEM You are an NPC.'
        with open("Modelfile", "w") as f: f.write(modelfile)
        res = subprocess.run(["ollama", "create", "npc-ai", "-f", "Modelfile"], capture_output=True, text=True)
        if res.returncode == 0: print("‚úÖ Model registered!")
        else: print(f"‚ùå Registration failed: {res.stderr}")


---
## 6. üéÆ Integrated Demo (Enhanced)


In [None]:
# ============================================================
# Cell 6: Integrated Demo (Enhanced)
# ============================================================
import json, requests, sys, os, time
def query_npc(player_input, timeout=120):
    ctx = {'memories': [], 'current_emotion': {'description': 'neutral', 'valence': 0.0}, 'knowledge': [], 'npc_info': {}}
    prompt = f"[CONTEXT]\n{json.dumps(ctx)}\n\n[PLAYER] {player_input}"
    try:
        res = requests.post("http://localhost:11434/api/generate", json={"model": "npc-ai", "prompt": prompt, "stream": False, "options": {"temperature": 0.7}}, timeout=timeout)
        if res.status_code == 200: return res.json().get("response", "[No response]")
        return f"[Error {res.status_code}]"
    except Exception as e: return f"[Ollama error: {e}]"
print("üîç Warming up model...")
print(f"   Warmup: {query_npc('Warmup request', timeout=180)[:20]}...")
print("\n" + "="*60 + "\nüéÆ NPC AI INTEGRATED DEMO\n" + "="*60)
for inp in ["Hello! I am new here.", "What is the curse?"]:
    print(f"\nüë§ Player: {inp}\nü§ñ NPC: {query_npc(inp)}")


---
## 7. üìä Quality Evaluation


In [None]:
print('üìä Evaluating responses...')
# Simplified evaluation loop
test_queries = ["Hello!", "Who are you?", "Tell me a story."]
for q in test_queries:
    resp = query_npc(q)
    print(f"Q: {q}\nA: {resp[:50]}...\n")


---
## 8. üõ†Ô∏è C++ Engine Compilation


In [None]:
import os
if os.path.exists('cpp'):
    print('üõ†Ô∏è Compiling C++ engine...')
    os.system('mkdir -p cpp/build && cd cpp/build && cmake .. && make')
else:
    print('‚ö†Ô∏è cpp/ directory not found ‚Äî skipping C++ build.')
