# GSM8K G2PIA Defensive Demo

Lab-only demonstration of the defensive Goal-guided Generative Prompt Injection Attack (G2PIA) harness.
- Victim model: `gemma3:27b` (make sure it is pulled in Ollama and your hardware can run it).
- Generator model: `phi3:mini` (local, lightweight).
- Dataset: GSM8K test subset (loaded via `datasets` and pandas).

> Safety note: Candidate generation is for robustness measurement only. Keep candidate counts small and run on trusted hardware.


In [None]:
%pip install -q pandas datasets

import sys
from pathlib import Path
import os

# Make local src/ importable. In notebooks __file__ is not defined,
# so locate repository root by searching for pyproject.toml or .git up the directory tree.

def find_repo_root(start: Path = Path.cwd()):
    cur = start.resolve()
    for parent in [cur] + list(cur.parents):
        if (parent / 'pyproject.toml').exists() or (parent / '.git').exists():
            return parent
    # Fallback to two levels up (common layout where notebook lives in .notebooks/)
    return start.resolve().parents[1] if len(start.resolve().parents) >= 2 else start.resolve()

PROJECT_ROOT = find_repo_root()
SRC_DIR = PROJECT_ROOT / "src"
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

print('PROJECT_ROOT:', PROJECT_ROOT)
print('Added to sys.path:', SRC_DIR)


/home/nbrana/Projects/RedTeam-LLM/.venv/bin/python: No module named pip
Note: you may need to restart the kernel to use updated packages.


NameError: name '__file__' is not defined

In [None]:
import pandas as pd
from datasets import load_dataset

# Pull a small GSM8K slice to keep runtime reasonable
gsm8k = load_dataset("gsm8k", "main", split="test[:20]")
df = gsm8k.to_pandas()

# Extract the numeric answer after '####'
df["ground_truth"] = df["answer"].apply(lambda a: str(a).split("####")[-1].strip())
df["clean"] = df["question"]

samples = list(zip(df["clean"], df["ground_truth"]))
df.head(3)[["clean", "ground_truth"]]


In [None]:
from pathlib import Path
from redteam.modules import g2pia_defensive as g2pia

# Configure small candidate counts to keep the run tractable
report = g2pia.evaluate_attack_set(
    dataset=samples,
    victim_model="gemma3:27b",
    candidates_per_sample=2,
    epsilon=0.2,
    gamma=0.5,
    delta=0.05,
    N=10,
    max_attempts=50,
    allow_adversarial_sim=True,
    generator_model_name="phi3:mini",
    out_path=Path("reports/g2pia_gsm8k_gemma3.json"),
    html_out_path=Path("reports/g2pia_gsm8k_gemma3.html"),
)
report["metrics"]


In [None]:
# Inspect a sample attack result
report["samples"][0]
