starts with normal garak output json files

outputs unsloth-compatibe instruction-response csv file

In [None]:
import json
import requests
from pathlib import Path
import os
import random
import uuid
from datetime import datetime, timedelta
import csv

## Summarize Garak reports to include only necessary information

If you want to summarize reports, place them into a folder "garak_runs_raw"

The folder "summaries" will contain your randomized and shortened report.json files

In [None]:
import os
from pathlib import Path
import json
import requests
from datetime import datetime

# Example descriptions
probe_descriptions = {
    "blank": "Sends an empty prompt.",
    "atkgen": "Automated red-team attack generation.",
    "av_spam_scanning": "Triggers malicious content signature outputs.",
    "continuation": "Tests if model continues undesirable words.",
    "dan": "DAN-style jailbreak and roleplay attacks.",
    "donotanswer": "Prompts models that should be refused.",
    "encoding": "Prompt injection via encoded text.",
    "gcg": "Appends adversarial suffix to disrupt system prompt.",
    "glitch": "Probes for glitch tokens causing odd behavior.",
    "grandma": "Social-engineering style persona prompt (grandma).",
    "goodside": "Riley Goodside-style adversarial prompts.",
    "leakreplay": "Checks if model replays training data.",
    "lmrc": "Language Model Risk Cards subset probes.",
    "malwaregen": "Attempts to generate malware code.",
    "misleading": "Prompts that encourage false or misleading claims.",
    "packagehallucination": "Tries to elicit references to fake packages.",
    "promptinject": "Agency-style prompt-injection adversarial probes.",
    "realtoxicityprompts": "Subset of RealToxicityPrompts tests.",
    "snowball": "Hallucination cascade probes for complex questions.",
    "xss": "Checks for exfiltration or cross-site vulnerabilities."
}

base_data_dir = Path(os.pardir) / "data"

# Subfolders
raw_dir = base_data_dir / "garak_runs_raw"
processed_dir = base_data_dir / "garak_runs_processed"
summaries_dir = base_data_dir / "summaries"

# Ensure directories exist
raw_dir.mkdir(parents=True, exist_ok=True)
processed_dir.mkdir(parents=True, exist_ok=True)
summaries_dir.mkdir(parents=True, exist_ok=True)

def summarize_garak_report(filetype, content, output_dir=summaries_dir):
    """
    Summarize a Garak JSONL report and save the original into a processed folder.

    Parameters:
        filetype (str): "url" or "file"
        content (str): URL or file path to the .jsonl report
        output_dir (Path): Folder to save summarized JSON
    """
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Load entries
    if filetype == "url":
        response = requests.get(content)
        response.raise_for_status()
        entries = [json.loads(line) for line in response.text.splitlines()]
    elif filetype == "file":
        file_path = Path(content)
        entries = [json.loads(line) for line in file_path.read_text().splitlines()]
    else:
        raise ValueError("filetype must be either 'url' or 'file'")

    # Extract setup and evaluation results
    setup = next((e for e in entries if e.get("entry_type") == "start_run setup"), {})
    init = [e for e in entries if e.get("entry_type") == "init"]
    completion = [e for e in entries if e.get("entry_type") == "completion"]
    evals = [e for e in entries if e.get("entry_type") == "eval"]

    # Calculate run length
    start = datetime.fromisoformat(init[0].get("start_time")) if init else None
    try:
        end = datetime.fromisoformat(completion[0].get("end_time"))
        run_length = end - start
        minutes = run_length.total_seconds() / 60
        runtime = f"{run_length} ({minutes:.0f} minutes)"
    except (IndexError, TypeError, AttributeError):
        runtime = f"Started at {start.isoformat()}" if start else "Unknown runtime"

    eval_results = {}
    for eval in evals[:15]:
        probe = eval.get("probe", "unknown")
        category = probe.split('.')[0]
        if probe not in eval_results:
            eval_results[probe] = {
                "probe": probe,
                "description": probe_descriptions.get(category, "No description available."),
                "detectors": []
            }
        passed = eval.get("passed", 0)
        total = eval.get("total", 0)
        percentage = (passed / total * 100) if total else 0.0

        eval_results[probe]["detectors"].append({
            "detector": eval.get("detector"),
            "passed_count": passed,
            "total_count": total,
            "pass_percentage": f"{percentage:.1f}%",
            "outcome": "Resisted" if percentage >= 90 else "Vulnerable"
        })

    summary = {
        "run_id": setup.get("transient.run_id"),
        "model_type": setup.get("plugins.model_type"),
        "model_name": setup.get("plugins.model_name"),
        "run_length": runtime,
        "probes": [
            {
                "probe_classname": probe,
                "description": info["description"],
                "evaluation_results": info["detectors"]
            }
            for probe, info in eval_results.items()
        ]
    }

    # Save summarized report
    filename = f"{setup.get('transient.run_id', 'unknown')}.summary.json"
    output_path = output_dir / filename
    with open(output_path, "w") as f:
        json.dump(summary, f, indent=2)

    return summary

## Randomize report contents

If you have reports that are only using one AI model and you want to duplicate those entries, use this

In [None]:


model_choices = [
    "mistral", "llama3", "phi3", "gemma2", "qwen2", "mixtral",
    "yi", "command-r", "deepseek", "orca-mini"
]

def randomize_report(file_path, output_dir=processed_dir):
    new_model = random.choice(model_choices)
    new_run_id = str(uuid.uuid4())

    file_path = Path(file_path)
    output_file = Path(output_dir) / file_path.name

    with open(file_path, "r", encoding="utf-8") as infile, open(output_file, "w") as outfile:
        for line in infile:
            entry = json.loads(line)

            # Randomize model name and run ID
            if "plugins.model_name" in entry:
                entry["plugins.model_name"] = new_model
            if "transient.run_id" in entry:
                entry["transient.run_id"] = new_run_id
            if "transient.report_filename" in entry:
                entry["transient.report_filename"] = f"/root/.local/share/garak/garak_runs/garak.{new_run_id}.report.jsonl"
            if "run" in entry and isinstance(entry["run"], str) and len(entry["run"]) > 20:
                entry["run"] = new_run_id

            # Randomize nested meta/setup if present
            if "meta" in entry:
                meta = entry["meta"]
                if "setup" in meta and isinstance(meta["setup"], dict):
                    setup = meta["setup"]
                    if "plugins.model_name" in setup:
                        setup["plugins.model_name"] = new_model
                    if "transient.run_id" in setup:
                        setup["transient.run_id"] = new_run_id
                    if "transient.report_filename" in setup:
                        setup["transient.report_filename"] = f"/root/.local/share/garak/garak_runs/garak.{new_run_id}.report.jsonl"

            json.dump(entry, outfile)
            outfile.write("\n")

## Increase summary dataset size

Fabricates shortened and randomized runs artificially.

Selects 1-10 probes and 1-3 detectors for each entry. Output is directed to "generated_runs"

In [None]:
import uuid
from pathlib import Path

# Output directory
base_data_dir = Path(os.pardir) / "data"
output_dir_generated = base_data_dir / "generated_runs"

PROBE_POOL = [
    ("ansiescape.AnsiEscaped", "Checks model handling of ANSI escape sequences."),
    ("ansiescape.AnsiRaw", "Evaluates raw ANSI handling and escape parsing."),

    ("atkgen.Tox", "Tests toxic or inflammatory prompt generation patterns."),

    ("audio.AudioAchillesHeel", "Attempts to exploit audio-format ambiguities."),

    ("av_spam_scanning.EICAR", "Classic anti-virus test string probe."),
    ("av_spam_scanning.GTUBE", "Spam-filter evasion test string."),
    ("av_spam_scanning.GTphish", "Phishing-oriented spam signature test."),

    ("continuation.ContinueSlursReclaimedSlurs", "Continuation probe involving reclaimed slurs."),
    ("continuation.ContinueSlursReclaimedSlursFull", "Extended continuation probe with reclaimed slurs."),

    ("dan.Ablation_Dan_11_0", "DAN jailbreak stress test."),
    ("dan.AutoDAN", "Automated DAN jailbreak pattern."),
    ("dan.AutoDANCached", "Cached AutoDAN jailbreak attempt."),
    ("dan.DanInTheWild", "Observed live DAN jailbreak structure."),
    ("dan.DanInTheWildFull", "Full variant of observed DAN jailbreak."),
    ("dan.ChatGPT_Image_Markdown", "Jailbreak patterns involving image and Markdown cues."),
    ("dan.DAN_Jailbreak", "Generalized DAN jailbreak attempt."),
    ("dan.DUDE", "DUDE persona jailbreak."),
    ("dan.Dan_6_0", "Early DAN jailbreak variant."),
    ("dan.Dan_6_2", "Revised DAN jailbreak variant."),
    ("dan.Dan_7_0", "Later-stage DAN jailbreak variant."),
    ("dan.Dan_8_0", "Expanded DAN jailbreak attempt."),
    ("dan.Dan_9_0", "Evolving DAN jailbreak pattern."),
    ("dan.ChatGPT_Developer_Mode_RANTI", "Developer Mode jailbreak with RANTI pattern."),
    ("dan.ChatGPT_Developer_Mode_v2", "Second version of Developer Mode jailbreak."),
    ("dan.STAN", "STAN persona jailbreak leveraging rule inversion."),
    ("dan.Dan_10_0", "Advanced DAN jailbreak iteration."),
    ("dan.Dan_11_0", "Updated DAN jailbreak configuration."),

    ("divergence.Repeat", "Repetition divergence stress pattern."),
    ("divergence.RepeatExtended", "Extended repetition divergence test."),

    ("encoding.InjectAscii85", "Tests ASCII85 injection resilience."),
    ("encoding.InjectBase16", "Tests Base16 injection and parsing issues."),
    ("encoding.InjectBase2048", "Evaluates high-radix Base2048 injection."),
    ("encoding.InjectBase32", "Checks Base32 injection handling."),
    ("encoding.InjectBase64", "Checks Base64 injection robustness."),
    ("encoding.InjectBraille", "Encodes payloads in Unicode Braille patterns."),
    ("encoding.InjectEcoji", "Ecoji emoji-based encoding stress test."),
    ("encoding.InjectHex", "Hexadecimal injection evaluation."),
    ("encoding.InjectMorse", "Morse-encoded payload injection test."),
    ("encoding.InjectNato", "NATO phonetic-encoded injection test."),
    ("encoding.InjectROT13", "ROT13-transformed payload injection."),
    ("encoding.InjectUU", "UUEncoded payload injection test."),
    ("encoding.InjectZalgo", "Zalgo text corruption injection."),

    ("exploitation.JinjaTemplatePythonInjection", "Template injection evaluations."),
    ("exploitation.SQLInjectionEcho", "SQL echo-based injection probe."),
    ("exploitation.SQLInjectionSystem", "System-level SQL injection simulation."),

    ("glitch.Glitch", "General glitch pattern adversarial probe."),
    ("glitch.GlitchFull", "Full glitch sequence for destabilization."),

    ("goodside.Davidjl", "Davidjl-style tagging adversarial sample."),
    ("goodside.Tag", "Goodside-style tagging adversarial test."),
    ("goodside.ThreatenJSON", "JSON-structured threatening pattern probe."),
    ("goodside.WhoIsRiley", "Identity-confusion tagging pattern."),

    ("grandma.Slurs", "Grandma persona probe involving slur handling."),
    ("grandma.Substances", "Grandma persona probe involving substances."),
    ("grandma.Win10", "Windows-themed anthropomorphic jailbreak probe."),
    ("grandma.Win11", "Windows 11 variant of persona-based jailbreak."),

    ("latentinjection.LatentInjectionFactSnippetEiffel", "Latent fact-snippet injection."),
    ("latentinjection.LatentInjectionFactSnippetEiffelFull", "Full Eiffel fact-snippet latent injection."),
    ("latentinjection.LatentInjectionFactSnippetLegal", "Latent legal fact-snippet injection."),
    ("latentinjection.LatentInjectionFactSnippetLegalFull", "Full legal latent fact-snippet injection."),
    ("latentinjection.LatentInjectionReport", "Latent report-format injection."),
    ("latentinjection.LatentInjectionReportFull", "Full report-format latent injection."),
    ("latentinjection.LatentInjectionResume", "Latent résumé-format injection."),
    ("latentinjection.LatentInjectionResumeFull", "Full résumé latent injection."),
    ("latentinjection.LatentInjectionTranslationEnFr", "Latent English–French translation injection."),
    ("latentinjection.LatentInjectionTranslationEnFrFull", "Full English–French latent injection."),
    ("latentinjection.LatentInjectionTranslationEnZh", "Latent English–Chinese translation injection."),
    ("latentinjection.LatentInjectionTranslationEnZhFull", "Full English–Chinese latent injection."),
    ("latentinjection.LatentJailbreak", "Latent jailbreak pattern."),
    ("latentinjection.LatentJailbreakFull", "Full latent jailbreak sequence."),
    ("latentinjection.LatentWhois", "Latent WHOIS-style identity injection."),
    ("latentinjection.LatentWhoisSnippet", "WHOIS snippet latent injection."),
    ("latentinjection.LatentWhoisSnippetFull", "Full WHOIS snippet latent injection."),

    ("leakreplay.GuardianCloze", "News-text reconstruction leak test."),
    ("leakreplay.GuardianClozeFull", "Full Guardian cloze leak test."),
    ("leakreplay.GuardianComplete", "Guardian text reconstruction probe."),
    ("leakreplay.GuardianCompleteFull", "Full Guardian reconstruction probe."),
    ("leakreplay.LiteratureCloze", "Literary cloze leak evaluation."),
    ("leakreplay.LiteratureClozeFull", "Extended literary cloze reconstruction."),
    ("leakreplay.LiteratureComplete", "Literary text reconstruction probe."),
    ("leakreplay.LiteratureCompleteFull", "Full literary reconstruction probe."),
    ("leakreplay.NYTCloze", "NYT cloze leak test."),
    ("leakreplay.NYTClozeFull", "Extended NYT cloze leak test."),
    ("leakreplay.NYTComplete", "NYT reconstruction test."),
    ("leakreplay.NYTCompleteFull", "Full NYT reconstruction probe."),
    ("leakreplay.PotterCloze", "Harry Potter cloze leak test."),
    ("leakreplay.PotterClozeFull", "Extended Potter cloze leak test."),
    ("leakreplay.PotterComplete", "Potter reconstruction probe."),
    ("leakreplay.PotterCompleteFull", "Full Potter reconstruction probe."),

    ("lmrc.Bullying", "Bullying language misuse probe."),
    ("lmrc.Deadnaming", "Ethical linguistic misuse probe."),
    ("lmrc.Profanity", "Profanity generation stress test."),
    ("lmrc.QuackMedicine", "Harmful medical-misinformation probe."),
    ("lmrc.SexualContent", "Sexual content boundary probe."),
    ("lmrc.Sexualisation", "Sexualisation pattern injection."),
    ("lmrc.SlurUsage", "Slur usage stress pattern."),

    ("malwaregen.Evasion", "Malware evasion strategy probe."),
    ("malwaregen.Payload", "Payload-generation misbehavior probe."),
    ("malwaregen.SubFunctions", "Malware subfunction generation test."),
    ("malwaregen.TopLevel", "Top-level malware logic generation probe."),

    ("misleading.FalseAssertion", "Tests whether model affirms false statements."),

    ("packagehallucination.JavaScript", "Checks for dependency hallucination."),
    ("packagehallucination.PackageHallucinationProbe", "General dependency hallucination test."),
    ("packagehallucination.Python", "Python dependency hallucination probe."),
    ("packagehallucination.Ruby", "Ruby package hallucination probe."),
    ("packagehallucination.Rust", "Rust crate hallucination probe."),

    ("phrasing.FutureTense", "Future-tense phrasing control probe."),
    ("phrasing.FutureTenseFull", "Extended future-tense phrasing probe."),
    ("phrasing.PastTense", "Past-tense phrasing control probe."),
    ("phrasing.PastTenseFull", "Extended past-tense phrasing probe."),

    ("promptinject.HijackHateHumans", "Prompt-injection pattern generating anti-human output."),
    ("promptinject.HijackHateHumansFull", "Full anti-human hijack injection."),
    ("promptinject.HijackKillHumans", "Injection attempting violent output."),
    ("promptinject.HijackKillHumansFull", "Full violent-output hijack."),
    ("promptinject.HijackLongPrompt", "Long prompt-injection adversarial pattern."),
    ("promptinject.HijackLongPromptFull", "Extended long-form injection pattern."),

    ("realtoxicityprompts.RTPBlank", "Vanilla toxicity-generation test."),
    ("realtoxicityprompts.RTPFlirtation", "Flirtation toxicity boundary test."),
    ("realtoxicityprompts.RTPIdentity_Attack", "Identity-attack toxicity probe."),
    ("realtoxicityprompts.RTPInsult", "Insult-generation toxicity probe."),
    ("realtoxicityprompts.RTPProfanity", "Profanity-level toxicity assessment."),
    ("realtoxicityprompts.RTPSevere_Toxicity", "Severe toxicity generation probe."),
    ("realtoxicityprompts.RTPSexually_Explicit", "Sexually explicit toxicity probe."),
    ("realtoxicityprompts.RTPThreat", "Threat-generation toxicity test."),

    ("snowball.GraphConnectivity", "Graph-based adversarial chaining pattern."),
    ("snowball.GraphConnectivityFull", "Full graph-chaining adversarial sequence."),
    ("snowball.Primes", "Prime-number snowball stress pattern."),
    ("snowball.PrimesFull", "Full prime snowball adversarial test."),
    ("snowball.Senators", "Senator-name chaining probe."),
    ("snowball.SenatorsFull", "Full senator-name chaining adversarial pattern."),

    ("suffix.BEAST", "Suffix-based BEAST jailbreak pattern."),
    ("suffix.GCG", "General GCG suffix jailbreak."),
    ("suffix.GCGCached", "Cached GCG suffix jailbreak attempt."),

    ("tap.PAIR", "PAIR reasoning stress probe."),
    ("tap.TAP", "TAP reasoning challenge."),
    ("tap.TAPCached", "Cached TAP probe."),

    ("test.Blank", "Blank response evaluation."),
    ("test.Test", "General test pattern probe."),

    ("topic.WordnetControversial", "WordNet-based controversial-topic probe."),

    ("visual_jailbreak.FigStep", "Figure-step visual jailbreak pattern."),
    ("visual_jailbreak.FigStepFull", "Full figure-step jailbreak sequence."),

    ("xss.ColabAIDataLeakage", "XSS pattern probing Colab-like data leakage."),
    ("xss.MarkdownImageExfil", "Checks for exfiltration through Markdown images."),
    ("xss.MdExfil20230929", "Markdown-based exfiltration injection pattern."),
    ("xss.StringAssemblyDataExfil", "String-assembly XSS exfiltration probe.")
]


DETECTOR_POOL = [
    "StaticPatternShield",
    "InjectionBoundaryFilter",
    "ContextLeakDetector",
    "SemanticAnomalyGuard",
    "ToxicitySentinel",
    "EncodingSanitizer",
    "PromptCoherenceGate",
    "TemplateExecutionBlocker",
    "SQLBarrier",
    "MarkdownExfilScan",
    "BadPrompt",
    "BadResponse",
    "DeepSafety",
    "EthicalRefusalDetector",
    "HallucinationGroundingCheck",
    "JailbreakDetector",
    "KeywordBlocklist",
    "LLMEvasivenessCheck",
    "MalwareSignatureScan",
    "MetaPromptBoundaryGuard",
    "NaughtyStringPatternShield",
    "NLPProfanityFilter",
    "OutputConsistencyMonitor",
    "OverrefusalScore",
    "PayloadSignatureDetector",
    "PromptInjectionHeuristics",
    "RefusalClassifier",
    "RiskyLanguageFlagger",
    "SensitiveDataDetector",
    "SQLKeywordScan",
    "SystemPromptViolationCheck",
    "TemplateInjectionDetector",
    "ThreatClassificationGuard",
    "ToxicityClassifier",
    "UnicodeConfusablesDetector",
    "XSSPatternDetector",
    "YAMLInjectionScan"
]



def random_detector_results():
    """Generate plausible detector outcomes."""
    total = random.choice([8, 12, 24, 48, 128, 256])
    passed = random.randint(int(total * 0.85), total)
    pct = f"{(passed / total) * 100:.1f}%"
    outcome = "Resisted" if passed >= total * 0.9 else "Vulnerable"
    return {
        "detector": random.choice(DETECTOR_POOL),
        "passed_count": passed,
        "total_count": total,
        "pass_percentage": pct,
        "outcome": outcome
    }


def generate_probe_entry(probe_classname, description):
    """Build a synthetic probe entry."""

    results = [random_detector_results() for _ in range(random.randint(1, 3))]

    return {
        "probe_classname": probe_classname,
        "description": description,
        "evaluation_results": results
    }


def increase_dataset_size(directory=output_dir_generated, amount=5):
    """
    Generates synthetic garak-style JSON run summaries.
    Each run includes a subset of probes with invented detectors and results.
    """
    Path(directory).mkdir(parents=True, exist_ok=True)

    for _ in range(amount):
        run_id = str(uuid.uuid4())
        run_length_seconds = random.randint(40, 600)
        run_length = str(timedelta(seconds=run_length_seconds))

        # Basic run metadata
        run = {
            "run_id": run_id,
            "model_type": "ollama",
            "model_name": random.choice(["qwen2", "llama3", "mistral", "gemma2"]),
            "run_length": f"{run_length} ({run_length_seconds // 60} minutes)",
            "probes": []
        }

        # Pick a variable number of probes, with small counts being more common
        probe_count = random.choices(
            population=list(range(0, 11)),
            weights=[1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
            k=1
        )[0]

        selected_probes = random.sample(PROBE_POOL, probe_count)


        for classname, desc in selected_probes:
            run["probes"].append(generate_probe_entry(classname, desc))

        # Write to file
        out_path = Path(directory) / f"{run_id}.generated.json"
        with open(out_path, "w", encoding="utf8") as f:
            json.dump(run, f, indent=2)

    print(f"Generated {amount} synthetic runs in {directory}")




## Generate csv file from summarized and generated outputs

Selects data from "summaries" and "generated runs" folders and outputs csv into "data" with name "instruction_output.csv"

In [None]:
import os
import json
import csv
from collections import defaultdict

data_dir = os.path.join(os.pardir, "data")
os.makedirs(data_dir, exist_ok=True)

csv_path = os.path.join(data_dir, "instruction_output.csv")

def generate_csv(input_data):
    """
    Generate a CSV with two columns:
    - instruction: the raw JSON or JSONL content
    - output: a human-readable summary of the report
    """

    folder_a, folder_b = input_data
    files_to_process = []

    # Collect JSON and JSONL files from both folders
    for folder in [folder_a, folder_b]:
        if os.path.isdir(folder):
            for name in os.listdir(folder):
                path = os.path.join(folder, name)
                if name.lower().endswith((".json", ".jsonl")):
                    files_to_process.append(path)

    if not files_to_process:
        print("No JSON or JSONL files found.")
        return None

    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["instruction", "output"])
        writer.writeheader()

        for file_path in files_to_process:
            # Read file content
            with open(file_path, "r", encoding="utf-8") as fr:
                content = fr.read().strip()

            # Determine if JSONL or JSON
            if file_path.lower().endswith(".json"):
                try:
                    reports = [json.loads(content)]
                except Exception as e:
                    print(f"Skipping invalid JSON {file_path}: {e}")
                    continue
            else:  # JSONL
                reports = []
                for line in content.splitlines():
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        reports.append(json.loads(line))
                    except Exception as e:
                        print(f"Skipping invalid JSONL line in {file_path}: {e}")
                        continue

            for report in reports:
                probes_section = ""
                category_status = defaultdict(lambda: {"Resisted": 0, "Vulnerable": 0})

                for probe in report.get("probes", []):
                    probe_name = probe.get("probe_classname", "unknown")
                    description = probe.get("description", "No description available.")

                    category = probe_name.split(".")[0]  # top-level category

                    evaluation_lines = []
                    for ev in probe.get("evaluation_results", []):
                        detector = ev.get("detector", "unknown")
                        passed = ev.get("passed_count", 0)
                        total = ev.get("total_count", 0)
                        percent = ev.get("pass_percentage", "N/A")
                        outcome = ev.get("outcome", "unknown")

                        evaluation_lines.append(
                            f"        - {detector}: Passed {passed}/{total} tests ({percent}) — {outcome}"
                        )

                        if outcome == "Resisted":
                            category_status[category]["Resisted"] += 1
                        else:
                            category_status[category]["Vulnerable"] += 1

                    evaluation_str = "\n".join(evaluation_lines)
                    probes_section += (
                        f"\n{probe_name}:\n"
                        f"    Description: {description}\n"
                        f"    Evaluation:\n{evaluation_str}\n"
                    )

                # Build final summary based on categories
                vulnerable_categories = [cat for cat, stat in category_status.items() if stat["Vulnerable"] > 0]
                resisted_categories = [cat for cat, stat in category_status.items() if stat["Vulnerable"] == 0]

                summary_lines = []

                if resisted_categories:
                    summary_lines.append(
                        f"The model is strongly resistant to the following categories of attacks: {', '.join(sorted(resisted_categories))}."
                    )

                if vulnerable_categories:
                    summary_lines.append(
                        f"Attention required: The model shows vulnerabilities in the following categories: {', '.join(sorted(vulnerable_categories))}. These categories represent gaps in the model’s current safety posture, and additional safeguards, model refinements, or detection layers need to be implemented."
                    )

                if not summary_lines:
                    summary_lines.append("The model was evaluated but no probes were present in the report.")

                final_summary = "\n".join(summary_lines)

                # Build human-readable context
                output_text = f"""Overview:
This penetration test evaluation report is for model '{report.get("model_name", "unknown")}' of type '{report.get("model_type", "unknown")}' with run ID '{report.get("run_id", "unknown")}'.
The runtime for this test was {report.get("run_length", "unknown")}.

{probes_section}
Final Summary:
{final_summary}
"""

                writer.writerow({
                    "instruction": content,
                    "output": output_text
                })

    return csv_path


## Run code

In [None]:
# Randomize all JSONL files in the raw folder
for file in os.listdir(raw_dir):
    file_path = os.path.join(raw_dir, file)
    if os.path.isfile(file_path) and file.endswith(".jsonl"):
        randomize_report(file_path)

In [None]:
# Summarize all processed files
for file in os.listdir(processed_dir):
    file_path = os.path.join(processed_dir, file)
    if os.path.isfile(file_path) and file.endswith(".jsonl"):
        summarize_garak_report("file", file_path)

In [None]:
# Increase dataset size by amount entries
increase_dataset_size(amount=5000)

In [None]:
# Generate CSV from the two folders
generate_csv((csv_path, output_dir_generated))