In [4]:
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
import torch
import pandas as pd
from transformers import AutoTokenizer

In [5]:
MODEL_NAME = "nraesalmi/phi3.5_set_eval_adapters"
BASE_MODEL = "unsloth/Phi-3.5-mini-instruct-bnb-4bit"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

csv_file = "C:\\Users\\nikke\\GitHub\\ai-pentest-report-finetuning-pipeline\\data\\manual_model_eval_dataset_0.3k.csv"

In [6]:
# Load your CSV
data = pd.read_csv(csv_file)

# Extract instruction and expected output columns
instructions = data["input"].astype(str)
expected_outputs = data["output"].astype(str)

In [7]:
# Load model + tokenizer via Unsloth
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = BASE_MODEL,
    max_seq_length = 8192,     # adjust if needed 8192
    dtype = None,              # auto
    load_in_4bit = True,      # set True if VRAM constrained
)

model.load_adapter(MODEL_NAME)


# Enable inference optimizations
FastLanguageModel.for_inference(model)

print(model.peft_config)

==((====))==  Unsloth 2026.1.2: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA GeForce RTX 3080. Num GPUs = 1. Max memory: 10.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.9.1+cu130. CUDA: 8.6. CUDA Toolkit: 13.0. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading adapter weights from nraesalmi/phi3.5_set_eval_adapters led to missing keys in the model: model.layers.0.self_attn.q_proj.lora_A.default.weight, model.layers.0.self_attn.q_proj.lora_B.default.weight, model.layers.0.self_attn.k_proj.lora_A.default.weight, model.layers.0.self_attn.k_proj.lora_B.default.weight, model.layers.0.self_attn.v_proj.lora_A.default.weight, model.layers.0.self_attn.v_proj.lora_B.default.weight, model.layers.0.self_attn.o_proj.lora_A.default.weight, model.layers.0.self_attn.o_proj.lora_B.default.weight, model.layers.0.mlp.gate_proj.lora_A.default.weight, model.layers.0.mlp.gate_proj.lora_B.default.weight, model.layers.0.mlp.up_proj.lora_A.default.weight, model.layers.0.mlp.up_proj.lora_B.default.weight, model.layers.0.mlp.down_proj.lora_A.default.weight, model.layers.0.mlp.down_proj.lora_B.default.weight, model.layers.1.self_attn.q_proj.lora_A.default.weight, model.layers.1.self_attn.q_proj.lora_B.default.weight, model.layers.1.self_attn.k_proj.lora_A.defau

{'default': LoraConfig(task_type='CAUSAL_LM', peft_type=<PeftType.LORA: 'LORA'>, auto_mapping={'base_model_class': 'LlamaForCausalLM', 'parent_library': 'transformers.models.llama.modeling_llama', 'unsloth_fixed': True}, peft_version='0.18.0', base_model_name_or_path='unsloth/Phi-3.5-mini-instruct-bnb-4bit', revision=None, inference_mode=True, r=16, target_modules={'gate_proj', 'q_proj', 'v_proj', 'o_proj', 'k_proj', 'down_proj', 'up_proj'}, exclude_modules=None, lora_alpha=16, lora_dropout=0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, alora_invocation_tokens=None, use_qalora=False, qalora_group_size=16, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=F

In [8]:
import re

def split_sections(text):
    return [s.strip() for s in re.split(r"\n\s*\n", text) if s.strip()]

In [9]:
from sentence_transformers import SentenceTransformer
import numpy as np

sentencemodel = SentenceTransformer("all-MiniLM-L6-v2")

def embed_sections(sections):
    return sentencemodel.encode(sections, normalize_embeddings=True)

In [10]:
def section_similarity(expected_sections, output_sections):
    exp_emb = embed_sections(expected_sections)
    out_emb = embed_sections(output_sections)

    sims = []
    for e in exp_emb:
        sims.append(np.max(out_emb @ e))
    return np.mean(sims)

In [12]:
from transformers import TextStreamer

# Set up tokenizer template
tokenizer = get_chat_template(
    tokenizer,
    chat_template="phi-3"
)

# Enable faster inference
FastLanguageModel.for_inference(model)

i=0
matches = 0
outputs = []

# Limit to first 10 for testing
# instructions = instructions.head(10)

for instr in instructions:

    expected_output = expected_outputs.iloc[i]
    print(f"\n\n=== Example {i+1} ===")
    print("----------------- Expected Output ----------------\n", expected_output)
    print ("\n------------------- Model Output ------------------\n")

    # Example JSON summarization message with rules
    messages = [
        {"role": "system", "content": """
You are an AI penetration test summarizing assistant.

You MUST output the following exact structure:

## Issue Summary:
<one sentence>

### Remediation Recommendation:
<one sentence>

Rules:
- Exactly two sentences total.
- Each sentence must be on its own line under the correct header.
- Do not combine sentences.
- Do not add text before or after the headers.
- Do not explain. 
"""},
        {"role": "user", "content": f"""Here is the penetration test summary:\n{instr}"""}
    ]

    # Tokenize with attention mask
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    )

    # Handle single tensor vs dict output
    if isinstance(inputs, dict):
        input_ids = inputs["input_ids"].to("cuda")
        attention_mask = inputs["attention_mask"].to("cuda")
    else:
        input_ids = inputs.to("cuda")
        attention_mask = None

    eos_token_id = tokenizer.convert_tokens_to_ids("<|end|>")

    # Generate output
    model_output = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=100,
        use_cache=True,
        do_sample=False,
        temperature=0,
        top_p=0.9,
        repetition_penalty=1.2,
        eos_token_id=eos_token_id,
    )

    # Compare model output to expected output
    generated_ids = model_output[0][input_ids.shape[-1]:]

    decoded_output = tokenizer.decode(
        generated_ids,
        skip_special_tokens=True
    )

    for end_token in ["<|endoftext|>", "<end>"]:
        decoded_output = decoded_output.replace(end_token, "")

    # Strip extra whitespace
    truncate_output = decoded_output.strip()

    print(truncate_output)

    outputs.append(truncate_output)
    i += 1

    # Calculate similarity score
    score = section_similarity(
        split_sections(expected_output),
        split_sections(truncate_output)
    )

    accuracy = score * 100


    print(f"Accuracy: {accuracy:.2f}%")

    if accuracy >= 70:
        print("Result: ✅ Match")
        matches += 1
    else:
        print("Result: ❌ Mismatch")


print(f"\n\n=== Summary ===\nTotal Matches: {matches}/{len(instructions)}")
pd.DataFrame({"model_output": outputs}).to_csv("../data/ai_outputs.csv", index=False)



=== Example 1 ===
----------------- Expected Output ----------------
 ## Issue Summary:
The findings from the Security Evaluation Tests indicate weaknesses in latentinjection.LatentInjectionFactSnippetEiffelFull, lmrc.Deadnaming, which may impact system robustness.

## Recommended Remediations:
To mitigate the identified risks, it is recommended to
- Introduce Intent detection and output filtering
- Implement Prompt boundary enforcement

------------------- Model Output ------------------

## Issue Summary: Vulnerabilities identified in various sets, with a pass percentage below optimal levels indicating security risks present within systems tested for ethical language use cases (e.g., deadnaming).  

### Remediation Recommendation: Implement intent recognition coupled with stringency checks to filter outputs; enforce strict boundaries around prompt inputs as immediate countermeasures against detected vulnerabilities across assessed scenarios involving sensitive contextual usage of t

KeyboardInterrupt: 