## Imports ##

In [1]:
# from runningLLAMA import llama_local_generate
from runningBaronLLM import baron_local_generate
import pandas as pd
import re
import time
import os

Downloading baronllm-llama3.1-v1-q6_k.gguf from AlicanKiraz0/Cybersecurity-BaronLLM_Offensive_Security_LLM_Q6_K_GGUF to ./models...


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.
llama_model_loader: loaded meta data with 30 key-value pairs and 292 tensors from models/baronllm-llama3.1-v1-q6_k.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = BaronLLM Llama3.1 v1
llama_model_loader: - kv   3:                            general.version str              = v1
llama_model_loader: - kv   4:                       general.organization str              = AlicanKiraz0
llama_model_loader: - kv   5:                           general.basename str              = BaronLLM-llama3.1
llama_mo

Model downloaded to: models/baronllm-llama3.1-v1-q6_k.gguf
Loading model with llama_cpp...


init_tokenizer: initializing tokenizer for type 2
load: control token: 128254 '<|reserved_special_token_246|>' is not marked as EOG
load: control token: 128249 '<|reserved_special_token_241|>' is not marked as EOG
load: control token: 128246 '<|reserved_special_token_238|>' is not marked as EOG
load: control token: 128243 '<|reserved_special_token_235|>' is not marked as EOG
load: control token: 128242 '<|reserved_special_token_234|>' is not marked as EOG
load: control token: 128241 '<|reserved_special_token_233|>' is not marked as EOG
load: control token: 128240 '<|reserved_special_token_232|>' is not marked as EOG
load: control token: 128235 '<|reserved_special_token_227|>' is not marked as EOG
load: control token: 128231 '<|reserved_special_token_223|>' is not marked as EOG
load: control token: 128230 '<|reserved_special_token_222|>' is not marked as EOG
load: control token: 128228 '<|reserved_special_token_220|>' is not marked as EOG
load: control token: 128225 '<|reserved_special_

Model loaded successfully!


## Prepare the prompts ##

In [2]:
sys_prompt = 'You are a cybersecurity expert specializing in cyberthreat intelligence.'

In [3]:
prompt = """You are given a vulnerability description and a CVSS vector. Your task is to determine the post-condition privilege level — the level of access the attacker gains *after* successful exploitation.
Classify the post-condition privilege as one of the following:
- None: Attacker does not gain access to the system.
- User: Attacker gains user-level access (e.g., running code as a normal user, accessing user files).
- Root: Attacker gains full system or administrative access (e.g., root privileges, complete control over the system or application).

- Use both the CVE description and CVSS vector to make your decision.
- Provide a brief justification (1-2 sentences maximum).
- End your response with EXACTLY one of these words on a new line: None, User, or Root
- Do not repeat yourself or add extra explanations after the classification.

Analyze the following vulnerability:

CVE Description: [DESCRIPTION]
CVSS Vector: [CVSS]
"""

In [4]:
temperature = 0.1  
top_p = 0     
seed = 42
max_tokens = 256   

## Prompt the LLM ##

In [5]:
def format_post_condition(text):
    """
    Extract post-condition privilege classification from LLM response text.
    Returns the last valid classification found and whether extraction was successful.
    """
    # Define the regex pattern for matching privilege classification
    privilege_pattern = r'^(None|User|Root)[.:\s]*$'
    
    # Split the text into lines (from bottom up) and search for a matching line
    lines = text.strip().splitlines()
    for line in reversed(lines):
        line = line.strip()
        if re.match(privilege_pattern, line):
            # Extract just the classification word
            match = re.match(r'^(None|User|Root)', line)
            if match:
                return match.group(1), True
    
    # If no exact match found, look for the classification word anywhere in the text
    text_reversed = text[::-1]
    for word in ["tooR", "resU", "enoN"]:  # Reversed words
        if word in text_reversed:
            pos = text_reversed.find(word)
            return word[::-1], True  # Reverse back to original
    
    # If still not found, return the entire text and mark as failed
    return text, False

In [6]:
def run_evaluation(file_path):
    """
    Run CVSS prediction evaluation on a dataset using specified model.
    Processes each CVE description, extracts CVSS vectors, and saves results.
    """
    start_time = time.time()
    count_chars = 0
    instructions_failed = 0
    
    data = pd.read_csv(file_path, encoding='utf-8', sep='\t')
    all_results = []
    all_full_responses = []
    
    for index, row in data.iterrows():
        # Format the prompt with the specific vulnerability data
        llm_prompt = prompt.format(description=row['Description'], cvss=row['CVSS'])
        
        try:
            # Get prediction from the model
            output = baron_local_generate(sys_prompt, llm_prompt, max_tokens, temperature, top_p, seed)
            count_chars += len(output)
            
            # Store the full response
            all_full_responses.append(f"=== CVE {index+1} ===\n{output}\n")
            
            # Try to extract post-condition from the response
            answer, success = format_post_condition(output)
            if not success:
                instructions_failed += 1
            
            all_results.append(answer)
            print(index+1, answer)
        except Exception as e:
            answer = 'Error'
            error_msg = f"=== CVE {index+1} ===\nERROR: {str(e)}\n"
            all_full_responses.append(error_msg)
            all_results.append(answer)
            print('Exception at row ', index+1)
            print(e)
    
    time_taken = time.time() - start_time
    print('Time taken:', time_taken)
    print('#Characters generated:', count_chars)
    print('#Instructions failed:', instructions_failed)
    
    output_dir = os.path.join('responses', 'individual-results')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    out_result = os.path.join(output_dir, 'SENG402_' + os.path.basename(file_path).split('.')[0] + '_postcondition.txt')
    with open(out_result, 'w', encoding='utf-8') as f:
        f.write('\n'.join(all_results))
    
    out_full_responses = os.path.join(output_dir, 'SENG402_' + os.path.basename(file_path).split('.')[0] + '_full_postcondition_responses.txt')
    with open(out_full_responses, 'w', encoding='utf-8') as f:
        f.write('\n'.join(all_full_responses))
    
    print(f'Results saved to: {out_result}')
    print(f'Full responses saved to: {out_full_responses}')
    print('------- Done --------')

<!--  -->

In [7]:
def test_baron_simple():
    """Test baron_local_generate with a simple, clear prompt"""
    
    # Simple test parameters
    test_temperature = 0.1
    test_top_p = 0.8
    test_seed = 42
    test_max_tokens = 50
    
    # Simple system prompt
    test_sys_prompt = "You are a helpful assistant. Answer questions briefly and accurately."
    
    # Simple question
    test_question = "What is the difference between a vulnerability and an exploit? Answer in one sentence only."
    
    # Custom stop sequences for this test
    test_stop_sequences = [".", "\n\n", "\n"]
    
    print("Testing baron_local_generate with simple prompt...")
    print(f"Question: {test_question}")
    print("-" * 50)
    
    try:
        response = baron_local_generate(
            test_sys_prompt, 
            test_question, 
            test_max_tokens, 
            test_temperature, 
            test_top_p, 
            test_seed,
            test_stop_sequences  # Pass custom stop sequences
        )
        
        print(f"Response: '{response}'")
            
    except Exception as e:
        print(f"❌ Error: {e}")
        
    print("-" * 50)

# Run the test
test_baron_simple()

Testing baron_local_generate with simple prompt...
Question: What is the difference between a vulnerability and an exploit? Answer in one sentence only.
--------------------------------------------------


llama_perf_context_print:        load time =     810.15 ms
llama_perf_context_print: prompt eval time =     809.85 ms /    30 tokens (   26.99 ms per token,    37.04 tokens per second)
llama_perf_context_print:        eval time =    3749.19 ms /    31 runs   (  120.94 ms per token,     8.27 tokens per second)
llama_perf_context_print:       total time =    4606.21 ms /    61 tokens


Response: 'A vulnerability is a weakness or flaw in software that can be misused, whereas an exploit specifically takes advantage of the issue to achieve malicious objectives like unauthorized access'
--------------------------------------------------


In [9]:
def test_baron_vulnerability_fixed():
    """Test baron_local_generate with a vulnerability analysis prompt - FIXED"""
    
    # Test parameters matching your configuration
    test_temperature = 0.3
    test_top_p = 0.9  
    test_seed = 42
    test_max_tokens = 256
    
    # Your system prompt
    test_sys_prompt = 'You are a cybersecurity expert specializing in cyberthreat intelligence.'
    
    # Restructured prompt to avoid stop sequence conflicts
    test_prompt = """# TASK: DETERMINE POST-CONDITION PRIVILEGE LEVEL

        You are given a vulnerability description and a CVSS vector. Your task is to determine the post-condition privilege level — the level of access the attacker gains AFTER successful exploitation.

        ## CLASSIFICATION CATEGORIES
        You must classify the post-condition privilege as EXACTLY one of the following:

        - None: Attacker does not gain access to the system.
        - User: Attacker gains user-level access (e.g., running code as a normal user, accessing user files).
        - Root: Attacker gains full system or administrative access (e.g., root privileges, complete control over the system or application).

        ## INSTRUCTIONS
        1. Use BOTH the CVE description and CVSS vector to make your decision.
        2. Provide a brief justification (1-2 sentences MAXIMUM).
        3. End your response with EXACTLY one of these words on a new line: None, User, or Root
        4. Do NOT repeat yourself or add ANY extra explanations after the classification.

        ## INPUT TO ANALYZE
        CVE Description: A buffer overflow vulnerability in a web server allows remote attackers to execute arbitrary code with administrator privileges.
        CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
    """

    # Custom stop sequences that won't conflict with the prompt
    custom_stop_sequences = [
        "### END",       # Explicit end marker
        "User",
        "Root",
        "None",          
    ]
    
    print("Testing baron_local_generate with vulnerability analysis (fixed)...")
    print("CVE: Buffer overflow with admin privileges")
    print("-" * 60)
    
    try:
        # Use the baron_local_generate function with custom stop sequences
        response = baron_local_generate(
            test_sys_prompt, 
            test_prompt, 
            test_max_tokens, 
            test_temperature, 
            test_top_p, 
            test_seed,
            stop_sequences=custom_stop_sequences  # Pass custom stop sequences
        )
        
        print(f"Full Response:\n'{response}'")
        print("-" * 30)
    
        # Test the extraction function
        classification, success = format_post_condition(response)
        print(f"Extracted classification: '{classification}'")
        print(f"Extraction successful: {success}")
        
        if success and classification in ["None", "User", "Root"]:
            print(f"✅ Valid classification: {classification}")
        else:
            print(f"❌ Invalid classification or extraction failed")
            
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()
        
    print("-" * 60)

# Run the fixed test
test_baron_vulnerability_fixed()

Testing baron_local_generate with vulnerability analysis (fixed)...
CVE: Buffer overflow with admin privileges
------------------------------------------------------------
Full Response:
'## YOUR RESPONSE:'
------------------------------
Extracted classification: '## YOUR RESPONSE:'
Extraction successful: False
❌ Invalid classification or extraction failed
------------------------------------------------------------


In [None]:
data_set_file_path = "../datasets/postcondition-prediction.tsv"
run_evaluation(data_set_file_path)


1 Based on the CVE description and CVSS vector, classify what privilege level an attacker gains after successful exploitation. Choose from
2 Based on the CVE description and CVSS vector, classify what privilege level an attacker gains after successful exploitation. Choose from
2 Based on the CVE description and CVSS vector, classify what privilege level an attacker gains after successful exploitation. Choose from


KeyboardInterrupt: 