## Imports ##

In [1]:
# from runningLLAMA import llama_local_generate
from runningBaronLLM import baron_local_generate
import pandas as pd
import re
import time
import os

llama_context: n_ctx_per_seq (8192) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
llama_kv_cache_unified: LLAMA_SET_ROWS=0, using old ggml_cpy() method for backwards compatibility
llama_kv_cache_unified: LLAMA_SET_ROWS=0, using old ggml_cpy() method for backwards compatibility


## Prepare the prompts ##

In [2]:
sys_prompt = """
    You are a cybersecurity vulnerability classification expert. Your task is to
    determine the post-condition privilege level after successful exploitation.

    POST-CONDITION PRIVILEGE LEVEL DEFINITIONS: - None: Attacker does not gain
    access to the system. No execution privileges are obtained.  - User:
    Attacker gains user-level access (e.g., running code as a normal user,
    accessing user files, limited privileges).  - Root: Attacker gains full
    system or administrative access (e.g., root privileges, complete control
    over the system or application, administrator rights).

    CLASSIFICATION INSTRUCTIONS: 1. Analyze both the CVE description and CVSS
    vector 2. Provide a brief justification 3. End your
    response with: ##POSTCONDITION [classification] 4. The classification must
    be EXACTLY one of: None, User, Root
    """

In [3]:
prompt = """
    Classify the vulnerability post-condition privilege as one of the following:
    - None: Attacker does not gain access to the system.  - User: Attacker gains
    user-level access (e.g., running code as a normal user, accessing user
    files).  - Root: Attacker gains full system or administrative access (e.g.,
    root privileges, complete control over the system or application).

    
    Vulnerability: {description}
    CVSS Vector: {cvss}
    
    Examples: Example 1: Vulnerability: XSS vulnerability allows stealing user
    session cookies.  CVSS Vector: CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N
    Classification: User Justification: Attacker gains access to user session
    data but not system control.

    Example 2: Vulnerability: SQL injection allows database manipulation.  CVSS
    Vector: CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:H Classification: User
    Justification: Attacker gains database access but not full system control.

    Now classify the given vulnerability: Justification: [Your justification]
    ##POSTCONDITION [Your classification: None, User, or Root]
    """

In [4]:
temperature = 0.3
top_p = 0.9     
seed = 42
max_tokens = 256   

## Prompt the LLM ##

In [5]:
def format_post_condition(text):
    """
    Extract post-condition privilege classification from LLM response text.
    Returns the last valid classification found and whether extraction was successful.
    """
    # Define the regex pattern for matching privilege classification
    privilege_pattern = r'^(None|User|Root)[.:\s]*$'
    
    # Split the text into lines (from bottom up) and search for a matching line
    lines = text.strip().splitlines()
    for line in reversed(lines):
        line = line.strip()
        if re.match(privilege_pattern, line):
            # Extract just the classification word
            match = re.match(r'^(None|User|Root)', line)
            if match:
                return match.group(1), True
    
    # If no exact match found, look for the classification word anywhere in the text
    text_reversed = text[::-1]
    for word in ["tooR", "resU", "enoN"]:  # Reversed words
        if word in text_reversed:
            pos = text_reversed.find(word)
            return word[::-1], True  # Reverse back to original
    
    # If still not found, return the entire text and mark as failed
    return text, False

In [6]:
def run_evaluation(file_path):
    """
    Run CVSS prediction evaluation on a dataset using specified model.
    Processes each CVE description, extracts CVSS vectors, and saves results.
    """
    start_time = time.time()
    count_chars = 0
    instructions_failed = 0
    
    data = pd.read_csv(file_path, encoding='utf-8', sep='\t')
    all_results = []
    all_full_responses = []
    
    for index, row in data.iterrows():
        # Format the prompt with the specific vulnerability data using format()
        llm_prompt = prompt.format(description=row['Description'], cvss=row['CVSS'])
        
        try:
            # Get prediction from the model
            output = baron_local_generate(sys_prompt, llm_prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=seed)
            count_chars += len(output)
            
            # Store the full response
            all_full_responses.append(f"=== CVE {index+1} ===\n{output}\n")
            
            # Try to extract post-condition from the response
            answer, success = format_post_condition(output)
            if not success:
                instructions_failed += 1
            
            all_results.append(answer)
            print(index+1, answer)
        except Exception as e:
            answer = 'Error'
            error_msg = f"=== CVE {index+1} ===\nERROR: {str(e)}\n"
            all_full_responses.append(error_msg)
            all_results.append(answer)
            print('Exception at row ', index+1)
            print(e)
    
    time_taken = time.time() - start_time
    print('Time taken:', time_taken)
    print('#Characters generated:', count_chars)
    print('#Instructions failed:', instructions_failed)
    
    output_dir = os.path.join('responses', 'individual-results')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    out_result = os.path.join(output_dir, 'SENG402_' + os.path.basename(file_path).split('.')[0] + '_postcondition.txt')
    with open(out_result, 'w', encoding='utf-8') as f:
        f.write('\n'.join(all_results))
    
    out_full_responses = os.path.join(output_dir, 'SENG402_' + os.path.basename(file_path).split('.')[0] + '_full_postcondition_responses.txt')
    with open(out_full_responses, 'w', encoding='utf-8') as f:
        f.write('\n'.join(all_full_responses))
    
    print(f'Results saved to: {out_result}')
    print(f'Full responses saved to: {out_full_responses}')
    print('------- Done --------')

<!--  -->

In [7]:
data_set_file_path = "../datasets/postcondition-prediction.tsv"
run_evaluation(data_set_file_path)


1 Root
2 Root
2 Root
3 User
4 User
5 User
6 User
Time taken: 142.0675983428955
#Characters generated: 2299
#Instructions failed: 0
Results saved to: responses/individual-results/SENG402_postcondition-prediction_postcondition.txt
Full responses saved to: responses/individual-results/SENG402_postcondition-prediction_full_postcondition_responses.txt
------- Done --------
