In [None]:
import json
import pandas as pd
import os

df_test = pd.read_csv("vuln_data_test.csv")
prompt = """You need to analyze the given vulnerability explanation and classify the severity of it as "low", "medium" or "high" based on the impact of that vulnerability. Dont give any additional explanations just give the label (low, medium or high).
    
    This is the vulnerability explanation we need to analyze:
    {explanation}
    
Answer: """

df_test['input_prompt'] = df_test['vuln_explanation'].map(lambda x: prompt.format(explanation=x))

# Define system prompt
system_prompt = 'You are the smartest AI solidity smart contract security auditor in the world that only answer in one word between "low", "medium" or "high".'

# Parameters
max_tokens_per_batch = 90000  # Set below the 900,000 token limit to be safe
estimated_tokens_per_request = 1000  # Adjust based on your average request size
requests_per_batch = max_tokens_per_batch // estimated_tokens_per_request

# Create output directory
output_dir = "batch_inputs_zeroshot_class_severity"
os.makedirs(output_dir, exist_ok=True)

batch_count = 0
request_count = 0
batch_file = open(os.path.join(output_dir, f"batch_input_{batch_count}.jsonl"), "w")

for index, row in df_test.iterrows():
    user_prompt = row['input_prompt']

    task = {
        "custom_id": f"request-{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            "model": "gpt-4o",
            "temperature": 0.1,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        }
    }

    batch_file.write(json.dumps(task) + "\n")
    request_count += 1

    if request_count >= requests_per_batch:
        batch_file.close()
        batch_count += 1
        request_count = 0
        batch_file = open(os.path.join(output_dir, f"batch_input_{batch_count}.jsonl"), "w")

batch_file.close()

In [3]:
# Read the original CSV
df = pd.read_csv("vuln_data_test_v6.csv")

# Initialize new columns
df["raw_llm_out"] = None

# Path to your batch output directory
batch_output_dir = "batch_outputs_zeroshot_class_severity"

# Gather all output lines from multiple batch files
output_lines = []
for batch_idx in range(len(os.listdir(batch_output_dir))):
    file_path = os.path.join(batch_output_dir, f"batch_output_{batch_idx}.jsonl")
    with open(file_path) as f:
        output_lines.extend(f.readlines())

# Process each output line using custom_id
for line in output_lines:
    try:
        json_out = json.loads(line)
        custom_id = json_out.get("custom_id", "")
        string = json_out['response']['body']['choices'][0]['message']['content']

        # Extract original index from custom_id like "request-123"
        original_index = int(custom_id.split("-")[-1])

        df.loc[original_index, "raw_llm_out"] = string

    except (json.JSONDecodeError, KeyError, ValueError) as e:
        print(f"Error processing custom_id {custom_id}: {e}")
        print(f"Raw response: {line}")

In [5]:
y_pred = df['raw_llm_out'].str.lower()
y_true = df['severity'].str.lower()

In [6]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred, digits=4))

              precision    recall  f1-score   support

        high     0.5263    0.8889    0.6612        45
         low     0.7778    0.2917    0.4242        24
      medium     0.7241    0.5676    0.6364        74

    accuracy                         0.6224       143
   macro avg     0.6761    0.5827    0.5739       143
weighted avg     0.6709    0.6224    0.6086       143



In [13]:
df.to_csv("zeroshot_gpt4o_class_severity.csv")