In [None]:
import json
import pandas as pd
import os

df_test = pd.read_csv("smart_contract_test.csv")
prompt = """You need to analyze the given function and classify it as "Vulnerable Code" or "Safe Code" based on potential security risks. 
    
    This is the function we need to audit:
    ```solidity
    {code}
    ```
    
Answer: """

df_test['input_prompt'] = df_test['code'].map(lambda x: prompt.format(code=x))

# Define system prompt
system_prompt = 'You are the smartest AI solidity smart contract security auditor in the world that only answer in term of "Vulnerable Code" or "Safe Code"'

# Parameters
max_tokens_per_batch = 90000  # Set below the 900,000 token limit to be safe
estimated_tokens_per_request = 1000  # Adjust based on your average request size
requests_per_batch = max_tokens_per_batch // estimated_tokens_per_request

# Create output directory
output_dir = "batch_inputs_zeroshot_class"
os.makedirs(output_dir, exist_ok=True)

batch_count = 0
request_count = 0
batch_file = open(os.path.join(output_dir, f"batch_input_{batch_count}.jsonl"), "w")

for index, row in df_test.iterrows():
    user_prompt = row['input_prompt']

    task = {
        "custom_id": f"request-{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            "model": "gpt-4o",
            "temperature": 0.1,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        }
    }

    batch_file.write(json.dumps(task) + "\n")
    request_count += 1

    if request_count >= requests_per_batch:
        batch_file.close()
        batch_count += 1
        request_count = 0
        batch_file = open(os.path.join(output_dir, f"batch_input_{batch_count}.jsonl"), "w")

batch_file.close()

In [None]:
# Read the original CSV
df = pd.read_csv("smart_contract_test.csv")

# Initialize new columns
df["raw_llm_out"] = None

# Path to your batch output directory
batch_output_dir = "batch_outputs_zeroshot_class"

# Gather all output lines from multiple batch files
output_lines = []
for batch_idx in range(len(os.listdir(batch_output_dir))):
    file_path = os.path.join(batch_output_dir, f"batch_output_{batch_idx}.jsonl")
    with open(file_path) as f:
        output_lines.extend(f.readlines())

# Process each output line using custom_id
for line in output_lines:
    try:
        json_out = json.loads(line)
        custom_id = json_out.get("custom_id", "")
        string = json_out['response']['body']['choices'][0]['message']['content']

        # Extract original index from custom_id like "request-123"
        original_index = int(custom_id.split("-")[-1])

        df.loc[original_index, "raw_llm_out"] = string

    except (json.JSONDecodeError, KeyError, ValueError) as e:
        print(f"Error processing custom_id {custom_id}: {e}")
        print(f"Raw response: {line}")

In [2]:
y_pred = [0 if "Safe Code" in x else 1 for x in df['raw_llm_out']]
y_true = df['vulnerable']

In [3]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred, digits=4))

              precision    recall  f1-score   support

           0     0.7059    0.4865    0.5760       148
           1     0.5682    0.7692    0.6536       130

    accuracy                         0.6187       278
   macro avg     0.6370    0.6279    0.6148       278
weighted avg     0.6415    0.6187    0.6123       278

