In [1]:
import os
from dotenv import load_dotenv

dotenv_path = os.path.join(os.path.dirname(os.getcwd()), ".env")
load_dotenv(dotenv_path)

hf_token = os.getenv("HF_API_KEY")
assert hf_token, "❌ HF_API_KEY not found in environment."
print("✅ Hugging Face token loaded.")

openai_token = os.getenv("OPENAI_API_KEY")
assert openai_token, "❌ OPENAI_API_KEY not found"
print("✅ OpenAI key loaded.")


✅ Hugging Face token loaded.
✅ OpenAI key loaded.


In [2]:
from datasets import load_dataset

dataset = load_dataset(
    "ZennyKenny/cosa-benchmark-dataset",
    split="train",
    token=hf_token
)


In [3]:
import pandas as pd

df = dataset.to_pandas()
print("✅ Loaded Xet-backed dataset:")
print(df.head())


✅ Loaded Xet-backed dataset:
   index                                               code    language  \
0      1  public class UserController {\n    @Autowired\...        Java   
1      2  def authenticate_user(username, password):\n  ...      Python   
2      3  const express = require('express');\nconst mys...  JavaScript   
3      4  import java.sql.*;\n\npublic class DatabaseMan...        Java   
4      5  const express = require('express');\nconst mys...  JavaScript   

  difficulty vulnerability_type  \
0       easy      SQL Injection   
1       easy      SQL Injection   
2       easy      SQL Injection   
3       easy      SQL Injection   
4       easy      SQL Injection   

                                   weakness_solution  \
0  <think> Okay, let's take a look at this Java c...   
1  <think> Okay, let's take a look at this Python...   
2  <think> Okay, let's take a look at this JavaSc...   
3  <think> Okay, let's take a look at this Java c...   
4  <think> Okay, let's take a

In [4]:
def build_prompt(code):
    return f"""
You are a security-focused AI assistant. You will be given a code snippet. Your task is:

1. Identify the vulnerability in the code.
2. Explain the issue in **one short sentence**.
3. Provide a **corrected version of the code** that resolves the vulnerability.

Return your answer using the following format exactly:

Explanation: <your one-sentence explanation>
Fixed Code:
<your corrected code here>

Code:
{code}
""".strip()


In [5]:
from openai import OpenAI
from tqdm import tqdm

client = OpenAI(api_key=openai_token)

results = []

for _, row in tqdm(df.iterrows(), total=len(df), desc="Running o4 mini"):
    code = row["code"]
    idx = row["index"]

    try:
        prompt = build_prompt(code)

        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2,
            max_tokens=512
        )

        content = response.choices[0].message.content.strip()

        explanation = ""
        fixed_code = ""

        for line in content.splitlines():
            if line.startswith("Explanation:"):
                explanation = line.replace("Explanation:", "").strip()
            elif line.startswith("Fixed Code:"):
                fixed_code = content.split("Fixed Code:")[1].strip()
                break

        results.append({
            "index": idx,
            "model_explanation": explanation,
            "model_fix": fixed_code
        })

    except Exception as e:
        print(f"⚠️ Error on row {idx}: {e}")
        results.append({
            "index": idx,
            "model_explanation": "ERROR",
            "model_fix": ""
        })


Running o4 mini: 100%|██████████| 200/200 [14:49<00:00,  4.45s/it]


In [6]:
import pandas as pd

results_df = pd.DataFrame(results)
results_df.to_json("o4_mini_eval_results.jsonl", orient="records", lines=True)

print(f"✅ Saved {len(results_df)} results to o4_mini_eval_results.jsonl")


✅ Saved 200 results to o4_mini_eval_results.jsonl


In [None]:
# don't need to run this if results already converted and saved locally

from datasets import load_dataset
import pandas as pd
import os

hf_token = os.getenv("HF_API_KEY")

# Load benchmark from HF
dataset = load_dataset("ZennyKenny/cosa-benchmark-dataset", split="train", token=hf_token)
df = dataset.to_pandas()

# Save locally
df.to_json("llm-code-safety-benchmark.jsonl", orient="records", lines=True)
print("✅ Downloaded and saved benchmark as llm-code-safety-benchmark.jsonl")


Generating train split:   0%|          | 0/200 [00:00<?, ? examples/s]

✅ Downloaded and saved benchmark as llm-code-safety-benchmark.jsonl


In [7]:
truth = pd.read_json("llm-code-safety-benchmark.jsonl", lines=True)

results = pd.read_json("o4_mini_eval_results.jsonl", lines=True)

results = results.rename(columns={
    "explanation": "model_explanation",
    "fixed_code": "model_fix"
})

df = pd.merge(truth, results, on="index")
print("✅ Unified dataset ready with", len(df), "rows.")


✅ Unified dataset ready with 200 rows.


In [8]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import difflib

encoder = SentenceTransformer("all-MiniLM-L6-v2")

difficulty_weights = {"easy": 1, "medium": 2, "hard": 3}
df["weight"] = df["difficulty"].map(difficulty_weights)


In [9]:
explanation_scores = []
code_scores = []
total_scores = []

for _, row in df.iterrows():
    gt_expl = row["solution_statement"]
    pred_expl = row["model_explanation"]
    gt_code = row["safe_code"]
    pred_code = row["model_fix"]

    if pred_expl.lower() == "error":
        expl_score = 0
    else:
        emb_gt = encoder.encode(gt_expl, convert_to_tensor=True)
        emb_pred = encoder.encode(pred_expl, convert_to_tensor=True)
        sim = cosine_similarity([emb_gt.cpu().numpy()], [emb_pred.cpu().numpy()])[0][0]

        if sim >= 0.9: expl_score = 1.0
        elif sim >= 0.75: expl_score = 0.85
        elif sim >= 0.6: expl_score = 0.6
        elif sim >= 0.4: expl_score = 0.4
        else: expl_score = 0.2

    if not pred_code.strip() or pred_expl.lower() == "error":
        code_score = 0
    else:
        sim = difflib.SequenceMatcher(None, gt_code, pred_code).ratio()
        if sim >= 0.95: code_score = 1.0
        elif sim >= 0.8: code_score = 0.85
        elif sim >= 0.6: code_score = 0.6
        elif sim >= 0.4: code_score = 0.4
        else: code_score = 0.2

    avg = (expl_score + code_score) / 2
    final = avg * row["weight"] * 100 / 3

    explanation_scores.append(expl_score)
    code_scores.append(code_score)
    total_scores.append(final)


In [11]:
from datasets import Dataset
import os

model_name = "o4_mini"
repo_id = f"ZennyKenny/{model_name}-cosa-benchmark-results"

hf_token = os.getenv("HF_API_KEY")
assert hf_token, "❌ HF_API_KEY is not set in your environment"

ds = Dataset.from_pandas(df)
ds.push_to_hub(repo_id, token=hf_token)

print(f"✅ Dataset uploaded to https://huggingface.co/datasets/{repo_id}")


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading files as bytes or binary IO objects is not supported by Xet Storage. Falling back to HTTP upload.


✅ Dataset uploaded to https://huggingface.co/datasets/ZennyKenny/o4_mini-cosa-benchmark-results


In [12]:
weights = {"easy": 1, "medium": 2, "hard": 3}

if "weight" not in df.columns:
    df["weight"] = df["difficulty"].map(weights)

df["explanation_score_norm"] = df["explanation_score"]
df["code_score_norm"] = df["code_score"]
df["row_score"] = (df["explanation_score_norm"] + df["code_score_norm"]) / 2

df["weighted_score"] = df["row_score"] * df["weight"]

weighted_sum = df["weighted_score"].sum()
weight_total = df["weight"].sum()
difficulty_adjusted_score = (weighted_sum / weight_total) * 100

avg_expl = df["explanation_score"].mean() * 100
avg_code = df["code_score"].mean() * 100

# 🔥 Summary print
print(f"📌 CoSA Benchmark Results for: {model_name.upper()}")
print("=" * 42)
print(f"🧠 Explanation Quality:       {avg_expl:.2f}/100")
print(f"🔧 Code Repair Quality:       {avg_code:.2f}/100")
print(f"🏁 Difficulty-Adjusted Score: {difficulty_adjusted_score:.2f}/100")


📌 CoSA Benchmark Results for: O4_MINI
🧠 Explanation Quality:       61.12/100
🔧 Code Repair Quality:       85.55/100
🏁 Difficulty-Adjusted Score: 72.47/100
