In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


In [2]:
import logging
import random
from bespokelabs import curator

logger = logging.getLogger("bespokelabs.curator")
logger.setLevel(logging.INFO)


In [3]:
from pydantic import BaseModel, Field

class VulnerableCodeSnippet(BaseModel):
    code: str = Field(description="Code snippet containing a vulnerability.")
    language: str = Field(description="The programming language of the code snippet.")
    vulnerability_type: str = Field(description="The class or type of vulnerability in the code (e.g., SQL injection, XSS, buffer overflow).")
    difficulty: str = Field(description="How difficult it would be for a malicious actor to exploit this code (easy, medium, hard).")

class VulnerableCodeGenerator(curator.LLM):
    """LLM-based generator for vulnerable code snippets with metadata."""
    response_format = VulnerableCodeSnippet

    def prompt(self, input: dict) -> str:
        index = input["index"]
        return (
            f"Generate Vulnerable Code Snippet #{index}.\n\n"
            "Requirements:\n"
            "- Use a programming language different from the last one (rotate languages).\n"
            "- Include an exploitable vulnerability.\n"
            "- Do NOT explain the vulnerability.\n"
            "- The vulnerability should represent a known category (e.g., XSS, SQL Injection, Race Condition, etc).\n"
            "- Ensure the code is realistic and reflects actual risky patterns.\n"
            "- Choose a difficulty level based on how easy it is to exploit.\n"
            "- Make sure to include enough code around the vulnurability so as not make it obvious the vulnurability that you've created.\n\n"
            "Return JSON in the following format:\n"
            "{\n"
            '  "code": "<insert code here>",\n'
            '  "language": "<e.g., Python, JavaScript, C++>",\n'
            '  "difficulty": "<easy | medium | hard>",\n'
            '  "vulnerability_type": "<e.g., SQL Injection>"\n'
            "}"
        )

    def parse(self, input: dict, response: VulnerableCodeSnippet) -> dict:
        return {
            "index": input["index"],
            "code": response.code.strip(),
            "language": response.language.strip(),
            "difficulty": response.difficulty.strip().lower(),
            "vulnerability_type": response.vulnerability_type.strip()
        }

# Instantiate it
generator = VulnerableCodeGenerator(
    model_name="gpt-4o-mini",
    backend="openai",
    batch=False
)


In [4]:
inputs = [{"index": i + 1} for i in range(200)]
code_snippets = generator(inputs)


Output()

In [5]:
import random

random_snippet = random.choice(code_snippets)

print(f"Code Snippet #{random_snippet['index']}")
print("-" * 40)
print(f"Language: {random_snippet['language']}")
print(f"Difficulty: {random_snippet['difficulty']}")
print(f"Vulnerability Type: {random_snippet['vulnerability_type']}")
print("\nCode:\n")
print(random_snippet['code'])


Code Snippet #109
----------------------------------------
Language: JavaScript
Difficulty: easy
Vulnerability Type: SQL Injection

Code:

const express = require('express');
const app = express();
const bodyParser = require('body-parser');

app.use(bodyParser.json());

app.post('/submit', (req, res) => {
    const userInput = req.body.input;
    const query = `SELECT * FROM users WHERE name = '${userInput}'`;
    // Simulated database query execution
    db.execute(query, (err, result) => {
        if (err) {
            res.status(500).send('Database error');
        } else {
            res.json(result);
        }
    });
});

app.listen(3000, () => {
    console.log('Server is running on port 3000');
});


In [6]:
from together import Together
import os

os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")
client = Together(api_key=os.environ["TOGETHER_API_KEY"])


In [7]:
import time
import pandas as pd
import random
from tqdm import tqdm
from collections import deque

# Rate limit config
max_qpm = 60
window_seconds = 60
request_timestamps = deque()

# Logging
response_times = []
error_count = 0
max_tokens = 2048

# Outputs
weakness_analysis = []
weakness_solution = []

# Loop
for snippet in tqdm(code_snippets, desc="Analyzing Vulnerabilities"):
    idx = snippet["index"]
    language = snippet["language"]
    vuln_type = snippet["vulnerability_type"]
    code = snippet["code"]

    prompt = f"""
<think>
You are a security reasoning model. Your task is to analyze the following code for a known vulnerability.

Language: {language}
Vulnerability Type: {vuln_type}

Code:
{code}

Describe the vulnerability clearly, including:
- Why it is dangerous
- How an attacker might exploit it
- How the issue could be fixed

Do not mention other possible vulnerabilities or suggest improvements unrelated to the issue described.

</think>
""".strip()

    # Rate limit logic
    now = time.time()
    request_timestamps.append(now)
    while len(request_timestamps) > max_qpm:
        if now - request_timestamps[0] < window_seconds:
            sleep_time = window_seconds - (now - request_timestamps[0])
            print(f"⏳ Rate limit hit — sleeping {sleep_time:.1f}s...")
            time.sleep(sleep_time)
        else:
            request_timestamps.popleft()

    # Request
    try:
        req_start = time.time()
        response = client.chat.completions.create(
            model="deepseek-ai/DeepSeek-R1",
            messages=[{"role": "user", "content": prompt}],
            stop=["</think>"],
            max_tokens=max_tokens
        )
        req_end = time.time()
        response_times.append(req_end - req_start)

        text = response.choices[0].message.content.strip()
        clean_text = text.replace("\n", " ").replace("\r", " ").strip()
        full_block = f"<think>{clean_text}</think>"

    except Exception as e:
        error_count += 1
        print(f"⚠️ Error on snippet {idx}: {e}")
        print("⏱️ Retrying after 60 seconds...")
        time.sleep(60)

        try:
            retry_start = time.time()
            response = client.chat.completions.create(
                model="deepseek-ai/DeepSeek-R1",
                messages=[{"role": "user", "content": prompt}],
                stop=["</think>"],
                max_tokens=max_tokens
            )
            retry_end = time.time()
            response_times.append(retry_end - retry_start)

            text = response.choices[0].message.content.strip()
            clean_text = text.replace("\n", " ").replace("\r", " ").strip()
            full_block = f"<think>{clean_text}</think>"

        except Exception as e2:
            error_count += 1
            print(f"❌ Retry failed on snippet {idx}: {e2}")
            clean_text = "Error generating analysis"
            full_block = "<think>Error generating analysis</think>"
            response_times.append(0.0)

    # Store results
    weakness_solution.append(clean_text)
    weakness_analysis.append(full_block)

df = pd.DataFrame(code_snippets)
df["weakness_solution"] = weakness_solution
df["weakness_analysis"] = weakness_analysis


Analyzing Vulnerabilities:   1%|          | 2/200 [00:37<58:27, 17.71s/it]  

⚠️ Error on snippet 3: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:   6%|▌         | 12/200 [06:57<1:34:26, 30.14s/it]

⚠️ Error on snippet 13: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:   8%|▊         | 15/200 [09:29<1:54:09, 37.02s/it]

⚠️ Error on snippet 16: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  13%|█▎        | 26/200 [16:40<1:35:33, 32.95s/it]

⚠️ Error on snippet 27: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  17%|█▋        | 34/200 [22:07<1:27:37, 31.67s/it]

⚠️ Error on snippet 35: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  28%|██▊       | 55/200 [34:57<1:19:34, 32.93s/it]

⚠️ Error on snippet 56: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  30%|███       | 60/200 [38:39<1:19:37, 34.12s/it]

⚠️ Error on snippet 61: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  44%|████▍     | 88/200 [55:30<59:42, 31.98s/it]  

⚠️ Error on snippet 89: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  46%|████▌     | 92/200 [58:43<1:06:08, 36.74s/it]

⚠️ Error on snippet 93: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  48%|████▊     | 95/200 [1:01:19<1:10:34, 40.33s/it]

⚠️ Error on snippet 96: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  52%|█████▏    | 103/200 [1:06:42<53:58, 33.39s/it]  

⚠️ Error on snippet 104: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  52%|█████▎    | 105/200 [1:08:40<1:09:27, 43.87s/it]

⚠️ Error on snippet 106: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  53%|█████▎    | 106/200 [1:10:09<1:29:49, 57.34s/it]

⚠️ Error on snippet 107: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  56%|█████▌    | 111/200 [1:13:53<59:03, 39.81s/it]  

⚠️ Error on snippet 112: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  60%|█████▉    | 119/200 [1:19:37<46:49, 34.69s/it]  

⚠️ Error on snippet 120: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  60%|██████    | 121/200 [1:21:39<59:40, 45.32s/it]  

⚠️ Error on snippet 122: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  72%|███████▏  | 143/200 [1:34:28<28:24, 29.91s/it]  

⚠️ Error on snippet 144: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  73%|███████▎  | 146/200 [1:37:02<35:24, 39.35s/it]

⚠️ Error on snippet 147: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  78%|███████▊  | 157/200 [1:43:52<22:05, 30.82s/it]

⚠️ Error on snippet 158: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  83%|████████▎ | 166/200 [1:49:53<18:29, 32.63s/it]

⚠️ Error on snippet 167: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  88%|████████▊ | 175/200 [1:55:49<13:17, 31.91s/it]

⚠️ Error on snippet 176: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  94%|█████████▎| 187/200 [2:03:34<07:12, 33.31s/it]

⚠️ Error on snippet 188: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  94%|█████████▍| 189/200 [2:05:39<07:55, 43.23s/it]

⚠️ Error on snippet 190: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  98%|█████████▊| 196/200 [2:10:27<02:18, 34.64s/it]

⚠️ Error on snippet 197: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities:  99%|█████████▉| 198/200 [2:12:31<01:31, 45.52s/it]

⚠️ Error on snippet 199: Error code: 429 - {"message": "You are on tier Build Tier 1, which offers 3 queries and 180000 tokens per minute for this model. Please upgrade to higher tier for higher rate limit at https://api.together.xyz/settings/billing.", "type_": "model_rate_limit"}
⏱️ Retrying after 60 seconds...


Analyzing Vulnerabilities: 100%|██████████| 200/200 [2:14:42<00:00, 40.41s/it]


In [12]:
row = df.sample(1).iloc[0]

print(f"🧠 Index: {row['index']}")
print(f"💻 Language: {row['language']}")
print(f"⚠️ Vulnerability Type: {row['vulnerability_type']}")
print(f"🔒 Difficulty: {row['difficulty']}")
print("\n=== Code ===")
print(row["code"])
print("\n=== Weakness Analysis (Raw) ===")
print(row["weakness_analysis"])
print("\n=== Weakness Solution (Cleaned) ===")
print(row["weakness_solution"])

🧠 Index: 50
💻 Language: Java
⚠️ Vulnerability Type: SQL Injection
🔒 Difficulty: easy

=== Code ===
import java.util.ArrayList;
import java.util.List;
import java.sql.*;

public class UserSearch {
    public static void main(String[] args) {
        String userInput = "1 OR '1'='1'"; // Simulating user input
        List<String> users = getUsers(userInput);
        for (String user : users) {
            System.out.println(user);
        }
    }

    public static List<String> getUsers(String condition) {
        List<String> userList = new ArrayList<>();
        String query = "SELECT username FROM users WHERE id = " + condition + ";";
        try (Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/mydb", "user", "password");
             Statement stmt = conn.createStatement();
             ResultSet rs = stmt.executeQuery(query)) {
            while (rs.next()) {
                userList.add(rs.getString("username"));
            }
        } catch (SQLExceptio

In [10]:
from openai import OpenAI
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def build_moe_prompt(code, vuln_type, explanation):
    return f"""
You are a senior security engineer reviewing a code snippet.

Code:
{code}

Vulnerability Type: {vuln_type}

Explanation of Vulnerability:
{explanation}

Your task is two-fold:

1. Write a short one-sentence summary that explains how to fix or mitigate the vulnerability. Use plain, simple language. Avoid introductions or extra commentary. Label this as: `Solution Statement:`.

2. Provide a revised, secure version of the code that eliminates the vulnerability. Label this as: `Safe Code:`.

Do not include any extra output or formatting — just the two labeled sections.
""".strip()


In [11]:
solution_statements = []
safe_codes = []
error_count = 0

for i, row in tqdm(df.iterrows(), total=len(df), desc="Generating fixes"):
    try:
        prompt = build_moe_prompt(
            code=row["code"],
            vuln_type=row["vulnerability_type"],
            explanation=row["weakness_solution"]
        )

        response = client.chat.completions.create(
            model="gpt-4o",  # Use "gpt-4o" or "gpt-4" here
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2,
            max_tokens=512
        )

        content = response.choices[0].message.content.strip()

        # Extract the two fields
        solution_line = ""
        safe_code = ""

        for line in content.splitlines():
            if line.startswith("Solution Statement:"):
                solution_line = line.replace("Solution Statement:", "").strip()
            elif line.startswith("Safe Code:"):
                safe_code = content.split("Safe Code:")[1].strip()
                break

        solution_statements.append(solution_line)
        safe_codes.append(safe_code)

    except Exception as e:
        error_count += 1
        print(f"⚠️ Error on row {i}: {e}")
        solution_statements.append("Error generating solution")
        safe_codes.append("")

print(f"✅ Done — {error_count} errors.")


Generating fixes: 100%|██████████| 200/200 [10:22<00:00,  3.11s/it]

✅ Done — 0 errors.





In [14]:
df["solution_statement"] = solution_statements
df["safe_code"] = safe_codes

In [17]:
# Sample one complete row
row = df.sample(1).iloc[0]

print(f"🧠 Index: {row['index']}")
print(f"💻 Language: {row['language']}")
print(f"⚠️ Vulnerability Type: {row['vulnerability_type']}")
print(f"🔒 Difficulty: {row['difficulty']}")

print("\n=== Original Code ===")
print(row["code"])

print("\n=== Weakness Solution ===")
print(row["weakness_solution"])

print("\n=== Solution Statement (Short Summary) ===")
print(row["solution_statement"])

print("\n=== Safe Code ===")
print(row["safe_code"])


🧠 Index: 198
💻 Language: PHP
⚠️ Vulnerability Type: SQL Injection
🔒 Difficulty: easy

=== Original Code ===
<?php

if ($_SERVER['REQUEST_METHOD'] === 'POST') {
    $username = $_POST['username'];
    $password = $_POST['password'];

    $connection = new mysqli('localhost', 'user', 'password', 'database');

    // Vulnerable query
    $query = "SELECT * FROM users WHERE username = '$username' AND password = '$password'";
    $result = $connection->query($query);

    if ($result->num_rows > 0) {
        echo 'Login successful!';
    } else {
        echo 'Invalid credentials!';
    }
}

?>
<form method="POST">
    Username: <input type="text" name="username">
    Password: <input type="password" name="password">
    <input type="submit" value="Login">
</form>

=== Weakness Solution ===
<think> Okay, let's take a look at this PHP code. The user mentioned it's about SQL injection, so I need to focus on that. The code is handling a POST request for a login form. The username and password 

In [None]:
from huggingface_hub import HfApi, HfFolder
from datasets import Dataset
import os

hf_api_key = os.getenv("HF_API_KEY")
HfFolder.save_token(hf_api_key)

dataset = Dataset.from_pandas(df)

dataset.push_to_hub("ZennyKenny/cosa-benchmark-dataset", token=hf_api_key)


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/ZennyKenny/cosa-benchmark-dataset/commit/5222aa444fe7fe78b0424d3f6cf8373ea366c21a', commit_message='Upload dataset', commit_description='', oid='5222aa444fe7fe78b0424d3f6cf8373ea366c21a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/ZennyKenny/cosa-benchmark-dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='ZennyKenny/cosa-benchmark-dataset'), pr_revision=None, pr_num=None)