In [26]:
import glob
import os

# Specify the folder path
folder_path = "prototype-pollution"

# Get all subfolders in the folder_path
subfolders = glob.glob(os.path.join(folder_path, "*"))
print("Subfolders found:", len(subfolders))

package_json_files = []
test_js_files = []

for subfolder in subfolders:
    package_json = os.path.join(subfolder, "package.json")
    test_js_candidates = glob.glob(os.path.join(subfolder, "*.test.js"))
    if os.path.isfile(package_json):
        package_json_files.append(package_json)
    else:
        print(f"Missing package.json in {subfolder}")
    if test_js_candidates:
        # If multiple test.js files, add all
        test_js_files.extend(test_js_candidates)

print("Total number of package.json files:", len(package_json_files))
print("Total number of test.js files:", len(test_js_files))


Subfolders found: 208
Missing package.json in prototype-pollution/payload.ini
Missing package.json in prototype-pollution/babel-analysis.config.js
Missing package.json in prototype-pollution/my-custom-reporter.js
Missing package.json in prototype-pollution/test_modules_in_serial.py
Missing package.json in prototype-pollution/id_filler.py
Missing package.json in prototype-pollution/call_chain.txt
Missing package.json in prototype-pollution/payload.toml
Missing package.json in prototype-pollution/require-interception.js
Missing package.json in prototype-pollution/README.md
Missing package.json in prototype-pollution/babel-instrumentor.js
Missing package.json in prototype-pollution/jest.config_old.json
Missing package.json in prototype-pollution/package.json
Missing package.json in prototype-pollution/fixed_version_filler.py
Missing package.json in prototype-pollution/jest-analysis.config.json
Missing package.json in prototype-pollution/fix-commit.py
Missing package.json in prototype-poll

In [27]:
import json
import pandas as pd

# Empty dataframe to store the results
rows_list = []

for package_json, test_js in zip(package_json_files, test_js_files):
    row = {"Attack Type": folder_path}
    # Read the package.json file
    with open(package_json, 'r') as f:
        package_json_content = json.load(f)

    # Print the package.json content
    print(f"Contents of package.json:\n{json.dumps(package_json_content, indent=4)}")

    # Read the test.js file
    with open(test_js, 'r') as f:
        test_js_content = f.read()
    # Print the test.js content
    print(f"Contents of test.js:\n{test_js_content}")

    # Add the package.json content to the dataframe
    row["CVE-ID"] = package_json_content.get("id", "")
    row["Package"] = list(package_json_content.get("dependencies", "").keys())[0]
    row["Version"] = list(package_json_content.get("dependencies", "").values())[0]
    row["Fix Version"] = package_json_content.get("fixedVersion", "")
    row["Exploit File Path"] = os.path.dirname(test_js)
    row["Exploit"] = test_js_content
    row["Fixed Commit"] = package_json_content.get("fixCommit", "")
    row["Sink"] = package_json_content.get("sink", "")
    row["Links"] = "; ".join(list(package_json_content.get("links", []).values()))

    # Append the row to the dataframe
    rows_list.append(row)

df = pd.DataFrame(rows_list) 

Contents of package.json:
{
    "id": "CVE-2020-28269",
    "dependencies": {
        "field": "1.0.1"
    },
    "links": {
        "source1": "https://security.snyk.io/vuln/SNYK-JS-FIELD-1039884",
        "source2": "https://github.com/advisories/GHSA-hm82-qr45-h7mw"
    },
    "fixedVersion": "n/a",
    "fixCommit": "n/a",
    "sink": "lib/field.js:50:20"
}
Contents of test.js:
//https://snyk.io/vuln/SNYK-JS-FIELD-1039884
test("prototype pollution in field ", () => {
  expect({}.polluted).toBe(undefined);

  const field = require("field");
  const obj = {};
  field.set(obj, "__proto__.polluted", "yes");

  expect(obj.polluted).toBe("yes");
});

Contents of package.json:
{
    "id": "CVE-2021-23624",
    "dependencies": {
        "dotty": "0.0.1"
    },
    "links": {
        "source1": "https://security.snyk.io/vuln/SNYK-JS-DOTTY-1577292",
        "source2": "https://github.com/advisories/GHSA-6g47-63mv-qpgh"
    },
    "fixedVersion": "0.1.2",
    "fixCommit": "https://github.com/d

In [28]:
df

Unnamed: 0,Attack Type,CVE-ID,Package,Version,Fix Version,Exploit File Path,Exploit,Fixed Commit,Sink,Links
0,prototype-pollution,CVE-2020-28269,field,1.0.1,,prototype-pollution/field_1.0.1,//https://snyk.io/vuln/SNYK-JS-FIELD-1039884\n...,,lib/field.js:50:20,https://security.snyk.io/vuln/SNYK-JS-FIELD-10...
1,prototype-pollution,CVE-2021-23624,dotty,0.0.1,0.1.2,prototype-pollution/dotty_0.0.1,//https://www.whitesourcesoftware.com/vulnerab...,https://github.com/deoxxa/dotty/commit/88f6186...,lib/index.js:147:17,https://security.snyk.io/vuln/SNYK-JS-DOTTY-15...
2,prototype-pollution,CVE-2020-7765,@firebase/util,0.3.2,0.3.4,prototype-pollution/firebase-util_0.3.2,//https://hackerone.com/reports/1001218\ntest(...,,util/src/deepCopy.ts:68:49,https://security.snyk.io/vuln/SNYK-JS-FIREBASE...
3,prototype-pollution,CVE-2020-28472,@aws-sdk/shared-ini-file-loader,1.0.0-rc.8,1.0.0-rc.9,prototype-pollution/aws-sdk-shared-ini-file-lo...,//https://security.snyk.io/vuln/SNYK-JS-AWSSDK...,https://github.com/aws/aws-sdk-js/pull/3585/co...,shared-ini-file-loader/src/index.ts:108:20,https://security.snyk.io/vuln/SNYK-JS-AWSSDKSH...
4,prototype-pollution,CVE-2020-28495,total.js,3.4.6,3.4.7,prototype-pollution/total.js_3.4.6,//https://snyk.io/vuln/SNYK-JS-TOTALJS-1046671...,https://github.com/totaljs/framework/commit/b3...,"utils.js:6624:11, <anonymous",https://security.snyk.io/vuln/SNYK-JS-TOTALJS-...
...,...,...,...,...,...,...,...,...,...,...
187,prototype-pollution,CVE-2020-7723,promisehelpers,0.0.5,,prototype-pollution/promisehelpers_0.0.5,//https://snyk.io/vuln/SNYK-JS-PROMISEHELPERS-...,,src/index.js:16:25,https://security.snyk.io/vuln/SNYK-JS-PROMISEH...
188,prototype-pollution,CVE-2018-3723,defaults-deep,0.2.0,0.2.4,prototype-pollution/defaults-deep_0.2.0,"//https://hackerone.com/reports/310514\ntest(""...",https://github.com/jonschlinkert/defaults-deep...,index.js:20:16,https://hackerone.com/reports/310514; https://...
189,prototype-pollution,CVE-2020-7716,deeps,1.4.5,,prototype-pollution/deeps_1.4.5,//https://snyk.io/vuln/SNYK-JS-DEEPS-598667\nt...,,index.js:226:94,https://security.snyk.io/vuln/SNYK-JS-DEEPS-59...
190,prototype-pollution,CVE-2020-7618,sds,3.2.0,4.0.0,prototype-pollution/sds_3.2.0,//https://snyk.io/vuln/SNYK-JS-SDS-564123\ntes...,https://github.com/monsterkodi/sds/commit/a228...,js:34:13,https://security.snyk.io/vuln/SNYK-JS-SDS-5641...


In [29]:
df.to_csv(f"{folder_path}.csv", index=False)

In [32]:
import glob
import pandas as pd
import os

# Specify the path containing the CSV files
csv_folder_path = "./outputs"

# Use glob to find all .csv files in the folder
csv_files = glob.glob(os.path.join(csv_folder_path, "*.csv"))

# List to store dataframes
dataframes = []

# Read each CSV file and append it to the list
for csv_file in csv_files:
    df_csv = pd.read_csv(csv_file)
    dataframes.append(df_csv)

# Concatenate all dataframes into one
merged_df = pd.concat(dataframes, ignore_index=True)

# Save the merged dataframe to a new CSV file
merged_csv_path = os.path.join(csv_folder_path, "merged.csv")
merged_df.to_csv(merged_csv_path, index=False)

print(f"Merged CSV saved to: {merged_csv_path}")

Merged CSV saved to: ./outputs/merged.csv
