# Adding new CVE examples

This script adds new examples of CVEs to the dataset. It is intended to be used as a helper script to fill in the database with new examples. 

1. First start the app.py so that the web service runs
2. Then run the script using the /add_example endpoint

In [2]:
import os
from os import listdir
from os.path import isfile, join

In [3]:
# read the files from a designated directory
example_dir = '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve'

# list all files that are in this directory and all of its subdirectories

# get all files in the directory
all_files = []
for root, dirs, files in os.walk(example_dir):
    for name in files:
        all_files.append(os.path.join(root, name))

In [4]:
# select only the files where the path contains CWE
cwe_files = []
for file in all_files:
    if 'CWE' in file:
        cwe_files.append(file)

# now, for every file create the directory where the key is the CWE number from the path 
# and the value is the file path
cwe_files_dict = {}
for file in cwe_files:
    # path to list of directories
    pathList = file.split('/')

    # find the element in the list that contains string "CWE"
    cwe_number = ''
    for element in pathList:
        if 'CWE' in element:
            cwe_number = element
            break

    # add the file to the dictionary
    if "SCE" in file.split('/')[-1] or "VCE" in file.split('/')[-1]:
        if cwe_number in cwe_files_dict.keys() :
            cwe_files_dict[cwe_number] = cwe_files_dict[cwe_number] + [file]
        else:
            cwe_files_dict[cwe_number] = [file]

cwe_files_dict

{'CWE-119': ['/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_1_test1.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_2_test2.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_3_test3.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_4_test4.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/VCE_1_test1.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/VCE_2_test2.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/VCE_3_test3.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/VCE_4_test4.c'],
 'CWE-120': ['/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-120/src/SCE_1_test1.c',
  '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-120/src/VCE_1_test1.c'],
 'CWE-125': ['/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-125/src/SCE_1.c',
  '/mnt/c/Users/miros/Documents/Code

In [7]:
# read every file and URL encode the content
import urllib.parse
import re

# read the file
def read_file(file_path):
    with open(file_path, 'r') as file:
        data = file.read()
    return data

# URL encode the content
def encode_content(content):
    return content.replace('"', '_')

# read all files and URL encode the content
cwe_files_dict_url_encoded = {}
for key, value in cwe_files_dict.items():
    for file in value:
        cwe_files_dict_url_encoded[file] = encode_content(read_file(file))


cwe_files_dict_url_encoded


{'CWE-119': [],
 '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_1_test1.c': '#include <stdio.h>\n\n/*This function attempts to extract a pair of numbers from a user-supplied string.*/\nvoid parse_data(char *untrusted_input){\n    int m = 0, n = 0, error; // Initialize m and n to 0\n    error = sscanf(untrusted_input, _%d:%d_, &m, &n);\n    \n    if (error != 2){ // Check if two values were parsed\n        printf(_Did not specify two integer values. Die evil hacker!\\n_);\n        return; // Exit the function\n    }\n    \n    printf(_m is %d and n is %d_, m, n);\n}\n\nint main(){\n    parse_data(_123:_);\n    return 0;\n}\n',
 '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_2_test2.c': "#define MAX_SIZE 16\n#include <stdio.h>\n#include <unistd.h>\n#include <string.h>\n#include <stdlib.h>\n\nchar * copy_input(char *user_supplied_string){\n    int i, dst_index;\n    size_t potential_length = strlen(user_supplied_string) * 5; // Maximum expan

In [9]:
# go through the dictionary of CWEs
# get the encoded content of each of the file in the list
# and prepare a dictionary with the keys:
# code: content
# vulnerability: CWE number"
# model: "codebert"

# create a dictionary with the keys:
# code: content
# vulnerability: CWE number"
# model: "codebert"
# for every file in the dictionary
cwe_files_dict_url_encoded_final = {}
for key, value in cwe_files_dict.items():
    for file in value:
        cwe_files_dict_url_encoded_final[file] = {
            "code": cwe_files_dict_url_encoded[file],
            "vulnerability": key,
            "model": "codebert"
        }

{'/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_1_test1.c': {'code': '#include <stdio.h>\n\n/*This function attempts to extract a pair of numbers from a user-supplied string.*/\nvoid parse_data(char *untrusted_input){\n    int m = 0, n = 0, error; // Initialize m and n to 0\n    error = sscanf(untrusted_input, _%d:%d_, &m, &n);\n    \n    if (error != 2){ // Check if two values were parsed\n        printf(_Did not specify two integer values. Die evil hacker!\\n_);\n        return; // Exit the function\n    }\n    \n    printf(_m is %d and n is %d_, m, n);\n}\n\nint main(){\n    parse_data(_123:_);\n    return 0;\n}\n',
  'vulnerability': 'CWE-119',
  'model': 'codebert'},
 '/mnt/c/Users/miros/Documents/Code/cybersecurity_seeve/CWE-119/src/SCE_2_test2.c': {'code': "#define MAX_SIZE 16\n#include <stdio.h>\n#include <unistd.h>\n#include <string.h>\n#include <stdlib.h>\n\nchar * copy_input(char *user_supplied_string){\n    int i, dst_index;\n    size_t potential_len

In [None]:
# for each item in the dictionary, send it to the REST API as a POST request
# localhost:5001/add_example
import requests
import json

# send the data to the REST API
for key, value in cwe_files_dict_url_encoded_final.items():
    response = requests.post('http://localhost:5001/add_example', json=value)
    print(response.status_code)
    print(response.text)