In [59]:
# Extract github value from dictionary and download git repo to directory named after the dictionary
import os
import git

from langfuzzDB import Library, LibraryFile, create_tables, get_engine
from sqlalchemy.orm import Session

http_libs = {
 'urllib3': {
 'github': 'https://github.com/urllib3/urllib3',
 'docs': 'https://urllib3.readthedocs.io/en/stable/'
},
 'requests': {
 'github': 'https://github.com/psf/requests',
 'docs': 'https://requests.readthedocs.io/en/latest/'
 },
'aiohttp': {
 'github': 'https://github.com/aio-libs/aiohttp/',
 'docs': 'https://docs.aiohttp.org/en/stable/'
 },
 'twisted': {
 'github': 'https://github.com/twisted/twisted',
 'docs': 'https://docs.twisted.org/en/stable/'
 }
}

utils = {
 'oss-fuzz': {
	 'github': 'https://github.com/google/oss-fuzz'
	 }
 }

def download_github_repos(lib_dicts, repo_path):
    for lib_dict in lib_dicts:
        
      for lib_name, lib_data in lib_dict.items():
            repo_dir = os.path.join(repo_path, lib_name)
            if not os.path.exists(repo_dir):
                git_url = lib_data["github"]
                print(f"Cloning {lib_name} from {git_url}...")
                git.Repo.clone_from(git_url, repo_dir)
            else:
                print(f"{lib_name} already exists, skipping download.")

# iterate http_libs dictionary and find all python fuzz files
# path = repo_path + http_libs[key]
# fuzz files start with "fuzz" and end with ".py"
# save the fuzz files to a dictionary with the key being the library name and the value being the path to the fuzz file
def get_fuzz_files(lib_dict, repo_path):
    fuzz_files = {}
    for key in lib_dict:
        path = repo_path + '/oss-fuzz/projects/' + str(key)
        fuzz_files[str(key)] = []
        # if the path exists, find all fuzz files
        if os.path.exists(path):
            for root, dirs, files in os.walk(path):
                for file in files:
                    if file.startswith("fuzz") and file.endswith(".py"):
                        fuzz_files[key].append(os.path.join(root, file))
        else:
            print(f"{key} Repo does not exist")
    return(fuzz_files)

# save library info to sqlite database
def save_libs(libs, sqlitedb, lang):
    engine = get_engine(sqlitedb)
    create_tables(engine)
    session = Session(engine)

    for library_name, lib_data in libs.items():
        github_url = lib_data['github']
        docs_url = lib_data['docs']
        language = lang

        existing_lib = session.query(Library).filter_by(library_name=library_name).first()
        if existing_lib is None:
            lib = Library(library_name=library_name, github_url=github_url, docs_url=docs_url, language=language)
            session.add(lib)
        else:
            print(f"Library {library_name} already exists, skipping.")

    session.commit()
    session.close()

# save fuzz files to sqlite database
def save_fuzz_files(fuzz_files, sqlitedb, lang):
    engine = get_engine(sqlitedb)
    create_tables(engine)
    session = Session(engine)

    for library_name, file_list in fuzz_files.items():
        for file_path in file_list:
            file_name = os.path.basename(file_path)
            with open(file_path, 'r') as f:
                lines = f.readlines()
            first_source_line_index = next((index for index, line in enumerate(lines) if line.startswith("import")), 0)
            contents = "".join(lines[first_source_line_index:])

            existing_file = session.query(LibraryFile).filter_by(library_name=library_name, file_name=file_name).first()
            if existing_file is None:
                lib_file = LibraryFile(library_name=library_name, file_name=file_name, contents=contents, generated=False, fuzz_test=True, type="fuzzer")
                session.add(lib_file)
            else:
                print(f"File {file_name} already exists for library {library_name}, skipping.")

    session.commit()
    session.close()


def save_recon_data(libs, fuzz_files, sqlitedb, lang):
    save_libs(libs, sqlitedb, lang)
    save_fuzz_files(fuzz_files, sqlitedb, lang)





# Set up
repo_path = "github_repos"
os.makedirs(repo_path) if not os.path.exists(repo_path) else None
sqlitedb = "langfuzz.db"
libs = [http_libs, utils]

# Download repos from http_libs and utils dictionaries
download_github_repos(libs, repo_path)
# get fuzzed files from oss-fuzz repo
fuzz_files = get_fuzz_files(http_libs, repo_path)
# save fuzzed files and library info in sqlite database
save_recon_data(http_libs, fuzz_files, sqlitedb, 'python')


urllib3 already exists, skipping download.
requests already exists, skipping download.
aiohttp already exists, skipping download.
twisted already exists, skipping download.
oss-fuzz already exists, skipping download.
twisted Repo does not exist
Library urllib3 already exists, skipping.
Library requests already exists, skipping.
Library aiohttp already exists, skipping.
Library twisted already exists, skipping.
File fuzz_requests.py already exists for library urllib3, skipping.
File fuzz_urlparse.py already exists for library urllib3, skipping.
File fuzz_server.py already exists for library requests, skipping.
File fuzz_web_request.py already exists for library aiohttp, skipping.
File fuzz_payload_url.py already exists for library aiohttp, skipping.
File fuzz_http_parser.py already exists for library aiohttp, skipping.
File fuzz_multipart.py already exists for library aiohttp, skipping.
File fuzz_http_payload_parser.py already exists for library aiohttp, skipping.


In [18]:
def get_fuzz_files(repo_path, lib_dict):
    fuzz_files = {}
    for key in lib_dict:
        path = repo_path + '/oss-fuzz/projects/' + key
        fuzz_files[key] = []
        print(path)
        # if the path exists, find all fuzz files
        if os.path.exists(path):
            for root, dirs, files in os.walk(path):
                for file in files:
                    if file.startswith("fuzz") and file.endswith(".py"):
                        fuzz_files[key].append(os.path.join(root, file))
        else:
            print(f"{key} Repo does not exist")
    return(fuzz_files)


repo_path = "github_repos"
# fuzz_files are library names and the path to the fuzz file
# fuzz_files['aiohttp'] = [file1, file2, file3]
fuzz_files = get_fuzz_files(repo_path, http_libs)

# prompt = base_prompt + docs + fuzz_tests + special_cases




github_repos/oss-fuzz/projects/urllib3
github_repos/oss-fuzz/projects/requests
github_repos/oss-fuzz/projects/aiohttp
github_repos/oss-fuzz/projects/twisted
twisted Repo does not exist
{'urllib3': ['github_repos/oss-fuzz/projects/urllib3/fuzz_requests.py', 'github_repos/oss-fuzz/projects/urllib3/fuzz_urlparse.py'], 'requests': ['github_repos/oss-fuzz/projects/requests/fuzz_server.py'], 'aiohttp': ['github_repos/oss-fuzz/projects/aiohttp/fuzz_web_request.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_payload_url.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_http_parser.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_multipart.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_http_payload_parser.py'], 'twisted': []}


In [None]:
# save to sqlite DB
# sqlalchemy
# prompt = base_prompt + docs + fuzz_tests
import sqlalchemy

# create engine
engine = sqlalchemy.create_engine('sqlite:///langfuzz.db')

#create table


In [20]:
file_path = "aiohttp_cc.txt"

def extract_functions_from_output(file_path):
    with open(file_path, 'r') as file:
        output = file.read()

    lines = output.splitlines()

    extracted_data = []
    current_path = ""

    for line in lines:
        stripped_line = line.strip()

        if stripped_line.startswith("github_repos"):
            current_path = stripped_line
        elif stripped_line.startswith("F"):
            extracted_data.append((current_path, stripped_line))

    return extracted_data

extracted_data = extract_functions_from_output(file_path)

# Print the extracted data
for path, function in extracted_data:
    print(f"Path: {path}\nFunction: {function}\n")

Path: github_repos/aiohttp/aiohttp/web_exceptions.py
Function: F 497:0 _initialize_default_reason - B

Path: github_repos/aiohttp/aiohttp/helpers.py
Function: F 387:0 content_disposition_header - C

Path: github_repos/aiohttp/aiohttp/helpers.py
Function: F 275:0 proxies_from_env - B

Path: github_repos/aiohttp/aiohttp/helpers.py
Function: F 198:0 netrc_from_env - B

Path: github_repos/aiohttp/aiohttp/helpers.py
Function: F 246:0 basicauth_from_netrc - B

Path: github_repos/aiohttp/aiohttp/pytest_plugin.py
Function: F 196:0 pytest_generate_tests - B

Path: github_repos/aiohttp/aiohttp/pytest_plugin.py

Path: github_repos/aiohttp/aiohttp/multipart.py
Function: F 74:0 parse_content_disposition - C

Path: github_repos/aiohttp/aiohttp/multipart.py
Function: F 173:0 content_disposition_filename - C

Path: github_repos/aiohttp/aiohttp/http_websocket.py
Function: F 172:0 ws_ext_parse - C

Path: github_repos/aiohttp/aiohttp/http_websocket.py
Function: F 223:0 ws_ext_gen - B

Path: github_repos/

In [None]:
libs = {
    'sqlalchemy': 'https://github.com/sqlalchemy/sqlalchemy',
    'pillow': 'https://github.com/python-pillow/Pillow',
    'babel': 'https://github.com/python-babel/babel',
    'pyyaml': 'https://github.com/yaml/pyyaml',
    'cryptography': {
        'github': 'https://github.com/pyca/cryptography',
        'docs': 'https://cryptography.io/en/latest/'
    }
}

libs2 = {
    'botocore': 'https://github.com/boto/botocore',
    'boto3': 'https://github.com/boto/boto3',
    'rq': 'https://github.com/rq/rq',
    'pip': 'https://github.com/pypa/pip',
    'grpc': {
        'github': 'https://github.com/grpc/grpc',
        'docs': 'https://grpc.github.io/grpc/python/'
    }
}

In [None]:
Help me write the save_libs() and save_fuzz_files() functions to store data into a sqlite database. This is the outline:

def save_recon_data(libs, fuzz_files, sqlitedb, lang):
    save_libs(libs, sqlitedb, lang)
    save_fuzz_files(fuzz_files, sqlitedb, lang)

example input:

fuzz_files={'urllib3': ['github_repos/oss-fuzz/projects/urllib3/fuzz_requests.py', 'github_repos/oss-fuzz/projects/urllib3/fuzz_urlparse.py'], 'requests': ['github_repos/oss-fuzz/projects/requests/fuzz_server.py'], 'aiohttp': ['github_repos/oss-fuzz/projects/aiohttp/fuzz_web_request.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_payload_url.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_http_parser.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_multipart.py', 'github_repos/oss-fuzz/projects/aiohttp/fuzz_http_payload_parser.py'], 'twisted': []}

http_libs = {
 'urllib3': {
 'github': 'https://github.com/urllib3/urllib3',
 'docs': 'https://urllib3.readthedocs.io/en/stable/'
},
 'requests': {
 'github': 'https://github.com/psf/requests',
 'docs': 'https://requests.readthedocs.io/en/latest/'
 },
'aiohttp': {
 'github': 'https://github.com/aio-libs/aiohttp/',
 'docs': 'https://docs.aiohttp.org/en/stable/'
 },
 'twisted': {
 'github': 'https://github.com/twisted/twisted',
 'docs': 'https://docs.twisted.org/en/stable/'
 }
}

sqlitedb = langfuzz.db
lang = 'python'

save_libs(libs, sqlitedb, lang) should save the following data into the sqlite database table named 'libraries':
library_name = libs[key]
github_url = libs[key]['github']
docs_url = libs[key]['docs']
language = lang

save_fuzz_files(fuzz_files, sqlitedb, lang) should open the fuzz_files and save the following data into the sqlite database table named 'library_files':
library_name = fuzz_files[key]
file_name = text after last slash from fuzz_files[key][i]; for example fuzz_http_payload_parser.py from 'github_repos/oss-fuzz/projects/aiohttp/fuzz_http_payload_parser.py'
contents = open(fuzz_files[key][i], 'r').read() and save file contents as a string starting from the line with 'import' and ending with the last line of the file
generated: false
fuzz_test: true
type: fuzzer

The sqlite database should have the following tables:
libraries, and library_files

Return code for the two functions and return a langfuzzDB.py file where we implement the sqlite database with sqlalchemy.
