In [3]:
import json

import pandas as pd

from tool.factory import get_tool_handle
import util
import db_util
from tool import SATResult

In [4]:
selected_trials = {
    "codechecker": "codechecker-trial4",
    "codeql": "codeql-trial4",
    "cppcheck": "cppcheck-trial1b",
    "flawfinder": "flawfinder-trial2",
    "infer": "infer-trial3",
}

In [5]:
active_database = "restored_db"

# transaction database
conn, cur = db_util.connect_database(database=active_database)

## Define Target VCC

In [62]:
# select a VCC to view the warnings
project = "danbloomberg/leptonica"
target_vcc = "316b85bbabfbb1072a9e3b38794c732aa940af4d"

In [63]:
output_filename = util.get_output_filename(
    project=project, commit_sha=target_vcc
)

In [64]:
selected_vcc_information_df = pd.read_csv(
    "../output_analysis/selected-vcc-information.csv"
)
print("VCC information")

vcc_dict = selected_vcc_information_df[selected_vcc_information_df["vcc_commit_sha"]==target_vcc].iloc[0].to_dict()

print("CVE: ", "https://nvd.nist.gov/vuln/detail/" + vcc_dict["cve"])
print(json.dumps(vcc_dict, indent=4))

VCC information
CVE:  https://nvd.nist.gov/vuln/detail/CVE-2018-7186
{
    "project": "danbloomberg/leptonica",
    "cve": "CVE-2018-7186",
    "cwe": "664",
    "vcc_commit_sha": "316b85bbabfbb1072a9e3b38794c732aa940af4d",
    "changed_files": 72,
    "changed_functions": 131,
    "changed_lines": 2157,
    "changed_files_loc": 33567,
    "changed_functions_loc": 7815,
    "vulnerable_files": 3,
    "vulnerable_functions": 4,
    "vulnerable_lines": 8,
    "vulnerable_files_loc": 2303,
    "vulnerable_functions_loc": 155
}


## Find vulnerable functions in VCC

In [65]:
vcc_commit_changes = db_util.get_commit_changes(
            cur=cur, commit_sha=target_vcc
        )
# filter out changes when vcc is also fixing commit
vcc_commit_changes = [
    c for c in vcc_commit_changes if c["commit_type"] == "vcc"
]

# fixing changes
relevant_changes = db_util.get_relevant_vcc_changes(
    cur=cur, vcc_commit_sha=target_vcc
)
combined_fixing_changes = []

# vcc files
vcc_files = set([change["file_name"] for change in vcc_commit_changes])

for relevant_change in relevant_changes:
    fixing_commit_sha = relevant_change["fixing_commit_sha"]
    fixing_changes = db_util.get_commit_changes(
        cur=cur, commit_sha=fixing_commit_sha
    )
    # filter out changes when fixing commit is also vcc
    combined_fixing_changes += [
        c for c in fixing_changes if c["commit_type"] == "fixing"
    ]


fixing_files = set(
    [change["file_name"] for change in combined_fixing_changes]
)

vulnerable_files = set(
    [
        f
        for f in vcc_files
        if
        # f in fixing_files
        util.is_changed_file_later_fixed(
            project=project,
            vcc_changed_file=f,
            fixing_files=fixing_files,
        )
        and util._is_c_or_cpp_file(filename=f)
    ]
)

fixing_functions = set(
    [
        change["function_name"]
        for change in combined_fixing_changes
        if change["function_name"] != "N/A"
    ]
)

vcc_functions = set(
    [
        change["function_name"]
        for change in vcc_commit_changes
        if change["function_name"] != "N/A"
    ]
)


In [66]:
vulnerable_files = set(
    [
        f
        for f in vcc_files
        if
        # f in fixing_files
        util.is_changed_file_later_fixed(
            project=project,
            vcc_changed_file=f,
            fixing_files=fixing_files,
        )
        and util._is_c_or_cpp_file(filename=f)
    ]
)

vulnerable_functions = set(
    [f for f in vcc_functions if f in fixing_functions]
)

In [67]:
# find changed functions / line numbers - see analysis_calculate.py
changed_functions_info = set(
    [
        (
            c["file_name"],
            c["function_name"],
            c["function_nloc"],
            c["function_start_line"],
            c["function_end_line"],
        )
        for c in vcc_commit_changes
        if util._is_c_or_cpp_file(filename=c["file_name"])
        and c["function_name"] != "N/A"
    ]
)

#  find vulnerable functions / line numbers 
vulnerable_functions_start_end_lines = set(
    [
        (
            c["file_name"],
            c["function_name"],
            c["function_start_line"],
            c["function_end_line"],
            c["function_nloc"],
        )
        for c in vcc_commit_changes
        if c["file_name"] in vulnerable_files
        and c["function_name"] in vulnerable_functions
    ]
)


## Vulnerable Functions

In [68]:
print("commit url", "https://github.com/" + project + "/commit/" + target_vcc)
print(json.dumps(list(vulnerable_functions_start_end_lines), indent=4))


commit url https://github.com/danbloomberg/leptonica/commit/316b85bbabfbb1072a9e3b38794c732aa940af4d
[
    [
        "src/ptabasic.c",
        "ptaReadStream",
        501,
        540,
        35
    ],
    [
        "src/sel1.c",
        "selReadStream",
        1346,
        1386,
        33
    ],
    [
        "src/gplot.c",
        "gplotRead",
        675,
        748,
        65
    ],
    [
        "src/gplot.c",
        "gplotMakeOutput",
        348,
        376,
        22
    ]
]


## Warnings within Vulnerable Functions

In [69]:
for tool, trial in selected_trials.items():
    print("tool: ", tool)
    tool_handle = get_tool_handle(name=tool)
    
    try:
        warnings = tool_handle.get_transaction_result(
            output_filename=output_filename
        )
    except UnboundLocalError:
        print("result not found, skip")
        continue

    # only filter warnings in vulnerable functions
    vulnerable_warnings = []
    w:SATResult
    for w in warnings:
        for fn in vulnerable_functions_start_end_lines:
            if fn[0] == w.location_file and (
                (
                    int(fn[2]) <= int(w.location_start_line)
                    and int(w.location_start_line) <= int(fn[3])
                )
                or (
                    int(fn[2]) <= int(w.location_end_line)
                    and int(w.location_end_line) <= int(fn[3])
                )
            ):
                vulnerable_warnings.append(w)

    vulnerable_warnings = list(set(vulnerable_warnings))

    print(json.dumps([w.to_dict() for w in vulnerable_warnings], indent=4))

    print("======================================================")

tool:  codechecker
[]
tool:  codeql
[
    {
        "location_hash": "59e7f45e20ca7726:1",
        "location_file": "src/sel1.c",
        "location_start_line": 1366,
        "location_start_column": 45,
        "location_end_line": -1,
        "location_end_column": 52,
    },
    {
        "location_hash": "c0175552336b8f38:1",
        "location_file": "src/gplot.c",
        "location_start_line": 714,
        "location_start_column": 41,
        "location_end_line": -1,
        "location_end_column": 50,
    }
]
tool:  cppcheck
[]
tool:  flawfinder
[
    {
        "location_hash": "75a770010e0ce1da375e3d96b95e38422a6e111de859cf1c07769f14e004a27d",
        "location_file": "src/gplot.c",
        "location_start_line": 730,
        "location_start_column": 14,
        "location_end_line": -1,
        "location_end_column": 48,
    },
    {
        "location_hash": "71ff0c95d6287b6f8cd1bbff7f3c9b26dc2214b495036697d31829776aa2c910",
        "location_file": "src/gplot.c",
        "locat