<h1>Binary Ninja Headless API</h1>

In [None]:
%%time
import pandas as pd
import seaborn as sns
import os
import hashlib
from binaryninja import *
import binaryninja
from binaryninja import Endianness
import math
import csv
import itertools
import pandas_bokeh
from collections import Counter
import matplotlib.pyplot as plt
from ast import literal_eval
import jupyter_black

jupyter_black.load()
pandas_bokeh.output_notebook()

os.environ["BN_DISABLE_USER_SETTINGS"] = "True"
os.environ["BN_DISABLE_USER_PLUGINS"] = "True"
os.environ["BN_DISABLE_REPOSITORY_PLUGINS"] = "True"

<h2>Using the Binary Ninja Headless API to find Potential Vulnerabilities</h2>

In [None]:
%%time
# List of dangerous functions
dangerous_functions = ["system", "execve", "execle", "execvp", "execlp", "doSystemCmd"]


# Get the name of the binaries
def get_file_name(path):
    return os.path.basename(path)


# Get the binary architecture
def get_architecture(bv):
    return bv.arch.name


# Get the binary endianness
def get_endianness(bv):
    return "Little" if bv.endianness == binaryninja.Endianness.LittleEndian else "Big"


# Calculate the SHA256 hash of the binaries
def get_hash(filepath):
    bv = binaryninja.load(filepath)
    t = binaryninja.transform.Transform["SHA256"]
    p = bv.parent_view
    h = t.encode(p.read(p.start, p.end))
    h_hex = h.hex()
    return h_hex


# Calculate the cyclomatic complexity of the binaries
def calculate_cyclomatic_complexity(function):
    edges = sum([len(block.outgoing_edges) for block in function.basic_blocks])
    nodes = len(function.basic_blocks)
    return edges - nodes + 2


# Calculate the entropy of the binaries
def calculate_entropy(data):
    ent = 0
    for byte in range(256):
        p_x = float(data.count(byte)) / len(data)
        if p_x > 0:
            ent += -p_x * math.log(p_x, 2)
    return ent


# Get the segments of the binaries
def get_seg(bv):
    segment_info = []
    for seg in bv.segments:
        segment_info.append(
            {
                "start": seg.start,
                "end": seg.end,
                "readable": seg.readable,
                "writable": seg.writable,
                "executable": seg.executable,
            }
        )
    return segment_info


# Get the dangerous symbols xrefs
def find_xrefs_to_dangerous_functions(bv):
    xref_info = []

    for func_name in dangerous_functions:
        symbol = bv.get_symbol_by_raw_name(func_name)
        if symbol:
            xrefs = bv.get_code_refs(symbol.address)
            for xref in xrefs:
                xref_info.append(
                    (func_name, hex(xref.function.start), hex(xref.address))
                )

    return xref_info


def analyze_binary(path):
    bv = binaryninja.load(path)

    if bv is None:
        return None, None, None, None, None, None, None, None, None, None

    ccs = []  # List to hold cyclomatic complexities

    for function in bv.functions:
        cc = calculate_cyclomatic_complexity(function)
        ccs.append(cc)

    avg_cc = sum(ccs) / len(ccs) if ccs else 0
    filename = get_file_name(path)
    file_hash = get_hash(path)
    architecture = get_architecture(bv)
    funcs = [(func.name, hex(func.start)) for func in bv.functions]
    endianness = get_endianness(bv)

    strings = [(str(string), hex(string.start)) for string in bv.get_strings()]
    segment_info = get_seg(bv)
    getrefs = find_xrefs_to_dangerous_functions(bv)

    with open(path, "rb") as f:
        data = f.read()
        entropy = calculate_entropy(data)

    return (
        filename,
        file_hash,
        architecture,
        endianness,
        avg_cc,
        entropy,
        funcs,
        strings,
        segment_info,
        getrefs,
    )


def analyze_directory(directory):
    binaries = [
        f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))
    ]

    entropies = []  # List to hold the entropy values of the binaries
    binary_data = []  # List to hold the binary data

    for binary in binaries:
        (
            filename,
            file_hash,
            architecture,
            endianness,
            avg_cc,
            entropy,
            funcs,
            strings,
            segment_info,
            getrefs,
        ) = analyze_binary(os.path.join(directory, binary))

        if filename is not None:
            entropies.append(entropy)
            binary_data.append(
                {
                    "Binary": filename,
                    "File_Hash": file_hash,
                    "Architecture": architecture,
                    "Endianness": endianness,
                    "Average_Cyclomatic_Complexity": avg_cc,
                    "Entropy": entropy,
                    "Functions": funcs,
                    "Strings": strings,
                    "Segments": segment_info,
                    "Xrefs_to_System": getrefs,
                }
            )

    df = pd.DataFrame(binary_data)
    df.to_parquet("binary_analysis_results.parquet", index=False)


if __name__ == "__main__":
    analyze_directory("./Tenda/BNDB/")

<h2>Creating a Pandas Dataframe from a Parquet</h2>

In [None]:
%%time
df = pd.read_parquet("binary_analysis_results.parquet")

<h2>Verify the Pandas Output</h2>

In [None]:
%%time
df

In [None]:
df.query('Entropy > 6 and Endianness == "Big" and Average_Cyclomatic_Complexity > 3')

<h2>Checking Pandas Datatypes</h2>

In [None]:
%%time
df.dtypes

<h2>Changing Datatypes to String</h2>

In [None]:
%%time
df["Strings"] = df["Strings"].astype(str)
df["Functions"] = df["Functions"].astype(str)
df["Binary"] = df["Binary"].astype(str)
df["Architecture"] = df["Architecture"].astype(str)
df["Xrefs_to_System"] = df["Xrefs_to_System"].astype(str)

In [None]:
df[df["Strings"].str.contains("0x8154", na=False)]

In [None]:
df[df["Functions"].str.contains("0xec50", na=False)]

In [None]:
df[df["Xrefs_to_System"].str.contains("0x4fb88", na=False)]

<h2>Create a Chart to Visualise the Average Cyclomatic Complexity < 3.6</h2>

In [None]:
df_plot = df.query("Average_Cyclomatic_Complexity < 3.6")

In [None]:
df_plot

In [None]:
search_string = "system"
df["Potential_Dangerous_Calls_To_System"] = df["Xrefs_to_System"].apply(
    lambda x: x.count(search_string)
)

In [None]:
df

In [None]:
df_sorted = df.sort_values(by="Potential_Dangerous_Calls_To_System", ascending=False)

<h2>Create a Chart to Visualise the Potentially Dangerous Calls to System</h2>

In [None]:
%%time
df_sorted.plot_bokeh.bar(
    x="Binary",
    y="Potential_Dangerous_Calls_To_System",
    figsize=(900, 700),
    title="Potential_Dangerous_Calls_To_System",
    xlabel="Binary",
    ylabel="Total",
    vertical_xlabel=True,
)

<h2>Create a Chart to Visualise the Entropy</h2>

In [None]:
%%time
plt.figure(figsize=(12, 6))
plt.bar(df["Binary"], df["Entropy"])
plt.title("Entropy of Binaries")
plt.xlabel("Binary")
plt.ylabel("Entropy")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.savefig("Entropy_chart.png")

In [None]:
%%time
plt.figure(figsize=(12, 6))
plt.bar(df_plot["Binary"], df_plot["Average_Cyclomatic_Complexity"])
plt.title("Average Cyclomatic Complexity")
plt.xlabel("Binary")
plt.ylabel("Average_Cyclomatic_Complexity")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.savefig("Average_Cyclomatic_Complexity_chart.png")

<h2>Create a Chart to Visualise the Average Cyclomatic Complexity</h2>

In [None]:
%%time
plt.figure(figsize=(12, 6))
plt.bar(df["Binary"], df["Average_Cyclomatic_Complexity"])
plt.title("Average Cyclomatic Complexity of Binaries")
plt.xlabel("Binary")
plt.ylabel("Average Cyclomatic Complexity")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.savefig("Average_Cyclomatic_Complexity_chart.png")

<h2>Create an Interactive Chart to Visualise the Entropy and Average Cyclomatic Complexity</h2>

In [None]:
%%time
df.plot_bokeh.bar(
    x="Binary",
    y=["Entropy", "Average_Cyclomatic_Complexity"],
    figsize=(900, 700),
    title="Entropy and Average Cyclomatic Complexity of Binaries",
    xlabel="Binary",
    ylabel="Score",
    vertical_xlabel=True,
)

<h2>Create a New Pandas Dataframe Highlight the Entropy Values</h2>

In [None]:
%%time
ent_chart = ["Binary", "File_Hash", "Entropy"]
ent_chart = pd.DataFrame(df[ent_chart])
ent_chart = ent_chart.sort_values(by=["Entropy"], ascending=False)
ent_chart = ent_chart.reset_index(drop=True)

In [None]:
%%time


def highlight_score(val):
    if val >= 7.0:
        return "color: red"
    else:
        return "color: green"


ent_styled_df = ent_chart.style.applymap(highlight_score, subset=["Entropy"])
ent_styled_df

<h2>Jaccard Similarity Index of Cisco RV130 HTTPD Binaries</h2>

In [None]:
%%time


def calculate_jaccard_index(binary1, binary2):
    bv1 = binaryninja.load(binary1)
    bv2 = binaryninja.load(binary2)

    functions1 = set([function.start for function in bv1.functions])
    functions2 = set([function.start for function in bv2.functions])

    intersection = functions1 & functions2
    union = functions1 | functions2

    return float(len(intersection)) / len(union)


# Get all binaries in the folder
binaries = [
    os.path.join("./RV130_HTTPD/bndb/", f)
    for f in os.listdir("./RV130_HTTPD/bndb/")
    if os.path.isfile(os.path.join("./RV130_HTTPD/bndb/", f))
]

# Create a CSV file to store the results
with open("jaccard_output.csv", "w", newline="") as csvfile:
    fieldnames = ["Binary1", "Binary2", "Jaccard Index"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

    # Calculate Jaccard Similarity for each pair of binaries
    for binary1, binary2 in itertools.combinations(binaries, 2):
        jaccard_index = calculate_jaccard_index(binary1, binary2)
        binary1 = binary1.replace("./RV130_HTTPD/bndb/", "")  # Strip the prefix
        binary2 = binary2.replace("./RV130_HTTPD/bndb/", "")  # Strip the prefix
        writer.writerow(
            {"Binary1": binary1, "Binary2": binary2, "Jaccard Index": jaccard_index}
        )

In [None]:
%%time
df_jaccard = pd.read_csv("jaccard_output.csv")

In [None]:
%%time


def highlight_score(val):
    if val >= 0.7:
        return "color: red"
    else:
        return "color: green"


jaccard_styled_df = df_jaccard.style.applymap(highlight_score, subset=["Jaccard Index"])
jaccard_styled_df

| Jaccard Index Range | Interpretation                                        |
|---------------------|------------------------------------------------------|
| 0.0                 | No similarity at all. The sets have no elements in common. |
| 0.0 - 0.3           | Low similarity. The sets have a relatively small overlap. |
| 0.3 - 0.7           | Moderate similarity. The sets have a reasonable overlap but are not identical. |
| 0.7 - 1.0           | High similarity. The sets have a substantial overlap. |

In [None]:
%%time
similarity_matrix = df_jaccard.pivot(
    index="Binary1", columns="Binary2", values="Jaccard Index"
).fillna(0)

In [None]:
%%time
plt.figure(figsize=(8, 6))
sns.heatmap(similarity_matrix, cmap="vlag", annot=True, fmt=".1f", linewidths=0.5)
plt.title("Jaccard Similarity Heatmap")
plt.show()

<h2>Run PwnTools Checksec to the Determine the Binary Security Settings</h2>

In [None]:
%%time
# Run PwnTools Checksec
data = !/Users/user/Library/Python/3.9/bin/checksec ./Tenda/Tenda/*

# Initiate empty lists for storing data
filename = []
arch = []
relro = []
stack = []
nx = []
pie = []

# Iterate over the data
for line in data:
    if line.startswith("[*]"):
        filename.append(line.split(" ")[1].strip("'"))
    elif line.strip().startswith("Arch:"):
        arch.append(line.split(":", 1)[1].strip())
    elif line.strip().startswith("RELRO:"):
        relro.append(line.split(":", 1)[1].strip())
    elif line.strip().startswith("Stack:"):
        stack.append(line.split(":", 1)[1].strip())
    elif line.strip().startswith("NX:"):
        nx.append(line.split(":", 1)[1].strip())
    elif line.strip().startswith("PIE:"):
        pie.append(line.split(":", 1)[1].strip())

# Create DataFrame
df_tenda_checksec = pd.DataFrame(
    {
        "Filename": filename,
        "Arch": arch,
        "RELRO": relro,
        "Stack": stack,
        "NX": nx,
        "PIE": pie,
    }
)

# Apply the function to the 'Filename' column
df_tenda_checksec["Filename"] = df_tenda_checksec["Filename"].apply(
    lambda x: os.path.basename(x)
)
df_tenda_checksec.to_csv("tenda_checksec.csv", index=False)

<h2>Verify the Pandas Output</h2>

In [None]:
%%time
df_tenda_checksec

<h2>Use Binary Ninja's IL SSA Form to Help Validate if we Control the Input to System</h2>

In [None]:
%%time
# Open the BinaryView
bv = binaryninja.load("./Tenda/BNDB/webs.bndb")

# Assuming the function at sub_3d874 is what we want
# The is one of the potentiual calls to system from our automated analysis
func = bv.get_function_at(0x3D874)

# Access HLIL SSA form, or other Binary Ninja IL SSA forms
hlil_ssa = func.hlil.ssa_form

# Now, you can iterate over basic blocks and instructions in this form
for block in hlil_ssa:
    for insn in block:
        print(insn)

In [None]:
%%time
from binaryninja import *
import glob


def find_control_to_system(bv):
    for func in bv.functions:
        for block in func.medium_level_il:
            for insn in block:
                if (
                    insn.operation == MediumLevelILOperation.MLIL_CALL
                    and insn.dest.operation == MediumLevelILOperation.MLIL_CONST_PTR
                    and bv.get_function_at(insn.dest.value)
                ):
                    called_func = bv.get_function_at(insn.dest.value)
                    if "system" in called_func.name:
                        print(
                            f"Call to system at {hex(insn.address)} in {bv.file.filename.strip('./Tenda/BNDB/')}"
                        )


# Replace 'folder_path' with the actual path to your folder containing binaries
folder_path = "./Tenda/BNDB/*"

# Use glob to get a list of binary file paths
binary_paths = glob.glob(folder_path)

# Process each binary
for binary_path in binary_paths:
    bv = binaryninja.load(binary_path)
    bv.update_analysis_and_wait()
    find_control_to_system(bv)

<h2>Create Bulk BNDBs for Batch Scanning</h2>

<h2>Reference Material</h2>

- 10 Minutes to Pandas: https://pandas.pydata.org/docs/user_guide/10min.html
- Pandas Cookbook: https://pandas.pydata.org/docs/user_guide/cookbook.html#cookbook
- Binary Ninja Python API Reference: https://api.binary.ninja/
- Binary Ninja Intermediate Language Overview: https://docs.binary.ninja/dev/bnil-overview.html
- Batch Processing and Other Automation Tips: https://docs.binary.ninja/dev/batch.html
- User Informed Data Flow: https://docs.binary.ninja/dev/uidf.html
- SSA Explained: https://carstein.github.io/2020/10/22/ssa-explained.html#fnref:1
- Hunting Format String Vulnerabilities: https://youtu.be/Mylbm3MIiTU
- Auditing system calls for command injection vulnerabilities using Binary Ninja's HLIL: https://youtu.be/F3uh8DuS0tE
- cetfor/SystemCallAuditorBinja.py: https://gist.github.com/cetfor/67cbd707bf44252aebbaf6308db28ee5
- Learning Binary Ninja for Reverse Engineering - Scripting Basics and More Part 1: https://youtu.be/RVyZBqjLrE0
- Learning Binary Ninja for Reverse Engineering - Scripting Basics and More Part 2: https://youtu.be/gLggUUy0-iI