In [None]:
%%time
import pandas as pd
import seaborn as sns
import os
import hashlib
import math
import csv
import itertools
import pandas_bokeh
from collections import Counter
import matplotlib.pyplot as plt

pandas_bokeh.output_notebook()
import jupyter_black

jupyter_black.load()

<h1>Ghidra Headless Script</h1>

In [None]:
%%time
%%writefile gheadless.py
import csv
from ghidra.program.util import DefinedDataIterator, CyclomaticComplexity

dangerous_functions = ["system", "execve", "execle", "execvp", "execlp", "doSystemCmd"]

fm = currentProgram.getFunctionManager()

# Collecting information
files = currentProgram.getName()
arches = currentProgram.getLanguage().toString()
hashes = currentProgram.getExecutableSHA256()
strings = [str(s) for s in DefinedDataIterator.definedStrings(currentProgram)]
all_funcs = list(fm.getFunctions(True))
total_cc = 0
system_xrefs_details = []

# Find dangerous functions and their xrefs
for func in all_funcs:
    if func.getName() in dangerous_functions:
        entry_point = func.getEntryPoint()
        references = getReferencesTo(entry_point)
        for xref in references:
            # Fetching the referencing function details
            ref_func = fm.getFunctionContaining(xref.getFromAddress())
            if ref_func:
                # Collecting address and function name
                detail = "{} ({})".format(xref.getFromAddress(), ref_func.getName())
                system_xrefs_details.append(detail)

num_calls_in_system_xrefs = len(system_xrefs_details)

# Calculating average cyclomatic complexity
for func in all_funcs:
    total_cc += CyclomaticComplexity().calculateCyclomaticComplexity(func, monitor)

# Calculating average cyclomatic complexity
num_funcs = len(all_funcs)
average_cc = total_cc / num_funcs if num_funcs > 0 else 0

# Saving results to CSV
csv_file_path = "./ghidratest.csv"
with open(csv_file_path, mode="a") as csv_file:
    fieldnames = [
        "File",
        "Architecture",
        "SHA256",
        "Strings",
        "Functions",
        "System_Xrefs",
        "Total_System_Xrefs",
        "Average_Cyclomatic_Complexity",
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Writing data
    writer.writerow(
        {
            "File": files,
            "Architecture": arches,
            "SHA256": hashes,
            "Strings": ", ".join(strings),
            "Functions": ", ".join([str(func) for func in all_funcs]),
            "System_Xrefs": "; ".join(system_xrefs_details),
            "Total_System_Xrefs": num_calls_in_system_xrefs,
            "Average_Cyclomatic_Complexity": round(average_cc, 2),
        }
    )

In [None]:
%%time
!chmod +x gheadless.py

In [None]:
%%time
ghidra_headless = "/Applications/ghidra_11.2.1_PUBLIC/support/analyzeHeadless"

In [None]:
%%time
exe_path = "./Tenda/Tenda/"
pyScript_path = "./"
tempProject = "./"
py_script = "gheadless.py"

In [None]:
%%time
!{ghidra_headless} {tempProject} TeamProject -import {exe_path} -analysisTimeoutPerFile 30  -scriptPath {pyScript_path} -postScript {py_script} -deleteProject -log my_log.txt

<h2>Creating a Pandas Dataframe from a CSV</h2>

In [None]:
%%time
df = pd.read_csv("ghidratest.csv", header=None)

<h2>Naming Pandas Columns</h2>

In [None]:
%%time
df.columns = [
    "File",
    "Architecture",
    "SHA256",
    "Strings",
    "Functions",
    "System_Xrefs",
    "Total_System_Xrefs",
    "Average_Cyclomatic_Complexity",
]

In [None]:
df.fillna("None", inplace=True)

<h2>Verify the Pandas Output</h2>

In [None]:
%%time
df

<h2>Checking Pandas Datatypes</h2>

In [None]:
%%time
df.dtypes

<h2>Changing Datatypes to String</h2>

In [None]:
%%time
df["Strings"] = df["Strings"].astype(str)
df["Functions"] = df["Functions"].astype(str)
df["File"] = df["File"].astype(str)
df["Architecture"] = df["Architecture"].astype(str)
df["System_Xrefs"] = df["System_Xrefs"].astype(str)

<h2>Searching for Features</h2>

In [None]:
df[df["Architecture"].str.contains("big", na=False)]

<h2>Using Query to Search for Features</h2>

In [None]:
df.query("Average_Cyclomatic_Complexity > 3")

In [None]:
df_sorted = df.sort_values(by="Total_System_Xrefs", ascending=False)

In [None]:
%%time
df_sorted.plot_bokeh.bar(
    x="File",
    y="Total_System_Xrefs",
    figsize=(900, 700),
    title="Potentially Dangerous Calls To System",
    xlabel="Binary",
    ylabel="Total",
    vertical_xlabel=True,
)

<h2>Creating Charts Using Pandas Bokeh</h2>

In [None]:
df.plot_bokeh.bar(
    x="File",
    y=["Average_Cyclomatic_Complexity"],
    figsize=(900, 700),
    title="Average Cyclomatic Complexity",
    xlabel="File",
    ylabel="Average Cyclomatic Complexity",
    vertical_xlabel=True,
)

<h2>Reference Material</h2>

- 10 Minutes to Pandas: https://pandas.pydata.org/docs/user_guide/10min.html
- Pandas Cookbook: https://pandas.pydata.org/docs/user_guide/cookbook.html#cookbook
- Ghidra API: https://ghidra.re/ghidra_docs/api/index.html
- Ghidra Snippets: https://github.com/HackOvert/GhidraSnippets
- Auditing system calls for command injection vulnerabilities using Ghidra's PCode: https://youtu.be/UVNeg7Vqytc
- cetfor/SystemCallAuditorGhidra.py: https://github.com/HackOvert/PotentiallyVulnerable/blob/main/CWE-78/SystemCallAuditorGhidra.py