In [1]:
%%time
import pandas as pd
import seaborn as sns
import os
import hashlib
import math
import csv
import itertools
import pandas_bokeh
from collections import Counter
import matplotlib.pyplot as plt

pandas_bokeh.output_notebook()
import jupyter_black

jupyter_black.load()

CPU times: user 2.76 s, sys: 431 ms, total: 3.19 s
Wall time: 1.99 s


<h1>Ghidra Headless Script</h1>

In [2]:
%%time
%%writefile gheadless.py
import csv
from ghidra.program.util import DefinedDataIterator, CyclomaticComplexity

dangerous_functions = ["system", "execve", "execle", "execvp", "execlp", "doSystemCmd"]

fm = currentProgram.getFunctionManager()

# Collecting information
files = currentProgram.getName()
arches = currentProgram.getLanguage().toString()
hashes = currentProgram.getExecutableSHA256()
strings = [str(s) for s in DefinedDataIterator.definedStrings(currentProgram)]
all_funcs = list(fm.getFunctions(True))
total_cc = 0
system_xrefs_details = []

# Find dangerous functions and their xrefs
for func in all_funcs:
    if func.getName() in dangerous_functions:
        entry_point = func.getEntryPoint()
        references = getReferencesTo(entry_point)
        for xref in references:
            # Fetching the referencing function details
            ref_func = fm.getFunctionContaining(xref.getFromAddress())
            if ref_func:
                # Collecting address and function name
                detail = "{} ({})".format(xref.getFromAddress(), ref_func.getName())
                system_xrefs_details.append(detail)

num_calls_in_system_xrefs = len(system_xrefs_details)

# Calculating average cyclomatic complexity
for func in all_funcs:
    total_cc += CyclomaticComplexity().calculateCyclomaticComplexity(func, monitor)

# Calculating average cyclomatic complexity
num_funcs = len(all_funcs)
average_cc = total_cc / num_funcs if num_funcs > 0 else 0

# Saving results to CSV
csv_file_path = "./ghidratest.csv"
with open(csv_file_path, mode="a") as csv_file:
    fieldnames = [
        "File",
        "Architecture",
        "SHA256",
        "Strings",
        "Functions",
        "System_Xrefs",
        "Total_System_Xrefs",
        "Average_Cyclomatic_Complexity",
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Writing data
    writer.writerow(
        {
            "File": files,
            "Architecture": arches,
            "SHA256": hashes,
            "Strings": ", ".join(strings),
            "Functions": ", ".join([str(func) for func in all_funcs]),
            "System_Xrefs": "; ".join(system_xrefs_details),
            "Total_System_Xrefs": num_calls_in_system_xrefs,
            "Average_Cyclomatic_Complexity": round(average_cc, 2),
        }
    )

Writing gheadless.py
CPU times: user 1.99 ms, sys: 146 μs, total: 2.13 ms
Wall time: 2.11 ms


In [3]:
%%time
!chmod +x gheadless.py

CPU times: user 35.9 ms, sys: 29.5 ms, total: 65.4 ms
Wall time: 150 ms


In [4]:
%%time
ghidra_headless = "/opt/src/ghidra_11.1.2_PUBLIC/support/analyzeHeadless"

CPU times: user 3 μs, sys: 1 μs, total: 4 μs
Wall time: 5.96 μs


In [5]:
!wget https://github.com/therealsaumil/emux/raw/master/files/emux/TRI227WF/rootfs.tar.bz2
!wget https://github.com/therealsaumil/emux/raw/master/files/emux/AC15/squashfs-root.tar.bz2

--2024-08-09 15:39:18--  https://github.com/therealsaumil/emux/raw/master/files/emux/TRI227WF/rootfs.tar.bz2
Resolving github.com (github.com)... 20.26.156.215
Connecting to github.com (github.com)|20.26.156.215|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/therealsaumil/emux/master/files/emux/TRI227WF/rootfs.tar.bz2 [following]
--2024-08-09 15:39:18--  https://raw.githubusercontent.com/therealsaumil/emux/master/files/emux/TRI227WF/rootfs.tar.bz2
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6018759 (5.7M) [application/octet-stream]
Saving to: ‘rootfs.tar.bz2’


2024-08-09 15:39:19 (18.5 MB/s) - ‘rootfs.tar.bz2’ saved [6018759/6018759]

--2024-08-09 15:39:19--  https://github.com/therealsau

In [6]:
!bzip2 -d ./rootfs.tar.bz2
!bzip2 -d ./squashfs-root.tar.bz2
!tar -xvf ./rootfs.tar
!tar -xvf ./squashfs-root.tar

rootfs/
rootfs/lib/
rootfs/lib/libnsl.so.0
rootfs/lib/libthread_db-0.9.30.2.so
rootfs/lib/libupnp.so.2.0.3
rootfs/lib/libutil.so.0
rootfs/lib/ld-linux.so.3
rootfs/lib/libupnp.so
rootfs/lib/libixml.so
rootfs/lib/libpthread.so.0
rootfs/lib/librt-0.9.30.2.so
rootfs/lib/libthreadutil.so.2
rootfs/lib/ld-uClibc.so.0
rootfs/lib/libgcc_s.so
rootfs/lib/libm.so.0
rootfs/lib/libgcc_s.so.1
rootfs/lib/libcrypt.so.0
rootfs/lib/libpthread-0.9.30.2.so
rootfs/lib/libresolv.so.0
rootfs/lib/libcrypt-0.9.30.2.so
rootfs/lib/libdl-0.9.30.2.so
rootfs/lib/libiw.so.29
rootfs/lib/libixml.so.2
rootfs/lib/libdl.so.0
rootfs/lib/libupnp.so.2
rootfs/lib/libutil-0.9.30.2.so
rootfs/lib/ld-uClibc-0.9.30.2.so
rootfs/lib/librt.so.0
rootfs/lib/libc.so.0
rootfs/lib/libresolv-0.9.30.2.so
rootfs/lib/libthreadutil.so
rootfs/lib/libthread_db.so.1
rootfs/lib/libnsl-0.9.30.2.so
rootfs/lib/libm-0.9.30.2.so
rootfs/lib/libixml.so.2.0.3
rootfs/lib/libthreadutil.so.2.1.0
rootfs/lib/libuClibc-0.9.30.2.so
rootfs/lib/modules/
rootfs/lib

In [7]:
!mkdir ./AC15
!cp ./squashfs-root/bin/httpd ./AC15/AC15_httpd
!cp ./rootfs/usr/bin/webs ./AC15/TRI227WF_webs

In [8]:
%%time
exe_path = "./AC15"
pyScript_path = "./"
tempProject = "./"
py_script = "./gheadless.py"

CPU times: user 10 μs, sys: 0 ns, total: 10 μs
Wall time: 16.9 μs


In [9]:
%%time
!{ghidra_headless} {tempProject} TeamProject -import {exe_path} -analysisTimeoutPerFile 30  -scriptPath {pyScript_path} -postScript {py_script} -deleteProject -log my_log.txt

openjdk version "21.0.4" 2024-07-16
OpenJDK Runtime Environment (build 21.0.4+7-Ubuntu-1ubuntu222.04)
OpenJDK 64-Bit Server VM (build 21.0.4+7-Ubuntu-1ubuntu222.04, mixed mode)
INFO  Using log config file: jar:file:/opt/src/ghidra_11.1.2_PUBLIC/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: my_log.txt (LoggingInitialization)  
INFO  Loading user preferences: /root/.config/ghidra/ghidra_11.1.2_PUBLIC/preferences (Preferences)  
INFO  Searching for classes... (ClassSearcher)  
INFO  Ignoring class 'generic.jar.GClassLoader' from '/opt/src/ghidra_11.1.2_PUBLIC/Ghidra/Framework/Utility/lib/Utility.jar'. Already found at '/opt/src/ghidra_11.1.2_PUBLIC/Ghidra/Framework/Utility/lib/Utility.jar'. (ClassSearcher)  
INFO  Ignoring class 'ghidra.GhidraClassLoader' from '/opt/src/ghidra_11.1.2_PUBLIC/Ghidra/Framework/Utility/lib/Utility.jar'. Already found at '/opt/src/ghidra_11.1.2_PUBLIC/Ghidra/Framework/Utility/lib/Utility.jar'. (Clas

<h2>Creating a Pandas Dataframe from a CSV</h2>

In [10]:
%%time
df = pd.read_csv("./ghidratest.csv", header=None)

CPU times: user 7.14 ms, sys: 7.48 ms, total: 14.6 ms
Wall time: 15.1 ms


<h2>Naming Pandas Columns</h2>

In [11]:
%%time
df.columns = [
    "File",
    "Architecture",
    "SHA256",
    "Strings",
    "Functions",
    "System_Xrefs",
    "Total_System_Xrefs",
    "Average_Cyclomatic_Complexity",
]

CPU times: user 666 μs, sys: 0 ns, total: 666 μs
Wall time: 672 μs


In [12]:
df.fillna("None", inplace=True)

<h2>Verify the Pandas Output</h2>

In [13]:
%%time
df

CPU times: user 2 μs, sys: 1 μs, total: 3 μs
Wall time: 5.72 μs


Unnamed: 0,File,Architecture,SHA256,Strings,Functions,System_Xrefs,Total_System_Xrefs,Average_Cyclomatic_Complexity
0,AC15_httpd,ARM/little/32/v8,e2880dc6a19a9ac5122d8686047db12a223d062324de7c...,"ds ""ELF"", ds ""/lib/ld-uClibc.so.0"", utf8 u8""li...","_init, <EXTERNAL>::tpi_wan_get_extra_ip_info, ...",0003db54 (FUN_0003da1c); 0004fc48 (TendaTelnet...,162,6.0
1,TRI227WF_webs,ARM/little/32/v8,800b296c5baa8d5d2dc5fba26ac2fc6c0e2f92b13e9f4b...,"ds ""ELF"", ds ""/lib/ld-linux.so.3"", utf8 u8""lib...","_DT_INIT, <EXTERNAL>::clock_gettime, <EXTERNAL...",00009934 (FUN_0000987c); 0003d8f0 (FUN_0003d87...,4,5.0


<h2>Checking Pandas Datatypes</h2>

In [14]:
%%time
df.dtypes

CPU times: user 989 μs, sys: 0 ns, total: 989 μs
Wall time: 1 ms


File                              object
Architecture                      object
SHA256                            object
Strings                           object
Functions                         object
System_Xrefs                      object
Total_System_Xrefs                 int64
Average_Cyclomatic_Complexity    float64
dtype: object

<h2>Changing Datatypes to String</h2>

In [15]:
%%time
df["Strings"] = df["Strings"].astype(str)
df["Functions"] = df["Functions"].astype(str)
df["File"] = df["File"].astype(str)
df["Architecture"] = df["Architecture"].astype(str)
df["System_Xrefs"] = df["System_Xrefs"].astype(str)

CPU times: user 2.74 ms, sys: 0 ns, total: 2.74 ms
Wall time: 2.59 ms


<h2>Searching for Features</h2>

In [16]:
df[df["Architecture"].str.contains("little", na=False)]

Unnamed: 0,File,Architecture,SHA256,Strings,Functions,System_Xrefs,Total_System_Xrefs,Average_Cyclomatic_Complexity
0,AC15_httpd,ARM/little/32/v8,e2880dc6a19a9ac5122d8686047db12a223d062324de7c...,"ds ""ELF"", ds ""/lib/ld-uClibc.so.0"", utf8 u8""li...","_init, <EXTERNAL>::tpi_wan_get_extra_ip_info, ...",0003db54 (FUN_0003da1c); 0004fc48 (TendaTelnet...,162,6.0
1,TRI227WF_webs,ARM/little/32/v8,800b296c5baa8d5d2dc5fba26ac2fc6c0e2f92b13e9f4b...,"ds ""ELF"", ds ""/lib/ld-linux.so.3"", utf8 u8""lib...","_DT_INIT, <EXTERNAL>::clock_gettime, <EXTERNAL...",00009934 (FUN_0000987c); 0003d8f0 (FUN_0003d87...,4,5.0


<h2>Using Query to Search for Features</h2>

In [17]:
df.query("Average_Cyclomatic_Complexity > 3")

Unnamed: 0,File,Architecture,SHA256,Strings,Functions,System_Xrefs,Total_System_Xrefs,Average_Cyclomatic_Complexity
0,AC15_httpd,ARM/little/32/v8,e2880dc6a19a9ac5122d8686047db12a223d062324de7c...,"ds ""ELF"", ds ""/lib/ld-uClibc.so.0"", utf8 u8""li...","_init, <EXTERNAL>::tpi_wan_get_extra_ip_info, ...",0003db54 (FUN_0003da1c); 0004fc48 (TendaTelnet...,162,6.0
1,TRI227WF_webs,ARM/little/32/v8,800b296c5baa8d5d2dc5fba26ac2fc6c0e2f92b13e9f4b...,"ds ""ELF"", ds ""/lib/ld-linux.so.3"", utf8 u8""lib...","_DT_INIT, <EXTERNAL>::clock_gettime, <EXTERNAL...",00009934 (FUN_0000987c); 0003d8f0 (FUN_0003d87...,4,5.0


In [18]:
df_sorted = df.sort_values(by="Total_System_Xrefs", ascending=False)

In [19]:
%%time
df_sorted.plot_bokeh.bar(
    x="File",
    y="Total_System_Xrefs",
    figsize=(900, 700),
    title="Potentially Dangerous Calls To System",
    xlabel="Binary",
    ylabel="Total",
    vertical_xlabel=True,
)

CPU times: user 104 ms, sys: 0 ns, total: 104 ms
Wall time: 102 ms


<h2>Creating Charts Using Pandas Bokeh</h2>

In [20]:
df.plot_bokeh.bar(
    x="File",
    y=["Average_Cyclomatic_Complexity"],
    figsize=(900, 700),
    title="Average Cyclomatic Complexity",
    xlabel="File",
    ylabel="Average Cyclomatic Complexity",
    vertical_xlabel=True,
)

<h2>Reference Material</h2>

- 10 Minutes to Pandas: https://pandas.pydata.org/docs/user_guide/10min.html
- Pandas Cookbook: https://pandas.pydata.org/docs/user_guide/cookbook.html#cookbook
- Ghidra API: https://ghidra.re/ghidra_docs/api/index.html
- EMUX: https://github.com/therealsaumil/emux
- Ghidra Snippets: https://github.com/HackOvert/GhidraSnippets
- Auditing system calls for command injection vulnerabilities using Ghidra's PCode: https://youtu.be/UVNeg7Vqytc
- cetfor/SystemCallAuditorGhidra.py: https://github.com/HackOvert/PotentiallyVulnerable/blob/main/CWE-78/SystemCallAuditorGhidra.py