Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow efficient diffing from the command line (integrated with idahunt) #246

Merged
merged 4 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions diaphora.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@

from diaphora_heuristics import *

from pygments.lexers import NasmLexer, CppLexer, DiffLexer
from pygments.formatters import HtmlFormatter

from jkutils.kfuzzy import CKoretFuzzyHashing
from jkutils.factor import (FACTORS_CACHE, difference, difference_ratio,
primesbelow as primes)
Expand Down Expand Up @@ -103,7 +106,7 @@ def ast_ratio(ast1, ast2):
#-------------------------------------------------------------------------------
def log(msg):
if isinstance(threading.current_thread(), threading._MainThread):
print(("[%s] %s" % (time.asctime(), msg)))
print(("[diaphora][%s] %s" % (time.asctime(), msg)))
joxeankoret marked this conversation as resolved.
Show resolved Hide resolved

#-------------------------------------------------------------------------------
def log_refresh(msg, show=False, do_log=True):
Expand Down Expand Up @@ -2068,13 +2071,19 @@ def diff(self, db):
cur.close()
return True

g_debug = False
joxeankoret marked this conversation as resolved.
Show resolved Hide resolved
if __name__ == "__main__":
version_info = sys.version_info
if version_info[0] == 2:
log("WARNING: You are using Python 2 instead of Python 3. The main branch of Diaphora works exclusively with Python 3.")
log("TIP: There are other branches that contain backward compatability.")
log("TIP: There are other branches that contain backward compatibility.")

do_diff = True
if g_debug:
log("DIAPHORA_AUTO_DIFF=%s" % os.getenv("DIAPHORA_AUTO_DIFF"))
log("DIAPHORA_DB1=%s" % os.getenv("DIAPHORA_DB1"))
log("DIAPHORA_DB2=%s" % os.getenv("DIAPHORA_DB2"))
log("DIAPHORA_DIFF_OUT=%s" % os.getenv("DIAPHORA_DIFF_OUT"))
if os.getenv("DIAPHORA_AUTO_DIFF") is not None:
db1 = os.getenv("DIAPHORA_DB1")
if db1 is None:
Expand Down
112 changes: 98 additions & 14 deletions diaphora_ida.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@

#-------------------------------------------------------------------------------
def log(message):
msg("[%s] %s\n" % (time.asctime(), message))
msg("[diaphora_ida][%s] %s\n" % (time.asctime(), message))

#-------------------------------------------------------------------------------
def log_refresh(msg, show=False, do_log=True):
Expand Down Expand Up @@ -965,7 +965,7 @@ def import_definitions_only(self, filename):
self.import_til()
self.import_definitions()

def show_asm_diff(self, item):
def generate_asm_diff(self, ea1, ea2, error_func=log):
cur = self.db_cursor()
sql = """select *
from (
Expand All @@ -978,12 +978,13 @@ def show_asm_diff(self, item):
where address = ?
and assembly is not null)
order by 4 asc"""
ea1 = str(int(item[1], 16))
ea2 = str(int(item[3], 16))
ea1 = str(int(ea1, 16))
ea2 = str(int(ea2, 16))
cur.execute(sql, (ea1, ea2))
rows = cur.fetchall()
res = None
if len(rows) != 2:
warning("Sorry, there is no assembly available for either the first or the second database.")
error_func("Sorry, there is no assembly available for either the first or the second database.")
else:
row1 = rows[0]
row2 = rows[1]
Expand All @@ -993,18 +994,31 @@ def show_asm_diff(self, item):
asm2 = self.prettify_asm(row2["assembly"])
buf1 = "%s proc near\n%s\n%s endp" % (row1["name"], asm1, row1["name"])
buf2 = "%s proc near\n%s\n%s endp" % (row2["name"], asm2, row2["name"])

fmt = HtmlFormatter()
fmt.noclasses = True
fmt.linenos = False
fmt.nobackground = True
src = html_diff.make_file(buf1.split("\n"), buf2.split("\n"), fmt, NasmLexer())

title = "Diff assembler %s - %s" % (row1["name"], row2["name"])
res = (src, title)

cur.close()
return res

def show_asm_diff(self, item):
res = self.generate_asm_diff(item[1], item[3], error_func=warning)
if res:
(src, title) = res
cdiffer = CHtmlViewer()
cdiffer.Show(src, title)

cur.close()
def save_asm_diff(self, ea1, ea2, filename):
res = self.generate_asm_diff(ea1, ea2)
if res:
(src, _) = res
open(filename, "w").write(src)

def import_one(self, item):
ret = ask_yn(1, "AUTOHIDE DATABASE\nDo you want to import all the type libraries, structs and enumerations?")
Expand Down Expand Up @@ -1079,7 +1093,7 @@ def show_pseudo(self, item, primary):
cdiffer.Show(src, title)
cur.close()

def show_pseudo_diff(self, item, html = True):
def generate_pseudo_diff(self, ea1, ea2, html = True, error_func=log):
cur = self.db_cursor()
sql = """select *
from (
Expand All @@ -1092,12 +1106,13 @@ def show_pseudo_diff(self, item, html = True):
where address = ?
and pseudocode is not null)
order by 4 asc"""
ea1 = str(int(item[1], 16))
ea2 = str(int(item[3], 16))
ea1 = str(int(ea1, 16))
ea2 = str(int(ea2, 16))
cur.execute(sql, (ea1, ea2))
rows = cur.fetchall()
res = None
if len(rows) != 2:
warning("Sorry, there is no pseudo-code available for either the first or the second database.")
error_func("Sorry, there is no pseudo-code available for either the first or the second database.")
else:
row1 = rows[0]
row2 = rows[1]
Expand All @@ -1112,7 +1127,7 @@ def show_pseudo_diff(self, item, html = True):
buf2 = row2["prototype"] + "\n" + row2["pseudocode"]

if buf1 == buf2:
warning("Both pseudo-codes are equal.")
error_func("Both pseudo-codes are equal.")

fmt = HtmlFormatter()
fmt.noclasses = True
Expand All @@ -1125,16 +1140,29 @@ def show_pseudo_diff(self, item, html = True):
tmp.append(line.strip("\n"))
tmp = tmp[2:]
buf = "\n".join(tmp)

src = highlight(buf, DiffLexer(), fmt)
else:
src = html_diff.make_file(buf2.split("\n"), buf1.split("\n"), fmt, CppLexer())

title = "Diff pseudo-code %s - %s" % (row2["name"], row1["name"])
res = (src, title)

cur.close()
return res

def show_pseudo_diff(self, item, html = True):
res = self.generate_pseudo_diff(item[1], item[3], html=html, error_func=warning)
if res:
(src, title) = res
cdiffer = CHtmlViewer()
cdiffer.Show(src, title)

cur.close()
def save_pseudo_diff(self, ea1, ea2, filename):
res = self.generate_pseudo_diff(ea1, ea2, html=True)
if res:
(src, _) = res
open(filename, "w").write(src)

def diff_external(self, item):
cmd_line = None
Expand Down Expand Up @@ -2574,6 +2602,16 @@ def _diff_or_export(use_ui, **options):

return bd

# XXX - db2 is unused so could be removed?
def _generate_html(db1, db2, diff_db, ea1, ea2, html_asm, html_pseudo):
bd = CIDABinDiff(db1)
joxeankoret marked this conversation as resolved.
Show resolved Hide resolved
bd.db = sqlite3.connect(db1, check_same_thread=True)
bd.db.text_factory = str
bd.db.row_factory = sqlite3.Row
bd.load_results(diff_db)
bd.save_pseudo_diff(ea1, ea2, html_pseudo)
bd.save_asm_diff(ea1, ea2, html_asm)

#-------------------------------------------------------------------------------
class BinDiffOptions:
def __init__(self, **kwargs):
Expand Down Expand Up @@ -2763,6 +2801,7 @@ def remove_file(filename):
cur.close()

#-------------------------------------------------------------------------------
g_debug = False
def main():
global g_bindiff
if os.getenv("DIAPHORA_AUTO") is not None:
Expand Down Expand Up @@ -2806,6 +2845,51 @@ def main():
os.remove("%s-crash" % file_out)

idaapi.qexit(0)
# EXPORT - works with pseudocode, better than above
elif os.getenv("DIAPHORA_AUTO2") is not None:
if g_debug:
log("Handling DIAPHORA_AUTO2")
log("DIAPHORA_EXPORT_FILE=%s" % os.getenv("DIAPHORA_EXPORT_FILE"))
file_out = os.getenv("DIAPHORA_EXPORT_FILE")
if file_out is None:
raise Exception("No export file specified!")
_diff_or_export(False, file_out=file_out)
joxeankoret marked this conversation as resolved.
Show resolved Hide resolved
idaapi.qexit(0)
# DIFF-SHOW
elif os.getenv("DIAPHORA_AUTO4") is not None:
joxeankoret marked this conversation as resolved.
Show resolved Hide resolved
if g_debug:
log("Handling DIAPHORA_AUTO4")
log("DIAPHORA_AUTO4=%s" % os.getenv("DIAPHORA_AUTO4"))
log("DIAPHORA_DB1=%s" % os.getenv("DIAPHORA_DB1"))
log("DIAPHORA_DB2=%s" % os.getenv("DIAPHORA_DB2"))
log("DIAPHORA_DIFF=%s" % os.getenv("DIAPHORA_DIFF"))
log("DIAPHORA_EA1=%s" % os.getenv("DIAPHORA_EA1"))
log("DIAPHORA_EA2=%s" % os.getenv("DIAPHORA_EA2"))
log("DIAPHORA_HTML_ASM=%s" % os.getenv("DIAPHORA_HTML_ASM"))
log("DIAPHORA_HTML_PSEUDO=%s" % os.getenv("DIAPHORA_HTML_PSEUDO"))
db1 = os.getenv("DIAPHORA_DB1")
if db1 is None:
raise Exception("No database file specified!")
db2 = os.getenv("DIAPHORA_DB2")
joxeankoret marked this conversation as resolved.
Show resolved Hide resolved
if db2 is None:
raise Exception("No database file to diff against specified!")
diff_db = os.getenv("DIAPHORA_DIFF")
if diff_db is None:
raise Exception("No diff database file for diff specified!")
ea1 = os.getenv("DIAPHORA_EA1")
if ea1 is None:
raise Exception("No address 1 specified!")
ea2 = os.getenv("DIAPHORA_EA2")
if ea2 is None:
raise Exception("No address 2 specified!")
html_asm = os.getenv("DIAPHORA_HTML_ASM")
if html_asm is None:
raise Exception("No html output file for asm specified!")
html_pseudo = os.getenv("DIAPHORA_HTML_PSEUDO")
if html_pseudo is None:
raise Exception("No html output file for pseudo specified!")
_generate_html(db1, db2, diff_db, ea1, ea2, html_asm, html_pseudo)
idaapi.qexit(0)
else:
_diff_or_export(True)

Expand Down