In [1]:
from pathlib import Path

import polars as pl

from addr_helpers import to_int_expr, addr_to_int, int_to_addr
from bin_compare_lib import parse, bin_compare

# Paths

In [2]:
base = Path("../fake_neomon_host/dumps")
compare_p = Path("compare/comp.csv")

compare_same_dump_p = Path("compare/comp_filt_same.csv")

In [3]:
p1 = base / "108_1"
p2 = base / "108_2"

p1 /= "NeoMon_dump.dll"
p2 /= "NeoMon_dump.dll"

basediff = 0

In [4]:
p1 = base / "108"
p2 = base / "110"

p1 /= "NeoMon_patched.dll"
p2 /= "NeoMon_patched.dll"

basediff = 0x1100 - 0x1080

# Compare

## Load

In [5]:
b1 = parse(p1)

for s in b1.sections:
    print(s.name, s.virtual_address, s.virtual_size)

    4096 139264
.rsrc 143360 4096
.idata   147456 4096
         151552 2699264
tvpxcrha 2850816 1617920
isgjaxhd 4468736 4096
.fake24d 4472832 4096
.fake24e 4476928 4096
.fake24f 4481024 4096
.fake250 4485120 4096
.idata 4489216 5632
.reloc 4497408 3072


In [6]:
b2 = parse(p2)

for s in b2.sections:
    print(s.name, s.virtual_address, s.virtual_size)

    4096 139264
.rsrc 143360 4096
.idata   147456 4096
         151552 2699264
tvpxcrha 2850816 1617920
isgjaxhd 4468736 4096
.idata 4472832 5120
.reloc 4481024 2560


In [7]:
secs = [0, 3]
df, sdf = bin_compare(compare_p, p1, p2, secs)

for i, si in enumerate(secs):
    base = list(b1.sections)[si].virtual_address
    sdf[i] = sdf[i].with_columns(pl.col("start") + base)
    sdf[i] = int_to_addr(sdf[i], "start")
    sdf[i] = sdf[i].with_columns(
        (pl.col("bytes1").str.len_chars() // 2).alias("length")
    )

df = sdf[0]
for d in sdf[1:]:
    df = df.vstack(d)

## Remove IAT

In [8]:
iat_range = 0x16000, 0x16230

df = df.filter(~to_int_expr('start').is_between(iat_range[0], iat_range[1]))

## Extract obvious pointers

In [9]:
df = df.with_row_index()

pure = df.filter(pl.col("length") == 2)
pure = pure.filter(pl.col("bytes1").str.starts_with('10'))
pure = pure.filter(
    (
        pl.col("bytes2").str.to_integer(base=16)
        - pl.col("bytes1").str.to_integer(base=16)
    )
    == basediff
)

df = df.join(pure, on='index', how='anti')

pure = pure.select("start", "length", "bytes1", "bytes2")
df = df.select("start", "length", "bytes1", "bytes2")

print(pure.shape, df.shape)
pure.head(3)

(4134, 4) (151, 4)


start,length,bytes1,bytes2
str,u32,str,str
"""0x1003""",2,"""1081""","""1101"""
"""0x100d""",2,"""1081""","""1101"""
"""0x1017""",2,"""1081""","""1101"""


## Remove heavily randomized areas

The areas are loaded from the saved `comp_filt_same.csv`, which is a comparison of two dumps with a same imagebase

In [10]:
same = pl.read_csv(compare_same_dump_p)

df_n = df.shape[0]
df = df.join(
    same,
    on=["start", "length"],
    how="anti"
)

print(f'{df_n} -> {df.shape[0]}')

151 -> 110


## Save

In [11]:
df.write_csv(str(compare_p).removesuffix('.csv') + '_filt.csv')

In [19]:
import os
import shutil

for n in os.listdir('../fake_neomon_host/shifts'):
    m = n.removesuffix('.dll')
    if len(m) == len('103000000000'):
        m = m[:3] + '00000'
    m = m +'.dll'
    shutil.move('../fake_neomon_host/shifts/' + n, '../fake_neomon_host/shifts/' + m)