In [1]:
import mmap
import re
import shutil
from pathlib import Path
from typing import cast

import lief
import pefile
import polars as pl
import numpy as np

Before starting:

Launch `fake_neomon_host/host-bind.exe` with x64dbg under ScyllaHide and reach the NeoMon.dll OEP (0x10013903). Make sure ASLR is off for both .exe and .dll.

Then collect data:
- x64dbg/Symbols -> `dumps/dump-imports.csv`
- x64dbg/ModulePathListExports -> `NeoMon.dll.export.full.csv`
- x64dbg/ModulePathListImports -> `NeoMon.dll.import.full.csv`
- dump 0x0213 .. 0x0218 sections that themida in NeoMon.dll has created, to `fake_neomon_host/neomon213.bin` (213, 214, etc.)
- Dump the NeoMon.dll using Scylla (you can specify 0x13903 OEP, but do not import IAT or fix dump)
- Open that NeoMon_dump.dll in IDA Pro and launch `scripts/extract-old-iat.py`
- and `scripts/extract-byte-calls.py`

# Paths

Everything here is done on a simple dllhost exe `fake_neomon_host/host-bind.exe`

Dumps are made once the _DllEntryPoint is reached (0x13903)

In [2]:
base = Path("../neomon-dump/dumps")
base_patch = Path("../neomon-dump/patches")

In [3]:
# imports csv

dump_imports_p = base / "dump-imports.csv"
old_iat_p = base / "old-iat.csv"
byte_calls_p = base / "broken-byte-calls.csv"

module_imports_p = base / "../NeoMon.dll.import.full.csv"
module_exports_p = base / "../NeoMon.dll.export.full.csv"

In [4]:
# table of manually resolved "stolen code" imports
manmapped_p = Path("manmapped.csv")

In [5]:
# exports csv
patch_thunks_p = base_patch / "thunks_patch.csv"
patch_calls_p = base_patch / "calls_patch.csv"
patch_iat_p = base_patch / "iat_patch.csv"

In [6]:
base_to_exe = Path("../fake_neomon_host")
original_dump_path = base_to_exe / "NeoMon_dump.dll"
patched_path = base_to_exe / "NeoMon_patched.dll"

In [7]:
from addr_helpers import int_to_LE, rel_call, to_bin, from_bin

patch_schema = {
    "patch_addr": pl.String,
    "mem_old": pl.String,
    "patch": pl.String,
}

# Parsing import table

In [8]:
def to_int_expr(col: str = "Address") -> pl.Expr:
    return pl.col(col).str.slice(2).str.to_integer(base=16)


def addr_to_int(df: pl.DataFrame, col: str = "Address") -> pl.DataFrame:
    return df.with_columns(to_int_expr(col))


def int_to_addr(
    df: pl.DataFrame, col: str = "Address", sort: bool = False
) -> pl.DataFrame:
    arr = df[col].to_numpy()
    hex_arr = np.char.add("0x", np.char.lower(np.char.mod("%x", arr)))
    return normalize_address(
        df.with_columns(pl.Series(col, hex_arr)), col=col, sort=sort
    )


def normalize_address(
    df: pl.DataFrame, col: str = "Address", sort: bool = True
) -> pl.DataFrame:
    df = df.with_columns(
        (
            "0x"
            + pl.when(pl.col(col).str.starts_with("0x"))
            .then(pl.col(col).str.strip_prefix("0x"))
            .otherwise(col)
            .str.to_lowercase()
            .str.strip_chars_start("0")
        ).alias(col)
    ).with_columns(
        pl.when(pl.col(col) == "0x").then(pl.lit("0x0").alias(col)).otherwise(col)
    )
    if sort:
        return df.sort(to_int_expr(col))
    return df

To match asm calls and jumps with functions, we collect all exported functions to `dump_imports`

In [9]:
dump_imports = pl.read_csv(dump_imports_p)
dump_imports.columns = [
    "Address",
    "Type",
    "Ordinal",
    "Symbol",
    "undecorated",
]
dump_imports = dump_imports.drop("undecorated")  # doesn't make any sense
dump_imports = dump_imports.with_columns(
    pl.when(pl.col("Type") == "Экспорт")
    .then(pl.lit("Export").alias("Type"))
    .otherwise(pl.lit("Import").alias("Type"))
)  # russian to english
dump_imports = dump_imports.filter(
    pl.col("Symbol") != "OptionalHeader.AddressOfEntryPoint"
)  # OEPs are never referenced
dump_imports = normalize_address(dump_imports, "Address")  # remove leading 0s

print(dump_imports.shape)
dump_imports.sample(3)

(33138, 4)


Address,Type,Ordinal,Symbol
str,str,i64,str
"""0x771825f0""","""Export""",966,"""_unloaddll"""
"""0x77c14a00""","""Export""",575,"""NtRollbackTransaction"""
"""0x75ad2b70""","""Export""",2617,"""Ordinal#2617"""


Default view doesn't mention module names. ModulePathList plugin does.

module_exports collects exported entries for each module. \
module_imports collects IAT for each module (IAT may be hidden by themida, these are not parsed)

In [10]:
module_exports = pl.read_csv(module_exports_p)
module_exports = module_exports.filter(
    ~pl.col("Module").str.ends_with(".exe")
)  # not relevant
module_exports = normalize_address(module_exports, "Address")  # remove leading 0s
module_exports = module_exports.unique("Address")  # remove aliases
print("Exports:", module_exports.shape)

module_imports = (
    pl.read_csv(module_imports_p)
    .drop("Function", "Module")
    .rename(
        {
            "Address": "IAT_addr",  # IAT line address
            "Bytes": "Address",  # IAT line content
            "Modname": "Module",
            "Symname": "Function",
        }
    )
)
module_imports = normalize_address(module_imports, "Address")
module_imports = normalize_address(module_imports, "IAT_addr")
print("Imports:", module_imports.shape)

Exports: (22855, 3)
Imports: (7437, 4)


Themida sometimes obfuscates calls using 3rd party IAT. We collect them all to deobsuscate back. \
`proxy_imports` now contains IATs of modules. \
`dump_imports` now contains only exported functions (to map module names)

In [11]:
proxy_imports = dump_imports.join(
    module_imports.rename({"IAT_addr": "Address", "Address": "Target"}),
    on="Address",
    how="left",
)
proxy_imports = proxy_imports.filter(pl.col("Type") == "Import").drop(
    "Type", "Ordinal", "Symbol", "Module"
)

dump_imports = dump_imports.filter(pl.col("Type") == "Export").drop("Type")
# map with module names
dump_imports = dump_imports.join(module_exports, on="Address", how="left")
# Names only seem correct for exported symbols
dump_imports = dump_imports.drop("Function").rename({"Symbol": "Function"})

# map with module names
proxy_imports = proxy_imports.join(
    dump_imports.select("Address", "Module"), left_on="Target", right_on="Address"
)

print("IAT Imports:", proxy_imports.shape)
print("True imports:", dump_imports.shape)

IAT Imports: (8260, 4)
True imports: (25701, 4)


In [12]:
assert dump_imports.filter(pl.col("Module").is_null()).shape[0] == 0
assert proxy_imports.filter(pl.col("Module").is_null()).shape[0] == 0

# Gathering imports from old IAT

In [13]:
iat = pl.read_csv(old_iat_p)
print(iat.shape)

iat = iat.rename({"Address": "Calladdr", "Destination": "Address"})
iat = iat.with_columns(("0x" + pl.col("Address").str.to_lowercase()).alias("Address"))
iat = normalize_address(iat, sort=False)
iat = normalize_address(iat, "Calladdr", sort=False)
iat = iat.join(dump_imports.unique("Address"), on="Address", how="left")

print(iat.shape)

(140, 2)
(140, 5)


Make sure iat2 is empty, i.e. no unknown calls present

In [14]:
iat2 = iat.filter(pl.col("Module").is_null())
iat2 = iat2.filter(pl.col("Address") != "0x0")
iat2 = iat2.filter(to_int_expr("Address") > 0x3000000) # fake sections are to remove
assert iat2.shape[0] == 0

Cancel forwarding imports (e.g. kernel32.HeapAlloc -> ntdll.RtlAllocateHeap)

In [15]:
dll_forward_to = {"ntdll.dll", "kernelbase.dll"}

def get_unforward_map(
    forwarding_modules=["kernel32.dll", "user32.dll"],
) -> dict[str, tuple[str, str]]:
    global forward_to
    
    systemroot = "C:/Windows/SysWOW64/"

    unforward_map: dict[str, tuple[str, str]] = dict()

    forwarded = iat.filter(pl.col("Module").is_in(dll_forward_to))

    for modname in forwarding_modules:
        modpath = systemroot + modname
        number = 0

        dll = pefile.PE(modpath)
        dll.full_load()
        for exp in dll.DIRECTORY_ENTRY_EXPORT.symbols:
            name = exp.name.decode() if exp.name else f"Ordinal#{exp.ordinal}"
            forward_to = ""
            if exp.forwarder:
                number += 1
                forward_to = exp.forwarder.decode().removeprefix("NTDLL.")
                unforward_map[forward_to] = (modname, name)

        print(f"For {modname} there are {number} forwards")
    return unforward_map


unforward_map = get_unforward_map()

For kernel32.dll there are 185 forwards
For user32.dll there are 4 forwards


In [16]:
iat_t = iat

for func in iat_t.filter(pl.col("Module").is_in(dll_forward_to))["Function"]:
    if func not in unforward_map:
        print(f"Func {func} from ntdll.dll is not found in forward map")
        continue

    origmod, origfunc = unforward_map[func]

    condition = (pl.col("Module").is_in(dll_forward_to)) & (pl.col("Function") == func)
    iat_t = iat_t.with_columns(
        [
            pl.when(condition)
            .then(pl.lit(origmod))
            .otherwise("Module")
            .alias("Module"),
            pl.when(condition)
            .then(pl.lit(origfunc))
            .otherwise("Function")
            .alias("Function"),
        ]
    )

iat = iat_t

In [17]:
# remove gaps and obfuscated imports
w = iat.shape[0]
iat = iat.filter(pl.col("Module").is_not_null())
print(f"Filtered out {iat.shape[0]}/{w} iat entries")

Filtered out 98/140 iat entries


In [18]:
# confirm all names are decorated
assert iat.filter(pl.col("Function").str.contains("public")).shape[0] == 0

# Constructing new IDT

In [19]:
def create_32bit_ordinal_import(ordinal_number: int) -> lief.PE.ImportEntry:
    """
    Create a 32-bit import by ordinal

    Args:
        ordinal_number: The ordinal number (0-65535)
    """
    # Validate ordinal range
    if ordinal_number < 0 or ordinal_number > 0xFFFF:
        raise ValueError("Ordinal number must be between 0 and 65535")

    # For 32-bit PE:
    # - Set bit 31 to 1 (0x80000000)
    # - Bits 30-16 must be 0
    # - Bits 15-0 contain the ordinal
    ORDINAL_MASK_32 = 0x80000000
    data_value = ORDINAL_MASK_32 | ordinal_number

    # Create the import entry
    entry = lief.PE.ImportEntry(data_value, lief.PE.PE_TYPE.PE32)

    return entry

In [20]:
shutil.copy(original_dump_path, patched_path)

pe_lief = cast(lief.PE.Binary, lief.PE.parse(patched_path))
pe_lief.remove_all_imports()

In [21]:
for s in pe_lief.sections:
    print(s.name, hex(s.virtual_address), hex(s.virtual_address + s.virtual_size))

    0x1000 0x23000
.rsrc 0x23000 0x24000
.idata   0x24000 0x25000
         0x25000 0x2b8000
tvpxcrha 0x2b8000 0x443000
isgjaxhd 0x443000 0x444000


### Manual mapping of obfuscated iat calls

[+] means it's tested (reached main menu) \
x_* means i'm not sure \
j_* means it's a straight jump, no wrappers. Switching calling convention is very unlikely. Safe to replace \
th_* means it's a wrapper (can switch calling convention, mix the results, etc.) \
__Intelocked* - two methods, that do not call anything system, that's just how hex-rays decompiled them. Also they do not look like as their library counterparts, so i'm not sure.

k32 wrappers are mostly. All are implemented as jumps to kb calls:
```asm
mov     edi, edi
push    ebp
mov     ebp, esp
pop     ebp
jmp     ds:??
```
If I verify each j_kb_* contains these mov's and push-pop's - i mark them as k32 counterparts, since themida doesn't evaluate instructions (only adds garbage)




[+] 0x008 - k32_GetProcAddress (reimplemented) \
[+] 0x014 - j_k32_OpenProcess \
[+] 0x020 - k32_CreateThread (reimplemented) \
[+] 0x028 - j_k32_ResumeThread \
[+] 0x030 - j_k32_TerminateThread \
[+] 0x03c - j_k32_CreateFileMappingW \
[+] 0x040 - j_k32_MapViewOfFile \
[+] 0x044 - j_k32_UnmapViewOfFile \
[+] 0x048 - j_k32_MultiByteToWideChar \
[+] 0x04c - j_k32_FreeLibrary \
[+] 0x050 - j_k32_LoadLibraryW \
[+] 0x054 - j_k32_GetModuleHandleA \
[+] 0x058 - j_k32_GetWindowsDirectoryW \
[+] 0x05c - j_k32_GetSystemWindowsDirectoryW \
[+] 0x070 - j_k32_InitializeCriticalSection (former ntdll) \
[+] 0x074 - j_k32_DeleteCriticalSection (former ntdll) \
[+] 0x080 - k32_GetCurrentThreadId (reimplemented) \
[+] 0x08c - j_k32_FindResourceExW \
[+] 0x090 - j_k32_HeapDestroy \
[+] 0x094 - j_k32_FindResourceW \
[+] 0x098 - j_k32_LoadResource \
[+] 0x09c - j_k32_LockResource \
[+] 0x0a4 - k32_GetTickCount (reimplemented) \
[+] 0x0ac - j_k32_HeapFree \
[+] 0x0b0 - j_k32_HeapReAlloc (former ntdll) \
[+] 0x0b4 - j_k32_HeapSize (former ntdll) \
[+] 0x0bc - k32_InterlockedExchange (reimplemented) \
[+] 0x0c0 - k32_InterlockedCompareExchange \
[+] 0x0c4 - j_k32_TerminateProcess \
[+] 0x0c8 - j_k32_UnhandledExceptionFilter \
[+] 0x0cc - j_k32_SetUnhandledExceptionFilter \
[+] 0x0d4 - j_k32_QueryPerformanceCounter \
[+] 0x0d8 - j_k32_SizeofResource \
[+] 0x210 - x_user_wvsprintfW 


In [22]:
ibase = pe_lief.imagebase

In [23]:
manmapped_imports = pl.read_csv(manmapped_p)
manmapped_imports = addr_to_int(manmapped_imports, "Offset")
manmapped_imports = manmapped_imports.with_columns(pl.col("Offset") + ibase + 0x16000)
manmapped_imports = int_to_addr(manmapped_imports, "Offset")
manmapped_imports = manmapped_imports.rename({"Offset": "Calladdr"})

In [24]:
iat2 = iat.update(manmapped_imports, on="Calladdr", how="left")
iat2 = iat2.vstack(
    manmapped_imports.join(iat2, on="Calladdr", how="anti").select(
        "Calladdr", pl.lit('0x1').alias("Address"), pl.lit(None).alias("Ordinal"), "Function", "Module"
    )
)
iat2 = iat2.sort("Calladdr")
iat2.write_csv(str(old_iat_p) + "2.csv")

### Adding imports

In [25]:
iat_seg = (
    iat2.sort("Calladdr")
    .fill_null("")
    .with_columns(
        (pl.col("Module") != pl.col("Module").shift(1)).cum_sum().alias("segment_id")
    )
    .fill_null(0)
    .filter(to_int_expr("Address") != 0)
    .filter(pl.col("Module") != "")
    .filter(~pl.col("Module").str.starts_with("section_"))
).drop("Address")
segments = [
    group.drop("segment_id")
    for _, group in iat_seg.group_by("segment_id", maintain_order=True)
]

lenseg = len(segments)
num_mods = iat_seg.unique("Module").unique().shape[0]
if lenseg == num_mods:
    print("Perfect: all obfuscated imports resolved")
elif lenseg < num_mods + 2:
    print("Good: most obfuscated imports resolved")
else:
    print(f"Number of segments: {lenseg}, number of unique modules: {num_mods}")

Perfect: all obfuscated imports resolved


Create brand new IDT with new IAT and ILT

In [26]:
for seg in segments:
    dll = seg["Module"][0]
    if dll is None or dll == "":
        continue

    mod = pe_lief.add_import(dll)
    for calladdr, ordinal, func, mname in seg.rows():
        if func.startswith("Ordinal#"):
            # ordinal = int(func.removeprefix("Ordinal#"))
            entry = create_32bit_ordinal_import(ordinal)
        else:
            entry = lief.PE.ImportEntry(func)
        mod.add_entry(entry)

In [27]:
config = lief.PE.Builder.config_t()
config.imports = True

bb = lief.PE.Builder(pe_lief, config)
bb.build()
bb.write(str(patched_path))

Reset IAT first thunks to the addresses in the old IAT

In [28]:
pe = pefile.PE(patched_path)
pe.full_load()

In [29]:
assert len(pe.DIRECTORY_ENTRY_IMPORT) == lenseg, (  # type: ignore
    "Change the MAX_REPEATED_ADDRESSES to >20"
)

In [30]:
for i, seg in enumerate(segments):
    first_thunk = int(seg["Calladdr"][0], 16)

    pe.DIRECTORY_ENTRY_IMPORT[i].struct.FirstThunk = (  # type: ignore
        first_thunk - pe.OPTIONAL_HEADER.ImageBase  # type: ignore
    )

### Saving stuff

In [31]:
temp = "tmp"
pe.write(filename=temp)
pe.close()
shutil.move(temp, patched_path)

WindowsPath('../fake_neomon_host/NeoMon_patched.dll')

In [32]:
iat_patch = pl.DataFrame(schema=patch_schema)
iat_patch.write_csv(patch_iat_p)

# Extract calls to original IAT

In [33]:
false_positive = {
    hex(ibase + 0x06fa6),
    hex(ibase + 0x09c34),
    hex(ibase + 0x0e31e),
}

In [34]:
calls = pl.read_csv(byte_calls_p)
calls = calls.filter(
    ~pl.col("Call address").is_in(false_positive)
)  # false positives

calls = calls.drop("Resolved name")

print(calls.shape)
calls.head(3)

(1162, 4)


subroutine,Instruction,Call address,Destination
str,str,str,str
"""-""","""jmp-far""","""0x10801e86""","""0x108161c8"""
"""-""","""jmp-far""","""0x10806c05""","""0x108161c8"""
"""-""","""jmp-far""","""0x1080ccd2""","""0x10816110"""


In [35]:
iats_to_join = iat2.select(
    pl.col("Address").alias("API Destination"), pl.col("Calladdr").alias("iat address")
)
calls = calls.join(iats_to_join, left_on="Destination", right_on="iat address", how="left")
print(calls.shape)

(1162, 5)


In [36]:
# all calls have their iat entry
uniated = calls.filter(pl.col("API Destination").is_null())
uniated = uniated.filter(~pl.col("Call address").is_in(false_positive))
uniated_n = uniated.shape[0]
print("Uniated:", uniated_n)
assert uniated_n == 0, uniated_n

Uniated: 0


# Patch PE

In [37]:
def patch_call_to_imm(
    addr: str, inst: str, dest: str, imm_addr: str, nop_first: bool = False
) -> dict[str, str]:
    """Create patch entry for 6-byte-long call or jump. Result is jmp,imm32 or call,imm32

    For 5-byte call-near or jmp-near, assuming src command is nop-padded.
    nop_first is the flag which controls if it's front-padded or back-padded.
    """
    next_addr = hex(int(addr, 16) + 6)

    new_ibin = to_bin(int_to_LE(int(imm_addr, 16)))
    old_ibin = to_bin(int_to_LE(int(dest, 16)))
    old_rbin = to_bin(rel_call(next_addr, dest))
    if inst in ("call-near", "jmp-near") and not nop_first:
        old_rbin = to_bin(rel_call(hex(int(addr, 16) + 5), dest))

    match inst:
        case "call-near":
            if nop_first:
                mem_old = "90E8" + old_rbin
            else:
                mem_old = "E8" + old_rbin + "90"
            patch = "FF15" + new_ibin
        case "jmp-near":
            if nop_first:
                mem_old = "90E9" + old_rbin
            else:
                mem_old = "E9" + old_rbin + "90"
            patch = "FF25" + new_ibin
        case "call-far":
            mem_old = "FF15" + old_ibin
            patch = "FF15" + new_ibin
        case "jmp-far":
            mem_old = "FF25" + old_ibin
            patch = "FF25" + new_ibin
        case "jne-near":
            mem_old = "0F84" + old_rbin
            raise RuntimeError("Need a thunk to perform conditional near jump to imm32")
        case "je-near":
            mem_old = "0F85" + old_rbin
            raise RuntimeError("Need a thunk to perform conditional near jump to imm32")
        case _:
            raise RuntimeError(f"Unsupported instruction {inst}")

    return {
        "patch_addr": addr,
        "mem_old": mem_old,
        "patch": patch,
    }

In [38]:
def patch_rel_call(
    addr: str, inst: str, old_rva: str, new_dest: str,
) -> dict[str, str]:
    """ Create patch entry for call-near or jmp-near. """
    assert 'near' in inst
    next_addr = hex(int(addr, 16) + 6)
    if inst in ('call-near', 'jmp-near'):
        next_addr = hex(int(addr, 16) + 5)
    
    new_rbin = to_bin(rel_call(next_addr, new_dest))
    old_rbin = to_bin(int_to_LE(int(old_rva, 16)))

    match inst:
        case "call-near":
            mem_old = "E8" + old_rbin
            patch = "E8" + new_rbin
        case "jmp-near":
            mem_old = "E9" + old_rbin
            patch = "E9" + new_rbin
        case "jne-near":
            mem_old = "0F84" + old_rbin
            patch = "0F84" + new_rbin
        case "je-near":
            mem_old = "0F85" + old_rbin
            patch = "0F85" + new_rbin
        case _:
            raise RuntimeError(f"Unsupported instruction {inst}")

    return {
        "patch_addr": addr,
        "mem_old": mem_old,
        "patch": patch,
    }

In [39]:
calls_patch = pl.DataFrame(schema=patch_schema)
calls_patch.shape

(0, 3)

In [40]:
calls_patch.write_csv(patch_calls_p)

### Fix local pointers via .reloc section

This step does not modify memory, and could be done independently from the main patch

Entries that require fixing:
- Literally all jmp-far and call-far from `broken-byte-calls.csv` (they point to the IAT)

In [41]:
pe_lief = cast(lief.PE.Binary, lief.PE.parse(patched_path))
pe_lief.remove_all_relocations()
ibase = pe_lief.optional_header.imagebase

pointers_to_protect = []

In [42]:
for _, inst, calladdr, _, _ in calls.rows():
    addr = int(calladdr, 16) - ibase
    if 'far' in inst:
        addr += 2
    else:
        raise RuntimeError("Attempt to protect relative pointer")
    pointers_to_protect.append(addr)

pointers_to_protect.sort()
print(len(pointers_to_protect))

1162


In [43]:
def fixate_reloc(reloc: None | lief.PE.Relocation, verbose: bool = False):
    if reloc is not None:
        if verbose:
            print(
                f"Added relocation with bbase {reloc.virtual_address:x} and {len(reloc.entries)} entries"
            )
        if len(reloc.entries) % 2 != 0:
            reloc.add_entry(lief.PE.RelocationEntry(0, 0)) # type: ignore
        pe_lief.add_relocation(reloc)


block_base = 0
processed = 0

reloc: None | lief.PE.Relocation = None
for rva in pointers_to_protect:
    if rva & (~0xFFF) != block_base:
        fixate_reloc(reloc)

        block_base = rva & (~0xFFF)
        reloc = lief.PE.Relocation()
        reloc.virtual_address = block_base

    reloc_type = lief.PE.RelocationEntry.BASE_TYPES.HIGHLOW
    reloc_entry = lief.PE.RelocationEntry(rva - block_base, reloc_type)

    assert reloc is not None
    reloc.add_entry(reloc_entry)
    processed += 1

fixate_reloc(reloc)

print(f"Processed {processed} pointers")

Processed 1162 pointers


In [44]:
config = lief.PE.Builder.config_t()
config.relocations = True

bb = lief.PE.Builder(pe_lief, config)
bb.build()
bb.write(str(patched_path))

Now run ida_patch.py script in IDA Pro and apply changes

# Troubleshooting

In [45]:
print("OK!")

OK!
