In [1]:
import os
import shutil
from pathlib import Path

import lief
from tqdm import tqdm
import pefile
import polars as pl
from dotenv import load_dotenv

# Paths

In [2]:
# imports csv
base = Path("../game-dump/dumps")
base_patch = Path("../game-dump/patches")

dump_imports_p = base / "dump-imports.csv"
old_iat_p = base / "old-iat.csv"
byte_calls_p = base / "broken-byte-calls.csv"
inst_calls_p = base / "broken-analyzed-calls.csv"

In [3]:
# exports csv
patch_thunks_p = base_patch / "thunks_patch.csv"
patch_calls_p = base_patch / "calls_patch.csv"

In [4]:
names_map_p = "../game-dump/names-map.csv"
names_map = pl.read_csv(names_map_p)

In [5]:
load_dotenv(Path(os.path.abspath('')).parent / ".env")
base_to_exe = Path(os.getenv("BASE_TO_EXE", "./"))
original_dump_path = base_to_exe / "GAME_dump.exe"
patched_path = base_to_exe / "GAME_patched.exe"

# Parsing import table

In [6]:
dump_imports = pl.read_csv(dump_imports_p).filter(pl.col("Module") != "game.exe")

In [7]:
dump_imports = dump_imports.with_columns(pl.col("Function").str.replace_all(" ", "").str.slice(0,200))
names_map = names_map.with_columns(
    pl.col("undecorated").str.replace_all(" ", "").str.slice(0,200)
)

dump_imports = dump_imports.join(names_map, left_on="Function", right_on="undecorated", how="left").with_columns(
    pl.coalesce("decorated", "Function").alias("Function")
).drop("decorated")

In [8]:
dump_imports = dump_imports.with_columns(("0x" + pl.col("Address").str.to_lowercase()).alias("Address"))
dump_imports = dump_imports.unique("Address", keep='first').sort("Address")

In [9]:
dump_imports.head()

Module,Address,Function
str,str,str
"""xinput9_1_0.dll""","""0x06d42730""","""DllMain"""
"""xinput9_1_0.dll""","""0x06d42980""","""XInputGetState"""
"""xinput9_1_0.dll""","""0x06d42b60""","""XInputSetState"""
"""xinput9_1_0.dll""","""0x06d42cd0""","""XInputGetCapabilities"""
"""xinput9_1_0.dll""","""0x06d42ea0""","""XInputGetDSoundAudioDeviceGuid…"


# Gathering imports from old IAT

In [10]:
iat = pl.read_csv(old_iat_p)

iat = iat.rename({"Address": "Calladdr", "Destination": "Address"})
iat = iat.with_columns(("0x" + pl.col("Address").str.to_lowercase()).alias("Address"))

iat = iat.join(dump_imports, on='Address', how='left')

In [11]:
iat2 = iat.filter(pl.col("Function").is_null())
iat2 = iat2.filter(pl.col("Address").str.slice(2).str.to_integer(base=16) != 0)
iat2

Calladdr,Address,Module,Function
str,str,str,str
"""0x01588AB0""","""0x023e3673""",,


<0x023e3673> points to an intermediate call which i believe, is an obfuscated jump to user32.dll!wsprintfA

Cancel forwarding imports (e.g. kernel32.dll,AllocateHeap -> ntdll.dll,RtlReAllocateHeap)

In [12]:
systemroot = "C:/Windows/System32/"
forwarding_modules = ["kernel32.dll", "user32.dll"]

unforward_map: dict[str, tuple[str, str]] = dict()

forwarded = iat.filter(pl.col("Module") == "ntdll.dll")

for modname in forwarding_modules:
    modpath = systemroot + modname
    number = 0

    dll = pefile.PE(modpath)
    dll.full_load()
    for exp in dll.DIRECTORY_ENTRY_EXPORT.symbols:
        name = exp.name.decode() if exp.name else f"Ordinal#{exp.ordinal}"
        forward_to = ''
        if exp.forwarder:
            forward_to = exp.forwarder.decode().removeprefix("NTDLL.")
        if forwarded.filter(pl.col("Function") == forward_to).shape[0] > 0:
            number += 1
            unforward_map[forward_to] = (modname, name)
    
    print(f'For {modname} there are {number} forwards')

For kernel32.dll there are 12 forwards
For user32.dll there are 2 forwards


In [13]:
for func in iat.filter(pl.col("Module") == "ntdll.dll")["Function"]:
    if func not in unforward_map:
        print(f"Func {func} from ntdll.dll is not found in forward map")
        continue

    origmod, origfunc = unforward_map[func]

    if "InitializeCrit" in func:
        print(func, origmod, origfunc)

    condition = (pl.col("Module") == "ntdll.dll") & (pl.col("Function") == func)
    iat = iat.with_columns(
        [
            pl.when(condition)
            .then(pl.lit(origmod))
            .otherwise("Module")
            .alias("Module"),
            pl.when(condition)
            .then(pl.lit(origfunc))
            .otherwise("Function")
            .alias("Function"),
        ]
    )

RtlInitializeCriticalSection kernel32.dll InitializeCriticalSection


In [14]:
iat.write_csv(str(old_iat_p) + '2.csv')

In [15]:
iat_seg = (
    iat.sort("Calladdr").fill_null("")
    .with_columns(
        (pl.col("Module") != pl.col("Module").shift(1)).cum_sum().alias("segment_id")
    )
    .fill_null(0)
    .filter(pl.col("Address").str.slice(2).str.to_integer(base=16) != 0)
    .filter(pl.col("Module") != '')
)
segments = [group.drop("segment_id") for _, group in iat_seg.group_by("segment_id", maintain_order=True)]

In [16]:
# extract obfuscated imports for later
obfuscated = iat.filter(pl.col("Module").is_null()).filter(pl.col("Address").str.slice(2).str.to_integer(base=16) != 0)
obfuscated

Calladdr,Address,Module,Function
str,str,str,str
"""0x01588AB0""","""0x023e3673""",,


In [17]:
# remove gaps and one obfuscated import
w = iat.shape[0]
iat = iat.filter(pl.col("Module").is_not_null())
print(f"Filtered out {iat.shape[0]}/{w} iat entries")

Filtered out 887/923 iat entries


In [18]:
# confirm all names are decorated
assert iat.filter(pl.col("Function").str.contains("public")).shape[0] == 0

# Constructing new IDT

In [19]:
shutil.copy(original_dump_path, patched_path)

pe_lief = lief.PE.parse(patched_path)
pe_lief.remove_all_imports()

In [20]:
def create_32bit_ordinal_import(ordinal_number: int) -> lief.PE.ImportEntry:
    """
    Create a 32-bit import by ordinal

    Args:
        ordinal_number: The ordinal number (0-65535)
    """
    # Validate ordinal range
    if ordinal_number < 0 or ordinal_number > 0xFFFF:
        raise ValueError("Ordinal number must be between 0 and 65535")

    # For 32-bit PE:
    # - Set bit 31 to 1 (0x80000000)
    # - Bits 30-16 must be 0
    # - Bits 15-0 contain the ordinal
    ORDINAL_MASK_32 = 0x80000000
    data_value = ORDINAL_MASK_32 | ordinal_number

    # Create the import entry
    entry = lief.PE.ImportEntry(data_value, lief.PE.PE_TYPE.PE32)

    return entry

Creates brand new IDT with new IAT and ILT

In [21]:
for seg in segments:
    dll = seg['Module'][0]
    if dll is None or dll == '':
        continue

    mod = pe_lief.add_import(dll)
    for calladdr, _, _, func in seg.rows():
        if func.startswith('Ordinal#'):
            ordinal = int(func.removeprefix("Ordinal#"))
            entry = create_32bit_ordinal_import(ordinal)
        else:
            entry = lief.PE.ImportEntry(func)
        mod.add_entry(entry)

In [22]:
config = lief.PE.Builder.config_t()
config.imports = True

bb = lief.PE.Builder(pe_lief, config)
bb.build()
bb.write(str(patched_path))

Can't find section with the rva: 0x0


Reset IAT to the old IAT address

In [23]:
pe = pefile.PE(patched_path)
pe.full_load()

In [24]:
assert len(pe.DIRECTORY_ENTRY_IMPORT) == len(segments), "Change the MAX_REPEATED_ADDRESSES to >20"

In [25]:
for i, seg in enumerate(segments):
    first_thunk = int(seg["Calladdr"][0], 16)

    pe.DIRECTORY_ENTRY_IMPORT[i].struct.FirstThunk = (
        first_thunk - pe.OPTIONAL_HEADER.ImageBase
    )

In [26]:
temp = "exe.exe"
pe.write(filename=temp)
pe.close()
shutil.move(temp, patched_path)

WindowsPath('C:/Games/FA-EMU/Shipping/GAME_patched.exe')

# Fix thunks

Suspect thunks are searched via <e9 ? ? ? ?> wildcard

In [27]:
thunks = pl.read_csv(byte_calls_p)
thunks = thunks.filter(pl.col("Instruction") == "jmp")
thunks = thunks.with_columns(pl.col("Call address").str.slice(2).str.to_integer(base=16).alias("Int_addr"))
thunks = thunks.drop('subroutine', 'Instruction', 'Resolved name')
thunks.shape

(4246, 3)

In [28]:
valid_addresses = set(dump_imports["Address"].to_list())
thunks = thunks.filter(pl.col("Destination").is_in(valid_addresses))
print(thunks.shape)

(3234, 3)


jmp is considered a thunk as long as it has at least two neighbouring jmps. Both neighbours would be counted as thunks as well.

In [29]:
get_addr = pl.col("Call address").str.slice(2).str.to_integer(base=16)
is_prev = pl.col("Int_addr").shift(1) + 6 == pl.col("Int_addr")
is_next = pl.col("Int_addr").shift(-1) - 6 == pl.col("Int_addr")

# thunks = thunks.with_columns([is_prev.alias('is_prev'), is_next.alias('is_next')])

thunks = thunks.with_columns(get_addr.alias("Int_addr"))
thunks = thunks.with_columns((is_next & is_prev).alias("is_thunk"))
thunks = thunks.with_columns(pl.col("is_thunk") | pl.col("is_thunk").shift(1) | pl.col("is_thunk").shift(-1))
thunks.filter("is_thunk").shape[0]

868

In [30]:
# additional thunks
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aaadb"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aaad5"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aa9f5"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aa9fb"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aab3d"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab35d"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab363"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab405"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab40b"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab767"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab76d"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14abc33"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14abc39"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14abe61"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0xcedcb1"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0xcedcf1"))

In [31]:
thunks = thunks.filter("is_thunk").drop("Int_addr")
print(f"Found {thunks.filter("is_thunk").unique("Destination").shape[0]} thunks and {iat.unique("Address").shape[0]} imports")

Found 870 thunks and 874 imports


In [32]:
thunks = thunks.join(iat.select("Module", "Function", "Address"), left_on="Destination", right_on="Address", how='left')
thunks.head(3)

Call address,Destination,is_thunk,Module,Function
str,str,bool,str,str
"""0xa577e5""","""0x7358b730""",True,"""umbraoptimizer32.dll""","""?getOptimizerInfoValue@Umbra@@…"
"""0xa577eb""","""0x7358cbb0""",True,"""umbraoptimizer32.dll""","""?insertObject@Scene@Umbra@@QAE…"
"""0xa577f1""","""0x7358dbd0""",True,"""umbraoptimizer32.dll""","""?insertModel@Scene@Umbra@@QAEP…"


In [33]:
# For the rest, we'll create new thunks

available_thunk_places = [
    ["0x014AB2A4", 12],
    ["0x014AB368", 8],
    ["0x014AB3C2", 14],
    ["0x014AB5B6", 10],
    ["0x014AB602", 14],
]

def find_next_addr(size: int = 6) -> str | None:
    global available_thunk_places
    for i in range(len(available_thunk_places)):
        e = available_thunk_places[i]
        if e[1] >= size:
            e[1] -= size
            retval = int(e[0], 16)
            e[0] = hex(retval + size)
            return hex(retval)
    return None

In [34]:
to_thunk = iat.vstack(obfuscated)
unfound = to_thunk.filter(
    ~pl.col("Address").is_in(thunks.filter("is_thunk")["Destination"].to_list())
).unique("Address")

thunks = thunks.with_columns(pl.lit(False).alias("new"))
for dest in unfound["Address"]:
    addr = find_next_addr()
    print(f"Using {addr} to place a new thunk to {dest}")
    if addr is None:
        raise RuntimeError("Can't place new thunk: no available space")

    func = iat.filter(pl.col("Address") == dest)
    if func.shape[0] > 0:
        modname = func["Module"][0]
        funname = func["Function"][0]
    else:
        modname, funname = None, None

    thunks = thunks.vstack(
        pl.DataFrame(
            {
                "Call address": hex(int(addr, 16)),
                "Destination": hex(int(dest, 16)),
                "is_thunk": True,
                "Module": modname,
                "Function": funname,
                "new": True,
            }
        )
    )

unfound = unfound.clear()
print(thunks.shape)

Using 0x14ab2a4 to place a new thunk to 0x6f67ba2c
Using 0x14ab2aa to place a new thunk to 0x6f64ce5a
Using 0x14ab368 to place a new thunk to 0x023e3673
Using 0x14ab3c2 to place a new thunk to 0x6f67c618
Using 0x14ab3c8 to place a new thunk to 0x732f132c
(923, 6)


In [35]:
# setting thunks to renewed iat
thunks = thunks.join(to_thunk.select("Calladdr", "Address").rename({"Calladdr": "iat_addr"}).unique("Address"), left_on="Destination", right_on="Address", how='left')
thunks.shape

(923, 7)

In [36]:
thunks.tail(6)

Call address,Destination,is_thunk,Module,Function,new,iat_addr
str,str,bool,str,str,bool,str
"""0x14fc183""","""0x763a3dd0""",True,"""kernel32.dll""","""SetConsoleTextAttribute""",False,"""0x01588388"""
"""0x14ab2a4""","""0x6f67ba2c""",True,"""msvcr90.dll""","""_acmdln""",True,"""0x01588558"""
"""0x14ab2aa""","""0x6f64ce5a""",True,"""msvcr90.dll""","""_onexit""",True,"""0x01588578"""
"""0x14ab368""","""0x23e3673""",True,,,True,
"""0x14ab3c2""","""0x6f67c618""",True,"""msvcr90.dll""","""_adjust_fdiv""",True,"""0x01588544"""
"""0x14ab3c8""","""0x732f132c""",True,"""msvcp90.dll""","""?npos@?$basic_string@DU?$char_…",True,"""0x0158839C"""


# Fix calls

Suspect calls are <90 e8 ? ? ? ?> and <e8 ? ? ? ? 90>, as well as analyzed calls by disassembler (IDA Pro)

In [37]:
calls = pl.read_csv(byte_calls_p)
calls_inst = pl.read_csv(inst_calls_p)
calls = calls.vstack(calls_inst).unique("Call address")
calls = calls.drop("Resolved name")
calls.shape

(32791, 4)

In [38]:
thunk_int_addr = thunks.select(pl.col("Call address").str.slice(2).str.to_integer(base=16))
thunk_int_addr_1 = thunk_int_addr.select(pl.col("Call address") - 1)
thunk_addrs = set(thunk_int_addr["Call address"].to_list() + thunk_int_addr_1["Call address"].to_list())

calls = calls.filter(~pl.col("Call address").str.slice(2).str.to_integer(base=16).is_in(thunk_addrs))
calls.shape

(31907, 4)

In [39]:
valid_addresses = set(dump_imports["Address"].to_list())
calls = calls.filter(pl.col("Destination").is_in(valid_addresses))
calls.shape

(30879, 4)

In [40]:
thunks_to_join = thunks.select(pl.col("Destination"), pl.col("Call address").alias("thunk address")).unique("Destination")
calls = calls.join(thunks_to_join, on="Destination", how='left')
calls.shape

(30879, 5)

In [41]:
# all calls have their thunk
assert calls.filter(pl.col("thunk address").is_null()).shape[0] == 0

# Patch PE

In [42]:
from addr_helpers import hex_to_LE, to_bin, rel_call

schema = {
    "patch_addr": pl.String,
    "mem_old": pl.String,
    "patch": pl.String,
}

In [43]:
def create_thunk(addr: str, dest: str, iat: str, new: bool, real: bool = True) -> pl.DataFrame:
    patch_addr = hex(int(addr, 16) - 1 + int(new))
    mem_old = 'CC' * 6
    if not new:
        mem_old = '90E9' + to_bin(rel_call(addr, dest))
    if real:
        patch = 'FF25' + to_bin(hex_to_LE(int(iat, 16)))
    else:
        patch = '90E9' + to_bin(rel_call(addr, dest))
    return pl.DataFrame({
        "patch_addr": patch_addr,
        "mem_old": mem_old,
        "patch": patch,
    })

In [44]:
thunks_patch = pl.DataFrame(schema=schema)

for addr, dest, mod, new, iat_addr in thunks.drop("is_thunk", "Function").rows():
    real = mod is not None
    thunks_patch = thunks_patch.vstack(create_thunk(addr, dest, iat_addr, new, real))
print(thunks_patch.shape)

(923, 3)


In [45]:
patch_data = []

for call in tqdm(calls.rows()):
    sub, inst, addr, dest, thunk_addr = call

    # Determine opcode based on instruction
    match inst:
        case 'call':
            opcode = 'E8'
        case "jmp":
            opcode = "E9"
        case _:
            raise RuntimeError(f"Unsupported instruction {inst}")
    
    if thunk_addr:
        thunk_addr = hex(int(thunk_addr, 16) - 1)
    
    naddr = hex(int(addr, 16) - 5)

    dest_bin = to_bin(rel_call(addr, dest))
    thunk_bin = to_bin(rel_call(naddr, hex(int(thunk_addr, 16) - 5)))

    patch_data.append(
        {
            "patch_addr": addr,
            "mem_old": opcode + dest_bin,
            "patch": opcode + thunk_bin,
        }
    )

# Create DataFrame in one operation
calls_patch = pl.DataFrame(patch_data, schema=schema)
calls_patch.shape

100%|██████████| 30879/30879 [00:00<00:00, 154211.50it/s]


(30879, 3)

In [46]:
thunks_patch.write_csv(patch_thunks_p)
calls_patch.write_csv(patch_calls_p)

Now run ida_patch.py script in IDA Pro and apply changes