In [1]:
import os
import shutil
from pathlib import Path

import lief
from tqdm import tqdm
import pefile
import polars as pl
from dotenv import load_dotenv

# Paths

In [2]:
# imports csv
base = Path("../game-dump/dumps")
base_patch = Path("../game-dump/patches")

dump_imports_p = base / "dump-imports.csv"
old_iat_p = base / "old-iat.csv"
byte_calls_p = base / "broken-byte-calls.csv"
inst_calls_p = base / "broken-analyzed-calls.csv"

In [3]:
# exports csv
patch_thunks_p = base_patch / "thunks_patch.csv"
patch_calls_p = base_patch / "calls_patch.csv"

In [4]:
names_map_p = "../game-dump/names-map.csv"
names_map = pl.read_csv(names_map_p)

In [5]:
load_dotenv(Path(os.path.abspath('')).parent / ".env")
base_to_exe = Path(os.getenv("BASE_TO_EXE", "./"))
original_dump_path = base_to_exe / "GAME_dump.exe"
patched_path = base_to_exe / "GAME_patched.exe"

# Parsing import table

In [6]:
dump_imports = pl.read_csv(dump_imports_p).filter(pl.col("Module") != "game.exe")

In [7]:
dump_imports = dump_imports.with_columns(pl.col("Function").str.replace_all(" ", "").str.slice(0,200))
names_map = names_map.with_columns(
    pl.col("undecorated").str.replace_all(" ", "").str.slice(0,200)
)

dump_imports = dump_imports.join(names_map, left_on="Function", right_on="undecorated", how="left").with_columns(
    pl.coalesce("decorated", "Function").alias("Function")
).drop("decorated")

In [8]:
dump_imports = dump_imports.with_columns(("0x" + pl.col("Address").str.to_lowercase()).alias("Address"))
dump_imports = dump_imports.unique("Address", keep='first').sort("Address")

In [9]:
dump_imports.head()

Module,Address,Function
str,str,str
"""xinput9_1_0.dll""","""0x06d42730""","""DllMain"""
"""xinput9_1_0.dll""","""0x06d42980""","""XInputGetState"""
"""xinput9_1_0.dll""","""0x06d42b60""","""XInputSetState"""
"""xinput9_1_0.dll""","""0x06d42cd0""","""XInputGetCapabilities"""
"""xinput9_1_0.dll""","""0x06d42ea0""","""XInputGetDSoundAudioDeviceGuid…"


In [63]:
dump_imports.filter(pl.col("Address") == "0x732f7286")['Function'][0]

'??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QAE@ABV01@II@Z'

# Gathering imports from old IAT

In [10]:
iat = pl.read_csv(old_iat_p)

iat = iat.rename({"Address": "Calladdr", "Destination": "Address"})
iat = iat.with_columns(("0x" + pl.col("Address").str.to_lowercase()).alias("Address"))

iat = iat.join(dump_imports, on='Address', how='left')

In [11]:
iat2 = iat.filter(pl.col("Function").is_null())
iat2 = iat2.filter(pl.col("Address").str.slice(2).str.to_integer(base=16) != 0)
iat2

Calladdr,Address,Module,Function
str,str,str,str
"""0x01588AB0""","""0x023e3673""",,


<0x023e3673> points to an intermediate call which i believe, is an obfuscated jump to user32.dll!wsprintfA

Cancel forwarding imports (e.g. kernel32.dll,AllocateHeap -> ntdll.dll,RtlReAllocateHeap)

In [12]:
systemroot = "C:/Windows/System32/"
forwarding_modules = ["kernel32.dll", "user32.dll"]

unforward_map: dict[str, tuple[str, str]] = dict()

forwarded = iat.filter(pl.col("Module") == "ntdll.dll")

for modname in forwarding_modules:
    modpath = systemroot + modname
    number = 0

    dll = pefile.PE(modpath)
    dll.full_load()
    for exp in dll.DIRECTORY_ENTRY_EXPORT.symbols:
        name = exp.name.decode() if exp.name else f"Ordinal#{exp.ordinal}"
        forward_to = ''
        if exp.forwarder:
            forward_to = exp.forwarder.decode().removeprefix("NTDLL.")
        if forwarded.filter(pl.col("Function") == forward_to).shape[0] > 0:
            number += 1
            unforward_map[forward_to] = (modname, name)
    
    print(f'For {modname} there are {number} forwards')

For kernel32.dll there are 12 forwards
For user32.dll there are 2 forwards


In [13]:
for func in iat.filter(pl.col("Module") == "ntdll.dll")["Function"]:
    if func not in unforward_map:
        print(f"Func {func} from ntdll.dll is not found in forward map")
        continue

    origmod, origfunc = unforward_map[func]

    if "InitializeCrit" in func:
        print(func, origmod, origfunc)

    condition = (pl.col("Module") == "ntdll.dll") & (pl.col("Function") == func)
    iat = iat.with_columns(
        [
            pl.when(condition)
            .then(pl.lit(origmod))
            .otherwise("Module")
            .alias("Module"),
            pl.when(condition)
            .then(pl.lit(origfunc))
            .otherwise("Function")
            .alias("Function"),
        ]
    )

RtlInitializeCriticalSection kernel32.dll InitializeCriticalSection


In [14]:
iat.write_csv(str(old_iat_p) + '2.csv')

In [15]:
iat_seg = (
    iat.sort("Calladdr").fill_null("")
    .with_columns(
        (pl.col("Module") != pl.col("Module").shift(1)).cum_sum().alias("segment_id")
    )
    .fill_null(0)
    .filter(pl.col("Address").str.slice(2).str.to_integer(base=16) != 0)
    .filter(pl.col("Module") != '')
)
segments = [group.drop("segment_id") for _, group in iat_seg.group_by("segment_id", maintain_order=True)]

In [16]:
# extract obfuscated imports for later
obfuscated = iat.filter(pl.col("Module").is_null()).filter(pl.col("Address").str.slice(2).str.to_integer(base=16) != 0)
obfuscated

Calladdr,Address,Module,Function
str,str,str,str
"""0x01588AB0""","""0x023e3673""",,


In [17]:
# remove gaps and one obfuscated import
w = iat.shape[0]
iat = iat.filter(pl.col("Module").is_not_null())
print(f"Filtered out {iat.shape[0]}/{w} iat entries")

Filtered out 887/923 iat entries


In [18]:
# confirm all names are decorated
assert iat.filter(pl.col("Function").str.contains("public")).shape[0] == 0

# Constructing new IDT

In [19]:
shutil.copy(original_dump_path, patched_path)

pe_lief = lief.PE.parse(patched_path)
pe_lief.remove_all_imports()

In [20]:
def create_32bit_ordinal_import(ordinal_number: int) -> lief.PE.ImportEntry:
    """
    Create a 32-bit import by ordinal

    Args:
        ordinal_number: The ordinal number (0-65535)
    """
    # Validate ordinal range
    if ordinal_number < 0 or ordinal_number > 0xFFFF:
        raise ValueError("Ordinal number must be between 0 and 65535")

    # For 32-bit PE:
    # - Set bit 31 to 1 (0x80000000)
    # - Bits 30-16 must be 0
    # - Bits 15-0 contain the ordinal
    ORDINAL_MASK_32 = 0x80000000
    data_value = ORDINAL_MASK_32 | ordinal_number

    # Create the import entry
    entry = lief.PE.ImportEntry(data_value, lief.PE.PE_TYPE.PE32)

    return entry

### Try this later

In [21]:
def get_section_boundaries(i: int, real: bool) -> tuple[int, int]:
    s = pe_lief.sections[i]
    if real:
        return s.offset, s.offset + s.size
    else:
        return (
            pe_lief.imagebase + s.virtual_address,
            pe_lief.imagebase + s.virtual_address + s.virtual_size,
        )


In [22]:
# L = 0x1588000 - get_section_boundaries(0, False)[0]
# idata_offset = get_section_boundaries(0, True)[0] + L
# idata_size = get_section_boundaries(0, True)[1] - idata_offset
# idata_virtual_address = get_section_boundaries(0, False)[0] + L
# idata_virtual_size = get_section_boundaries(0, False)[1] - idata_virtual_address

In [23]:
# for s in pe_lief.sections:
#     print(s.name, hex(s.virtual_address), hex(s.virtual_address + s.virtual_size))

In [24]:
# Fixing first section
# pe_lief.sections[0].name = ".text"
# pe_lief.sections[0].virtual_size = idata_offset - pe_lief.sections[0].offset
# pe_lief.sections[0].size = idata_virtual_address - pe_lief.sections[0].virtual_address - pe_lief.imagebase - 2500 # magic number!

In [25]:
from pydantic import BaseModel


class Sect(BaseModel):
    name: str
    raw_addr: int
    raw_size: int
    virt_addr: int
    virt_size: int
    chars: int

    @staticmethod
    def from_section(section: lief.PE.Section) -> "Sect":
        return Sect(
            name=section.name,
            raw_addr=section.offset,
            raw_size=section.size,
            virt_addr=section.virtual_address,
            virt_size=section.virtual_size,
            chars=section.characteristics,
        )

    def to_section(self) -> lief.PE.Section:
        sect = lief.PE.Section(self.name)
        sect.name = self.name
        sect.offset = self.raw_addr
        sect.size = self.raw_size
        sect.virtual_address = self.virt_addr
        sect.virtual_size = self.virt_size
        sect.characteristics = self.chars
        return sect

In [26]:
# adding proper .idata
# new_idata = lief.PE.Section(".idata")
# new_idata.offset = idata_offset
# new_idata.size = idata_size
# new_idata.virtual_address = idata_virtual_address - pe_lief.imagebase
# new_idata.virtual_size = idata_virtual_size
# new_idata.characteristics = pe_lief.sections[0].characteristics
# pe_lief.add_section(new_idata)

# sections = [Sect.from_section(sec) for sec in pe_lief.sections]
# sections = [sections[0], Sect.from_section(new_idata)] + sections[1:]

# N = len(pe_lief.sections)
# for i in range(N):
#     pe_lief.sections[i].name = f'{i}'

# for i in range(N):
#     pe_lief.remove_section(f'{i}')

# for sec in sections:
#     pe_lief.add_section(sec.to_section())

In [27]:
for s in pe_lief.sections:
    print(s.name, hex(s.virtual_address), hex(s.virtual_address + s.virtual_size))

    0x1000 0x1ab8000
.rsrc 0x1ab8000 0x1d9c000
.idata   0x1d9c000 0x1d9d000
         0x1d9d000 0x203d000
wlovhtaq 0x203d000 0x2225000
oemvvlbu 0x2225000 0x2226000


### Adding imports

Creates brand new IDT with new IAT and ILT

In [28]:
for seg in segments:
    dll = seg['Module'][0]
    if dll is None or dll == '':
        continue

    mod = pe_lief.add_import(dll)
    for calladdr, _, _, func in seg.rows():
        if func.startswith('Ordinal#'):
            ordinal = int(func.removeprefix("Ordinal#"))
            entry = create_32bit_ordinal_import(ordinal)
        else:
            entry = lief.PE.ImportEntry(func)
        mod.add_entry(entry)

In [29]:
config = lief.PE.Builder.config_t()
config.imports = True

bb = lief.PE.Builder(pe_lief, config)
bb.build()
bb.write(str(patched_path))

Can't find section with the rva: 0x0


Reset IAT to the old IAT address

In [30]:
pe = pefile.PE(patched_path)
pe.full_load()

In [31]:
assert len(pe.DIRECTORY_ENTRY_IMPORT) == len(segments), "Change the MAX_REPEATED_ADDRESSES to >20"

In [32]:
for i, seg in enumerate(segments):
    first_thunk = int(seg["Calladdr"][0], 16)

    pe.DIRECTORY_ENTRY_IMPORT[i].struct.FirstThunk = (
        first_thunk - pe.OPTIONAL_HEADER.ImageBase
    )

In [33]:
temp = "exe.exe"
pe.write(filename=temp)
pe.close()
shutil.move(temp, patched_path)

WindowsPath('G:/Games/FA/FA-EMU/Shipping/GAME_patched.exe')

# Fix thunks

Suspect thunks are searched via <90 e9 ? ? ? ?> wildcard

In [34]:
thunks = pl.read_csv(byte_calls_p)
thunks = thunks.filter(pl.col("Instruction").is_in(["jmp"]))
thunks = thunks.with_columns(pl.col("Call address").str.slice(2).str.to_integer(base=16).alias("Int_addr"))
thunks = thunks.filter(pl.col("Int_addr") < 0x01588000)  # this is where the old IAT begins
thunks = thunks.drop('subroutine', 'Instruction', 'Resolved name')
thunks.shape

(3236, 3)

In [35]:
valid_addresses = set(dump_imports["Address"].to_list())
thunks = thunks.filter(pl.col("Destination").is_in(valid_addresses))
print(thunks.shape)

(3234, 3)


jmp is considered a thunk as long as it has at least two neighbouring jmps. Both neighbours would be counted as thunks as well.

In [36]:
get_addr = pl.col("Call address").str.slice(2).str.to_integer(base=16)
is_prev = pl.col("Int_addr").shift(1) + 6 == pl.col("Int_addr")
is_next = pl.col("Int_addr").shift(-1) - 6 == pl.col("Int_addr")

# thunks = thunks.with_columns([is_prev.alias('is_prev'), is_next.alias('is_next')])

thunks = thunks.with_columns(get_addr.alias("Int_addr"))
thunks = thunks.with_columns((is_next | is_prev).alias("is_thunk"))
thunks = thunks.with_columns(pl.col("is_thunk") | pl.col("is_thunk").shift(1) | pl.col("is_thunk").shift(-1))
thunks.filter("is_thunk").shape[0]

890

In [37]:
thunks.filter(abs(pl.col("Int_addr") - 0x14ab766) < 48)

Call address,Destination,Int_addr,is_thunk
str,str,i64,bool
"""0x14ab766""","""0x6f6033ef""",21673830,True
"""0x14ab76c""","""0x6f65a95b""",21673836,True


In [38]:
# additional thunks
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aaadb"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aaad5"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aa9f5"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aa9fb"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14aab3d"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab35d"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab363"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab405"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab40b"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab767"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14ab76d"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14abc33"))
thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14abc39"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0x14abe61"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0xcedcb1"))

thunks = thunks.with_columns(pl.col("is_thunk") | (pl.col("Call address") == "0xcedcf1"))

In [39]:
thunks.filter(pl.col("Call address").str.slice(2).str.to_integer(base=16) == 0x14ab766)

Call address,Destination,Int_addr,is_thunk
str,str,i64,bool
"""0x14ab766""","""0x6f6033ef""",21673830,True


In [40]:
thunks = thunks.filter("is_thunk").drop("Int_addr")
print(f"Found {thunks.filter("is_thunk").unique("Destination").shape[0]} thunks and {iat.unique("Address").shape[0]} imports")

Found 870 thunks and 874 imports


In [41]:
thunks = thunks.join(iat.select("Module", "Function", "Address"), left_on="Destination", right_on="Address", how='left')
print(thunks.shape)
thunks.filter(pl.col("Module").is_null())

(924, 5)


Call address,Destination,is_thunk,Module,Function
str,str,bool,str,str


In [42]:
# For the rest, we'll create new thunks
available_thunk_places = [
    ["0x014AB2A4", 12],
    ["0x014AB368", 8],
    ["0x014AB3C2", 14],
    ["0x014AB5B6", 10],
    ["0x014AB602", 14],
]

def find_next_addr(size: int = 6) -> str | None:
    global available_thunk_places
    for i in range(len(available_thunk_places)):
        e = available_thunk_places[i]
        if e[1] > size:
            e[1] -= size
            retval = int(e[0], 16)
            e[0] = hex(retval + size)
            return hex(retval)
    return None

In [43]:
# Trying least intervention: no thunks = they aren't needed
skip = True

to_thunk = iat.vstack(obfuscated)
unfound = to_thunk.filter(
    ~pl.col("Address").is_in(thunks.filter("is_thunk")["Destination"].to_list())
).unique("Address")
print(unfound.shape)

if skip:
    unfound = unfound.clear()

(5, 4)


In [44]:
thunks = thunks.with_columns(pl.lit(False).alias("new"))
for dest in unfound["Address"]:
    addr = find_next_addr()
    print(f"Using {addr} to place a new thunk to {dest}")
    if addr is None:
        raise RuntimeError("Can't place new thunk: no available space")

    func = iat.filter(pl.col("Address") == dest)
    if func.shape[0] > 0:
        modname = func["Module"][0]
        funname = func["Function"][0]
    else:
        modname, funname = None, None

    thunks = thunks.vstack(
        pl.DataFrame(
            {
                "Call address": hex(int(addr, 16)),
                "Destination": hex(int(dest, 16)),
                "is_thunk": True,
                "Module": modname,
                "Function": funname,
                "new": True,
            }
        )
    )

unfound = unfound.clear()
print(thunks.shape)

(924, 6)


In [45]:
# setting thunks to renewed iat
thunks = thunks.join(to_thunk.select("Calladdr", "Address").rename({"Calladdr": "iat_addr"}).unique("Address"), left_on="Destination", right_on="Address", how='left')
thunks.shape

(924, 7)

In [46]:
thunks.filter(pl.col("Call address").str.slice(2).str.to_integer(base=16) == 0x14ab766)

Call address,Destination,is_thunk,Module,Function,new,iat_addr
str,str,bool,str,str,bool,str
"""0x14ab766""","""0x6f6033ef""",True,"""msvcr90.dll""","""_endthreadex""",False,"""0x01588628"""


In [47]:
thunks.tail(6)

Call address,Destination,is_thunk,Module,Function,new,iat_addr
str,str,bool,str,str,bool,str
"""0x14fc16a""","""0x6f61338c""",True,"""msvcr90.dll""","""_snwprintf_s""",False,"""0x01588500"""
"""0x14fc170""","""0x6f616292""",True,"""msvcr90.dll""","""wprintf_s""",False,"""0x015884FC"""
"""0x14fc176""","""0x6ea29600""",True,"""d3dx10_42.dll""","""D3DX10SaveTextureToMemory""",False,"""0x01588C7C"""
"""0x14fc17c""","""0x6ea278f9""",True,"""d3dx10_42.dll""","""D3DX10LoadTextureFromTexture""",False,"""0x01588C78"""
"""0x14fc182""","""0x763a3dd0""",True,"""kernel32.dll""","""SetConsoleTextAttribute""",False,"""0x01588388"""
"""0x1504253""","""0x732f5ebb""",True,"""msvcp90.dll""","""??1?$basic_string@DU?$char_tra…",False,"""0x015884E8"""


# Fix calls

Suspect calls are <90 e8 ? ? ? ?> and <e8 ? ? ? ? 90>. The only concerning calls are "optimized". The rest point to thunks (restored) or iat (restored).

Broken jmp on 0xbd0394 (LeaveCriticalSection) made me to include nop-jumps to the list: <90 e9 ? ? ? ?>

In [48]:
calls = pl.read_csv(byte_calls_p).filter(pl.col("Instruction").is_in(["call", "call2", "jmp"]))

# remove nop-jmps that are thunks:
thunk_addrs = set(thunks["Call address"].to_list())
calls = calls.filter(~pl.col("Call address").is_in(thunk_addrs))

calls_inst = pl.read_csv(inst_calls_p).clear() # deprecated
calls = calls.vstack(calls_inst).unique("Call address")
calls = calls.drop("Resolved name")
calls = calls.with_columns(pl.col("Call address").str.slice(2).str.to_integer(base=16).alias("Int_addr"))
calls = calls.filter(pl.col("Int_addr") < 0x01588000)  # this is where the old IAT begins

print(calls.shape)
calls.head(3)

(31082, 5)


subroutine,Instruction,Call address,Destination,Int_addr
str,str,str,str,i64
"""-""","""call""","""0x8fa44a""","""0x6f64ccc9""",9413706
"""-""","""call""","""0xe10f2d""","""0x7639eb00""",14749485
"""-""","""call""","""0x713789""","""0x6f61d377""",7419785


In [49]:
# remove dupes (double patterns)
addrs = set(calls['Int_addr'].to_list())
calls = calls.filter(~(pl.col("Int_addr") - 1).is_in(addrs)).drop("Int_addr")

print(calls.shape)

(30885, 4)


In [50]:
# remove thunks from calls
# thunk_int_addr = thunks.select(pl.col("Call address").str.slice(2).str.to_integer(base=16))
# thunk_int_addr_1 = thunk_int_addr.select(pl.col("Call address") - 1)
# thunk_addrs = set(thunk_int_addr["Call address"].to_list() + thunk_int_addr_1["Call address"].to_list())

# calls = calls.filter(~pl.col("Call address").str.slice(2).str.to_integer(base=16).is_in(thunk_addrs))
# calls.shape

In [51]:
# filter only calls that point to api calls directly (still rel32 though)
valid_addresses = set(dump_imports["Address"].to_list())
calls = calls.filter(pl.col("Destination").is_in(valid_addresses))

calls.shape

(30873, 4)

In [52]:
iat.head(1)

Calladdr,Address,Module,Function
str,str,str,str
"""0x01588000""","""0x7622f2a0""","""advapi32.dll""","""CryptGetHashParam"""


In [53]:
# map with thunk addresses
thunks_to_join = thunks.select(pl.col("Destination"), pl.col("Call address").alias("thunk address")).unique("Destination")
calls = calls.join(thunks_to_join, on="Destination", how='left')

iats_to_join = iat.select(pl.col("Address").alias("Destination"), pl.col("Calladdr").alias("iat address")).unique("Destination")
calls = calls.join(iats_to_join, on="Destination", how='left')

calls.shape

(30873, 6)

In [54]:
# all calls have their thunk and iat
unthunked = calls.filter(pl.col("thunk address").is_null()).shape[0]
print('Unthunked:', unthunked)
# disabled, since we're binding to IAT now
# assert unthunked == 0, unthunked

uniated = calls.filter(pl.col("iat address").is_null()).shape[0]
print('Uniated:', uniated)
assert uniated == 0, uniated

Unthunked: 1
Uniated: 0


# Patch PE

In [55]:
from addr_helpers import hex_to_LE, to_bin, rel_call

schema = {
    "patch_addr": pl.String,
    "mem_old": pl.String,
    "patch": pl.String,
}

In [56]:
def create_thunk(addr: str, dest: str, iat: str, new: bool, real: bool = True) -> pl.DataFrame:
    patch_addr = hex(int(addr, 16) + int(new))
    naddr = hex(int(addr, 16) + 6) # nop, jmp is 6 bytes
    mem_old = 'CC' * 6
    if not new:
        mem_old = '90E9' + to_bin(rel_call(naddr, dest))
    
    if real:
        patch = 'FF25' + to_bin(hex_to_LE(int(iat, 16)))
    else:
        patch = '90E9' + to_bin(rel_call(naddr, dest))
    
    return pl.DataFrame({
        "patch_addr": patch_addr,
        "mem_old": mem_old,
        "patch": patch,
    })

In [57]:
thunks_patch = pl.DataFrame(schema=schema)

for addr, dest, mod, new, iat_addr in thunks.drop("is_thunk", "Function").rows():
    real = mod is not None
    thunks_patch = thunks_patch.vstack(create_thunk(addr, dest, iat_addr, new, real))
print(thunks_patch.shape)

(924, 3)


In [58]:
calls.head(5)

subroutine,Instruction,Call address,Destination,thunk address,iat address
str,str,str,str,str,str
"""-""","""call""","""0x8fa44a""","""0x6f64ccc9""","""0x14aac16""","""0x015887F0"""
"""-""","""call""","""0xe10f2d""","""0x7639eb00""","""0xa57892""","""0x01588314"""
"""-""","""call""","""0x713789""","""0x6f61d377""","""0x14aacb8""","""0x01588818"""
"""-""","""call""","""0x9ad745""","""0x732f5ebb""","""0xf60c78""","""0x015884E8"""
"""-""","""call""","""0x7f5289""","""0x6f64ccc9""","""0x14aac16""","""0x015887F0"""


In [59]:
calls.filter(pl.col("Call address").is_in(["0x6f6be9"]))

subroutine,Instruction,Call address,Destination,thunk address,iat address
str,str,str,str,str,str
"""-""","""call""","""0x6f6be9""","""0x732f7149""","""0xf60c90""","""0x01588394"""


In [60]:
patch_data = []

for call in tqdm(calls.rows()):
    sub, inst, addr, dest, thunk_addr, iat_addr = call

    if thunk_addr:
        thunk_addr = hex(int(thunk_addr, 16) - 1)

    naddr = hex(int(addr, 16) + 6)  # both nop, call and call, nop are 6-bytes
    if inst[-1] == "2":
        oaddr = hex(int(addr, 16) + 6)  # call, nop, in this case, jump starts from nop
    else:
        oaddr = naddr  # nop, call, in this case, jump starts from next instruction

    dest_rbin = to_bin(rel_call(oaddr, dest))  # rel32
    # thunk_bin = to_bin(rel_call(naddr, hex(int(thunk_addr, 16) - 5))) # TODO: wtf - 5?
    iat_rbin = to_bin(rel_call(naddr, iat_addr))  # rel32
    iat_bin = to_bin(hex_to_LE(int(iat_addr, 16)))  # imm32

    # Determine opcode based on instruction
    match inst:
        case "call":
            mem_old = "90E8" + dest_rbin
            patch = "FF15" + iat_bin
        case "jmp":
            mem_old = "90E9" + dest_rbin
            patch = "FF25" + iat_bin
            # raise RuntimeError("Shouldn't patch jumps for now")
        case _:
            raise RuntimeError(f"Unsupported instruction {inst}")

    patch_data.append(
        {
            "patch_addr": addr,
            "mem_old": mem_old,
            "patch": patch,
        }
    )

# Create DataFrame in one operation
calls_patch = pl.DataFrame(patch_data, schema=schema).sort("patch_addr")
calls_patch.shape

  0%|          | 0/30873 [00:00<?, ?it/s]

100%|██████████| 30873/30873 [00:00<00:00, 190816.78it/s]


(30873, 3)

In [61]:
thunks_patch.write_csv(patch_thunks_p)
calls_patch.write_csv(patch_calls_p)

Now run ida_patch.py script in IDA Pro and apply changes