# Search for elements and addresses in heap dump mem files

In [46]:
import os
import json
from dataclasses import dataclass

In [47]:

@dataclass
class ProgramParams:
    """
    Wrapper class for program parameters.
    """
    LINE_BLOCK_BYTE_SIZE = 16

    JSON_TEST_FILE_PATH = os.environ['HOME'] + "/Documents/code/phdtrack/phdtrack_project_3/data/302-1644391327.json"
    HEAP_DUMP_RAW_FILE_PATH = os.environ['HOME'] + "/Documents/code/phdtrack/phdtrack_project_3/data/302-1644391327-heap.raw"

    def __init__(self, **kwargs):
        if (
            self.check_path_exists(self.JSON_TEST_FILE_PATH) and
            self.check_path_exists(self.HEAP_DUMP_RAW_FILE_PATH)
        ):
            print("Program paths are OK.")
        else:
            print("Program paths are NOT OK.")
            exit(1)
    
    def check_path_exists(self, path: str):
        """
        Check if the path exists. Return True if it exists, False otherwise.
        """
        if not os.path.exists(path):
            print('WARNING: Path does not exist: %s' % path)
            return False
        return True



PARAMS = ProgramParams()

Program paths are OK.


In [48]:
# read the JSON file and get all pair of addresses and keys
@dataclass
class KeyData:
    """
    Wrapper class for key data.
    """
    name: str
    key: bytes
    addr: bytes
    len: int
    real_len: int

heap_start_addr = None
addr_key_pairs: dict[int, KeyData] = {} # key addr (int in base 16 - hex) -> key data (KeyData)

with open(PARAMS.JSON_TEST_FILE_PATH, 'r') as f:
    json_data = json.load(f)

    heap_start_addr = bytes.fromhex(json_data["HEAP_START"])
    
    for json_key_name in json_data:
        # match json key names that start with 'KEY_' and are followed by a single letter
        if json_key_name.startswith('KEY_') and len(json_key_name) == 5:
            real_key_addr = bytes.fromhex(json_data[json_key_name + "_ADDR"])
            addr_key_pairs[int.from_bytes(real_key_addr, byteorder='big', signed=False)] = KeyData(
                name=json_key_name,
                key=bytes.fromhex(json_data[json_key_name]),
                addr=real_key_addr,
                len=int(json_data[json_key_name + "_LEN"]),
                real_len=int(json_data[json_key_name + "_REAL_LEN"])
            )
            # print(
            #     'addr: ', hex(int.from_bytes(real_key_addr, byteorder='big', signed=False)), 
            #     'real key addr: ', json_data[json_key_name + "_ADDR"]
            # )

# print nb of keys
print("Nb of keys: %d" % len(addr_key_pairs))

Nb of keys: 6


In [52]:
# read the heap dump file and search for the keys
with open(PARAMS.HEAP_DUMP_RAW_FILE_PATH, 'rb') as f:
    heap_dump = f.read()

    # split the heap dump into lines of 16 bytes
    heap_dump_lines = [heap_dump[i:i+PARAMS.LINE_BLOCK_BYTE_SIZE] for i in range(0, len(heap_dump), PARAMS.LINE_BLOCK_BYTE_SIZE)]
    
    # print first 5 lines
    for i in range(5):
        print(heap_dump_lines[i].hex())
    
    print("Number of dump lines: %d" % len(heap_dump_lines), "of size:", PARAMS.LINE_BLOCK_BYTE_SIZE, "bytes")

    # go to known key addresses and check if the key is there
    for key_addr in addr_key_pairs:
        key_data = addr_key_pairs[key_addr]

        # get the line index of the key address
        # WARN: Need to divide the line index by 16 because the heap dump is in bytes
        # and line addresses is the address of the first byte of the line.
        # so each line address is 16 bytes apart.
        line_index = (int.from_bytes(key_data.addr, byteorder='big', signed=False) - int.from_bytes(heap_start_addr, byteorder='big', signed=False)) // PARAMS.LINE_BLOCK_BYTE_SIZE
        print("key name:", key_data.name, "index:", line_index, "index in hex:", hex(line_index))
        if (heap_dump_lines[line_index] == key_data.key):
            print("Key found: %s" % key_data.name)
        else:
            print("Key NOT found: %s" % key_data.name)
        
    

00000000000000005102000000000000
02040706070704070504070204060106
06070107060702020201000000000001
03010001000000000000000000000001
00000000030200000000000000000000
Number of dump lines: 17408 of size: 16 bytes
key name: KEY_A index: 5537 index in hex: 0x15a1
Key found: KEY_A
key name: KEY_B index: 4533 index in hex: 0x11b5
Key found: KEY_B
key name: KEY_C index: 5546 index in hex: 0x15aa
Key found: KEY_C
key name: KEY_D index: 4537 index in hex: 0x11b9
Key found: KEY_D
key name: KEY_E index: 6069 index in hex: 0x17b5
Key found: KEY_E
key name: KEY_F index: 3620 index in hex: 0xe24
Key found: KEY_F
