diff --git a/pw64_filesys_dump.py b/pw64_filesys_dump.py index 384342b..fb23041 100755 --- a/pw64_filesys_dump.py +++ b/pw64_filesys_dump.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import argparse +import binascii import struct import sys @@ -39,6 +40,108 @@ def decompress_mio0(raw_bytes): if uncompressed_size <= 0: return output +def print_adat_decoded(hex_data): + # The DATA blocks in the ADAT container appear to be "coded" ASCII strings. + # The strings use a sort of look-up table as seen below. + # This was probably done for easier localization (Kanji font textures?) + # The Font Sprite/Texture maps are in the "STRG" container/blocks. + # This table was extrapolated from the FS dump and PJ64 memory searches. + char_map_combined = { # // Normal Font // + '00': '0', '01': '1', '02': '2', '03': '3', '04': '4', + '05': '5', '06': '6', '07': '7', '08': '8', '09': '9', + '0A': 'A', '0B': 'B', '0C': 'C', '0D': 'D', '0E': 'E', + '0F': 'F', '10': 'G', '11': 'H', '12': 'I', '13': 'J', + '14': 'K', '15': 'L', '16': 'M', '17': 'N', '18': 'O', + '19': 'P', '1A': 'Q', '1B': 'R', '1C': 'S', '1D': 'T', + '1E': 'U', '1F': 'V', '20': 'W', '21': 'X', '22': 'Y', + '23': 'Z', '24': 'a', '25': 'b', '26': 'c', '27': 'd', + '28': 'e', '29': 'f', '2A': 'g', '2B': 'h', '2C': 'i', + '2D': 'j', '2E': 'k', '2F': 'l', '30': 'm', '31': 'n', + '32': 'o', '33': 'p', '34': 'q', '35': 'r', '36': 's', + '37': 't', '38': 'u', '39': 'v', '3A': 'w', '3B': 'x', + '3C': 'y', '3D': 'z', '3E': '-', '3F': '#', '40': '<', + '41': '>', '42': ' ', '43': '\"', '44': '(', '45': ')', + '46': '*', '47': '&', '48': ',', '49': '.', '4A': '/', + '4B': '!', '4C': '?', '4D': '\'', '4E': '#', '4F': ':', + '50': '0', '51': '1', '52': '2', '53': '3', '54': '4', + '55': '5', '56': '6', '57': '7', '58': '8', '59': '9', + '5A': '\\', '5B': '\\', '5C': '\\', '5D': '\\', + '5E': '\\', '5F': '\\', + # // Bold Font // + '60': '0', '61': '1', '62': '2', '63': '3', '64': '4', + '65': '5', '66': '6', '67': '7', '68': '8', '69': '9', + '6A': 'A', '6B': 'B', '6C': 'C', '6D': 'D', '6E': 'E', + '6F': 'F', '70': 'G', '71': 'H', '72': 'I', '73': 'J', + '74': 'K', '75': 'L', '76': 'M', '77': 'N', '78': 'O', + '79': 'P', '7A': 'Q', '7B': 'R', '7C': 'S', '7D': 'T', + '7E': 'U', '7F': 'V', '80': 'W', '81': 'X', '82': 'Y', + '83': 'Z', '84': 'a', '85': 'b', '86': 'c', '87': 'd', + '88': 'e', '89': 'f', '8A': 'g', '8B': 'h', '8C': 'i', + '8D': 'j', '8E': 'k', '8F': 'l', '90': 'm', '91': 'n', + '92': 'o', '93': 'p', '94': 'q', '95': 'r', '96': 's', + '97': 't', '98': 'u', '99': 'v', '9A': 'w', '9B': 'x', + '9C': 'y', '9D': 'z', '9E': '-', '9F': '#', 'A0': '<', + 'A1': '>', 'A2': ' ', 'A3': '\"', 'A4': '(', 'A5': ')', + 'A6': '*', 'A7': '&', 'A8': ',', 'A9': '.', 'AA': '/', + 'AB': '!', 'AC': '?', 'AD': '\'', 'AE': '}', 'AF': ':', + 'B0': '0', 'B1': '1', 'B2': '2', 'B3': '3', 'B4': '4', + 'B5': '5', 'B6': '6', 'B7': '7', 'B8': '8', 'B9': '9', + 'BA': '\\', 'BB': '\\', 'BC': '\\', 'BD': '\\', + 'BE': '\\', 'BF': '\\' } + + # Take the raw binary data and convert to Hex + hex_data = str(binascii.b2a_hex(hex_data),'ascii') + + # Split input stream of characters into hex bytes + hex_split = [(hex_data[i:i+2]) for i in range(0, len(hex_data), 2)] + + # There are various "command" codes that I haven't figured out yet. + # They are detected below. + # Special "turn on bold" command (until newline)? + # 00 fd | 00 d4 | 00 00 | 00 4a + # 00 fd | 00 69 | 00 00 | 00 + # Turn off bold? + # 00 fd | 00 7d | 00 00 + # Weird ">" arrow in Sound settings + # 00 fd | 00 b4 | 00 00 00 + + # Empty list for storing final string + adat_final_string = [] + + # Read a pair of hex bytes + for i in range(0, len(hex_split), 2): + hex_pair = hex_split[i:i+2] + + char_byte1 = hex_pair[0].upper() + char_byte2 = hex_pair[1].upper() + + if char_byte1 == '00': + if char_byte2 == 'CA': + # slash? '\' ? + pass + elif char_byte2 == 'D4': + # Unknown char + pass + elif char_byte2 == 'FE': + # Newline + adat_final_string.append('\n') + elif char_byte2 == 'FD': + # Tab? + pass + elif char_byte2 == 'FF': + # EOF/EOS + break + else: + adat_final_string.append(char_map_combined[char_byte2]) + else: + # We found some weird control char in our pair? + adat_final_string.append('?0') + + print(' --------- Decoded String ---------') + for line in "".join(adat_final_string).splitlines(): + print (' %s' % line) + print(' ----------------------------------') + def print_hex_dump(raw_bytes): count = 0 for b in raw_bytes: @@ -132,8 +235,8 @@ def hexdump(raw_bytes): elif magicStr in ['PART', 'STRG', 'FRMT', 'ESND', 'TPAD', 'CNTG', 'HOPD', 'LWIN', 'LSTP', 'TARG', 'FALC', 'BALS', 'HPAD', 'BTGT', - 'THER', 'PHTS', 'SIZE', 'DATA', 'QUAT', - 'XLAT', 'PHDR', 'RHDR', 'PPOS', 'RPKT', + 'THER', 'PHTS', 'SIZE', 'QUAT', 'XLAT', + 'PHDR', 'RHDR', 'PPOS', 'RPKT', '.CTL', '.TBL', 'SCPP', 'SCPH', 'SCPX', 'SCPY', 'SCPR', 'SCPZ', 'SCP#', 'LEVL', 'RNGS', 'BNUS', 'WOBJ', 'LPAD', 'TOYS', 'TPTS', 'APTS']: @@ -141,6 +244,12 @@ def hexdump(raw_bytes): sectionData = fin.read(length) print(' %s: 0x%06X:' % (magicStr, length)) hexdump(sectionData) + elif magicStr == 'DATA': # ASCII(?) Data, game/mission/etc text + length = int.from_bytes(fin.read(4), byteorder='big') + sectionData = fin.read(length) + print(' %s: 0x%06X:' % (magicStr, length)) + print_hex_dump(sectionData) + print_adat_decoded(sectionData) # PAD always seems to be 4 bytes of 0 - ignore it elif magicStr in ['PAD ']: length = int.from_bytes(fin.read(4), byteorder='big')