Skip to content

Commit

Permalink
Support decoding UTF-16 paths (#50)
Browse files Browse the repository at this point in the history
In at least some pak files, a negative path length in the index flags a
UTF-16 encoded path. This patch changes the decoding logic to handle
that case, but doesn't change index encoding, which still uses UTF-8.
  • Loading branch information
benrg committed Jul 12, 2021
1 parent 33a5ccd commit 5c8f61f
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions u4pak.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,8 +603,12 @@ def base_offset(self):
return self.offset

def read_path(stream: io.BufferedReader, encoding: str = 'utf-8') -> str:
path_len, = st_unpack('<I',stream.read(4))
return stream.read(path_len).rstrip(b'\0').decode(encoding).replace('/',os.path.sep)
path_len, = st_unpack('<i',stream.read(4))
if path_len < 0:
# in at least some format versions, this indicates a UTF-16 path
path_len = -2 * path_len
encoding = 'utf-16le'
return stream.read(path_len).decode(encoding).rstrip('\0').replace('/',os.path.sep)

def pack_path(path: str, encoding: str = 'utf-8') -> bytes:
encoded_path = path.replace(os.path.sep, '/').encode('utf-8') + b'\0'
Expand Down

0 comments on commit 5c8f61f

Please sign in to comment.