-
Notifications
You must be signed in to change notification settings - Fork 107
Expand file tree
/
Copy pathlzma.py
More file actions
91 lines (72 loc) · 2.95 KB
/
lzma.py
File metadata and controls
91 lines (72 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import io
import lzma
from typing import Optional
from structlog import get_logger
from unblob.extractors import Command
from ...file_utils import (
DEFAULT_BUFSIZE,
Endian,
InvalidInputFormat,
convert_int32,
convert_int64,
)
from ...models import File, Handler, HexString, ValidChunk
logger = get_logger()
# 256GB
MAX_UNCOMPRESSED_SIZE = 256 * 1024 * 1024 * 1024
MIN_READ_RATIO = 0.1
class LZMAHandler(Handler):
NAME = "lzma"
PATTERNS = [
HexString(
"""
// pre-computed valid properties bytes
(
51 | 5A | 5B | 5C | 5D | 5E | 63 | 64 | 65 | 66 | 6C | 6D | 6E | 75 | 76 | 7E |
87 | 88 | 89 | 8A | 8B | 90 | 91 | 92 | 93 | 99 | 9A | 9B | A2 | A3 | AB | B4 |
B5 | B6 | B7 | B8 | BD | BE | BF | C0 | C6 | C7 | C8 | CF | D0 | D8
)
// dictionary size
00 00 ( 00 | 01 | 04 | 08 | 10 | 20 | 40 | 80) ( 00 | 01 | 02 | 04 | 08 )
"""
)
]
EXTRACTOR = Command("7z", "x", "-y", "{inpath}", "-o{outdir}")
def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
read_size = 0
file.seek(start_offset + 1)
dictionary_size = convert_int32(file.read(4), Endian.LITTLE)
# dictionary size is non-zero (section 1.1.2 of format definition)
# dictionary size is a power of two (section 1.1.2 of format definition)
if dictionary_size == 0 or (dictionary_size & (dictionary_size - 1)) != 0:
raise InvalidInputFormat
uncompressed_size = convert_int64(file.read(8), Endian.LITTLE)
# uncompressed size is either unknown (0xFFFFFFFFFFFFFFFF) or
# smaller than 256GB (section 1.1.3 of format definition)
if not (
uncompressed_size == 0xFFFFFFFFFFFFFFFF
or uncompressed_size < MAX_UNCOMPRESSED_SIZE
):
raise InvalidInputFormat
file.seek(start_offset, io.SEEK_SET)
decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_ALONE)
try:
while read_size < uncompressed_size and not decompressor.eof:
data = file.read(DEFAULT_BUFSIZE)
if not data:
if read_size < (uncompressed_size * MIN_READ_RATIO):
raise InvalidInputFormat("Very early truncated LZMA stream")
else:
logger.debug(
"LZMA stream is truncated.",
read_size=read_size,
uncompressed_size=uncompressed_size,
)
break
read_size += len(decompressor.decompress(data))
except lzma.LZMAError as exc:
raise InvalidInputFormat from exc
return ValidChunk(
start_offset=start_offset,
end_offset=file.tell() - len(decompressor.unused_data),
)