Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 255 lines (188 sloc) 9.07 KB
#!/usr/bin/env python3
# yarp: yet another registry parser
# (c) Maxim Suhanov
from yarp import RegistryCarve, RegistryFile, __version__
import argparse
from collections import namedtuple
from io import BytesIO
import os
import sys
PROGRAM_NAME = 'yarp-carver'
PROGRAM_VERSION = __version__
Arguments = namedtuple('Arguments', [ 'source_file', 'output_dir', 'no_reconstruction', 'ntfs_offsets', 'rebuild_margin' ])
def parse_ntfs_offsets(ntfs_offsets):
if ntfs_offsets is None:
return set()
if len(ntfs_offsets) != 1:
return set()
ntfs_offsets = ntfs_offsets[0]
results = set()
offsets = ntfs_offsets.split(',')
for offset in offsets:
try:
offset = int(offset)
except ValueError:
print('Invalid offset (not an integer): {}'.format(offset), file = sys.stderr)
continue
if offset > 0 and offset % 512 != 0:
print('Invalid offset (not aligned): {}'.format(offset), file = sys.stderr)
continue
if offset < 0:
print('Invalid offset (negative): {}'.format(offset), file = sys.stderr)
continue
results.add(offset)
return results
def parse_args():
"""Parse command line arguments and return a named tuple (Arguments)."""
parser = argparse.ArgumentParser(prog = PROGRAM_NAME, description = 'Carve Windows registry files and fragments from a disk image (or a similar source).', add_help = False, prefix_chars = '-')
group_main = parser.add_argument_group('Main arguments')
group_opt = parser.add_argument_group('Optional arguments')
group_misc = parser.add_argument_group('Miscellaneous arguments')
group_main.add_argument('file', help = 'a disk image')
group_main.add_argument('outdir', help = 'an output directory')
group_opt.add_argument('--rebuild-margin', action = 'store_true', help = 'include a margin (registry data with no specific header preceding an identified fragment) to a fragment by rebuilding it')
group_opt.add_argument('--no-reconstruction', action = 'store_true', help = 'do not reconstruct fragmented hives')
group_opt.add_argument('--volumes', action = 'store', nargs = 1, metavar = 'offsets', default = None, help = 'use a given comma-separated list of NTFS volume offsets (in bytes) to reconstruct fragmented hives using NTFS metadata (this requires reading the disk image twice)')
group_misc.add_argument('--help', action = 'help', help = 'show this help message and exit')
group_misc.add_argument('--version', action = 'version', help = 'show the version number and exit', version = PROGRAM_VERSION)
parsed_args = parser.parse_args()
source_file = parsed_args.file
output_dir = parsed_args.outdir
no_reconstruction = parsed_args.no_reconstruction
ntfs_offsets = parse_ntfs_offsets(parsed_args.volumes)
rebuild_margin = parsed_args.rebuild_margin
return Arguments(source_file = source_file, output_dir = output_dir, no_reconstruction = no_reconstruction, ntfs_offsets = ntfs_offsets, rebuild_margin = rebuild_margin)
def make_sane_filename(filename):
for bad_char in [ '\x00', '/', '\\', ':' ]:
filename = filename.replace(bad_char, '')
if filename == '':
filename = 'unknown'
return filename
def print_progress_carving(bytes_read, bytes_total):
print('Bytes read / Bytes total: {} / {}'.format(bytes_read, bytes_total), file = sys.stderr)
def print_progress_reconstruction():
print('.', end = '', file = sys.stderr)
sys.stderr.flush()
args = parse_args()
if not os.path.isdir(args.output_dir):
print('Output directory does not exist: {}'.format(args.output_dir), file = sys.stderr)
sys.exit(255)
try:
f = open(args.source_file, 'rb')
except (OSError, IOError):
print('Source file cannot be opened: {}'.format(args.source_file), file = sys.stderr)
sys.exit(255)
carver = RegistryCarve.Carver(f)
carver.progress_callback = print_progress_carving
results = []
print('Offset\tSize\tTruncated\tInternal file name / Comment')
for carve_result in carver.carve(True, True, args.rebuild_margin):
if type(carve_result) is RegistryCarve.CarveResult:
print('{}\t{}\t{}\t{}'.format(carve_result.offset, carve_result.size, carve_result.truncated, carve_result.filename))
regf_filename = carve_result.filename
if regf_filename.rfind('\\') != -1:
regf_filename = regf_filename.split('\\')[-1]
regf_filename = make_sane_filename(regf_filename)
if carve_result.truncated:
output_filename = '{}_{}-truncated'.format(carve_result.offset, regf_filename)
else:
output_filename = '{}_{}'.format(carve_result.offset, regf_filename)
output_file = os.path.join(args.output_dir, output_filename)
with open(output_file, 'wb') as out_f:
f.seek(carve_result.offset)
buf = f.read(carve_result.size)
out_f.write(buf)
results.append(carve_result)
elif type(carve_result) is RegistryCarve.CarveResultFragment:
if not args.rebuild_margin:
print('{}\t{}\t{}\t{}'.format(carve_result.offset, carve_result.size, True, '<hive fragment>'))
output_filename = '{}-fragment'.format(carve_result.offset)
output_file = os.path.join(args.output_dir, output_filename)
with open(output_file, 'wb') as out_f:
f.seek(carve_result.offset)
buf = f.read(carve_result.size)
out_f.write(buf)
results.append(carve_result)
else:
print('{}\t{}\t{}\t{}'.format(carve_result.offset, carve_result.size, True, '<hive fragment, margin = {}>'.format(carve_result.suggested_margin)))
if carve_result.suggested_margin > 0:
output_filename = '{}-fragment_with_margin'.format(carve_result.offset)
else:
output_filename = '{}-fragment_no_margin'.format(carve_result.offset)
output_file = os.path.join(args.output_dir, output_filename)
if carve_result.suggested_margin > 0:
f.seek(carve_result.offset - carve_result.suggested_margin)
buf = f.read(carve_result.size + carve_result.suggested_margin)
tmp_f_1 = BytesIO(buf)
try:
tmp_f_2 = RegistryFile.FragmentWithMarginTranslator(tmp_f_1, carve_result.suggested_margin)
except (RegistryFile.HiveBinException, ValueError):
# Something is wrong with the margin, write a usual fragment instead.
tmp_f_1.close()
f.seek(carve_result.offset)
buf = f.read(carve_result.size)
output_filename = '{}-fragment_with_invalid_margin'.format(carve_result.offset)
else:
tmp_f_1.close()
buf = tmp_f_2.getvalue()
tmp_f_2.close()
else:
f.seek(carve_result.offset)
buf = f.read(carve_result.size)
with open(output_file, 'wb') as out_f:
out_f.write(buf)
results.append(carve_result)
elif type(carve_result) is RegistryCarve.CarveResultCompressed:
print('{}\t{}\t{}\t{}'.format(carve_result.offset, len(carve_result.buffer_decompressed), 'Unknown', carve_result.filename))
regf_filename = carve_result.filename
if regf_filename.rfind('\\') != -1:
regf_filename = regf_filename.split('\\')[-1]
regf_filename = make_sane_filename(regf_filename)
output_filename = '{}_{}-compressed'.format(carve_result.offset, regf_filename)
output_file = os.path.join(args.output_dir, output_filename)
with open(output_file, 'wb') as out_f:
out_f.write(carve_result.buffer_decompressed)
results.append(carve_result)
elif type(carve_result) is RegistryCarve.CarveResultFragmentCompressed:
print('{}\t{}\t{}\t{}'.format(carve_result.offset, len(carve_result.buffer_decompressed), True, '<compressed hive fragment>'))
output_filename = '{}-compressed_fragment'.format(carve_result.offset)
output_file = os.path.join(args.output_dir, output_filename)
with open(output_file, 'wb') as out_f:
out_f.write(carve_result.buffer_decompressed)
results.append(carve_result)
if not args.no_reconstruction:
print('', file = sys.stderr)
print('Reconstructing fragmented hives', end = '', file = sys.stderr)
sys.stderr.flush()
reconstructor = RegistryCarve.HiveReconstructor(f)
reconstructor.set_fragments(results)
reconstructor.progress_callback = print_progress_reconstruction
for carve_result, hive_buf in reconstructor.reconstruct_fragmented():
regf_filename = carve_result.filename
if regf_filename.rfind('\\') != -1:
regf_filename = regf_filename.split('\\')[-1]
regf_filename = make_sane_filename(regf_filename)
output_filename = '{}_{}-reconstructed'.format(carve_result.offset, regf_filename)
output_file = os.path.join(args.output_dir, output_filename)
with open(output_file, 'wb') as out_f:
out_f.write(hive_buf)
print('', file = sys.stderr)
if len(args.ntfs_offsets) > 0:
ntfs_carver = RegistryCarve.NTFSAwareCarver(f)
ntfs_carver.progress_callback = print_progress_carving
ntfs_carver.set_fragments(results)
print('', file = sys.stderr)
print('Searching for NTFS data attribute records', file = sys.stderr)
ntfs_carver.find_data_runs()
print('Reconstructing fragmented hives...', file = sys.stderr)
for ntfs_offset in args.ntfs_offsets:
for carve_result, hive_buf in ntfs_carver.reconstruct_ntfs(ntfs_offset):
regf_filename = carve_result.filename
if regf_filename.rfind('\\') != -1:
regf_filename = regf_filename.split('\\')[-1]
regf_filename = make_sane_filename(regf_filename)
output_filename = '{}_{}-reconstructed_ntfs'.format(carve_result.offset, regf_filename)
output_file = os.path.join(args.output_dir, output_filename)
with open(output_file, 'wb') as out_f:
out_f.write(hive_buf)
f.close()