Skip to content

Commit

Permalink
Merge pull request #73 from obsidianforensics/site-characteristics
Browse files Browse the repository at this point in the history
Parse "Site Characteristics Database" LevelDB
  • Loading branch information
obsidianforensics authored Feb 2, 2021
2 parents 2293f49 + 4344b1b commit 317a8c3
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 4 deletions.
4 changes: 4 additions & 0 deletions pyhindsight/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,9 @@ def generate_excel(self, output_object):
w.write(row_number, 5, item.interpretation, blue_value_format) # interpretation
w.write(row_number, 6, item.profile, blue_value_format) # Profile

if friendly_date(item.timestamp) < '1970-01-02':
w.set_row(row_number, options={'hidden': True})

except Exception as e:
log.error(f'Failed to write row to XLSX: {e}')

Expand All @@ -835,6 +838,7 @@ def generate_excel(self, output_object):
# Formatting
w.freeze_panes(2, 0) # Freeze top row
w.autofilter(1, 0, row_number, 19) # Add autofilter
w.filter_column('B', 'Timestamp > 1970-01-02')

s = workbook.add_worksheet('Storage')
# Title bar
Expand Down
61 changes: 59 additions & 2 deletions pyhindsight/browsers/chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ class Chrome(WebBrowser):
def __init__(self, profile_path, browser_name=None, cache_path=None, version=None, timezone=None,
parsed_artifacts=None, parsed_storage=None, storage=None, installed_extensions=None,
artifacts_counts=None, artifacts_display=None, available_decrypts=None, preferences=None,
no_copy=None, temp_dir=None):
no_copy=None, temp_dir=None, origin_hashes=None):
WebBrowser.__init__(self, profile_path, browser_name=browser_name, cache_path=cache_path, version=version,
timezone=timezone, parsed_artifacts=parsed_artifacts, parsed_storage=parsed_storage,
artifacts_counts=artifacts_counts, artifacts_display=artifacts_display,
preferences=preferences, no_copy=no_copy, temp_dir=temp_dir)
preferences=preferences, no_copy=no_copy, temp_dir=temp_dir, origin_hashes=origin_hashes)
self.profile_path = profile_path
self.browser_name = "Chrome"
self.cache_path = cache_path
Expand All @@ -52,6 +52,7 @@ def __init__(self, profile_path, browser_name=None, cache_path=None, version=Non
self.preferences = preferences
self.no_copy = no_copy
self.temp_dir = temp_dir
self.origin_hashes = origin_hashes

if self.version is None:
self.version = []
Expand All @@ -71,6 +72,9 @@ def __init__(self, profile_path, browser_name=None, cache_path=None, version=Non
if self.preferences is None:
self.preferences = []

if self.origin_hashes is None:
self.origin_hashes = {}

if self.artifacts_counts is None:
self.artifacts_counts = {}

Expand Down Expand Up @@ -1970,6 +1974,52 @@ def get_file_system(self, path, dir_name):
self.artifacts_counts['File System'] = len(result_list)
self.parsed_storage.extend(result_list)

def get_site_characteristics(self, path, dir_name):
result_list = []

self.build_hash_list_of_origins()

log.info('Site Characteristics:')
sc_root_path = os.path.join(path, dir_name)
log.info(f' - Reading from {sc_root_path}')

# Grab listing of 'Site Characteristics' directory
sc_root_listing = os.listdir(sc_root_path)
log.debug(f' - {len(sc_root_listing)} files in Site Characteristics directory: {str(sc_root_listing)}')

items = utils.get_ldb_records(sc_root_path)
for item in items:
try:
from pyhindsight.lib.site_data_pb2 import SiteDataProto

if item['key'] == b'database_metadata':
if item['value'] != b'1':
log.warning(f' - Expected type 1; got type {item["value"].encode()}. Trying to parse anyway.')
continue

raw_proto = item['value']

# Deleted records won't have a value
if raw_proto:
# SiteDataProto built from components/performance_manager/persistence/site_data/site_data.proto
parsed_proto = SiteDataProto.FromString(raw_proto)
last_loaded = parsed_proto.last_loaded
else:
parsed_proto = ''
last_loaded = 0

matched_url = self.origin_hashes.get(item['key'].decode(), f'MD5 of origin: {item["key"].decode()}')
result_list.append(Chrome.PreferenceItem(
self.profile_path, url=matched_url, timestamp=utils.to_datetime(last_loaded, self.timezone),
key=f'Status: {item["state"]}', value=str(parsed_proto), interpretation=''))

except Exception as e:
log.exception(f' - Exception parsing SiteDataProto ({item}): {e}')

log.info(f' - Parsed {len(result_list)} items')
self.artifacts_counts['Site Characteristics'] = len(result_list)
self.parsed_artifacts.extend(result_list)

def process(self):
supported_databases = ['History', 'Archived History', 'Media History', 'Web Data', 'Cookies', 'Login Data',
'Extension Cookies']
Expand Down Expand Up @@ -2146,6 +2196,13 @@ def process(self):
self.artifacts_display['Preferences'],
self.artifacts_counts.get('Preferences', '0')))

if 'Site Characteristics Database' in input_listing:
self.get_site_characteristics(self.profile_path, 'Site Characteristics Database')
self.artifacts_display['Site Characteristics'] = "Site Characteristics records"
print(self.format_processing_output(
self.artifacts_display['Site Characteristics'],
self.artifacts_counts.get('Site Characteristics', '0')))

if 'File System' in input_listing:
self.get_file_system(self.profile_path, 'File System')
self.artifacts_display['File System'] = 'File System Items'
Expand Down
18 changes: 16 additions & 2 deletions pyhindsight/browsers/webbrowser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import hashlib
import logging
import sqlite3
import sys
import logging
import urllib.parse
from pyhindsight import utils

log = logging.getLogger(__name__)
Expand All @@ -10,7 +12,7 @@ class WebBrowser(object):
def __init__(
self, profile_path, browser_name, cache_path=None, version=None, display_version=None,
timezone=None, structure=None, parsed_artifacts=None, parsed_storage=None, artifacts_counts=None,
artifacts_display=None, preferences=None, no_copy=None, temp_dir=None):
artifacts_display=None, preferences=None, no_copy=None, temp_dir=None, origin_hashes=None):
self.profile_path = profile_path
self.browser_name = browser_name
self.cache_path = cache_path
Expand All @@ -25,6 +27,7 @@ def __init__(
self.preferences = preferences
self.no_copy = no_copy
self.temp_dir = temp_dir
self.origin_hashes = origin_hashes

if self.version is None:
self.version = []
Expand All @@ -44,6 +47,9 @@ def __init__(
if self.preferences is None:
self.preferences = []

if self.origin_hashes is None:
self.origin_hashes = {}

@staticmethod
def format_processing_output(name, items):
width = 80
Expand Down Expand Up @@ -104,6 +110,14 @@ def dict_factory(cursor, row):
d[col[0]] = row[idx]
return d

def build_hash_list_of_origins(self):
for artifact in self.parsed_artifacts:
if isinstance(artifact, self.HistoryItem):
domain = urllib.parse.urlparse(artifact.url).hostname
# Some URLs don't have a domain, like local PDF files
if domain:
self.origin_hashes[hashlib.md5(domain.encode()).hexdigest()] = domain

class HistoryItem(object):
def __init__(self, item_type, timestamp, profile, url=None, name=None, value=None, interpretation=None):
self.row_type = item_type
Expand Down

0 comments on commit 317a8c3

Please sign in to comment.