Skip to content
Permalink
Browse files

Add JSONL output format, suitable for importing into Timesketch.

  • Loading branch information...
obsidianforensics committed May 2, 2019
1 parent aaf0a3e commit a663a284322d2d2bb3d43d105dcfc3c0a67da727
@@ -150,6 +150,10 @@ def write_sqlite(analysis_session):

analysis_session.generate_sqlite(output_file)

def write_jsonl(analysis_session):
output_file = analysis_session.output_name + '.jsonl'
analysis_session.generate_jsonl(output_file)

print(banner)

# Useful when Hindsight is run from a different directory than where the file is located
@@ -302,10 +306,10 @@ def write_sqlite(analysis_session):
print(value, "- is the file open? If so, please close it and try again.")
log.error("Error writing XLSX file; type: {}, value: {}, traceback: {}".format(type, value, traceback))

elif args.format == 'json':
log.info("Writing output; JSON format selected")
output = open("{}.json".format(analysis_session.output_name), 'wb')
output.write(json.dumps(analysis_session, cls=MyEncoder, indent=4))
elif args.format == 'jsonl':
log.info("Writing output; JSONL format selected")
print("\n Writing {}.jsonl".format(analysis_session.output_name))
write_jsonl(analysis_session)

elif args.format == 'sqlite':
log.info("Writing output; SQLite format selected")
@@ -209,15 +209,31 @@ def generate_xlsx():
return strIO.read()


@bottle.route('/json')
def generate_json():
@bottle.route('/jsonl')
def generate_jsonl():
temp_output = '.tempjsonl'
try:
os.remove(temp_output)
except:
# temp file deletion failed
pass

analysis_session.generate_jsonl(temp_output)
import StringIO
strIO = StringIO.StringIO()
strIO.write(json.dumps(analysis_session, cls=MyEncoder, indent=4))
strIO.seek(0)
bottle.response.headers['Content-Type'] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet; charset=UTF-8'
bottle.response.headers['Content-Disposition'] = 'attachment; filename={}.json'.format(analysis_session.output_name)
return strIO.read()
str_io = StringIO.StringIO()
with open(temp_output, 'rb') as f:
str_io.write(f.read())

try:
os.remove(temp_output)
except:
# temp file deletion failed
pass

bottle.response.headers['Content-Type'] = 'application/json; charset=UTF-8'
bottle.response.headers['Content-Disposition'] = 'attachment; filename={}.jsonl'.format(analysis_session.output_name)
str_io.seek(0)
return str_io.read()


def main():
@@ -1,3 +1,3 @@
__author__ = "Ryan Benson"
__version__ = "2.3.0"
__version__ = "2.3.1"
__email__ = "ryan@obsidianforensics.com"
@@ -1,19 +1,214 @@
import sys
import os
import datetime
import importlib
import json
import logging
import os
import pytz
import time
import sqlite3
import importlib
import sys
import time

from pyhindsight import __version__
from pyhindsight.browsers.chrome import Chrome
from pyhindsight.browsers.chrome import CacheEntry
from pyhindsight.browsers.brave import Brave
from pyhindsight.utils import friendly_date, format_meta_output, format_plugin_output
from pyhindsight.utils import friendly_date, format_plugin_output
import pyhindsight.plugins

log = logging.getLogger(__name__)


class HindsightEncoder(json.JSONEncoder):
"""This JSONEncoder translates several Hindsight HistoryItem classes into
JSON objects for use in the JSONL output format. It also makes changes
to field names and values to more closely align with Plaso
(https://github.com/log2timeline/plaso) output for easier use with
Timesketch (https://github.com/google/timesketch/).
"""

@staticmethod
def base_encoder(history_item):
item = {'source_short': 'WEBHIST', 'source_long': 'Chrome History',
'parser': 'hindsight/{}'.format(__version__)}
for key, value in history_item.__dict__.items():
# Drop any keys that have None as value
if value is None:
continue

if isinstance(value, datetime.datetime):
value = value.isoformat()

# JSONL requires utf-8 encoding
if isinstance(value, str):
value = value.decode('utf-8', errors='replace')

item[key] = value

item['datetime'] = item['timestamp']
del(item['timestamp'])

return item

def default(self, obj):
if isinstance(obj, Chrome.URLItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Last Visited Time'
item['data_type'] = 'chrome:history:page_visited'
item['url_hidden'] = 'true' if item['hidden'] else 'false'
if item['visit_duration'] == u'None':
del (item['visit_duration'])

item['message'] = u'{} ({}) [count: {}]'.format(
item['url'], item['title'], item['visit_count'])

del(item['name'], item['row_type'], item['visit_time'],
item['last_visit_time'], item['hidden'])
return item

if isinstance(obj, Chrome.DownloadItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'File Downloaded'
item['data_type'] = 'chrome:history:file_downloaded'

item['message'] = u'{} ({}). Received {}/{} bytes'.format(
item['url'],
item['full_path'] if item.get('full_path') else item.get('target_path'),
item['received_bytes'], item['total_bytes'])

del(item['row_type'], item['start_time'])
return item

if isinstance(obj, Chrome.CookieItem):
item = HindsightEncoder.base_encoder(obj)

item['data_type'] = 'chrome:cookie:entry'
item['source_long'] = 'Chrome Cookies'
if item['row_type'] == 'cookie (accessed)':
item['timestamp_desc'] = 'Last Access Time'
elif item['row_type'] == 'cookie (created)':
item['timestamp_desc'] = 'Creation Time'
item['host'] = item['host_key']
item['cookie_name'] = item['name']
item['data'] = item['value'] if item['value'] != '<encrypted>' else ''
item['url'] = item['url'].lstrip('.')
item['url'] = 'https://{}'.format(item['url']) if item['secure'] else 'http://{}'.format(item['url'])
if item['expires_utc'] == '1970-01-01T00:00:00+00:00':
del(item['expires_utc'])
# Convert these from 1/0 to true/false to match Plaso
item['secure'] = 'true' if item['secure'] else 'false'
item['httponly'] = 'true' if item['httponly'] else 'false'
item['persistent'] = 'true' if item['persistent'] else 'false'

item['message'] = u'{} ({}) Flags: [HTTP only] = {} [Persistent] = {}'.format(
item['url'],
item['cookie_name'],
item['httponly'], item['persistent'])

del(item['creation_utc'], item['last_access_utc'], item['row_type'],
item['host_key'], item['name'], item['value'])
return item

if isinstance(obj, Chrome.AutofillItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Used Time'
item['data_type'] = 'chrome:autofill:entry'
item['source_long'] = 'Chrome Autofill'
item['usage_count'] = item['count']
item['field_name'] = item['name']

item['message'] = u'{}: {} (times used: {})'.format(
item['field_name'], item['value'], item['usage_count'])

del(item['name'], item['row_type'], item['count'], item['date_created'])
return item

if isinstance(obj, Chrome.BookmarkItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Creation Time'
item['data_type'] = 'chrome:bookmark:entry'
item['source_long'] = 'Chrome Bookmarks'

item['message'] = u'{} ({}) bookmarked in folder "{}"'.format(
item['name'], item['url'], item['parent_folder'])

del(item['value'], item['row_type'], item['date_added'])
return item

if isinstance(obj, Chrome.BookmarkFolderItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Creation Time'
item['data_type'] = 'chrome:bookmark:folder'
item['source_long'] = 'Chrome Bookmarks'

item['message'] = u'"{}" bookmark folder created in folder "{}"'.format(
item['name'], item['parent_folder'])

del(item['value'], item['row_type'], item['date_added'])
return item

if isinstance(obj, Chrome.LocalStorageItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Not a time'
item['data_type'] = 'chrome:local_storage:entry'
item['source_long'] = 'Chrome LocalStorage'
item['url'] = item['url'][1:]

item['message'] = u'key: {} value: {}'.format(
item['key'], item['value'])

del (item['row_type'])
return item

if isinstance(obj, Chrome.LoginItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Used Time'
item['data_type'] = 'chrome:login_item:entry'
item['source_long'] = 'Chrome Logins'
item['usage_count'] = item['count']

item['message'] = u'{}: {} used on {} (total times used: {})'.format(
item['name'], item['value'], item['url'], item['usage_count'])

del(item['row_type'], item['count'], item['date_created'])
return item

if isinstance(obj, Chrome.PreferenceItem):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Update Time'
item['data_type'] = 'chrome:preferences:entry'
item['source_long'] = 'Chrome Preferences'

item['message'] = u'Updated preference: {}: {})'.format(
item['key'], item['value'])

del(item['row_type'], item['name'])
return item

if isinstance(obj, CacheEntry):
item = HindsightEncoder.base_encoder(obj)

item['timestamp_desc'] = 'Last Visit Time'
item['data_type'] = 'chrome:cache:entry'
item['source_long'] = 'Chrome Cache'
item['original_url'] = item['url']
item['cache_type'] = item['row_type']
item['cached_state'] = item['name']

item['message'] = u'Original URL: {}'.format(
item['original_url'])

del(item['row_type'], item['name'], item['timezone'])
return item


class AnalysisSession(object):
def __init__(self, input_path=None, profile_paths=None, cache_path=None, browser_type=None, available_input_types=None,
version=None, display_version=None, output_name=None, log_path=None, timezone=None,
@@ -56,7 +251,7 @@ def __init__(self, input_path=None, profile_paths=None, cache_path=None, browser
self.artifacts_counts = {}

if self.available_output_formats is None:
self.available_output_formats = ['sqlite']
self.available_output_formats = ['sqlite', 'jsonl']

if self.available_decrypts is None:
self.available_decrypts = {'windows': 0, 'mac': 0, 'linux': 0}
@@ -141,8 +336,7 @@ def promote_object_to_analysis_session(self, item_name, item_value):

@staticmethod
def is_profile(base_path, existing_files, warn=False):
"""
Log a warning message if any file in `required_files` is missing from
"""Log a warning message if any file in `required_files` is missing from
`existing_files`. Return True if all required files are present.
"""
is_profile = True
@@ -722,4 +916,9 @@ def generate_sqlite(self, output_file_path='.temp_db'):
"VALUES (?, ?, ?, ?, ?)",
(extension.name, extension.description, extension.version, extension.app_id, extension.profile))


def generate_jsonl(self, output_file):
with open(output_file, mode='wb') as jsonl:
for parsed_artifact in self.parsed_artifacts:
parsed_artifact_json = json.dumps(parsed_artifact, cls=HindsightEncoder)
jsonl.write(parsed_artifact_json)
jsonl.write('\n')
@@ -451,8 +451,9 @@ def get_cookies(self, path, database, version):

# Queries for different versions
query = {66: '''SELECT cookies.host_key, cookies.path, cookies.name, cookies.value, cookies.creation_utc,
cookies.last_access_utc, cookies.expires_utc, cookies.is_secure, cookies.is_httponly,
cookies.is_persistent, cookies.has_expires, cookies.priority, cookies.encrypted_value
cookies.last_access_utc, cookies.expires_utc, cookies.is_secure AS secure,
cookies.is_httponly AS httponly, cookies.is_persistent AS persistent,
cookies.has_expires, cookies.priority, cookies.encrypted_value
FROM cookies''',
33: '''SELECT cookies.host_key, cookies.path, cookies.name, cookies.value, cookies.creation_utc,
cookies.last_access_utc, cookies.expires_utc, cookies.secure, cookies.httponly,
@@ -500,20 +501,20 @@ def get_cookies(self, path, database, version):
cookie_value = row.get('value')
# print type(cookie_value), cookie_value

# Using row.get(key) returns 'None' if the key doesn't exist instead of an error
new_row = Chrome.CookieItem(self.profile_path, row.get('host_key'), row.get('path'), row.get('name'), cookie_value,
to_datetime(row.get('creation_utc'), self.timezone),
new_row = Chrome.CookieItem(self.profile_path, row.get('host_key'), row.get('path'), row.get('name'),
cookie_value, to_datetime(row.get('creation_utc'), self.timezone),
to_datetime(row.get('last_access_utc'), self.timezone),
to_datetime(row.get('expires_utc'), self.timezone), row.get('secure'),
row.get('httponly'), row.get('persistent'),
row.get('has_expires'), row.get('priority'))
row.get('secure'), row.get('httponly'), row.get('persistent'),
row.get('has_expires'), to_datetime(row.get('expires_utc'), self.timezone),
row.get('priority'))

accessed_row = Chrome.CookieItem(self.profile_path, row.get('host_key'), row.get('path'), row.get('name'), cookie_value,
accessed_row = Chrome.CookieItem(self.profile_path, row.get('host_key'), row.get('path'),
row.get('name'), cookie_value,
to_datetime(row.get('creation_utc'), self.timezone),
to_datetime(row.get('last_access_utc'), self.timezone),
to_datetime(row.get('expires_utc'), self.timezone), row.get('secure'),
row.get('httponly'), row.get('persistent'),
row.get('has_expires'), row.get('priority'))
row.get('secure'), row.get('httponly'), row.get('persistent'),
row.get('has_expires'), to_datetime(row.get('expires_utc'), self.timezone),
row.get('priority'))

new_row.url = (new_row.host_key + new_row.path)
accessed_row.url = (accessed_row.host_key + accessed_row.path)
@@ -643,13 +644,12 @@ def get_autofill(self, path, database, version):
cursor.execute(query[compatible_version])

for row in cursor:
# Using row.get(key) returns 'None' if the key doesn't exist instead of an error
results.append(Chrome.AutofillItem(self.profile_path, to_datetime(row.get('date_created'), self.timezone), row.get('name'),
row.get('value'), row.get('count')))
results.append(Chrome.AutofillItem(self.profile_path, to_datetime(row.get('date_created'), self.timezone),
row.get('name'), row.get('value'), row.get('count')))

if row.get('date_last_used') and row.get('count') > 1:
results.append(Chrome.AutofillItem(self.profile_path, to_datetime(row.get('date_last_used'), self.timezone),
row.get('name'), row.get('value'), row.get('count')))
results.append(Chrome.AutofillItem(self.profile_path, to_datetime(row.get('date_last_used'),
self.timezone), row.get('name'), row.get('value'), row.get('count')))

db_file.close()
self.artifacts_counts['Autofill'] = len(results)
Oops, something went wrong.

0 comments on commit a663a28

Please sign in to comment.
You can’t perform that action at this time.