Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implemented DateLessLogFormatHelper #4699

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
54 changes: 53 additions & 1 deletion plaso/containers/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,57 @@ def CalculateEventValuesHash(event_data, event_data_stream):
return md5_context.hexdigest()


class DateLessLogHelper(interface.AttributeContainer):
"""Date-less log helper attribute container.

Attributes:
earliest_date (datetime.datetime): earliest possible date the event data
joachimmetz marked this conversation as resolved.
Show resolved Hide resolved
stream was created.
latest_date (datetime.datetime): last relative date determined by the
date-less log helper.
"""

CONTAINER_TYPE = 'date_less_log_helper'

SCHEMA = {
'_event_data_stream_identifier': 'AttributeContainerIdentifier',
'earliest_date': 'datetime.datetime',
'latest_date': 'datetime.datetime'}

_SERIALIZABLE_PROTECTED_ATTRIBUTES = [
'_event_data_stream_identifier']

def __init__(self):
"""Initializes a date-less log helper attribute container."""
super(DateLessLogHelper, self).__init__()
self._event_data_stream_identifier = None
self.earliest_date = None
self.latest_date = None

def GetEventDataStreamIdentifier(self):
"""Retrieves the identifier of the associated event data stream.

The event data stream identifier is a storage specific value that requires
special handling during serialization.

Returns:
AttributeContainerIdentifier: event data stream or None when not set.
"""
return self._event_data_stream_identifier

def SetEventDataStreamIdentifier(self, event_data_stream_identifier):
"""Sets the identifier of the associated event data stream.

The event data stream identifier is a storage specific value that requires
special handling during serialization.

Args:
event_data_stream_identifier (AttributeContainerIdentifier): event data
stream identifier.
"""
self._event_data_stream_identifier = event_data_stream_identifier


class EventData(interface.AttributeContainer):
"""Event data attribute container.

Expand Down Expand Up @@ -439,4 +490,5 @@ def SetEventDataStreamIdentifier(self, event_data_stream_identifier):


manager.AttributeContainersManager.RegisterAttributeContainers([
EventData, EventDataStream, EventObject, EventTag, YearLessLogHelper])
DateLessLogHelper, EventData, EventDataStream, EventObject, EventTag,
YearLessLogHelper])
114 changes: 106 additions & 8 deletions plaso/engine/timeliner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from dfdatetime import interface as dfdatetime_interface
from dfdatetime import semantic_time as dfdatetime_semantic_time
from dfdatetime import time_elements as dfdatetime_time_elements

from plaso.containers import events
from plaso.containers import warnings
Expand Down Expand Up @@ -46,7 +47,9 @@ def __init__(
"""
super(EventDataTimeliner, self).__init__()
self._attribute_mappings = {}
self._base_dates = {}
self._base_years = {}
self._current_date = self._GetCurrentDate()
self._current_year = self._GetCurrentYear()
self._data_location = data_location
self._place_holder_event = set()
Expand Down Expand Up @@ -75,6 +78,74 @@ def _CreateTimeZonePerPathSpec(self, system_configurations):
self._time_zone_per_path_spec[path_spec.parent] = (
system_configuration.time_zone)

def _GetBaseDate(self, storage_writer, event_data):
"""Retrieves the base date.

Args:
storage_writer (StorageWriter): storage writer.
event_data (EventData): event data.

Returns:
datetime.datetime: base date.
"""
event_data_stream_identifier = event_data.GetEventDataStreamIdentifier()
lookup_key = event_data_stream_identifier.CopyToString()

base_date = self._base_dates.get(lookup_key, None)
if base_date:
return base_date

filter_expression = '_event_data_stream_identifier == "{0:s}"'.format(
lookup_key)
date_less_log_helpers = list(storage_writer.GetAttributeContainers(
events.DateLessLogHelper.CONTAINER_TYPE,
filter_expression=filter_expression))
if not date_less_log_helpers:
message = (
'missing date-less log helper, defaulting to current date: '
'{0:d}').format(self._current_date)
joachimmetz marked this conversation as resolved.
Show resolved Hide resolved
self._ProduceTimeliningWarning(storage_writer, event_data, message)

base_date = self._current_date

else:
earliest_date = date_less_log_helpers[0].earliest_date
latest_date = date_less_log_helpers[0].latest_date

if earliest_date is None and latest_date is None:
message = (
'missing earliest and latest date in date-less log helper, '
'defaulting to current date: {0:d}').format(self._current_date)
self._ProduceTimeliningWarning(storage_writer, event_data, message)

base_date = self._current_date

elif earliest_date < self._current_date:
base_date = earliest_date

elif latest_date < self._current_year:
joachimmetz marked this conversation as resolved.
Show resolved Hide resolved
message = (
'earliest date: {0:d} as base date would exceed current date: '
'{1:d}, using latest date: {2:d}').format(
earliest_date, self._current_year, latest_date)
self._ProduceTimeliningWarning(storage_writer, event_data, message)

base_date = latest_date

else:
message = (
'earliest date: {0:d} and latest: date: {1:d} as base date '
'would exceed current date: {2:d}, using current '
'date').format(
earliest_date, latest_date, self._current_year)
self._ProduceTimeliningWarning(storage_writer, event_data, message)

base_date = self._current_year

self._base_dates[lookup_key] = base_date

return base_date

def _GetBaseYear(self, storage_writer, event_data):
"""Retrieves the base year.

Expand Down Expand Up @@ -156,6 +227,16 @@ def _GetBaseYear(self, storage_writer, event_data):

return base_year

def _GetCurrentDate(self):
"""Retrieves the current date.

Returns:
datetime.datetime: the current date.
"""
current_time = datetime.datetime.now()
current_date = datetime.datetime(*current_time.timetuple()[:3])
return current_date

def _GetCurrentYear(self):
"""Retrieves current year.

Expand Down Expand Up @@ -183,16 +264,33 @@ def _GetEvent(
"""
timestamp = None
if date_time.is_delta:
base_year = self._GetBaseYear(storage_writer, event_data)
if (
date_time.year == 0 and
joachimmetz marked this conversation as resolved.
Show resolved Hide resolved
date_time.month == 0 and
joachimmetz marked this conversation as resolved.
Show resolved Hide resolved
date_time.day_of_month == 0):
base_date = self._GetBaseDate(storage_writer, event_data)
base_year, base_month, base_day = base_date.timetuple()[:3]
hours = date_time.hours
minutes = date_time.minutes
seconds = date_time.seconds
fractions = date_time.fraction_of_second

date_time = dfdatetime_time_elements.TimeElementsInMicroseconds(
time_elements_tuple=(
base_year, base_month, base_day, hours, minutes, seconds, 0))
date_time.fraction_of_second = fractions

try:
date_time = date_time.NewFromDeltaAndYear(base_year)
except ValueError as exception:
self._ProduceTimeliningWarning(
storage_writer, event_data, str(exception))
else:
base_year = self._GetBaseYear(storage_writer, event_data)

try:
date_time = date_time.NewFromDeltaAndYear(base_year)
except ValueError as exception:
self._ProduceTimeliningWarning(
storage_writer, event_data, str(exception))

date_time = dfdatetime_semantic_time.InvalidTime()
timestamp = 0
date_time = dfdatetime_semantic_time.InvalidTime()
timestamp = 0

if timestamp is None:
timestamp = date_time.GetPlasoTimestamp()
Expand Down
110 changes: 110 additions & 0 deletions plaso/lib/dateless_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*-
"""The date-less log format helper mix-in."""

import datetime

from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.resolver import resolver as path_spec_resolver

from plaso.containers import events


class DateLessLogFormatHelper(object):
"""Date-less log format helper mix-in."""

_VALID_DAYS = frozenset(range(1, 32))

_VALID_MONTHS = frozenset(range(1, 13))

def __init__(self):
"""Initializes the date-less log format helper mix-in."""
super(DateLessLogFormatHelper, self).__init__()
self._maximum_date = None
self._minimum_date = None
self._date = None

def _GetDatesFromFileEntry(self, file_entry):
"""Retrieves the dates from the file entry date and time values.

Args:
file_entry (dfvfs.FileEntry): file entry.

Returns:
set[datetime.datetime]: dates of the file entry.
"""
dates = set()

for attribute_name in ('change_time', 'creation_time', 'modification_time'):
date_time = getattr(file_entry, attribute_name, None)
if date_time:
year, month, day = date_time.GetDate()
new_date = datetime.datetime(year=year, month=month, day=day)
dates.add(new_date)

return dates

def _GetDate(self):
"""Retrieves the date.

Returns:
datetime.datetime: date.
"""
return self._date

def _SetDate(self, year, month, day):
"""Sets the date.

Args:
year (int): year.
month (int): month.
day (int): day.

Raise:
ValueError: if month or day contains an unsupported value.
"""
if day not in self._VALID_DAYS:
raise ValueError('Invalid day: {0!s}'.format(day))

if month not in self._VALID_MONTHS:
raise ValueError('Invalid month: {0!s}'.format(month))

date = datetime.datetime(year=year, month=month, day=day)
self._date = date

def _SetEstimatedDate(self, parser_mediator):
self._maximum_date = None
self._minimum_date = None
self._date = None

dates = set()

file_entry = parser_mediator.GetFileEntry()
if file_entry:
dates = self._GetDatesFromFileEntry(file_entry)

if not dates and file_entry.type_indicator in (
dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
dfvfs_definitions.TYPE_INDICATOR_GZIP):

parent_file_entry = path_spec_resolver.Resolver.OpenFileEntry(
file_entry.path_spec.parent,
resolver_context=parser_mediator.resolver_context)
if parent_file_entry:
dates = self._GetDatesFromFileEntry(parent_file_entry)

if dates:
self._maximum_date = max(dates)
self._minimum_date = min(dates)
self._date = self._minimum_date

def GetDateLessLogHelper(self):
"""Retrieves a date-less log helper attribute container.

Returns:
DateLessLogHelper: date-less log helper.
"""
date_less_log_helper = events.DateLessLogHelper()
date_less_log_helper.latest_date = self._maximum_date
date_less_log_helper.earliest_date = self._minimum_date

return date_less_log_helper
4 changes: 3 additions & 1 deletion plaso/multi_process/extraction_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class ExtractionMultiProcessEngine(task_engine.TaskMultiProcessEngine):
_CONTAINER_TYPE_EVENT_DATA_STREAM = events.EventDataStream.CONTAINER_TYPE
_CONTAINER_TYPE_EVENT_SOURCE = event_sources.EventSource.CONTAINER_TYPE
_CONTAINER_TYPE_YEAR_LESS_LOG_HELPER = events.YearLessLogHelper.CONTAINER_TYPE
_CONTAINER_TYPE_DATE_LESS_LOG_HELPER = events.DateLessLogHelper.CONTAINER_TYPE

# Maximum number of dfVFS file system objects to cache in the foreman process.
_FILE_SYSTEM_CACHE_SIZE = 3
Expand Down Expand Up @@ -396,7 +397,8 @@ def _MergeAttributeContainer(self, storage_writer, merge_helper, container):

if container.CONTAINER_TYPE in (
self._CONTAINER_TYPE_EVENT_DATA,
self._CONTAINER_TYPE_YEAR_LESS_LOG_HELPER):
self._CONTAINER_TYPE_YEAR_LESS_LOG_HELPER,
self._CONTAINER_TYPE_DATE_LESS_LOG_HELPER):
event_data_stream_identifier = container.GetEventDataStreamIdentifier()
event_data_stream_lookup_key = None
if event_data_stream_identifier:
Expand Down
1 change: 1 addition & 0 deletions plaso/multi_process/merge_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class ExtractionTaskMergeHelper(BaseTaskMergeHelper):
# data by the timeliner and therefore needs to be merged before event
# data containers.
events.YearLessLogHelper.CONTAINER_TYPE,
events.DateLessLogHelper.CONTAINER_TYPE,
events.EventData.CONTAINER_TYPE,
warnings.ExtractionWarning.CONTAINER_TYPE,
warnings.RecoveryWarning.CONTAINER_TYPE,
Expand Down
33 changes: 33 additions & 0 deletions tests/containers/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,39 @@
from tests import test_lib as shared_test_lib


class DateLessLogHelperTest(shared_test_lib.BaseTestCase):
"""Tests for the date-less log helper attribute container."""

# pylint: disable=protected-access

def testGetAttributeNames(self):
"""Tests the GetAttributeNames function."""
attribute_container = events.DateLessLogHelper()

expected_attribute_names = [
'_event_data_stream_identifier', 'earliest_date', 'latest_date']

attribute_names = sorted(attribute_container.GetAttributeNames())

self.assertEqual(attribute_names, expected_attribute_names)

def testGetEventDataStreamIdentifier(self):
"""Tests the GetEventDataStreamIdentifier function."""
attribute_container = events.DateLessLogHelper()

identifier = attribute_container.GetEventDataStreamIdentifier()
self.assertIsNone(identifier)

def testSetEventDataStreamIdentifier(self):
"""Tests the SetEventDataStreamIdentifier function."""
attribute_container = events.DateLessLogHelper()

attribute_container.SetEventDataStreamIdentifier('Foo')

self.assertEqual(
attribute_container._event_data_stream_identifier, 'Foo')


class EventValuesHelperTest(shared_test_lib.BaseTestCase):
"""Tests for the event values helper functions."""

Expand Down