Skip to content

Commit

Permalink
Added Container Runtime Interface (CRI) text parser plugin (#4742)
Browse files Browse the repository at this point in the history
  • Loading branch information
sydp committed Jun 9, 2024
1 parent 38fa61b commit 2ad42e3
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 1 deletion.
12 changes: 12 additions & 0 deletions plaso/data/formatters/generic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,18 @@ short_source: 'LOG'
source: 'Confluence access log'
---
type: 'conditional'
data_type: 'cri:container:log:entry'
message:
- 'Stream: {stream}'
- 'Tag: {tag}'
- 'Body: {body}'
short_message:
- 'Stream: {stream}'
- 'Body: {body}'
short_source: 'CRI'
source: 'Container Runtime Interface Container Log'
---
type: 'conditional'
data_type: 'cups:ipp:event'
message:
- 'Status: {status}'
Expand Down
6 changes: 6 additions & 0 deletions plaso/data/timeliner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ attribute_mappings:
description: 'Last Visited Time'
place_holder_event: true
---
data_type: 'cri:container:log:entry'
attribute_mappings:
- name: 'event_datetime'
description: 'Content Modification Time'
place_holder_event: true
---
data_type: 'cups:ipp:event'
attribute_mappings:
- name: 'creation_time'
Expand Down
1 change: 1 addition & 0 deletions plaso/parsers/text_plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from plaso.parsers.text_plugins import aws_elb_access
from plaso.parsers.text_plugins import bash_history
from plaso.parsers.text_plugins import confluence_access
from plaso.parsers.text_plugins import cri
from plaso.parsers.text_plugins import dpkg
from plaso.parsers.text_plugins import gdrive_synclog
from plaso.parsers.text_plugins import google_logging
Expand Down
116 changes: 116 additions & 0 deletions plaso/parsers/text_plugins/cri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
"""Text file parser plugin for Container Runtime Interface (CRI) log format.
This is a text-based log format used in kubernetes/GKE.
Also see:
https://github.com/kubernetes/design-proposals-archive/blob/main/node/kubelet-cri-logging.md
"""

import pyparsing

from dfdatetime import time_elements

from plaso.containers import events
from plaso.lib import errors
from plaso.parsers import text_parser
from plaso.parsers.text_plugins import interface


class CRIEventData(events.EventData):
"""CRI log event data.
Attributes:
body (str): the log message body.
event_datetime (time_elements.TimeElementsInNanoseconds): the datetime of
the log message.
stream (str): the log stream. Currently only 'stdout' and 'stderr' are
supported.
tag (str): the log tag. Currently only 'P' (partial) and 'F' (full) are
supported.
"""
DATA_TYPE = 'cri:container:log:entry'

def __init__(self):
"""Initializes event data."""
super(CRIEventData, self).__init__(data_type=self.DATA_TYPE)
self.body = None
self.event_datetime = None
self.stream = None
self.tag = None


class CRITextPlugin(interface.TextPlugin):
"""Text file parser plugin for CRI log files."""

NAME = 'cri_log'
DATA_FORMAT = 'Container Runtime Interface log file'

ENCODING = 'utf-8'

# Date and time values are formatted as: 2016-10-06T00:17:09.669794202Z
_DATE_AND_TIME = (
pyparsing.Regex(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{1,9}Z')
).setResultsName('date_time')

_STREAM = (
pyparsing.Literal('stderr') ^ pyparsing.Literal('stdout')
).setResultsName('stream')

# P indicates a partial log,
# F indicates a complete or the end of a multiline log.
_TAG = pyparsing.oneOf(['P', 'F']).setResultsName('tag')

_LOG = (
pyparsing.restOfLine() + pyparsing.Suppress(pyparsing.LineEnd())
).setResultsName('body')

_LOG_LINE = _DATE_AND_TIME + _STREAM + _TAG + _LOG
_LINE_STRUCTURES = [('log_line', _LOG_LINE)]

VERIFICATION_GRAMMAR = _LOG_LINE

def _ParseRecord(self, parser_mediator, key, structure):
"""Parses a pyparsing structure.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
key (str): name of the parsed structure.
structure (pyparsing.ParseResults): tokens from a parsed log line.
Raises:
ParseError: if the structure cannot be parsed.
"""
if key == 'log_line':
date_time = time_elements.TimeElementsInNanoseconds()
date_time.CopyFromStringISO8601(self._GetValueFromStructure(
structure, 'date_time'))
event_data = CRIEventData()
event_data.event_datetime = date_time
event_data.body = self._GetValueFromStructure(
structure, 'body')[0]
event_data.stream = self._GetValueFromStructure(structure, 'stream')
event_data.tag = self._GetValueFromStructure(structure, 'tag')
parser_mediator.ProduceEventData(event_data)

def CheckRequiredFormat(self, parser_mediator, text_reader):
"""Check if the log record has the minimal structure required by the parser.
Args:
parser_mediator (ParserMediator): mediates interactions between parsers
and other components, such as storage and dfVFS.
text_reader (EncodedTextReader): text reader.
Returns:
bool: True if this is the correct parser, False otherwise.
"""
try:
self._VerifyString(text_reader.lines)
except errors.ParseError:
return False

return True


text_parser.TextLogParser.RegisterPlugin(CRITextPlugin)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ bencode.py
certifi >= 2016.9.26
cffi >= 1.9.1
defusedxml >= 0.5.0
dfdatetime >= 20240330
dfdatetime >= 20240504
dfvfs >= 20240115
dfwinreg >= 20240229
dtfabric >= 20230518
Expand Down
17 changes: 17 additions & 0 deletions test_data/cri.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
2016-10-06T00:17:09.669794202Z stdout P log content 1
2016-10-06T00:17:09.669794203Z stderr F log content 2
2024-04-16T06:21:30.494902976Z stderr F 2024/04/16 06:21:30 Starting server...
2024-04-16T06:21:30.49495227Z stderr F 2024/04/16 06:21:30 Health service listening on 0.0.0.0:81
2024-04-16T06:21:30.494958332Z stderr F 2024/04/16 06:21:30 HTTP service listening on 0.0.0.0:80
2024-04-16T06:21:39.0Z stdout F 10.0.2.1:59838 - - [Tue, 16 Apr 2024 06:21:39 UTC] "GET /readiness HTTP/1.1" kube-probe/1.27
2024-04-16T06:21:44.09Z stdout F 10.0.2.1:57922 - - [Tue, 16 Apr 2024 06:21:44 UTC] "GET /healthz HTTP/1.1" kube-probe/1.27
2024-04-16T06:23:05.887Z stdout F 10.0.2.15:37674 - - [Tue, 16 Apr 2024 06:23:05 UTC] "GET / HTTP/1.0" Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36
2024-04-16T06:23:44.0952Z stdout F 10.0.2.1:47888 - - [Tue, 16 Apr 2024 06:23:44 UTC] "GET /healthz HTTP/1.1" kube-probe/1.27
2024-04-16T06:24:29.09466Z stdout F 10.0.2.1:36006 - - [Tue, 16 Apr 2024 06:24:29 UTC] "GET /readiness HTTP/1.1" kube-probe/1.27
2024-04-16T06:24:39.095116Z stdout F 10.0.2.1:50862 - - [Tue, 16 Apr 2024 06:24:39 UTC] "GET /readiness HTTP/1.1" kube-probe/1.27
2024-04-16T06:24:44.09475473Z stdout F 10.0.2.1:34564 - - [Tue, 16 Apr 2024 06:24:44 UTC] "GET /healthz HTTP/1.1" kube-probe/1.27
2024-04-16T06:24:49.09457166Z stdout F 10.0.2.1:34574 - - [Tue, 16 Apr 2024 06:24:49 UTC] "GET /readiness HTTP/1.1" kube-probe/1.27
2024-04-16T06:24:59.094443628Z stdout F 10.0.2.1:48854 - - [Tue, 16 Apr 2024 06:24:59 UTC] "GET /readiness HTTP/1.1" kube-probe/1.27
2024-04-16T06:25:29.09520786Z stdout F 10.0.2.1:39914 - - [Tue, 16 Apr 2024 06:25:29 UTC] "GET /readiness HTTP/1.1" kube-probe/1.27
2024-04-16T06:25:39.094238892Z stdout F 10.0.2.1:45586 - - [Tue, 16 Apr 2024 06:25:39 UTC] "GET /readiness HTTP/1.1" kube-probe/1.27
2024-04-16T06:25:44.094383157Z stdout F 10.0.2.1:38936 - - [Tue, 16 Apr 2024 06:25:44 UTC] "GET /healthz HTTP/1.1" kube-probe/1.27
67 changes: 67 additions & 0 deletions tests/parsers/text_plugins/cri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Tests for the Container Runtime Interface (CRI) log text plugin."""

import unittest

from plaso.parsers.text_plugins import cri

from tests.parsers.text_plugins import test_lib


class CRILogTextPluginTest(test_lib.TextPluginTestCase):
"""Tests for the CRI log text parser plugin."""

def testProcess(self):
"""Tests for the CheckRequiredFormat method."""
plugin = cri.CRITextPlugin()
storage_writer = self._ParseTextFileWithPlugin(
['cri.log'], plugin)

number_of_event_data = storage_writer.GetNumberOfAttributeContainers(
'event_data')
self.assertEqual(number_of_event_data, 17)

number_of_warnings = storage_writer.GetNumberOfAttributeContainers(
'extraction_warning')
self.assertEqual(number_of_warnings, 0)

number_of_warnings = storage_writer.GetNumberOfAttributeContainers(
'recovery_warning')
self.assertEqual(number_of_warnings, 0)

expected_event_values = {
'data_type': 'cri:container:log:entry',
'event_datetime': '2016-10-06T00:17:09.669794202+00:00',
'body': ' log content 1',
'stream': 'stdout',
'tag': 'P'}

event_data = storage_writer.GetAttributeContainerByIndex('event_data', 0)
self.CheckEventData(event_data, expected_event_values)

expected_event_values = {
'data_type': 'cri:container:log:entry',
'event_datetime': '2016-10-06T00:17:09.669794203+00:00',
'body': ' log content 2',
'stream': 'stderr',
'tag': 'F'}

event_data = storage_writer.GetAttributeContainerByIndex('event_data', 1)
self.CheckEventData(event_data, expected_event_values)

expected_event_values = {
'data_type': 'cri:container:log:entry',
'event_datetime': '2024-04-16T06:25:29.095207860+00:00',
'body': (
' 10.0.2.1:39914 - - [Tue, 16 Apr 2024 06:25:29 UTC] '
'"GET /readiness HTTP/1.1" kube-probe/1.27'),
'stream': 'stdout',
'tag': 'F'}

event_data = storage_writer.GetAttributeContainerByIndex('event_data', 14)
self.CheckEventData(event_data, expected_event_values)


if __name__ == '__main__':
unittest.main()

0 comments on commit 2ad42e3

Please sign in to comment.