Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#758 - Introduce ModuleSyncMetadata node for AWS S3Bucket syncs #763

Merged
merged 9 commits into from
Feb 11, 2022
8 changes: 8 additions & 0 deletions cartography/intel/aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from botocore.exceptions import EndpointConnectionError
from policyuniverse.policy import Policy

from cartography.util import merge_module_sync_metadata
from cartography.util import run_analysis_job
from cartography.util import run_cleanup_job
from cartography.util import timeit
Expand Down Expand Up @@ -619,6 +620,13 @@ def load_s3_buckets(neo4j_session: neo4j.Session, data: Dict, current_aws_accoun
AWS_ACCOUNT_ID=current_aws_account_id,
aws_update_tag=aws_update_tag,
)
merge_module_sync_metadata(
neo4j_session,
group_type='AWSAccount',
group_id=current_aws_account_id,
synced_type='S3Bucket',
update_tag=aws_update_tag,
)


@timeit
Expand Down
36 changes: 36 additions & 0 deletions cartography/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import re
import sys
from functools import wraps
from string import Template
from typing import Dict
from typing import Optional
from typing import Union

import botocore
import neo4j
Expand Down Expand Up @@ -47,6 +49,40 @@ def run_cleanup_job(
)


def merge_module_sync_metadata(
neo4j_session: neo4j.Session,
group_type: str,
group_id: Union[str, int],
synced_type: str,
update_tag: int,
):
'''
This function creates `ModuleSyncMetadata` nodes when called from each of the individual modules or sub-modules.
ramonpetgrave64 marked this conversation as resolved.
Show resolved Hide resolved
The 'types' used here should be actual node labels. For example, if we did sync a particular AWSAccount's S3Buckets,
the `grouptype` is 'AWSAccount', the `groupid` is the particular account's `id`, and the `syncedtype` is 'S3Bucket'.

:param neo4j_session: Neo4j session object
:param group_type: The parent module's type
:param group_id: The parent module's id
:param synced_type: The sub-module's type
:param update_tag: Timestamp used to determine data freshness
'''
template = Template("""
MERGE (n:ModuleSyncMetadata{id:'${group_type}_${group_id}_${synced_type}'})
ON CREATE SET
n:SyncMetadata, n.firstseen=timestamp()
SET n.syncedtype='${synced_type}',
n.grouptype='${group_type}',
n.groupid={group_id},
n.lastupdated={UPDATE_TAG}
""")
neo4j_session.run(
template.safe_substitute(group_type=group_type, group_id=group_id, synced_type=synced_type),
group_id=group_id,
UPDATE_TAG=update_tag,
)


def load_resource_binary(package, resource_name):
return open_binary(package, resource_name)

Expand Down
3 changes: 3 additions & 0 deletions docs/schema/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,8 @@
## Kubernetes
- Click [here](kubernetes.md)

## SyncMetadata
- Click [here](syncmetadata.md)

## More to come!
👍
17 changes: 17 additions & 0 deletions docs/schema/syncmetadata.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## SyncMetadata

SyncMetadata nodes are created by sync jobs to convey information about the job itself. See this doc for how this is
used.

## SyncMetadata:ModuleSyncMetadata

This is a node to represent some metadata about the sync job of a particular module or sub-module. Its existence should suggest that a paritcular sync job did happen.
The 'types' used here should be actual node labels. For example, if we did sync a particular AWSAccount's S3Buckets,
the `grouptype` is 'AWSAccount', the `groupid` is the particular account's `id`, and the `syncedtype` is 'S3Bucket'.

| Field | Description | Source|
|-------|-------------|------|
|**id**|`{group_type}_{group_id}_{synced_type}`|util.py|
|grouptype| The parent module's type |util.py|
|groupid|The parent module's id|util.py|
|syncedtype|The sub-module's type|util.py|
6 changes: 3 additions & 3 deletions tests/integration/cartography/data/jobs/test_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

def test_analysis_jobs_cypher_syntax(neo4j_session):
parameters = {
'AWS_ID': None,
'UPDATE_TAG': None,
'OKTA_ORG_ID': None,
'AWS_ID': 'my_aws_account_id',
'UPDATE_TAG': 'my_update_tag',
'OKTA_ORG_ID': 'my_okta_org_id',
}

for job_name in contents('cartography.data.jobs.analysis'):
Expand Down
37 changes: 37 additions & 0 deletions tests/integration/cartography/intel/aws/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,43 @@ def test_load_s3_buckets(neo4j_session, *args):
assert actual_nodes == expected_nodes


def test_load_s3_buckets_sync_metadata(neo4j_session, *args):
ramonpetgrave64 marked this conversation as resolved.
Show resolved Hide resolved
# Arrange
data = tests.data.aws.s3.LIST_BUCKETS
expected_nodes = {
(
f'AWSAccount_{TEST_ACCOUNT_ID}_S3Bucket',
'AWSAccount',
TEST_ACCOUNT_ID,
'S3Bucket',
TEST_UPDATE_TAG,
),
}
# Act
cartography.intel.aws.s3.load_s3_buckets(neo4j_session, data, TEST_ACCOUNT_ID, TEST_UPDATE_TAG)
nodes = neo4j_session.run(f"""
MATCH (m:ModuleSyncMetadata{{id:'AWSAccount_{TEST_ACCOUNT_ID}_S3Bucket'}})
RETURN
m.id,
m.syncedtype,
m.grouptype,
m.groupid,
m.lastupdated
""")
# Assert
actual_nodes = {
(
n['m.id'],
n['m.grouptype'],
n['m.groupid'],
n['m.syncedtype'],
n['m.lastupdated'],
)
for n in nodes
}
assert actual_nodes == expected_nodes


def test_load_s3_encryption(neo4j_session, *args):
"""
Ensure that expected bucket gets loaded with their encryption fields.
Expand Down