Skip to content

Commit

Permalink
Monitoring docs parity: check for insertions too (elastic#27)
Browse files Browse the repository at this point in the history
* Check for insertions too!

* Sort descending by timestamp so we sample the latest docs

* Account for optional fields
  • Loading branch information
ycombinator committed Aug 17, 2018
1 parent 3411b87 commit 5fdebe2
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 5 deletions.
4 changes: 2 additions & 2 deletions playbooks/monitoring/docs_parity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
return_content: yes
user: "{{ elasticsearch_username }}"
password: "{{ elasticsearch_password }}"
body: '{ "collapse": { "field": "type" } }'
body: '{ "collapse": { "field": "type" }, "sort": { "timestamp": "desc" } }'
body_format: json
status_code: 200
register: xpack_elasticsearch_monitoring_sample_docs
Expand Down Expand Up @@ -154,7 +154,7 @@
user: "{{ elasticsearch_username }}"
password: "{{ elasticsearch_password }}"
method: POST
body: '{ "collapse": { "field": "type" } }'
body: '{ "collapse": { "field": "type" }, "sort": { "timestamp": "desc" } }'
body_format: json
status_code: 200
register: xpack_elasticsearch_monitoring_sample_docs
Expand Down
65 changes: 62 additions & 3 deletions playbooks/monitoring/scripts/python/kibana_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,40 @@ def get_doc(docs_path, doc_type):
with open(os.path.join(docs_path, doc_type + ".json")) as f:
data = f.read()
f.closed
return data
return json.loads(data)

def remove_field(doc, field):
field_path_segments = field.split(".")
last_segment = field_path_segments.pop()

d = doc
for segment in field_path_segments:
if segment in d:
d = d[segment]

d.pop(last_segment, None)

def remove_optional_fields(doc, fields):
for field in fields:
remove_field(doc, field)

def has_insertions_recursive(obj):
obj_type = type(obj)

if obj_type is dict:
keys = obj.keys()
if '$insert' in keys:
return True

for key in keys:
if has_insertions_recursive(obj[key]):
return True
elif obj_type is list:
for el in obj:
if has_insertions_recursive(el):
return True
else:
return False

def has_deletions_recursive(obj):
obj_type = type(obj)
Expand Down Expand Up @@ -75,7 +108,30 @@ def log_parity_error(message):
internal_doc = get_doc(internal_docs_path, doc_type)
metricbeat_doc = get_doc(metricbeat_docs_path, doc_type)

difference = diff(internal_doc, metricbeat_doc, syntax='explicit', load=True, marshal=True)
# Certain fields are expected to be optional, as they depend on the time of collection. We omit those from the comparison.
optional_fields = [
"kibana_stats.response_times.average"
]
remove_optional_fields(internal_doc, optional_fields)
remove_optional_fields(metricbeat_doc, optional_fields)

difference = diff(internal_doc, metricbeat_doc, syntax='explicit', marshal=True)

# Expect there to be exactly four top-level insertions to the metricbeat-indexed doc: beat, @timestamp, host, and metricset
expected_insertions = [ "beat", "@timestamp", "host", "metricset" ]
insertions = difference.get('$insert')
if insertions == None or len(insertions) < 1:
log_parity_error("Metricbeat-indexed doc for type='" + doc_type + "' has no insertions. Expected 'beat', '@timestamp', 'host', and 'metricset' to be inserted.")

if len(insertions) > 4:
log_parity_error("Metricbeat-indexed doc for type='" + doc_type + "' has too many insertions: " + json.dumps(deletions))

insertion_keys = insertions.keys()
for expected_insertion in expected_insertions:
if expected_insertion not in insertion_keys:
log_parity_error("Metricbeat-indexed doc for type='" + doc_type + "' does not have '" + expected_insertion + "' inserted.")

difference.pop('$insert')

# Expect there to be exactly one top-level deletion from metricbeat-indexed doc: source_node
deletions = difference.get('$delete')
Expand All @@ -90,7 +146,10 @@ def log_parity_error(message):

difference.pop('$delete')

# Inserts and updates are okay in metricbeat-indexed docs, but deletions are not
# Updates are okay in metricbeat-indexed docs, but insertions and deletions are not
if has_insertions_recursive(difference):
log_parity_error("Metricbeat-indexed doc for type='" + doc_type + "' has unexpected insertions. Difference: " + json.dumps(difference, indent=2))

if has_deletions_recursive(difference):
log_parity_error("Metricbeat-indexed doc for type='" + doc_type + "' has unexpected deletions. Difference: " + json.dumps(difference, indent=2))

Expand Down

0 comments on commit 5fdebe2

Please sign in to comment.