Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(spans): Add role level spans, item spans, rework test #22

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
251 changes: 193 additions & 58 deletions plugins/callback/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General comment: There's a lot of changes in this commit; would it be possible to split out the reformatting from the actual new changes? If not, no worries, it'll just take me a bit longer to review.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I know it was too much when I read the changes, I'll split it


from ansible.plugins.callback import CallbackBase
from typing import Dict, TextIO
from typing import Any, Deque, Dict, TextIO
from datetime import datetime
from dataclasses import dataclass
from dataclasses import dataclass, field
from collections import deque
import time
import os
import json
import atexit
from ansible.plugins.callback import CallbackBase

DOCUMENTATION = '''
name: trace
Expand Down Expand Up @@ -73,9 +74,8 @@ def __init__(self):
self._next_pid: int = 1
self._first: bool = True
self._start_date: str = datetime.now().isoformat()
self._output_file: str = 'trace-%s.json' % self._start_date
self._current_play: str = ''
self._play_id: int = 0
self._output_file: str = f'trace-{self._start_date}.json'
self._current_play: Any = None
self._tasks: Dict[str] = {}

if not os.path.exists(self._output_dir):
Expand All @@ -86,9 +86,8 @@ def __init__(self):

atexit.register(self._end)

# Permits to handle interpolation in task name in linear strategy
def v2_playbook_on_task_start(self, task, is_conditional):
self._tasks[task._uuid] = task.get_name().strip()
def _get_time(self):
return time.time_ns() / 1000 if "time_ns" in time.__dict__ else time.time() * 1000000

def _write_event(self, e: Dict):
if not self._first:
Expand All @@ -98,12 +97,6 @@ def _write_event(self, e: Dict):
json.dump(e, self._f, sort_keys=True, indent=2)
self._f.flush()

def v2_playbook_on_play_start(self, play):

self._end_play_span()
self._current_play = play
self._play_id += 1

def v2_runner_on_start(self, host, task):
uuid = task._uuid
name = self._tasks[uuid]
Expand All @@ -114,42 +107,23 @@ def v2_runner_on_start(self, host, task):

host_uuid = host._uuid
if host_uuid not in self._hosts:
pid = self._next_pid
self._hosts[host_uuid] = Host(pid=pid, name=host.name)
self._next_pid += 1
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#bookmark=id.iycbnb4z7i9g
self._write_event({
"name": "process_name",
"pid": pid,
"cat": "process",
"ph": "M",
"args": {
"name": host.name,
},
})
self._register_host(host)

# If it's the first task of the host for the play, start duration event for the current play
if not self._hosts[host_uuid].hasTaskInPlay:
self._write_event({
"name": self._current_play.get_name().strip(),
"cat": "play",
"ph": "B", # Begin
"ts": time.time_ns() / 1000 if "time_ns" in time.__dict__ else time.time() * 100000,
"pid": self._hosts[host_uuid].pid,
"id": self._play_id,
"args": {
"host": host.name,
},
})
self._hosts[host_uuid].hasTaskInPlay = True
if not self._hosts[host_uuid].has_task_in_play:
self._start_play_span(host)

# Handle role spans
self._role_span(host, task)

# Start task duration event
# See "Duration Events" in:
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.nso4gcezn7n1
self._write_event({
"name": name,
"cat": "runner",
"ph": "B", # Begin
"ts": time.time_ns() / 1000 if "time_ns" in time.__dict__ else time.time() * 100000,
"ts": self._get_time(),
"pid": self._hosts[host_uuid].pid,
"id": abs(hash(uuid)),
"args": {
Expand All @@ -159,61 +133,222 @@ def v2_runner_on_start(self, host, task):
"host": host.name,
},
})
self._hosts[host_uuid].last_runner_begin_ts = self._get_time()

def v2_playbook_on_play_start(self, play):
self._end_all_role_span()
self._end_play_span()
self._current_play = play

def v2_playbook_on_task_start(self, task, is_conditional):
self._tasks[task._uuid] = task.name.strip()

def _register_host(self, host):
pid = self._next_pid
self._hosts[host._uuid] = Host(pid=pid, name=host.name)
self._next_pid += 1

# See Metadata Events in:
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#bookmark=id.iycbnb4z7i9g

self._write_event({
"name": "process_name",
"pid": pid,
"cat": "process",
"ph": "M",
"args": {
"name": host.name,
},
})

def _start_play_span(self, host):
self._write_event({
"name": self._current_play.get_name().strip(),
"cat": "play",
"ph": "B", # Begin
"ts": self._get_time(),
"pid": self._hosts[host._uuid].pid,
"id": abs(hash(self._current_play._uuid)),
"args": {
"host": host.name,
},
})
self._hosts[host._uuid].has_task_in_play = True

def _end_play_span(self):
# Spawn ending play event for each play that are done and then reset flag
for host in self._hosts.values():
if host.hasTaskInPlay:
if host.has_task_in_play:
# Write end event
self._write_event({
"name": self._current_play.get_name().strip(),
"cat": "play",
"id": self._play_id,
"id": abs(hash(self._current_play._uuid)),
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why hash + abs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll admit I copy/pasted this one to stay coherent. But I guess it was for shorter iand more distinct id

"ph": "E", # End
"ts": time.time_ns() / 1000 if "time_ns" in time.__dict__ else time.time() * 100000,
"ts": host.last_runner_end_ts,
"pid": host.pid,
})

host.hasTaskInPlay = False
host.has_task_in_play = False

def _role_span(self, host, task):

# End all current role if done
while(self._hosts[host._uuid].role_stack
and host.name in self._hosts[host._uuid].role_stack[-1]._completed):

# Do not end current role event marked as completed if we are currently in it
# Duplicates roles are marked as completed after first occurence
if(task._role is not None and
task._role.get_name() == self._hosts[host._uuid].role_stack[-1].get_name()):
break

self._end_role_span(
self._hosts[host._uuid], self._hosts[host._uuid].role_stack[-1])

# Create B role event if required
if task._role is not None:

# Still in current role, nothing to do
if(self._hosts[host._uuid].role_stack and
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a python style expert, but isn't it normal to have a space after the if ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be the autopep8 or just me when wrapping, if its not lint, I can change that.

self._hosts[host._uuid].role_stack[-1].get_name() ==
task._role.get_name()):
return

# Declare Parents not yet in role stack (happen when import_roles at start of role)
if task._role._parents:
for parent in task._role._parents[::-1]:
if parent.from_include:
continue
if(not parent.get_name() in
list(map(lambda x: x.get_name(), self._hosts[host._uuid].role_stack))):
self._start_role_span(host, parent)

self._start_role_span(host, task._role)

def _end_all_role_span(self):
# Handle for roles that finish at the end of play
for host in self._hosts.values():
while(host.role_stack):
self._end_role_span(host, host.role_stack[-1])

def _end_span(self, result, status: str):
def _start_role_span(self, host, role):
name: str = role.get_name().strip()

self._write_event({
"name": name,
"cat": "role",
"ph": "B", # Begin
"ts": self._get_time(),
"pid": self._hosts[host._uuid].pid,
"id": abs(hash(role._uuid)),
"args": {
"role": name,
"path": role._role_path,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not much of a python programmer: how legit/good/frowned-upon is it to use underscore variables? They're nominally-private, right? I suppose this could break in future versions? But it's the only way to get this data on old Ansible?

I wonder if we could check if get_path function exists and use it, falling back to this otherwise? WDYT

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, these are private fields. Function get_path for role does not exists under 2.12 core. So yea this could break in futur version.

I will try to remove usage of private var when possible.

"host": host.name,
},
})
self._hosts[host._uuid].role_stack.append(role)

def _end_role_span(self, host, role):
name: str = role.get_name().strip()
self._write_event({
"name": name,
"cat": "role",
"id": abs(hash(role._uuid)),
"ph": "E", # End
"ts": host.last_runner_end_ts,
"pid": host.pid
})
host.role_stack.pop()

def _end_task_span(self, result, status: str):
task = result._task
uuid = task._uuid
# See "Duration Events" in:
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.nso4gcezn7n1
now: float = self._get_time()

self._write_event({
"name": task.get_name().strip(),
"name": self._tasks[uuid],
"cat": "runner",
"id": abs(hash(uuid)),
"ph": "E", # End
"ts": time.time_ns() / 1000 if "time_ns" in time.__dict__ else time.time() * 100000,
"ts": now,
"pid": self._hosts[result._host._uuid].pid,
"args": {
"status": status,
},
})
self._hosts[result._host._uuid].last_runner_end_ts = now

def v2_runner_on_ok(self, result):
self._end_span(result, status="ok")
self._end_task_span(result, status="ok")

def v2_runner_on_unreachable(self, result):
self._end_span(result, 'unreachable')
self._end_task_span(result, 'unreachable')

def v2_runner_on_failed(self, result, ignore_errors=False):
self._end_span(result, status='failed')
self._end_task_span(result, status='failed')

def v2_runner_on_skipped(self, result):
self._end_span(result, status='skipped')
self._end_task_span(result, status='skipped')

def _end(self):
def _item_span(self, result, status: str):
item: Any = result._result['_ansible_item_label']
name = str(item)

# Try to have a nice name when item is a dict
if isinstance(item, dict) and 'name' in item:
name = item['name']

self._write_event({
"name": name,
"cat": "item",
"id": abs(hash(self._hosts[result._host._uuid].item_id)),
"ph": "B", # Begin
"ts": self._hosts[result._host._uuid].last_runner_begin_ts,
"pid": self._hosts[result._host._uuid].pid,
"args": {}
})

now: float = self._get_time()
self._write_event({
"name": name,
"cat": "item",
"id": abs(hash(self._hosts[result._host._uuid].item_id)),
"ph": "E", # End
"ts": self._get_time(),
"pid": self._hosts[result._host._uuid].pid,
"args": {
"status": status,
},
})
self._hosts[result._host._uuid].item_id += 1
self._hosts[result._host._uuid].last_runner_end_ts = now
# Has to be different from ending time to avoid errors
self._hosts[result._host._uuid].last_runner_begin_ts = self._get_time()

def v2_runner_item_on_ok(self, result):
self._item_span(result, status="ok")

def v2_runner_item_on_failed(self, result):
self._item_span(result, status="failed")

def v2_runner_item_on_skipped(self, result):
self._item_span(result, status="skipped")

def _end(self):
self._end_all_role_span()
self._end_play_span()
self._f.write("\n]")
self._f.close()


@dataclass
@ dataclass
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is a space here normal?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's autopep8 lint. I do not know pep8 by heart, so I will double check

class Host:
name: str
pid: int
hasTaskInPlay: bool = False
last_runner_end_ts: float = None
last_runner_begin_ts: float = None
has_task_in_play: bool = False
role_stack: Deque = field(default_factory=deque)
item_id: int = 0
3 changes: 0 additions & 3 deletions tests/integration/inventories/one_host.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
[all]
localhost ansible_connection=local

[group_a]
127.0.0.1 ansible_connection=local

Expand Down
38 changes: 38 additions & 0 deletions tests/integration/loop/base.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---

- hosts: all
name: Test loop
environment:
CALLBACKS_ENABLED: trace
TRACE_OUTPUT_DIR: /ansible_collections/mhansen/ansible-trace
TRACE_HIDE_TASK_ARGUMENTS: True
tasks:

- name: Loop with str
shell: "echo '{{ item }}'"
loop:
- "foo"
- "bar"
- "foo"

- name: Skip items
shell: "echo '{{ item }}'"
loop:
- "bar"
- "skipme"
- "foo"
when: item == 'foo'

- name: Item objects
shell: "echo '{{ item.name | default('bar') }}'"
loop:
- { name: "bar" }
-
- { foo: 'bar'}
- { name: "foo" }

- name: Module ping
ansible.builtin.ping:
loop:
- '127.0.0.1'
- '127.0.0.2'