From 374c7dcb080e52e22d07d6d54058fad9d3ca58ed Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Wed, 22 Nov 2023 15:56:57 +1100 Subject: [PATCH] fix: add missing pmie webhook action configuration functionality --- docs/pcp/setup.yml | 1 + roles/pcp/README.md | 4 +++ roles/pcp/defaults/main.yml | 1 + roles/pcp/tasks/pmie.yml | 49 +++++++++++++++++++++++++++++ roles/pcp/vars/main.yml | 1 + tests/check_pmie_webhook.yml | 28 +++++++++++++++++ tests/tests_verify_pmie_webhook.yml | 27 ++++++++++++++++ 7 files changed, 111 insertions(+) create mode 100644 tests/check_pmie_webhook.yml create mode 100644 tests/tests_verify_pmie_webhook.yml diff --git a/docs/pcp/setup.yml b/docs/pcp/setup.yml index cf98f26..4a5bcb8 100644 --- a/docs/pcp/setup.yml +++ b/docs/pcp/setup.yml @@ -4,6 +4,7 @@ roles: - role: performancecopilot.metrics.pcp vars: + pcp_pmie_endpoint: https://example.com/webhook pcp_pmlogger_interval: 10 pcp_optional_agents: [dm, nfsclient, openmetrics] pcp_explicit_labels: diff --git a/roles/pcp/README.md b/roles/pcp/README.md index efbef15..f6ecfe7 100644 --- a/roles/pcp/README.md +++ b/roles/pcp/README.md @@ -28,6 +28,10 @@ Default location for [pmlogger(1)](http://man7.org/linux/man-pages/man1/pmlogger An optional list of remote hostnames for which metric recording and inference rules should be installed, to be monitored from the host running the playbook. By default, all performance rules evaluating to true will be logged to the local system log (for both the local host and remote hosts in the target hosts list), and daily archives will be created below *pcp_archive_dir*/*hostname* locally, again for each host listed in the target hosts list. + pcp_pmie_endpoint: '' + +Send inference events to the given webhook endpoint (URL) from [pmie(1)](http://man7.org/linux/man-pages/man1/pmie.1.html) performance rules. The default is to log these events into the local system log only. + pcp_single_control: 0 Specifies whether the pcp_target_hosts configuration file(s) for pmie and pmlogger are in control.d form (the default) or in the single file form where /*etc*/*pcp*/*pmlogger*/*control* and /*etc*/*pcp*/*pmie*/*control* are used to setup the target hosts list for monitoring. diff --git a/roles/pcp/defaults/main.yml b/roles/pcp/defaults/main.yml index aaa918e..9d42e3c 100644 --- a/roles/pcp/defaults/main.yml +++ b/roles/pcp/defaults/main.yml @@ -5,6 +5,7 @@ pcp_rest_api: false pcp_pmlogger_discard: 14 pcp_pmlogger_interval: 60 pcp_archive_dir: /var/log/pcp/pmlogger +pcp_pmie_endpoint: '' pcp_pmcd_localonly: 0 pcp_pmproxy_localonly: 0 pcp_pmlogger_localonly: 1 diff --git a/roles/pcp/tasks/pmie.yml b/roles/pcp/tasks/pmie.yml index 31777f9..de728fe 100644 --- a/roles/pcp/tasks/pmie.yml +++ b/roles/pcp/tasks/pmie.yml @@ -31,6 +31,54 @@ loop: "{{ __pcp_pmieconf_rules | default([]) }}" register: __pcp_register_changed_rules_for_hosts +- name: Ensure performance rule actions are installed for targeted hosts + tasks: + - name: Prepare localhost facts for webhook_action + set_fact: + local_pmie: "default" + + - name: Check if global webhook_action is configured + lineinfile: + state: absent + path: "{{ __pcp_pmie_config_path }}/config.{{ item }}" + regexp: "//.*global webhook_action = yes" + check_mode: true + changed_when: false + register: __pcp_global_webhook_action_status + loop: "{{ pcp_target_hosts + [local_pmie] }}" + when: + - pcp_pmie_endpoint | d(false) | bool + + - name: Configure global webhook_action + # yamllint disable rule:line-length + command: "pmieconf -f {{ __pcp_pmie_config_path }}/config.{{ item.item }} modify global webhook_action yes" + loop: "{{ __pcp_global_webhook_action_status.results }}" + when: + - pcp_pmie_endpoint | d(false) | bool + - item.found == 0 + register: __pcp_register_changed_actions_for_hosts + # yamllint enable rule:line-length + + - name: Check if global webhook_endpoint is configured + lineinfile: + state: absent + path: "{{ __pcp_pmie_config_path }}/config.{{ item }}" + regexp: "//.*global webhook_endpoint = \"{{ pcp_pmie_endpoint }}\"" + check_mode: true + changed_when: false + register: __pcp_global_webhook_endpoint_status + loop: "{{ pcp_target_hosts + [local_pmie] }}" + + - name: Configure global webhook_endpoint + # yamllint disable rule:line-length + command: "pmieconf -f {{ __pcp_pmie_config_path }}/config.{{ item.item }} modify global webhook_endpoint {{ pcp_pmie_endpoint }}" + loop: "{{ __pcp_global_webhook_endpoint_status.results }}" + when: + - pcp_pmie_endpoint | d(false) | bool + - item.found == 0 + register: __pcp_register_changed_actions_for_hosts + # yamllint enable rule:line-length + - name: Ensure extra rules symlinks have been created for targeted hosts file: src: "{{ __pcp_pmieconf_path }}/{{ item }}" @@ -67,6 +115,7 @@ __pcp_register_changed_group_dir is changed or __pcp_register_changed_group_link_dir is changed or __pcp_register_changed_rules_for_hosts is changed or + __pcp_register_changed_actions_for_hosts is changed or __pcp_register_changed_symlinks_for_hosts is changed or __pcp_register_changed_target_hosts_controld is changed or __pcp_register_changed_target_hosts_single is changed }}" diff --git a/roles/pcp/vars/main.yml b/roles/pcp/vars/main.yml index 254c517..49357ac 100644 --- a/roles/pcp/vars/main.yml +++ b/roles/pcp/vars/main.yml @@ -19,6 +19,7 @@ __pcp_pmlogger_control_path: /etc/pcp/pmlogger/control __pcp_pmie_control_d_path: /etc/pcp/pmie/control.d __pcp_pmie_control_path: /etc/pcp/pmie/control +__pcp_pmie_config_path: /var/lib/pcp/config/pmie __pcp_pmieconf_path: /etc/pcp/pmieconf __pcp_pmieconf_link_path: /var/lib/pcp/config/pmieconf diff --git a/tests/check_pmie_webhook.yml b/tests/check_pmie_webhook.yml new file mode 100644 index 0000000..084a24b --- /dev/null +++ b/tests/check_pmie_webhook.yml @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: MIT +--- +- name: Check if primary pmie is running + changed_when: false + shell: | + set -eu + if set -o | grep -q pipefail; then + set -o pipefail # no pipefail on debian, some ubuntu + fi + pmprobe -I pmcd.pmie.pmcd_host | grep '"primary"' + when: (ansible_facts['distribution'] in ['RedHat', 'CentOS'] and + ansible_facts['distribution_major_version'] | int > 6) or + ansible_facts['distribution'] not in ['Fedora', 'RedHat', 'CentOS'] + +# yamllint disable rule:line-length +- name: Check if primary pmie uses webhook + changed_when: false + shell: | + set -eu + if set -o | grep -q pipefail; then + set -o pipefail # no pipefail on debian, some ubuntu + fi + pmieconf -f /var/lib/pcp/config/pmie/config.default list global webhook_endpoint | grep '"example"' + pmieconf -f /var/lib/pcp/config/pmie/config.default list global webhook_action | grep '"yes"' + when: (ansible_distribution in ['RedHat', 'CentOS'] and + (ansible_facts['distribution_version'] is version('9.3', '<'))) or + ansible_distribution not in ['Fedora', 'RedHat', 'CentOS'] +# yamllint enable rule:line-length diff --git a/tests/tests_verify_pmie_webhook.yml b/tests/tests_verify_pmie_webhook.yml new file mode 100644 index 0000000..926b91d --- /dev/null +++ b/tests/tests_verify_pmie_webhook.yml @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: MIT +--- +- name: Test pmie webhook configuration + hosts: all + + roles: + - role: performancecopilot.metrics.pcp + vars: + pcp_pmie_endpoint: https://example.com:12345/webhook + + pre_tasks: + - name: Skip test if not supported by platform + meta: end_host + when: (ansible_distribution in ['RedHat', 'CentOS'] and + (ansible_facts['distribution_version'] is version('9.3', '<'))) or + ansible_distribution not in ['Fedora', 'RedHat', 'CentOS'] + + - name: Save state of services + import_tasks: get_services_state.yml + + tasks: + - name: Check if configuring pmie webhook works + include_tasks: check_pmie_webhook.yml + + post_tasks: + - name: Restore state of services + import_tasks: restore_services_state.yml