Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
bcae6dc
Sync kayobe-config with kayobe changes
priteau Jan 31, 2022
86c9655
[CI] Move queue setting to project level
priteau May 18, 2022
1880a73
playbook for installing and running cardiff
MoteHue Sep 22, 2022
a1bccbc
Add regex pattern, add line to eof
MoteHue Sep 22, 2022
3df7cb4
Review changes
MoteHue Oct 7, 2022
9a48295
packages moved to requirements file
MoteHue Oct 14, 2022
f9ec2b1
Change name from cardiff to ADVise
MoteHue Oct 18, 2022
022e0bd
Separate processing from visualisation
MoteHue Oct 24, 2022
7cd8ea0
Upgrade Pulp container to 3.21
markgoddard Nov 23, 2022
b18a302
Merge branch 'stackhpc/wallaby' into cardiff
MoteHue Nov 23, 2022
74c1e58
feat: bump stackhp.pulp from `0.3.0` ~> `0.4.0` (#241)
jackhodgkiss Nov 28, 2022
fc408a5
Merge pull request #168 from stackhpc/cardiff
markgoddard Nov 29, 2022
8d097a6
Merge upstream stable/wallaby
markgoddard Dec 1, 2022
da05cc1
Merge upstream stable/xena
markgoddard Dec 1, 2022
79330c0
Merge pull request #265 from stackhpc/merge-upstream-xena
markgoddard Dec 1, 2022
592b95f
Merge pull request #264 from stackhpc/merge-upstream-wallaby
markgoddard Dec 1, 2022
15024b2
CI: symlink to prometheus & grafana config in ci-aio & ci-multinode
markgoddard Nov 29, 2022
37a62c0
Merge pull request #267 from stackhpc/xena-aio-monitoring
markgoddard Dec 1, 2022
e264f92
Fixes for Rocky 8 pulp snapshots
jovial Dec 2, 2022
148a05c
Merge pull request #273 from stackhpc/bugfix/rocky-8-pulp
markgoddard Dec 5, 2022
d4b449c
Replace hardcoded threshold by temp_max value
priteau Dec 5, 2022
e3f83f5
Adding support for pulp RBAC
Alex-Welsh Dec 5, 2022
7d412ce
Merge branch 'stackhpc/xena' into xena/pulp-rbac
Alex-Welsh Dec 5, 2022
7437660
Set Pulp 3.21 init: false
m-bull Dec 6, 2022
ec01a76
Adding support for pulp RBAC
Alex-Welsh Dec 5, 2022
60f8588
updating docs
Alex-Welsh Dec 5, 2022
25d50f7
Wait longer before raising Elasticsearch alerts
priteau Nov 11, 2022
9b53bf1
Merge pull request #242 from stackhpc/xena-pulp-3-21
markgoddard Dec 6, 2022
3933e45
Hammer playbook for rabbitmq
MoteHue Dec 6, 2022
3f24976
Add Rocky minor version to Pulp URL in CI
m-bull Dec 6, 2022
523e2f9
Merge pull request #281 from stackhpc/wallaby/pulp-rbac
markgoddard Dec 7, 2022
00c46a6
Merge pull request #283 from stackhpc/fix/rocky-repo-urls
markgoddard Dec 7, 2022
9a70b75
Merge pull request #286 from stackhpc/rabbitmq-reset-2
markgoddard Dec 7, 2022
48ace32
Merge pull request #279 from stackhpc/overheating-alert
markgoddard Dec 7, 2022
ad7caa8
Merge pull request #223 from stackhpc/elasticsearch-alerts
markgoddard Dec 7, 2022
c278ff1
Merge pull request #282 from stackhpc/xena/pulp-rbac
markgoddard Dec 7, 2022
01a21b3
Merge stackhpc/wallaby into stackhpc/xena
markgoddard Dec 7, 2022
125750d
Merge stackhpc/xena into stackhpc/yoga
markgoddard Dec 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions etc/kayobe/ansible/advise-run.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
- name: ADVise run
hosts: localhost
gather_facts: no
tags:
- advise
vars:
venv: "~/venvs/advise-review"
input_dir: "{{ lookup('env', 'PWD') }}/overcloud-introspection-data"
output_dir: "{{ lookup('env', 'PWD') }}/review"
advise_pattern: ".*.eval" # Uses regex
tasks:
- name: Install dependencies
pip:
virtualenv: "{{ venv }}"
name:
- git+https://github.com/stackhpc/ADVise
state: latest

- name: Create data directory
file:
path: '{{ output_dir }}/data'
state: directory

- name: Extract data
shell:
cmd: >
{{ venv }}/bin/m2-extract {{ input_dir }}/*.json --output_dir {{ output_dir }}/data

- name: Create review directory
file:
path: '{{ output_dir }}/results'
state: directory

- name: Process data
shell:
cmd: >
{{ venv }}/bin/advise-process
-I ipmi
-p '{{ output_dir }}/data/extra-hardware/{{ advise_pattern }}'
-o '{{ output_dir }}'

- name: Visualise data
command: >
{{ venv }}/bin/advise-visualise
--output_dir '{{ output_dir }}'
57 changes: 57 additions & 0 deletions etc/kayobe/ansible/rabbitmq-reset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
# Reset a broken RabbitMQ cluster.
# Also restarts OpenStack services which may be broken.

- name: Reset RabbitMQ
hosts: controllers
become: True
gather_facts: no
tags:
- rabbitmq-reset
vars:
- container_name: rabbitmq
tasks:
- name: Inspect the {{ container_name }} container
shell:
cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ container_name }}"
register: inspection

- name: Ensure the {{ container_name }} container is running
command: "docker start {{ container_name }}"
when: inspection.stdout == 'false'

- name: Wait for the {{ container_name }} container to reach state 'Running'
shell:
cmd: "docker container inspect --format '{{ '{{' }} .State.Running {{ '}}' }}' {{ container_name }}"
register: result
until: result.stdout == 'true'
retries: 10
delay: 6

- name: Wait for the rabbitmq node to automatically start on container start
command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl wait /var/lib/rabbitmq/mnesia/rabbitmq.pid --timeout 60'"
when: inspection.stdout == 'false'

- name: Stop app
command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl stop_app'"

- name: Force reset app
command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl force_reset'"

- name: Start app
command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl start_app'"

- name: Wait for all nodes to join the cluster
command: "docker exec -it {{ container_name }} /bin/bash -c 'rabbitmqctl await_online_nodes {{ groups['controllers'] | length }}'"

- name: Restart OpenStack services
hosts: controllers:compute
become: true
gather_facts: no
tags:
- restart-openstack
tasks:
# The following services can have problems if the cluster gets broken.
- name: Restart OpenStack services
shell: >-
docker ps -a | egrep '(cinder|heat|ironic|keystone|magnum|neutron|nova)' | awk '{ print $NF }' | xargs docker restart
1 change: 1 addition & 0 deletions etc/kayobe/environments/ci-aio/kolla/config/grafana
1 change: 1 addition & 0 deletions etc/kayobe/environments/ci-aio/kolla/config/prometheus
13 changes: 10 additions & 3 deletions etc/kayobe/environments/ci-aio/stackhpc-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,16 @@ stackhpc_repo_ubuntu_cloud_archive_version: "{{ stackhpc_pulp_repo_ubuntu_cloud_
stackhpc_repo_ubuntu_focal_version: "{{ stackhpc_pulp_repo_ubuntu_focal_version }}"
stackhpc_repo_ubuntu_focal_security_version: "{{ stackhpc_pulp_repo_ubuntu_focal_security_version }}"
stackhpc_repo_docker_ce_ubuntu_version: "{{ stackhpc_pulp_repo_docker_ce_ubuntu_version }}"
stackhpc_repo_rocky_baseos_version: "{{ stackhpc_pulp_repo_rocky_8_6_baseos_version }}"
stackhpc_repo_rocky_appstream_version: "{{ stackhpc_pulp_repo_rocky_8_6_appstream_version }}"
stackhpc_repo_rocky_extras_version: "{{ stackhpc_pulp_repo_rocky_8_6_extras_version }}"
## Use derived vars from etc/kayobe/pulp.yml to switch between
## minor Rocky versions using stackhpc_pulp_repo_rocky_8_minor_version
stackhpc_repo_rocky_baseos_version: "{{ stackhpc_pulp_repo_rocky_8_baseos_version }}"
stackhpc_repo_rocky_appstream_version: "{{ stackhpc_pulp_repo_rocky_8_appstream_version }}"
stackhpc_repo_rocky_extras_version: "{{ stackhpc_pulp_repo_rocky_8_extras_version }}"

# Rocky-and-CI-specific Pulp urls
stackhpc_repo_rocky_baseos_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/BaseOS/x86_64/os/{{ stackhpc_repo_rocky_baseos_version }}/"
stackhpc_repo_rocky_appstream_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/AppStream/x86_64/os/{{ stackhpc_repo_rocky_appstream_version }}/"
stackhpc_repo_rocky_extras_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/extras/x86_64/os/{{ stackhpc_repo_rocky_extras_version }}/"

# Host and port of container registry.
# Push built images to the development Pulp service registry.
Expand Down
6 changes: 3 additions & 3 deletions etc/kayobe/kolla/config/prometheus/elasticsearch.rules
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ groups:

- alert: ElasticsearchClusterRed
expr: elasticsearch_cluster_health_status{color="red"} == 1
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -53,7 +53,7 @@ groups:

- alert: ElasticsearchClusterYellow
expr: elasticsearch_cluster_health_status{color="yellow"} == 1
for: 5m
for: 15m
labels:
severity: warning
annotations:
Expand All @@ -80,7 +80,7 @@ groups:

- alert: ElasticsearchUnassignedShards
expr: elasticsearch_cluster_health_unassigned_shards > 0
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand Down
2 changes: 1 addition & 1 deletion etc/kayobe/kolla/config/prometheus/system.rules
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ groups:
description: "OOM kill detected"

- alert: Overheating
expr: node_hwmon_temp_celsius >= 85
expr: node_hwmon_temp_celsius >= node_hwmon_temp_max_celsius
for: 1m
labels:
severity: warning
Expand Down
4 changes: 2 additions & 2 deletions etc/kayobe/pulp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pulp_proxy_url: "{{ omit }}"
# Base URL of the StackHPC Pulp service.
stackhpc_release_pulp_url: "https://ark.stackhpc.com"

# Credentials used to access the StackHPC Ark container image registry.
# Credentials used to access the StackHPC Ark pulp server.
stackhpc_release_pulp_username:
stackhpc_release_pulp_password:

Expand Down Expand Up @@ -248,7 +248,7 @@ stackhpc_pulp_repository_rpm_repos:
state: present
required: "{{ stackhpc_pulp_sync_rocky_8 | bool }}"
- name: Rocky Linux 8 - PowerTools
url: "{{ stackhpc_release_pulp_content_url }}/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/PowerTools/x86_64/os/{{ stackhpc_pulp_repo_rocky_8_6_powertools_version }}"
url: "{{ stackhpc_release_pulp_content_url }}/rocky/8.{{ stackhpc_pulp_repo_rocky_8_minor_version }}/PowerTools/x86_64/os/{{ stackhpc_pulp_repo_rocky_8_powertools_version }}"
remote_username: "{{ stackhpc_release_pulp_username }}"
remote_password: "{{ stackhpc_release_pulp_password }}"
policy: on_demand
Expand Down
5 changes: 4 additions & 1 deletion etc/kayobe/seed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,11 @@ seed_pulp_container:
image: pulp/pulp
pre: "{{ kayobe_config_path }}/containers/pulp/pre.yml"
post: "{{ kayobe_config_path }}/containers/pulp/post.yml"
tag: "3.16"
tag: "3.21"
network_mode: host
# Override deploy_containers_defaults.init == true to ensure
# s6-overlay-suexec starts as pid 1
init: false
volumes:
- /opt/kayobe/containers/pulp:/etc/pulp
- pulp_storage:/var/lib/pulp
Expand Down
6 changes: 3 additions & 3 deletions etc/kayobe/stackhpc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,15 @@ stackhpc_repo_treasuredata_4_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/t
stackhpc_repo_treasuredata_4_version: "{{ stackhpc_repo_distribution }}"

# Rocky 8 BaseOS
stackhpc_repo_rocky_baseos_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.6/BaseOS/x86_64/os/{{ stackhpc_repo_rocky_baseos_version }}"
stackhpc_repo_rocky_baseos_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8/BaseOS/x86_64/os/{{ stackhpc_repo_rocky_baseos_version }}/"
stackhpc_repo_rocky_baseos_version: "{{ stackhpc_repo_distribution }}"

# Rocky 8 AppStream
stackhpc_repo_rocky_appstream_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.6/AppStream/x86_64/os/{{ stackhpc_repo_rocky_appstream_version }}"
stackhpc_repo_rocky_appstream_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8/AppStream/x86_64/os/{{ stackhpc_repo_rocky_appstream_version }}/"
stackhpc_repo_rocky_appstream_version: "{{ stackhpc_repo_distribution }}"

# Rocky 8 extras
stackhpc_repo_rocky_extras_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8.6/extras/x86_64/os/{{ stackhpc_repo_rocky_extras_version }}"
stackhpc_repo_rocky_extras_url: "{{ stackhpc_repo_mirror_url }}/pulp/content/rocky/8/extras/x86_64/os/{{ stackhpc_repo_rocky_extras_version }}/"
stackhpc_repo_rocky_extras_version: "{{ stackhpc_repo_distribution }}"

###############################################################################
Expand Down