diff --git a/.automation.conf/config.sh b/.automation.conf/config.sh index e8b88728b..4ca6b3b78 100644 --- a/.automation.conf/config.sh +++ b/.automation.conf/config.sh @@ -19,10 +19,15 @@ if [ ! -z ${KAYOBE_ENVIRONMENT:+x} ]; then # Seem to get servers failing to spawn with higher concurrency export TEMPEST_CONCURRENCY=1 fi + if [[ "$KAYOBE_ENVIRONMENT" =~ "ci-multinode" ]]; then - export KAYOBE_AUTOMATION_TEMPEST_LOADLIST=tempest-full - export KAYOBE_AUTOMATION_TEMPEST_SKIPLIST=ci-multinode + # SMSLab is currently running with 1G switches. This causes tests using volumes and images to fail if + # the concurrency is set too high. + export TEMPEST_CONCURRENCY=1 + export KAYOBE_AUTOMATION_TEMPEST_LOADLIST=tempest-full + export KAYOBE_AUTOMATION_TEMPEST_SKIPLIST=ci-multinode fi + fi if [[ -z "${KAYOBE_AUTOMATION_TEMPEST_CONF_OVERRIDES:+x}" ]] || [[ ! -e "${KAYOBE_AUTOMATION_TEMPEST_CONF_OVERRIDES}" ]]; then diff --git a/.automation.conf/tempest/tempest-ci-multinode.overrides.conf b/.automation.conf/tempest/tempest-ci-multinode.overrides.conf index 5cd61d03b..9c6398074 100644 --- a/.automation.conf/tempest/tempest-ci-multinode.overrides.conf +++ b/.automation.conf/tempest/tempest-ci-multinode.overrides.conf @@ -23,8 +23,13 @@ volume_backed_live_migration = true console_output = true [volume] +storage_protocol = ceph +build_timeout = 600 min_microversion = 3.0 max_microversion = 3.66 +[image] +build_timeout = 600 + [dashboard] dashboard_url = http://192.168.39.2 diff --git a/doc/source/configuration/host-images.rst b/doc/source/configuration/host-images.rst index cfa5fe678..ba35a244f 100644 --- a/doc/source/configuration/host-images.rst +++ b/doc/source/configuration/host-images.rst @@ -43,7 +43,7 @@ The 3 partitions are: * p0: EFI ESP bootloader * p1: EFI BSP * p2: LVM PV (``rootpv``) - + The LVM Logical Volumes are: ============== ================== ========= diff --git a/doc/source/configuration/monitoring.rst b/doc/source/configuration/monitoring.rst index c2384ba93..2b336c699 100644 --- a/doc/source/configuration/monitoring.rst +++ b/doc/source/configuration/monitoring.rst @@ -126,4 +126,4 @@ need to be set, and the proxy URL sould be set or removed. If you want to add an alerting rule, there are many good examples of alerts are available `here `__. They simply need to be added to one of the ``*.rules`` files in the prometheus configuration -directory. \ No newline at end of file +directory. diff --git a/doc/source/configuration/release-train.rst b/doc/source/configuration/release-train.rst index 00967ffaa..e1a9fefe6 100644 --- a/doc/source/configuration/release-train.rst +++ b/doc/source/configuration/release-train.rst @@ -233,7 +233,7 @@ see this message when you later try to run ``pulp-container-sync.yml``: The issue is that pushing an image automatically creates a `container-push repository `__ which conflicts with the creation of a regular container repository of the same -name. You can resolve this conflict by deleting the distribution associated +name. You can resolve this conflict by deleting the distribution associated with the push repository using the pulp CLI: .. code-block:: console @@ -242,17 +242,17 @@ with the push repository using the pulp CLI: Started background task /pulp/api/v3/tasks/1f0a474a-b7c0-44b4-9ef4-ed633077f4d8/ .Done. -HTTP Error 404: Not Found +HTTP Error 404: Not Found ~~~~~~~~~~~~~~~~~~~~~~~~~ -If your login credentials are incorrect, or lack the required permissions, +If your login credentials are incorrect, or lack the required permissions, you will see a 404 error during ``pulp-repo-sync.yml``: .. code-block:: console TASK [stackhpc.pulp.pulp_repository : Sync RPM remotes into repositories] **************************************************************************************************************************************** An exception occurred during task execution. To see the full traceback, use -vvv. The error was: Exception: Task failed to complete. (failed; 404, message='Not Found', url=URL('https://ark.stackhpc.com/pulp/content/centos/8-stream/BaseOS/x86_64/os/20211122T102435')) - failed: [localhost] (item=centos-stream-8-baseos-development) => changed=false + failed: [localhost] (item=centos-stream-8-baseos-development) => changed=false ansible_loop_var: item item: name: centos-stream-8-baseos-development diff --git a/doc/source/configuration/walled-garden.rst b/doc/source/configuration/walled-garden.rst index 0df8ef8b5..9a45ea4db 100644 --- a/doc/source/configuration/walled-garden.rst +++ b/doc/source/configuration/walled-garden.rst @@ -49,7 +49,7 @@ In some environments we have found that squid’s preference for IPv6 can cause problems. It can be forced to prefer IPv4, by adding the following in ``etc/kayobe/containers/squid_proxy/squid.conf``: -.. code:: none +.. code:: dns_v4_first on diff --git a/doc/source/contributor/environments/aufn-ceph.rst b/doc/source/contributor/environments/aufn-ceph.rst index 18ca0d531..bf3f6a27b 100644 --- a/doc/source/contributor/environments/aufn-ceph.rst +++ b/doc/source/contributor/environments/aufn-ceph.rst @@ -198,4 +198,4 @@ We then use the CLI to create a keypair, floating IP and test VM: openstack server add floating ip test-vm-1 `openstack floating ip list -c ID -f value` openstack server list -which will create a VM named ``test-vm-1`` with a Cirros OS iamge and a default login password of 'gocubsgo'. \ No newline at end of file +which will create a VM named ``test-vm-1`` with a Cirros OS iamge and a default login password of 'gocubsgo'. diff --git a/doc/source/contributor/environments/ci-multinode.rst b/doc/source/contributor/environments/ci-multinode.rst index 72aecc3e7..0dd787140 100644 --- a/doc/source/contributor/environments/ci-multinode.rst +++ b/doc/source/contributor/environments/ci-multinode.rst @@ -1,5 +1,5 @@ ========================== -Multinode Test Environment +Multinode Test Environment ========================== Set up hosts @@ -21,7 +21,7 @@ Basic Kayobe Setup 2. ``sudo dnf install -y python3-virtualenv`` 3. ``mkdir src`` and ``cd src`` 4. Clone https://github.com/stackhpc/stackhpc-kayobe-config.git, then checkout - commit f31df6256f1b1fea99c84547d44f06c4cb74b161 + commit f31df6256f1b1fea99c84547d44f06c4cb74b161 5. ``cd ..`` and ``mkdir venvs`` 6. ``virtualenv venvs/kayobe`` and source ``venvs/kayobe/bin/activate`` 7. ``pip install -U pip`` @@ -39,8 +39,8 @@ Config changes 4. Also under vxlan_interfaces, check vxlan_dstport is not 4789 (this causes conflicts, change to 4790) 5. In /etc/kayobe/environments/ci-multinode/tf-networks.yml, edit admin_ips so - that the compute and controller IPs line up with the - instances that were created earlier, remove the other IPs for seed and + that the compute and controller IPs line up with the + instances that were created earlier, remove the other IPs for seed and cephOSD 6. In /etc/kayobe/environments/ci-multinode/network-allocation.yml, remove all the entries and just assign ``aio_ips:`` an empty set ``[]`` diff --git a/doc/source/contributor/environments/index.rst b/doc/source/contributor/environments/index.rst index 4e0ba4708..888608973 100644 --- a/doc/source/contributor/environments/index.rst +++ b/doc/source/contributor/environments/index.rst @@ -9,4 +9,4 @@ The following Kayobe environments are provided with this configuration: ci-aio ci-builder ci-multinode - aufn-ceph \ No newline at end of file + aufn-ceph diff --git a/doc/source/operations/rabbitmq.rst b/doc/source/operations/rabbitmq.rst index e678fbed9..12fd5a789 100644 --- a/doc/source/operations/rabbitmq.rst +++ b/doc/source/operations/rabbitmq.rst @@ -128,6 +128,8 @@ Check to see if all OpenStack queues and exchanges have been removed from the Ra kayobe overcloud host command run --limit controllers --show-output --command 'docker exec rabbitmq rabbitmqctl list_queues name' kayobe overcloud host command run --limit controllers --show-output --command 'docker exec rabbitmq rabbitmqctl list_exchanges name' +There should be no queues listed, and the only exchanges listed should start with `amq.`. + Start the OpenStack services which use RabbitMQ. Note that this will start all matching services, even if they weren't running prior to starting this procedure. diff --git a/etc/kayobe/ansible/fix-networking.yml b/etc/kayobe/ansible/fix-networking.yml index f24be3862..c992be181 100644 --- a/etc/kayobe/ansible/fix-networking.yml +++ b/etc/kayobe/ansible/fix-networking.yml @@ -11,11 +11,10 @@ ansible_ssh_common_args: "-o StrictHostKeyChecking=no" tasks: - name: Ensure `hosts` file contains pulp entries - ansible.builtin.copy: - content: | - 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 - ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 + blockinfile: + path: /etc/hosts + marker: "# {mark} Kayobe Pulp entries" + block: | 10.0.0.34 pelican pelican.service.compute.sms-lab.cloud 10.205.3.187 pulp-server pulp-server.internal.sms-cloud - dest: /etc/hosts become: true diff --git a/etc/kayobe/ansible/reboot.yml b/etc/kayobe/ansible/reboot.yml index af01e5944..a284dd425 100644 --- a/etc/kayobe/ansible/reboot.yml +++ b/etc/kayobe/ansible/reboot.yml @@ -1,7 +1,7 @@ --- - name: Reboot the host hosts: seed-hypervisor:seed:overcloud:infra-vms - serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(0) }}" + serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(0, true) }}" tags: - reboot tasks: diff --git a/etc/kayobe/containers/pulp/settings.py b/etc/kayobe/containers/pulp/settings.py index 90e64ed53..2afa6a08e 100644 --- a/etc/kayobe/containers/pulp/settings.py +++ b/etc/kayobe/containers/pulp/settings.py @@ -1,4 +1,5 @@ CONTENT_ORIGIN='{{ pulp_url }}' +ANALYTICS=False ANSIBLE_API_HOSTNAME='{{ pulp_url }}' ANSIBLE_CONTENT_HOSTNAME='{{ pulp_url }}/pulp/content' TOKEN_AUTH_DISABLED=True diff --git a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml index 6c1108afe..7e2eb4c7f 100644 --- a/etc/kayobe/environments/ci-builder/stackhpc-ci.yml +++ b/etc/kayobe/environments/ci-builder/stackhpc-ci.yml @@ -8,6 +8,7 @@ kolla_docker_namespace: stackhpc-dev # Kolla feature flag configuration. kolla_enable_barbican: true kolla_enable_blazar: true +kolla_enable_caso: true kolla_enable_central_logging: true kolla_enable_cinder: true kolla_enable_cloudkitty: true diff --git a/etc/kayobe/environments/ci-multinode/kolla/config/neutron.conf b/etc/kayobe/environments/ci-multinode/kolla/config/neutron.conf new file mode 100644 index 000000000..265f67b26 --- /dev/null +++ b/etc/kayobe/environments/ci-multinode/kolla/config/neutron.conf @@ -0,0 +1,3 @@ +[DEFAULT] +global_physnet_mtu = {{ tunnel_net_name | net_mtu }} + diff --git a/etc/kayobe/environments/ci-multinode/kolla/config/neutron/ml2_conf.ini b/etc/kayobe/environments/ci-multinode/kolla/config/neutron/ml2_conf.ini new file mode 100644 index 000000000..ff31f09a5 --- /dev/null +++ b/etc/kayobe/environments/ci-multinode/kolla/config/neutron/ml2_conf.ini @@ -0,0 +1,3 @@ +[ml2] +path_mtu = {{ tunnel_net_name | net_mtu }} + diff --git a/etc/kayobe/environments/ci-multinode/networks.yml b/etc/kayobe/environments/ci-multinode/networks.yml index 86406bfad..e6ef17fad 100644 --- a/etc/kayobe/environments/ci-multinode/networks.yml +++ b/etc/kayobe/environments/ci-multinode/networks.yml @@ -81,7 +81,7 @@ internal_vlan: 101 # External network external_cidr: 192.168.38.0/24 -external_mtu: 1350 +external_mtu: 1450 external_allocation_pool_start: 192.168.38.3 external_allocation_pool_end: 192.168.38.128 external_vlan: 102 diff --git a/etc/kayobe/kolla.yml b/etc/kayobe/kolla.yml index 34398d7f4..524498012 100644 --- a/etc/kayobe/kolla.yml +++ b/etc/kayobe/kolla.yml @@ -338,6 +338,14 @@ kolla_build_blocks: && grafana-cli plugins install grafana-piechart-panel ironic_inspector_header: | ADD additions-archive / + keystone_base_footer: | + {% raw %} + {% if base_package_type == 'deb' %} + RUN curl -sLO https://github.com/stackhpc/lasso/releases/download/applied%2F2.7.0-2build3/liblasso3_2.7.0-2build3_amd64.deb && \ + sudo dpkg -i liblasso3_2.7.0-2build3_amd64.deb && \ + rm -f liblasso3_2.7.0-2build3_amd64.deb + {% endif %} + {% endraw %} nova_base_footer: | # Fix for https://bugs.launchpad.net/nova/+bug/1955035, i.e. # https://bugzilla.redhat.com/show_bug.cgi?id=2090752 on c8s diff --git a/etc/kayobe/kolla/config/prometheus/system.rules b/etc/kayobe/kolla/config/prometheus/system.rules index be1343b82..c82bed16e 100644 --- a/etc/kayobe/kolla/config/prometheus/system.rules +++ b/etc/kayobe/kolla/config/prometheus/system.rules @@ -22,7 +22,7 @@ groups: severity: alert annotations: summary: "Prometheus exporter at {{ $labels.instance }} reports low memory" - description: "Available memory is {{ $value }}." + description: "Available memory is {{ $value }} GiB." - alert: HostOomKillDetected expr: increase(node_vmstat_oom_kill[5m]) > 0 @@ -53,7 +53,7 @@ groups: - alert: InstanceDown expr: up{job="node"} == 0 - for: 1m + for: 5m labels: severity: alert annotations: diff --git a/etc/kayobe/kolla/globals.yml b/etc/kayobe/kolla/globals.yml index b09669099..6cc3ee166 100644 --- a/etc/kayobe/kolla/globals.yml +++ b/etc/kayobe/kolla/globals.yml @@ -9,11 +9,17 @@ enable_docker_repo: {% raw %}"{{ 'overcloud' not in group_names or ansible_facts {% if kolla_base_distro == 'centos' %} bifrost_tag: yoga-20230217T160618 +blazar_tag: yoga-20230315T125157 +caso_tag: yoga-20230315T125157 neutron_tag: yoga-20230309T123152 {% elif kolla_base_distro == 'rocky' %} bifrost_tag: yoga-20230310T194732 +blazar_tag: yoga-20230315T130918 +caso_tag: yoga-20230315T130918 {% else %} bifrost_tag: yoga-20230220T184947 +blazar_tag: yoga-20230315T125441 +caso_tag: yoga-20230315T125441 neutron_tag: yoga-20230309T123143 {% endif %} diff --git a/etc/kayobe/seed.yml b/etc/kayobe/seed.yml index 8f497b915..b575ab82a 100644 --- a/etc/kayobe/seed.yml +++ b/etc/kayobe/seed.yml @@ -106,11 +106,14 @@ seed_pulp_container: image: pulp/pulp pre: "{{ kayobe_config_path }}/containers/pulp/pre.yml" post: "{{ kayobe_config_path }}/containers/pulp/post.yml" - tag: "{{ '3.21-https' if pulp_enable_tls | bool else '3.21' }}" + tag: "{{ '3.22-https' if pulp_enable_tls | bool else '3.22' }}" network_mode: host # Override deploy_containers_defaults.init == true to ensure # s6-overlay-suexec starts as pid 1 init: false + env: + PULP_CONTENT_WORKERS: "{{ ansible_facts.processor_vcpus * 2 + 1 }}" + PULP_API_WORKERS: "{{ ansible_facts.processor_vcpus * 2 + 1 }}" volumes: - /opt/kayobe/containers/pulp:/etc/pulp - pulp_storage:/var/lib/pulp diff --git a/releasenotes/config.yaml b/releasenotes/config.yaml index 913f7c98a..187ed012c 100644 --- a/releasenotes/config.yaml +++ b/releasenotes/config.yaml @@ -1,4 +1,3 @@ --- # This needs to be updated to the latest release. -default_branch: stackhpc/yoga release_tag_re: stackhpc/12\.\d+\.\d+\.\d diff --git a/releasenotes/notes/add-caso-f36b98453be10169.yaml b/releasenotes/notes/add-caso-f36b98453be10169.yaml new file mode 100644 index 000000000..fec9b8286 --- /dev/null +++ b/releasenotes/notes/add-caso-f36b98453be10169.yaml @@ -0,0 +1,8 @@ +--- + features: + - | + Adds ``caso`` container images. ``cASO`` is an is an accounting reporter + that supports Cloud Accounting Usage Records. For more information, see + the `upstream docs `__. Note that + this container does not exist in upstream Kolla and is maintained + downstream by StackHPC. diff --git a/releasenotes/notes/adds-networking-generic-switch-batching-support-adffe038ea2441d0.yaml b/releasenotes/notes/adds-networking-generic-switch-batching-support-adffe038ea2441d0.yaml new file mode 100644 index 000000000..f68ab23ed --- /dev/null +++ b/releasenotes/notes/adds-networking-generic-switch-batching-support-adffe038ea2441d0.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Brings in new neutron container images to add batching support to + Networking Generic Switch. This is opt in via the ``ngs_batch_requests`` + configuration option and only affects Ironic deployments that use + Networking Generic Switch. See the following `PR + `__ for more + details. diff --git a/releasenotes/notes/fix-liblasso-netiq-issue-afec5b2ee7de2a1e.yaml b/releasenotes/notes/fix-liblasso-netiq-issue-afec5b2ee7de2a1e.yaml new file mode 100644 index 000000000..4c71ad67d --- /dev/null +++ b/releasenotes/notes/fix-liblasso-netiq-issue-afec5b2ee7de2a1e.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes the `issue `__ with using + SAML2 federation in Keystone against NetIQ IdP. diff --git a/releasenotes/notes/fixes-InstanceDown-Alert-570a295e3d5006f7.yaml b/releasenotes/notes/fixes-InstanceDown-Alert-570a295e3d5006f7.yaml new file mode 100644 index 000000000..60be7d77b --- /dev/null +++ b/releasenotes/notes/fixes-InstanceDown-Alert-570a295e3d5006f7.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixes the InstanceDown alerting rule wait time to be consistent with + the alert message. The alert message says "for 5 minutes" but the rule + was set to wait for 1 minute. + diff --git a/releasenotes/notes/low-memory-alert-units-a6fde380ff9b7839.yaml b/releasenotes/notes/low-memory-alert-units-a6fde380ff9b7839.yaml new file mode 100644 index 000000000..4732bb759 --- /dev/null +++ b/releasenotes/notes/low-memory-alert-units-a6fde380ff9b7839.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Add unit to LowMemory alert description. diff --git a/releasenotes/notes/neutron-dadfailed-workaround-9cd82d29ee1fba59.yaml b/releasenotes/notes/neutron-dadfailed-workaround-9cd82d29ee1fba59.yaml new file mode 100644 index 000000000..4c0b59cd2 --- /dev/null +++ b/releasenotes/notes/neutron-dadfailed-workaround-9cd82d29ee1fba59.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Disables metadata proxy over IPv6 inside Neutron DHCP agent to work around + `bug 1953165 `__. diff --git a/releasenotes/notes/release-train-02-2023-package-refresh-9de8049f8cb08a5e.yaml b/releasenotes/notes/release-train-02-2023-package-refresh-9de8049f8cb08a5e.yaml index 72a3318e0..b8c79e5e2 100644 --- a/releasenotes/notes/release-train-02-2023-package-refresh-9de8049f8cb08a5e.yaml +++ b/releasenotes/notes/release-train-02-2023-package-refresh-9de8049f8cb08a5e.yaml @@ -5,14 +5,21 @@ features: Key packages to note are: * Kernel + * version: 4.18.0 * release: 448.el8 + * Libvirt + * version: 8.0.0 * release: 6.module_el8.7.0+1140+ff0772f9 + * OVS + * version: 2.17.0 * release: 71.el8s + * OVN + * version: 22.09.0 * release: 11.el8s diff --git a/releasenotes/notes/release-train-02-2023-package-refresh-ubuntu-a9fe8a1c3c2f2796.yaml b/releasenotes/notes/release-train-02-2023-package-refresh-ubuntu-a9fe8a1c3c2f2796.yaml index 1b9d4f1d8..1895cc2a1 100644 --- a/releasenotes/notes/release-train-02-2023-package-refresh-ubuntu-a9fe8a1c3c2f2796.yaml +++ b/releasenotes/notes/release-train-02-2023-package-refresh-ubuntu-a9fe8a1c3c2f2796.yaml @@ -5,11 +5,16 @@ features: Key packages to note are: * Libvirt + * version: 8.0.0 * release: 1ubuntu7.4~cloud0 + * OVS + * version: 2.17.3 * release: 0ubuntu0.22.04.1~cloud0 + * OVN (unchanged since last container build) + * version: 22.03.0 * release: 0ubuntu1~cloud0 diff --git a/releasenotes/notes/update-pulp-3.22-aa485b7e619cd380.yaml b/releasenotes/notes/update-pulp-3.22-aa485b7e619cd380.yaml new file mode 100644 index 000000000..d1235e62f --- /dev/null +++ b/releasenotes/notes/update-pulp-3.22-aa485b7e619cd380.yaml @@ -0,0 +1,7 @@ +--- +features: + - Upgrades Pulp from ``3.21`` to ``3.22``. + - Disables Pulp analytics. + - | + Sets Pulp worker based on available CPU cores. This may improve performance + when pulling container images to many hosts simultaneously. diff --git a/test-requirements.txt b/test-requirements.txt index 333d31836..d7f05bb0d 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -3,3 +3,5 @@ # process, which may cause wedges in the gate later. yamllint # GPLv3 +doc8 # Apache-2.0 +reno>=3.4.0 # Apache-2.0 diff --git a/tox.ini b/tox.ini index 49f231654..f79ac9701 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,8 @@ deps = [testenv:pep8] commands = yamllint etc/kayobe + reno lint + doc8 README.rst doc/source --ignore D001 # StackHPC Kayobe configuration release notes: [testenv:releasenotes]