From ea44f97ae5b433147d3672599e9e53683415c5be Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Tue, 20 Sep 2022 14:18:59 +0000 Subject: [PATCH] Fix cephadm bootstrap host delegation issues Sometimes we see a bootstrap failure, where cephadm cannot read the previously generated SSH key. This seems to be because it generates the key on one host, then tries to bootstrap on another. This can happen when running under Kayobe, and the ansible_host variable is not respected with delegate_to. There is another issue, where the facts seen when using delegate_to are of the original host, but the task is executed on the delegate host. This results in cephadm bootstrap getting the wrong value for --mon-ip, and failing a port binding check. This change reworks the cephadm role to avoid delegate_to, and instead uses include_tasks and a condition, to only include for a single bootstrap host. Fixes: #62 --- roles/cephadm/README.md | 1 + roles/cephadm/defaults/main.yml | 1 + roles/cephadm/tasks/bootstrap.yml | 145 +++++++++++----------- roles/cephadm/tasks/destroy.yml | 9 +- roles/cephadm/tasks/main.yml | 20 ++- roles/cephadm/tasks/osds.yml | 33 ----- roles/cephadm/tasks/osds_spec.yml | 26 ++++ roles/cephadm/tasks/prereqs.yml | 53 +------- roles/cephadm/tasks/prereqs_bootstrap.yml | 48 +++++++ 9 files changed, 168 insertions(+), 168 deletions(-) create mode 100644 roles/cephadm/tasks/osds_spec.yml create mode 100644 roles/cephadm/tasks/prereqs_bootstrap.yml diff --git a/roles/cephadm/README.md b/roles/cephadm/README.md index 14af8f8..e1dec4c 100644 --- a/roles/cephadm/README.md +++ b/roles/cephadm/README.md @@ -33,6 +33,7 @@ All Ceph hosts must be in the `ceph` group. * `cephadm_custom_repos`: If enabled - the role won't define yum/apt repositories (default: False) * `cephadm_package_update`: If enabled - cephadm package will be updated to latest version (default: False) * Bootstrap settings + * `cephadm_bootstrap_host`: The host on which to bootstrap Ceph (default: `groups['mons'][0]`) * `cephadm_enable_dashboard`: If enabled - dashboard service on MGR will be enabled (default: False) * `cephadm_enable_firewalld`: If enabled - firewalld will be installed and rules will be applied (default: False) * `cephadm_enable_monitoring`: If enabled - cephadm monitoring stack will be deployed i.e. prometheus/node-exporters/grafana (default: False) diff --git a/roles/cephadm/defaults/main.yml b/roles/cephadm/defaults/main.yml index 34b2f15..6a0fc88 100644 --- a/roles/cephadm/defaults/main.yml +++ b/roles/cephadm/defaults/main.yml @@ -15,6 +15,7 @@ cephadm_registry_url: "" cephadm_registry_username: "" cephadm_registry_password: "" # Bootstrap settings +cephadm_bootstrap_host: "{{ groups['mons'][0] }}" cephadm_enable_dashboard: False cephadm_enable_firewalld: False cephadm_enable_monitoring: False diff --git a/roles/cephadm/tasks/bootstrap.yml b/roles/cephadm/tasks/bootstrap.yml index e1b1982..1e59f85 100644 --- a/roles/cephadm/tasks/bootstrap.yml +++ b/roles/cephadm/tasks/bootstrap.yml @@ -1,83 +1,78 @@ --- - name: Bootstrap cephadm - block: - - name: Bootstrap cephadm - vars: - mon_ip: "{{ hostvars[inventory_hostname].ansible_facts[cephadm_public_interface].ipv4.address }}" - monitoring_stack: "{{ '--skip-monitoring-stack' if not (cephadm_enable_monitoring | bool) else '' }}" - dashboard: "{{ '--skip-dashboard' if not cephadm_enable_dashboard | bool else '' }}" - firewalld: "{{ '--skip-firewalld' if not cephadm_enable_firewalld | bool else '' }}" - command: - cmd: > - cephadm - {% if cephadm_image | length > 0 %} - --image={{ cephadm_image }} - {% endif %} - bootstrap - {{ monitoring_stack }} - {{ dashboard }} - {{ firewalld }} - --ssh-private-key={{ cephadm_ssh_private_key }} - --ssh-public-key={{ cephadm_ssh_public_key }} - {% if cephadm_ssh_user | length > 0 %} - --ssh-user "{{ cephadm_ssh_user }}" - {% endif %} - {% if cephadm_registry_url | length > 0 %} - --registry-url={{ cephadm_registry_url }} - --registry-username={{ cephadm_registry_username }} - --registry-password={{ cephadm_registry_password }} - {% endif %} - --skip-pull - {% if cephadm_fsid | length > 0 %} - --fsid={{ cephadm_fsid }} - {% endif %} - --mon-ip={{ mon_ip }} - {{ cephadm_bootstrap_additional_parameters }} - become: true - when: not cephadm_check_ceph_conf.stat.exists + vars: + mon_ip: "{{ hostvars[inventory_hostname].ansible_facts[cephadm_public_interface].ipv4.address }}" + monitoring_stack: "{{ '--skip-monitoring-stack' if not (cephadm_enable_monitoring | bool) else '' }}" + dashboard: "{{ '--skip-dashboard' if not cephadm_enable_dashboard | bool else '' }}" + firewalld: "{{ '--skip-firewalld' if not cephadm_enable_firewalld | bool else '' }}" + command: + cmd: > + cephadm + {% if cephadm_image | length > 0 %} + --image={{ cephadm_image }} + {% endif %} + bootstrap + {{ monitoring_stack }} + {{ dashboard }} + {{ firewalld }} + --ssh-private-key={{ cephadm_ssh_private_key }} + --ssh-public-key={{ cephadm_ssh_public_key }} + {% if cephadm_ssh_user | length > 0 %} + --ssh-user "{{ cephadm_ssh_user }}" + {% endif %} + {% if cephadm_registry_url | length > 0 %} + --registry-url={{ cephadm_registry_url }} + --registry-username={{ cephadm_registry_username }} + --registry-password={{ cephadm_registry_password }} + {% endif %} + --skip-pull + {% if cephadm_fsid | length > 0 %} + --fsid={{ cephadm_fsid }} + {% endif %} + --mon-ip={{ mon_ip }} + {{ cephadm_bootstrap_additional_parameters }} + become: true + when: not cephadm_check_ceph_conf.stat.exists - - name: Set public network - command: - cmd: "cephadm shell -- ceph config set global public_network {{ cephadm_public_network }}" - become: true +- name: Set public network + command: + cmd: "cephadm shell -- ceph config set global public_network {{ cephadm_public_network }}" + become: true - - name: Set cluster network - command: - cmd: "cephadm shell -- ceph config set global cluster_network {{ cephadm_cluster_network }}" - when: cephadm_cluster_network | length > 0 - become: true +- name: Set cluster network + command: + cmd: "cephadm shell -- ceph config set global cluster_network {{ cephadm_cluster_network }}" + when: cephadm_cluster_network | length > 0 + become: true - - name: Get cluster fsid - command: - cmd: "cephadm shell -- ceph fsid" - when: cephadm_fsid | length == 0 - become: true - register: cephadm_fsid_current +- name: Get cluster fsid + command: + cmd: "cephadm shell -- ceph fsid" + when: cephadm_fsid | length == 0 + become: true + register: cephadm_fsid_current - - name: Template out cluster.yml - vars: - fsid: "{{ cephadm_fsid if cephadm_fsid | length > 0 else cephadm_fsid_current.stdout }}" - template: - src: "templates/cluster.yml.j2" - dest: "/var/run/ceph/{{ fsid }}/cephadm_cluster.yml" - owner: root - group: root - mode: 0644 - become: true - run_once: True - - - name: Apply spec - command: - cmd: > - cephadm shell -- - ceph orch apply -i /var/run/ceph/cephadm_cluster.yml - become: true +- name: Template out cluster.yml + vars: + fsid: "{{ cephadm_fsid if cephadm_fsid | length > 0 else cephadm_fsid_current.stdout }}" + template: + src: "templates/cluster.yml.j2" + dest: "/var/run/ceph/{{ fsid }}/cephadm_cluster.yml" + owner: root + group: root + mode: 0644 + become: true + run_once: True - - name: Install ceph cli on mon hosts - command: - cmd: "cephadm install ceph" - become: true - when: cephadm_install_ceph_cli +- name: Apply spec + command: + cmd: > + cephadm shell -- + ceph orch apply -i /var/run/ceph/cephadm_cluster.yml + become: true - delegate_to: "{{ groups['mons'][0] }}" - run_once: True +- name: Install ceph cli on mon hosts + command: + cmd: "cephadm install ceph" + become: true + when: cephadm_install_ceph_cli diff --git a/roles/cephadm/tasks/destroy.yml b/roles/cephadm/tasks/destroy.yml index 48233de..70f6eaf 100644 --- a/roles/cephadm/tasks/destroy.yml +++ b/roles/cephadm/tasks/destroy.yml @@ -4,16 +4,17 @@ cmd: "cephadm shell -- ceph fsid" become: true register: cephadm_destroy_fsid - delegate_to: "{{ groups['mons'][0] }}" - run_once: True changed_when: false failed_when: false + when: inventory_hostname == cephadm_bootstrap_host - name: Destroy cluster + vars: + fsid_result: "{{ hostvars[cephadm_bootstrap_host].cephadm_destroy_fsid }}" command: - cmd: "cephadm rm-cluster --fsid {{ cephadm_destroy_fsid.stdout }} --force" + cmd: "cephadm rm-cluster --fsid {{ fsid_result.stdout }} --force" become: true - when: cephadm_destroy_fsid.rc != 1 + when: fsid_result.rc != 1 - name: Remove ssh keys file: diff --git a/roles/cephadm/tasks/main.yml b/roles/cephadm/tasks/main.yml index b6a3d4a..cb4b992 100644 --- a/roles/cephadm/tasks/main.yml +++ b/roles/cephadm/tasks/main.yml @@ -1,15 +1,27 @@ --- - include_tasks: "destroy.yml" - when: cephadm_recreate | bool + when: + - cephadm_recreate | bool - include_tasks: "prechecks.yml" when: not cephadm_skip_prechecks | bool - include_tasks: "pkg_{{ ansible_facts.os_family | lower }}.yml" -- include_tasks: "prereqs.yml" +- include_tasks: "prereqs_bootstrap.yml" + when: + - inventory_hostname == cephadm_bootstrap_host + +- import_tasks: "prereqs.yml" - include_tasks: "bootstrap.yml" - when: cephadm_bootstrap | bool + when: + - cephadm_bootstrap | bool + - inventory_hostname == cephadm_bootstrap_host + +- import_tasks: "osds.yml" -- include_tasks: "osds.yml" +- include_tasks: "osds_spec.yml" + when: + - cephadm_osd_spec | length > 0 + - inventory_hostname == cephadm_bootstrap_host diff --git a/roles/cephadm/tasks/osds.yml b/roles/cephadm/tasks/osds.yml index bc5e818..b35a69e 100644 --- a/roles/cephadm/tasks/osds.yml +++ b/roles/cephadm/tasks/osds.yml @@ -11,36 +11,3 @@ delegate_to: "{{ omit if 'mons' in group_names else groups['mons'][0] }}" when: cephadm_osd_devices | length > 0 with_items: "{{ cephadm_osd_devices }}" - -- name: Add OSDs - block: - - name: Get cluster fsid - command: - cmd: "cephadm shell -- ceph fsid" - when: cephadm_fsid | length == 0 - become: true - register: cephadm_fsid_current - changed_when: false - - - name: Template out osd_spec.yml - vars: - fsid: "{{ cephadm_fsid if cephadm_fsid | length > 0 else cephadm_fsid_current.stdout }}" - copy: - content: "{{ cephadm_osd_spec | to_nice_yaml if cephadm_osd_spec is mapping else cephadm_osd_spec }}" - dest: "/var/run/ceph/{{ fsid }}/osd_spec.yml" - owner: root - group: root - mode: 0644 - when: cephadm_osd_spec | length > 0 - become: true - - - name: Apply OSDs spec - command: - cmd: > - cephadm shell -- - ceph orch apply -i /var/run/ceph/osd_spec.yml - when: cephadm_osd_spec | length > 0 - become: true - - delegate_to: "{{ groups['mons'][0] }}" - run_once: True diff --git a/roles/cephadm/tasks/osds_spec.yml b/roles/cephadm/tasks/osds_spec.yml new file mode 100644 index 0000000..751fe66 --- /dev/null +++ b/roles/cephadm/tasks/osds_spec.yml @@ -0,0 +1,26 @@ +--- +- name: Get cluster fsid + command: + cmd: "cephadm shell -- ceph fsid" + when: cephadm_fsid | length == 0 + become: true + register: cephadm_fsid_current + changed_when: false + +- name: Template out osd_spec.yml + vars: + fsid: "{{ cephadm_fsid if cephadm_fsid | length > 0 else cephadm_fsid_current.stdout }}" + copy: + content: "{{ cephadm_osd_spec | to_nice_yaml if cephadm_osd_spec is mapping else cephadm_osd_spec }}" + dest: "/var/run/ceph/{{ fsid }}/osd_spec.yml" + owner: root + group: root + mode: 0644 + become: true + +- name: Apply OSDs spec + command: + cmd: > + cephadm shell -- + ceph orch apply -i /var/run/ceph/osd_spec.yml + become: true diff --git a/roles/cephadm/tasks/prereqs.yml b/roles/cephadm/tasks/prereqs.yml index 93357b1..bb002b0 100644 --- a/roles/cephadm/tasks/prereqs.yml +++ b/roles/cephadm/tasks/prereqs.yml @@ -1,60 +1,9 @@ --- -- name: Prepare cephadm bootstrap dependencies - block: - - name: Ensure /etc/ceph directory exists - file: - path: /etc/ceph - state: directory - owner: root - group: root - mode: 0755 - become: true - - - name: Check if /etc/ceph/ceph.conf exists - stat: - path: /etc/ceph/ceph.conf - register: cephadm_check_ceph_conf - - - name: Check if cephadm ssh key exists - stat: - path: "{{ cephadm_ssh_private_key }}" - register: cephadm_check_ceph_id - - - name: Generate ssh key for cephadm - openssh_keypair: - path: "{{ cephadm_ssh_private_key }}" - size: 4096 - comment: "ceph-{{ cephadm_fsid }}" - when: not cephadm_check_ceph_id.stat.exists - register: cephadm_ssh_key - become: true - - - name: Save public key - copy: - content: "{{ cephadm_ssh_key.public_key | default }}" - dest: "{{ cephadm_ssh_public_key }}" - owner: root - group: root - mode: 0644 - become: true - when: not cephadm_check_ceph_id.stat.exists - - - name: Slurp public key if already exist - slurp: - src: "{{ cephadm_ssh_public_key }}" - register: cephadm_ssh_public_key_slurp - when: cephadm_check_ceph_id.stat.exists - - delegate_to: "{{ groups['mons'][0] }}" - run_once: True - - name: Copy cephadm public key to all hosts - vars: - content: "{{ cephadm_ssh_public_key_slurp.content | b64decode if cephadm_check_ceph_id.stat.exists else cephadm_ssh_key.public_key }}" authorized_key: user: "{{ cephadm_ssh_user }}" state: present - key: "{{ content }}" + key: "{{ hostvars[cephadm_bootstrap_host].cephadm_ssh_public_key_content }}" when: "cephadm_ssh_user | length > 0" become: true diff --git a/roles/cephadm/tasks/prereqs_bootstrap.yml b/roles/cephadm/tasks/prereqs_bootstrap.yml new file mode 100644 index 0000000..ca5e494 --- /dev/null +++ b/roles/cephadm/tasks/prereqs_bootstrap.yml @@ -0,0 +1,48 @@ +--- +- name: Ensure /etc/ceph directory exists + file: + path: /etc/ceph + state: directory + owner: root + group: root + mode: 0755 + become: true + +- name: Check if /etc/ceph/ceph.conf exists + stat: + path: /etc/ceph/ceph.conf + register: cephadm_check_ceph_conf + +- name: Check if cephadm ssh key exists + stat: + path: "{{ cephadm_ssh_private_key }}" + register: cephadm_check_ceph_id + +- name: Generate ssh key for cephadm + openssh_keypair: + path: "{{ cephadm_ssh_private_key }}" + size: 4096 + comment: "ceph-{{ cephadm_fsid }}" + when: not cephadm_check_ceph_id.stat.exists + register: cephadm_ssh_key + become: true + +- name: Save public key + copy: + content: "{{ cephadm_ssh_key.public_key | default }}" + dest: "{{ cephadm_ssh_public_key }}" + owner: root + group: root + mode: 0644 + become: true + when: not cephadm_check_ceph_id.stat.exists + +- name: Slurp public key + slurp: + src: "{{ cephadm_ssh_public_key }}" + register: cephadm_ssh_public_key_slurp + when: cephadm_check_ceph_id.stat.exists + +- name: Set a fact about the SSH public key + set_fact: + cephadm_ssh_public_key_content: "{{ cephadm_ssh_public_key_slurp.content | b64decode if cephadm_check_ceph_id.stat.exists else cephadm_ssh_key.public_key }}"