diff --git a/README.md b/README.md index 4d59faf2..4c4de774 100644 --- a/README.md +++ b/README.md @@ -295,7 +295,7 @@ Path to the private key file to copy to SQL Server. Default: `null` Type: `str` -### 'mssql_tls_remote_src' +### `mssql_tls_remote_src` Influence whether files provided with `mssql_tls_cert` and `mssql_tls_private_key` need to be transferred or already are present remotely. @@ -365,6 +365,7 @@ Set to `true` to configure for high availability. Setting to `false` does not remove configuration for high availability. When set to `true`, the role performs the following tasks: + 1. Include the System Roles firewall role to configure firewall: 1.1. Open the firewall port set with the `mssql_ha_listener_port` variable. 1.2. Enable the `high-availability` service in firewall. @@ -372,10 +373,15 @@ When set to `true`, the role performs the following tasks: 2.1. Enable AlwaysOn Health events. 2.2. Create certificate on the primary replica and distribute to other replicas. 2.3. Configure endpoint and availability group. -2.4. Configure the user provided with the ``mssql_ha_login` variable for - Pacemaker. -3. Include the System Roles ha_cluster role to configure pacemaker. - Note that this role does not configure STONITH devices in Pacemaker. +2.4. Configure the user provided with the `mssql_ha_login` variable for +Pacemaker. +1. Optional: Include the System Roles `ha_cluster` role to configure Pacemaker. +You must set the following variables to enable this: + +* [`mssql_ha_cluster_run_role`](#mssql_ha_cluster_run_role) to `true` +* [`mssql_ha_cluster_password`](#mssql_ha_cluster_password) +* [`mssql_ha_cluster_virtual_ip`](#mssql_ha_cluster_virtual_ip) +* [`mssql_ha_cluster_stonith_resources`](#mssql_ha_cluster_stonith_resources) Default: `false` @@ -394,7 +400,7 @@ host. The available values are: `primary`, `synchronous`, `witness`. -Default: no +Default: no default Type: `str` @@ -512,7 +518,48 @@ Default: `null` Type: `string` -#### `mssql_ha_hacluster_password` +#### `mssql_ha_cluster_run_role` + +Whether to run the `ha_cluster` role from this role. + +Note that the `ha_cluster` role has the following limitation: +**The role replaces the configuration of HA Cluster on specified nodes. +Any settings not specified in the role variables will be lost.** + +This means that running the `microsoft.sql.server` role re-writes existing +Pacemaker configuration. You must verify that you want to run the `ha_cluster` +role by setting `mssql_ha_cluster_run_role: true`. + +If you want to run the `ha_cluster` role independently of the +`microsoft.sql.server` role, or run provide custom variables for the +`ha_cluster` role, you can work around this limitation. To do this, +run the `microsoft.sql.server` role with +[`mssql_ha_cluster_print_vars:`](#mssql_ha_cluster_print_vars)`true` to print +planned `ha_cluster` variables. Then you can merge the printed variables with +your custom `ha_cluster` variables and specify the resulting set of variables +with the `microsoft.sql.server` role invocation. + +Default: `false` + +Type: `string` + +#### `mssql_ha_cluster_print_vars` + +Use this variable to print `ha_cluster_*` variables that the +`microsoft.sql.server` role sets when running the `ha_cluster` role to +configure Pacemaker. + +After `ha_cluster_*` are printed the `microsoft.sql.server` role exits without +executing any tasks. + +To print variables in a more readable manner you can configure a `debug` stdout +callback plug-in or pipe the playbook output to `| sed 's|\\n|\n|g'`. + +Default: `false` + +Type: `bool` + +#### `mssql_ha_cluster_password` The password for the cluster to be created in Pacemaker. @@ -520,7 +567,7 @@ Default: `null` Type: `string` -#### `mssql_ha_virtual_ip` +#### `mssql_ha_cluster_virtual_ip` A floating virtual IP address for accessing the primary SQL Server node in an Always On availability group to be created in Pacemaker. @@ -532,6 +579,56 @@ Default: `null` Type: `string` +#### `mssql_ha_cluster_stonith_resources` + +With this variable, define Pacemaker stonith resource to be configured by the +`ha_cluster` role. `microsoft.sql.server` role feeds this as a list to the +`ha_cluster_resource_primitives` variable. + +The items are as follows: + +* `id` (mandatory) - ID of a resource. +* `agent` (mandatory) - Name of a resource or stonith agent, for example + `ocf:pacemaker:Dummy` or `stonith:fence_xvm`. It is mandatory to use + `stonith:` for stonith agents. For resource agents, it is possible to use a + short name, such as `Dummy` instead of `ocf:pacemaker:Dummy`. However, if + several agents with the same short name are installed, the role will fail as + it will be unable to decide which agent should be used. Therefore, it is + recommended to use full names. +* `instance_attrs` (optional) - List of sets of the resource's instance + attributes. Currently, only one set is supported. The exact names and values + of attributes, as well as whether they are mandatory or not, depends on the + resource or stonith agent. +* `meta_attrs` (optional) - List of sets of the resource's meta attributes. + Currently, only one set is supported. +* `operations` (optional) - List of the resource's operations. + * `action` (mandatory) - Operation action as defined by Pacemaker and the + resource or stonith agent. + * `attrs` (mandatory) - Operation options, at least one option must be + specified. + +Example of setting this variable to confiture a `stonith:fence_apc_snmp` agent: + +```yaml +mssql_ha_cluster_stonith_resources: + id: myapc + agent: stonith:fence_apc_snmp + instance_attrs: + - attrs: + - name: ipaddr + value: apc-switch.example.com + - name: pcmk_host_map + value: rhel8-node1.example.com:1;rhel8-node2.example.com:2 + - name: login + value: apclogin + - name: passwd + value: apcpassword +``` + +Default: `[]` + +Type: `list` + ## Example Playbooks This section outlines example playbooks that you can use as a reference. @@ -668,8 +765,22 @@ Example playbook: mssql_ha_db_backup_path: /var/opt/mssql/data/{{ mssql_ha_db_name }}.bak mssql_ha_login: pacemakerLogin mssql_ha_login_password: "p@55w0rD3" - mssql_ha_hacluster_password: "p@55w0rD4" - mssql_ha_virtual_ip: 192.168.1.254 + mssql_ha_cluster_run_role: false + mssql_ha_cluster_password: "p@55w0rD4" + mssql_ha_cluster_virtual_ip: 192.168.122.10 + mssql_ha_stonith_resources: + id: myapc + agent: stonith:fence_apc_snmp + instance_attrs: + - attrs: + - name: ipaddr + value: apc-switch.example.com + - name: pcmk_host_map + value: rhel8-node1.example.com:1;rhel8-node2.example.com:2 + - name: login + value: apclogin + - name: passwd + value: apcpassword roles: - microsoft.sql.server ``` diff --git a/defaults/main.yml b/defaults/main.yml index 3b0a7bc4..1e02f5f9 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -38,8 +38,8 @@ mssql_ha_db_name: null mssql_ha_db_backup_path: /var/opt/mssql/data/{{ mssql_ha_db_name }}.bak mssql_ha_login: null mssql_ha_login_password: null -mssql_ha_hacluster_password: null -mssql_ha_virtual_ip: null -mssql_ha_sbd_enabled: false -mssql_ha_sbd_watchdog: /dev/watchdog -mssql_ha_sbd_devices: null +mssql_ha_cluster_run_role: false +mssql_ha_cluster_print_vars: false +mssql_ha_cluster_password: null +mssql_ha_cluster_virtual_ip: null +mssql_ha_cluster_stonith_resources: [] diff --git a/tasks/main.yml b/tasks/main.yml index 73b12fe5..86fd5098 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -72,6 +72,44 @@ run_once: true when: mssql_ha_configure | bool +- name: Verify that mssql_ha_cluster variables are provided correctly + assert: + that: + - mssql_ha_cluster_virtual_ip is not none + - mssql_ha_stonith_resources.id is defined + - mssql_ha_stonith_resources.agent is defined + fail_msg: >- + When setting mssql_ha_cluster_run_role=true you must also specify + mssql_ha_cluster_virtual_ip and mssql_ha_stonith_resources correctly + when: + - mssql_ha_cluster_run_role | bool + - mssql_ha_configure | bool + +- name: Print variables to be used with the ha_cluster role + when: mssql_ha_cluster_print_vars | bool + run_once: true + block: + - name: Set fact with ha_cluster_* variables and their values + set_fact: + __mssql_ha_cluster_vars: "{{ __mssql_ha_cluster_vars | d([]) + [ + { + lookup('varnames', '^ha_cluster*', wantlist=True)[item | int]: + lookup('vars', *lookup('varnames', '^ha_cluster*', wantlist=True)) + [item | int] + } + ] }}" + with_sequence: >- + start=0 + end={{ lookup('varnames', '^ha_cluster*', wantlist=True) | length - 1 }} + no_log: true + + - name: Print variables to be used with the ha_cluster role + debug: + msg: "{{ __mssql_ha_cluster_vars | to_nice_yaml(indent=2) }}" + + - name: End play + meta: end_play + - name: Gather package facts package_facts: manager: auto @@ -726,73 +764,7 @@ - name: Run ha_cluster to configure pacemaker include_role: name: fedora.linux_system_roles.ha_cluster - vars: - ha_cluster_cluster_name: "{{ mssql_ha_ag_name }}" - ha_cluster_hacluster_password: >- - {{ mssql_ha_hacluster_password | quote }} - ha_cluster_cluster_properties: - - attrs: - - name: cluster-recheck-interval - value: 2min - - name: start-failure-is-fatal - value: true - ha_cluster_resource_primitives: - - id: ag_cluster - agent: ocf:mssql:ag - instance_attrs: - - attrs: - - name: ag_name - value: "{{ mssql_ha_ag_name }}" - meta_attrs: - - attrs: - - name: failure-timeout - value: 60s - - id: virtualip - agent: ocf:heartbeat:IPaddr2 - instance_attrs: - - attrs: - - name: ip - value: "{{ mssql_ha_virtual_ip }}" - operations: - - action: monitor - attrs: - - name: interval - value: 30s - ha_cluster_resource_clones: - - resource_id: ag_cluster - promotable: yes - meta_attrs: - - attrs: - - name: notify - value: true - # If RHEL > 8.3, set on_fail: demote. - # Else, set notify: true again as a workaround - - name: "{{ - 'on_fail' - if ansible_distribution_version is version('8.3', '>') - else 'notify' - }}" - value: "{{ - 'demote' - if ansible_distribution_version is version('8.3', '>') - else true - }}" - ha_cluster_constraints_colocation: - - resource_leader: - id: ag_cluster-clone - role: Promoted - resource_follower: - id: virtualip - options: - - name: score - value: INFINITY - ha_cluster_constraints_order: - - resource_first: - id: ag_cluster-clone - action: promote - resource_then: - id: virtualip - action: start + when: mssql_ha_cluster_run_role | bool - name: Verify if the {{ mssql_ha_db_name }} database exists vars: diff --git a/tests/tests_configure_ha_cluster.yml b/tests/tests_configure_ha_cluster.yml index f8043482..61177a5a 100644 --- a/tests/tests_configure_ha_cluster.yml +++ b/tests/tests_configure_ha_cluster.yml @@ -64,8 +64,23 @@ mssql_ha_db_backup_path: /var/opt/mssql/data/{{ mssql_ha_db_name }}.bak mssql_ha_login: pacemakerLogin mssql_ha_login_password: "p@55w0rD3" - mssql_ha_hacluster_password: "p@55w0rD4" - mssql_ha_virtual_ip: 192.168.122.10 + mssql_ha_cluster_run_role: true + mssql_ha_cluster_password: "p@55w0rD4" + mssql_ha_cluster_virtual_ip: 192.168.122.10 + mssql_ha_stonith_resources: + id: myapc + agent: stonith:fence_apc_snmp + instance_attrs: + - attrs: + - name: ipaddr + value: apc-switch.example.com + - name: pcmk_host_map + value: rhel8-node1.example.com:1;rhel8-node2.example.com:2 + - name: login + value: apc + - name: passwd + value: apc + include_role: name: linux-system-roles.mssql @@ -97,9 +112,7 @@ mssql_ha_db_backup_path: /var/opt/mssql/data/{{ mssql_ha_db_name }}.bak mssql_ha_login: pacemakerLogin mssql_ha_login_password: "p@55w0rD3" - mssql_ha_hacluster_password: "p@55w0rD4" - mssql_ha_virtual_ip: 192.168.122.10 - mssql_ha_sbd_enabled: true + mssql_ha_cluster_run_role: false tasks: - name: Ensure ansible_facts and variables used by role include_vars: ../vars/main.yml diff --git a/vars/main.yml b/vars/main.yml index de2baeaf..d09ebc80 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -44,3 +44,72 @@ __mssql_required_facts: - distribution_major_version - distribution_version - os_family +ha_cluster_cluster_name: "{{ mssql_ha_ag_name }}" +ha_cluster_hacluster_password: >- + {{ mssql_ha_cluster_password | quote }} +ha_cluster_cluster_properties: + - attrs: + - name: cluster-recheck-interval + value: 2min + - name: start-failure-is-fatal + value: true + - name: stonith-enabled + value: true +ha_cluster_resource_primitives: + - "{{ mssql_ha_stonith_resources }}" + - id: ag_cluster + agent: ocf:mssql:ag + instance_attrs: + - attrs: + - name: ag_name + value: "{{ mssql_ha_ag_name }}" + meta_attrs: + - attrs: + - name: failure-timeout + value: 60s + - id: virtualip + agent: ocf:heartbeat:IPaddr2 + instance_attrs: + - attrs: + - name: ip + value: "{{ mssql_ha_cluster_virtual_ip }}" + operations: + - action: monitor + attrs: + - name: interval + value: 30s +ha_cluster_resource_clones: + - resource_id: ag_cluster + promotable: yes + meta_attrs: + - attrs: + - name: notify + value: true + # If RHEL > 8.3, set on_fail: demote. + # Else, set notify: true again as a workaround + - name: "{{ + 'on_fail' + if ansible_distribution_version is version('8.3', '>') + else 'notify' + }}" + value: "{{ + 'demote' + if ansible_distribution_version is version('8.3', '>') + else true + }}" +ha_cluster_constraints_colocation: + - resource_leader: + id: ag_cluster-clone + role: Promoted + resource_follower: + id: virtualip + options: + - name: score + value: INFINITY +ha_cluster_constraints_order: + - resource_first: + id: ag_cluster-clone + action: promote + resource_then: + id: virtualip + action: start