Skip to content

Commit

Permalink
Send sbd data in diskless scenario (#321)
Browse files Browse the repository at this point in the history
  • Loading branch information
arbulu89 committed Feb 20, 2024
1 parent 9d901ab commit a0c73ff
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 99 deletions.
34 changes: 5 additions & 29 deletions internal/core/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"encoding/hex"
"io"
"os"
"strconv"
"strings"

// These packages were originally imported from github.com/ClusterLabs/ha_cluster_exporter/collector/pacemaker
Expand All @@ -23,7 +22,6 @@ const (
crmmonAdmPath string = "/usr/sbin/crm_mon"
corosyncKeyPath string = "/etc/corosync/authkey"
clusterNameProperty string = "cib-bootstrap-options-cluster-name"
stonithEnabled string = "cib-bootstrap-options-stonith-enabled"
stonithResourceMissing string = "notconfigured"
stonithAgent string = "stonith:"
sbdFencingAgentName string = "external/sbd"
Expand Down Expand Up @@ -110,15 +108,13 @@ func NewClusterWithDiscoveryTools(discoveryTools *DiscoveryTools) (*Cluster, err

cluster.Name = getName(cluster)

if cluster.IsFencingSBD() {
sbdData, err := NewSBD(discoveryTools.CommandExecutor, discoveryTools.SBDPath, discoveryTools.SBDConfigPath)
if err != nil {
return nil, err
}

cluster.SBD = sbdData
sbdData, err := NewSBD(discoveryTools.CommandExecutor, discoveryTools.SBDPath, discoveryTools.SBDConfigPath)
if err != nil && cluster.IsFencingSBD() {
log.Error(errors.Wrap(err, "Error discovering SBD data"))
}

cluster.SBD = sbdData

cluster.DC = cluster.IsDC()

cloudIdentifier := cloud.NewIdentifier(discoveryTools.CommandExecutor)
Expand Down Expand Up @@ -159,26 +155,6 @@ func (c *Cluster) IsDC() bool {
return false
}

func (c *Cluster) IsFencingEnabled() bool {
for _, prop := range c.Cib.Configuration.CrmConfig.ClusterProperties {
if prop.ID == stonithEnabled {
b, err := strconv.ParseBool(prop.Value)
if err != nil {
return false
}
return b
}
}

return false
}

func (c *Cluster) FencingResourceExists() bool {
f := c.FencingType()

return f != stonithResourceMissing
}

func (c *Cluster) FencingType() string {
for _, resource := range c.Crmmon.Resources {
if strings.HasPrefix(resource.Agent, stonithAgent) {
Expand Down
93 changes: 23 additions & 70 deletions internal/core/cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,29 @@ func (suite *ClusterTestSuite) TestNewClusterWithDiscoveryTools() {
suite.NoError(err)
}

func (suite *ClusterTestSuite) TestNewClusterDisklessSBD() {
mockCommand := new(mocks.CommandExecutor)
mockCommand.On("Exec", "dmidecode", "-s", "chassis-asset-tag").
Return([]byte("7783-7084-3265-9085-8269-3286-77"), nil)

c, err := cluster.NewClusterWithDiscoveryTools(&cluster.DiscoveryTools{
CibAdmPath: helpers.GetFixturePath("discovery/cluster/fake_cibadmin.sh"),
CrmmonAdmPath: helpers.GetFixturePath("discovery/cluster/fake_crm_mon_diskless_sbd.sh"),
CorosyncKeyPath: helpers.GetFixturePath("discovery/cluster/authkey"),
SBDPath: "/usr/sbin/sbd",
SBDConfigPath: helpers.GetFixturePath("discovery/cluster/sbd/sbd_config_no_device"),
CommandExecutor: mockCommand,
})

suite.Equal("hana_cluster", c.Name)
suite.Equal("47d1190ffb4f781974c8356d7f863b03", c.ID)
suite.Equal(false, c.DC)
suite.Equal("azure", c.Provider)
suite.Equal("/dev/watchdog", c.SBD.Config["SBD_WATCHDOG_DEV"])
suite.Equal([]*cluster.SBDDevice(nil), c.SBD.Devices)
suite.NoError(err)
}

func (suite *ClusterTestSuite) TestClusterName() {
root := new(cib.Root)

Expand Down Expand Up @@ -140,48 +163,6 @@ func (suite *ClusterTestSuite) TestIsDC() {
suite.Equal(false, c.IsDC())
}

func (suite *ClusterTestSuite) TestIsFencingEnabled() {
root := new(cib.Root)

crmConfig := struct {
ClusterProperties []cib.Attribute `xml:"cluster_property_set>nvpair"`
}{
ClusterProperties: []cib.Attribute{
{
ID: "cib-bootstrap-options-stonith-enabled",
Value: "true",
},
},
}

root.Configuration.CrmConfig = crmConfig

c := cluster.Cluster{
Cib: *root,
}

suite.Equal(true, c.IsFencingEnabled())

crmConfig = struct {
ClusterProperties []cib.Attribute `xml:"cluster_property_set>nvpair"`
}{
ClusterProperties: []cib.Attribute{
{
ID: "cib-bootstrap-options-stonith-enabled",
Value: "false",
},
},
}

root.Configuration.CrmConfig = crmConfig

c = cluster.Cluster{
Cib: *root,
}

suite.Equal(false, c.IsFencingEnabled())
}

func (suite *ClusterTestSuite) TestFencingType() {
c := cluster.Cluster{
Crmmon: crmmon.Root{
Expand Down Expand Up @@ -210,34 +191,6 @@ func (suite *ClusterTestSuite) TestFencingType() {
suite.Equal("notconfigured", c.FencingType())
}

func (suite *ClusterTestSuite) TestFencingResourceExists() {
c := cluster.Cluster{
Crmmon: crmmon.Root{
Version: "1.2.3",
Resources: []crmmon.Resource{
{
Agent: "stonith:myfencing",
},
},
},
}

suite.Equal(true, c.FencingResourceExists())

c = cluster.Cluster{
Crmmon: crmmon.Root{
Version: "1.2.3",
Resources: []crmmon.Resource{
{
Agent: "notstonith:myfencing",
},
},
},
}

suite.Equal(false, c.FencingResourceExists())
}

func (suite *ClusterTestSuite) TestIsFencingSBD() {
c := cluster.Cluster{
Crmmon: crmmon.Root{
Expand Down
142 changes: 142 additions & 0 deletions test/fixtures/discovery/cluster/fake_crm_mon_diskless_sbd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env bash

cat <<EOF
<?xml version="1.0"?>
<crm_mon version="2.0.0">
<summary>
<stack type="corosync" />
<current_dc present="true" version="1.1.18+20180430.b12c320f5-3.15.1-b12c320f5" name="node01" id="1084783375" with_quorum="true" />
<last_update time="Fri Oct 18 11:48:54 2019" />
<last_change time="Fri Oct 18 11:48:22 2019" user="root" client="crm_attribute" origin="node01" />
<nodes_configured number="2" />
<resources_configured number="8" disabled="1" blocked="0" />
<cluster_options stonith-enabled="true" stonith-watchdog-timeout="10" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" />
</summary>
<nodes>
<node name="node01" id="1084783375" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="true" resources_running="7" type="member" />
<node name="node02" id="1084783376" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="false" resources_running="5" type="member" />
</nodes>
<resources>
<resource id="test-stop" resource_agent="ocf::heartbeat:Dummy" role="Stopped" target_role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0" />
<resource id="test" resource_agent="ocf::heartbeat:Dummy" role="Started" target_role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
<node name="node02" id="1084783376" cached="false"/>
</resource>
<resource id="rsc_ip_PRD_HDB00" resource_agent="ocf::heartbeat:IPaddr2" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node01" id="1084783375" cached="false"/>
</resource>
<clone id="msl_SAPHana_PRD_HDB00" multi_state="true" unique="false" managed="true" failed="false" failure_ignored="false" >
<resource id="rsc_SAPHana_PRD_HDB00" resource_agent="ocf::suse:SAPHana" role="Master" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node01" id="1084783375" cached="false"/>
</resource>
<resource id="rsc_SAPHana_PRD_HDB00" resource_agent="ocf::suse:SAPHana" role="Slave" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" pending="Monitoring" >
<node name="node02" id="1084783376" cached="false"/>
</resource>
</clone>
<clone id="cln_SAPHanaTopology_PRD_HDB00" multi_state="false" unique="false" managed="true" failed="false" failure_ignored="false" >
<resource id="rsc_SAPHanaTopology_PRD_HDB00" resource_agent="ocf::suse:SAPHanaTopology" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node01" id="1084783375" cached="false"/>
</resource>
<resource id="rsc_SAPHanaTopology_PRD_HDB00" resource_agent="ocf::suse:SAPHanaTopology" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node02" id="1084783376" cached="false"/>
</resource>
</clone>
<clone id="c-clusterfs" multi_state="false" unique="false" managed="true" failed="false" failure_ignored="false">
<resource id="clusterfs" resource_agent="ocf::heartbeat:Filesystem" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
<node name="node01" id="1084783225" cached="true"/>
</resource>
<resource id="clusterfs" resource_agent="ocf::heartbeat:Filesystem" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
<node name="node02" id="1084783226" cached="true"/>
</resource>
<resource id="clusterfs" resource_agent="ocf::heartbeat:Filesystem" role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
<resource id="clusterfs" resource_agent="ocf::heartbeat:Filesystem" role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
</clone>
<group id="grp_HA1_ASCS00" number_resources="3" >
<resource id="rsc_ip_HA1_ASCS00" resource_agent="ocf::heartbeat:IPaddr2" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node01" id="1084783375" cached="false"/>
</resource>
<resource id="rsc_fs_HA1_ASCS00" resource_agent="ocf::heartbeat:Filesystem" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node01" id="1084783375" cached="false"/>
</resource>
<resource id="rsc_sap_HA1_ASCS00" resource_agent="ocf::heartbeat:SAPInstance" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node01" id="1084783375" cached="false"/>
</resource>
</group>
<group id="grp_HA1_ERS10" number_resources="3" >
<resource id="rsc_ip_HA1_ERS10" resource_agent="ocf::heartbeat:IPaddr2" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node02" id="1084783376" cached="false"/>
</resource>
<resource id="rsc_fs_HA1_ERS10" resource_agent="ocf::heartbeat:Filesystem" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node02" id="1084783376" cached="false"/>
</resource>
<resource id="rsc_sap_HA1_ERS10" resource_agent="ocf::heartbeat:SAPInstance" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
<node name="node02" id="1084783376" cached="false"/>
</resource>
</group>
</resources>
<node_attributes>
<node name="node01">
<attribute name="hana_prd_clone_state" value="PROMOTED" />
<attribute name="hana_prd_op_mode" value="logreplay" />
<attribute name="hana_prd_remoteHost" value="node02" />
<attribute name="hana_prd_roles" value="4:P:master1:master:worker:master" />
<attribute name="hana_prd_site" value="PRIMARY_SITE_NAME" />
<attribute name="hana_prd_srmode" value="sync" />
<attribute name="hana_prd_sync_state" value="PRIM" />
<attribute name="hana_prd_version" value="2.00.040.00.1553674765" />
<attribute name="hana_prd_vhost" value="node01" />
<attribute name="lpa_prd_lpt" value="1571392102" />
<attribute name="master-rsc_SAPHana_PRD_HDB00" value="150" />
</node>
<node name="node02">
<attribute name="hana_prd_clone_state" value="DEMOTED" />
<attribute name="hana_prd_op_mode" value="logreplay" />
<attribute name="hana_prd_remoteHost" value="node01" />
<attribute name="hana_prd_roles" value="4:S:master1:master:worker:master" />
<attribute name="hana_prd_site" value="SECONDARY_SITE_NAME" />
<attribute name="hana_prd_srmode" value="sync" />
<attribute name="hana_prd_sync_state" value="SOK" />
<attribute name="hana_prd_version" value="2.00.040.00.1553674765" />
<attribute name="hana_prd_vhost" value="node02" />
<attribute name="lpa_prd_lpt" value="30" />
<attribute name="master-rsc_SAPHana_PRD_HDB00" value="100" />
</node>
</node_attributes>
<node_history>
<node name="node01">
<resource_history id="rsc_SAPHana_PRD_HDB00" orphan="false" migration-threshold="5000" fail-count="1000000" last-failure="Wed Oct 23 12:37:22 2019">
<operation_history call="15" task="probe" last-rc-change="Thu Oct 10 12:57:33 2019" last-run="Thu Oct 10 12:57:33 2019" exec-time="4140ms" queue-time="0ms" rc="0" rc_text="ok" />
<operation_history call="31" task="promote" last-rc-change="Thu Oct 10 12:57:57 2019" last-run="Thu Oct 10 12:57:57 2019" exec-time="2015ms" queue-time="0ms" rc="0" rc_text="ok" />
<operation_history call="32" task="monitor" interval="60000ms" last-rc-change="Thu Oct 10 12:58:03 2019" exec-time="3589ms" queue-time="0ms" rc="8" rc_text="master" />
</resource_history>
<resource_history id="rsc_ip_PRD_HDB00" orphan="false" migration-threshold="5000" fail-count="2" last-failure="Wed Oct 23 12:37:22 2019">
<operation_history call="21" task="start" last-rc-change="Thu Oct 10 12:57:33 2019" last-run="Thu Oct 10 12:57:33 2019" exec-time="130ms" queue-time="0ms" rc="0" rc_text="ok" />
<operation_history call="22" task="monitor" interval="10000ms" last-rc-change="Thu Oct 10 12:57:33 2019" exec-time="78ms" queue-time="0ms" rc="0" rc_text="ok" />
</resource_history>
<resource_history id="rsc_SAPHanaTopology_PRD_HDB00" orphan="false" migration-threshold="1">
<operation_history call="24" task="start" last-rc-change="Thu Oct 10 12:57:39 2019" last-run="Thu Oct 10 12:57:39 2019" exec-time="4538ms" queue-time="0ms" rc="0" rc_text="ok" />
<operation_history call="26" task="monitor" interval="10000ms" last-rc-change="Thu Oct 10 12:57:46 2019" exec-time="4220ms" queue-time="0ms" rc="0" rc_text="ok" />
</resource_history>
</node>
<node name="node02">
<resource_history id="rsc_SAPHana_PRD_HDB00" orphan="false" migration-threshold="50" fail-count="300" last-failure="Wed Oct 23 12:37:22 2019">
<operation_history call="22" task="start" last-rc-change="Thu Oct 17 15:22:40 2019" last-run="Thu Oct 17 15:22:40 2019" exec-time="44083ms" queue-time="0ms" rc="0" rc_text="ok" />
<operation_history call="23" task="monitor" interval="61000ms" last-rc-change="Thu Oct 17 15:23:24 2019" exec-time="2605ms" queue-time="0ms" rc="0" rc_text="ok" />
</resource_history>
<resource_history id="rsc_SAPHanaTopology_PRD_HDB00" orphan="false" migration-threshold="3">
<operation_history call="20" task="start" last-rc-change="Thu Oct 17 15:22:37 2019" last-run="Thu Oct 17 15:22:37 2019" exec-time="2905ms" queue-time="0ms" rc="0" rc_text="ok" />
<operation_history call="21" task="monitor" interval="10000ms" last-rc-change="Thu Oct 17 15:22:40 2019" exec-time="3347ms" queue-time="0ms" rc="0" rc_text="ok" />
</resource_history>
<resource_history id="test" orphan="false" migration-threshold="5000">
<operation_history call="29" task="start" last-rc-change="Mon Feb 24 09:45:49 2020" last-run="Mon Feb 24 09:45:49 2020" exec-time="11ms" queue-time="0ms" rc="0" rc_text="ok" />
</resource_history>
<resource_history id="test-stop" orphan="false" migration-threshold="5000">
<operation_history call="35" task="stop" last-rc-change="Mon Feb 24 09:46:58 2020" last-run="Mon Feb 24 09:46:58 2020" exec-time="12ms" queue-time="0ms" rc="0" rc_text="ok" />
</resource_history>
</node>
</node_history>
<tickets>
</tickets>
<bans>
</bans>
</crm_mon>
EOF

0 comments on commit a0c73ff

Please sign in to comment.