Skip to content

Commit

Permalink
Merge pull request #7896 from rook/mergify/bp/release-1.6/pr-7884
Browse files Browse the repository at this point in the history
ceph: Persist expected mon endpoints immediately during mon failover (backport #7884)
  • Loading branch information
mergify[bot] committed May 12, 2021
2 parents f75d0f9 + 6f2cb5e commit 5dd2d38
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 23 deletions.
52 changes: 29 additions & 23 deletions pkg/operator/ceph/cluster/mon/mon.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,11 +502,6 @@ func (c *Cluster) ensureMonsRunning(mons []*monConfig, i, targetCount int, requi
return errors.Wrap(err, "failed to save mons")
}

// make sure we have the connection info generated so connections can happen
if err := WriteConnectionConfig(c.context, c.ClusterInfo); err != nil {
return err
}

// Start the deployment
if err := c.startDeployments(mons[0:expectedMonCount], requireAllInQuorum); err != nil {
return errors.Wrap(err, "failed to start mon pods")
Expand Down Expand Up @@ -1056,6 +1051,29 @@ func (c *Cluster) waitForMonsToJoin(mons []*monConfig, requireAllInQuorum bool)
}

func (c *Cluster) saveMonConfig() error {
if err := c.persistExpectedMonDaemons(); err != nil {
return errors.Wrap(err, "failed to persist expected mons")
}

// Every time the mon config is updated, must also update the global config so that all daemons
// have the most updated version if they restart.
if err := config.GetStore(c.context, c.Namespace, c.ownerInfo).CreateOrUpdate(c.ClusterInfo); err != nil {
return errors.Wrap(err, "failed to update the global config")
}

// write the latest config to the config dir
if err := WriteConnectionConfig(c.context, c.ClusterInfo); err != nil {
return errors.Wrap(err, "failed to write connection config for new mons")
}

if err := csi.SaveClusterConfig(c.context.Clientset, c.Namespace, c.ClusterInfo, c.csiConfigMutex); err != nil {
return errors.Wrap(err, "failed to update csi cluster config")
}

return nil
}

func (c *Cluster) persistExpectedMonDaemons() error {
ctx := context.TODO()
configMap := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -1067,7 +1085,6 @@ func (c *Cluster) saveMonConfig() error {
if err != nil {
return errors.Wrapf(err, "failed to set owner reference mon configmap %q", configMap.Name)
}

monMapping, err := json.Marshal(c.mapping)
if err != nil {
return errors.Wrap(err, "failed to marshal mon mapping")
Expand Down Expand Up @@ -1106,24 +1123,7 @@ func (c *Cluster) saveMonConfig() error {
return errors.Wrap(err, "failed to update mon endpoint config map")
}
}

logger.Infof("saved mon endpoints to config map %+v", configMap.Data)

// Every time the mon config is updated, must also update the global config so that all daemons
// have the most updated version if they restart.
if err := config.GetStore(c.context, c.Namespace, c.ownerInfo).CreateOrUpdate(c.ClusterInfo); err != nil {
return errors.Wrap(err, "failed to update the global config")
}

// write the latest config to the config dir
if err := WriteConnectionConfig(c.context, c.ClusterInfo); err != nil {
return errors.Wrap(err, "failed to write connection config for new mons")
}

if err := csi.SaveClusterConfig(c.context.Clientset, c.Namespace, c.ClusterInfo, c.csiConfigMutex); err != nil {
return errors.Wrap(err, "failed to update csi cluster config")
}

return nil
}

Expand Down Expand Up @@ -1337,6 +1337,12 @@ func (c *Cluster) startMon(m *monConfig, schedule *MonScheduleInfo) error {
return errors.Wrapf(err, "failed to commit maxMonId after starting mon %q", m.DaemonName)
}

// Persist the expected list of mons to the configmap in case the operator is interrupted before the mon failover is completed
// The config on disk won't be updated until the mon failover is completed
if err := c.persistExpectedMonDaemons(); err != nil {
return errors.Wrap(err, "failed to persist expected mon daemons")
}

return nil
}

Expand Down
25 changes: 25 additions & 0 deletions pkg/operator/ceph/cluster/mon/mon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,31 @@ func validateStart(ctx context.Context, t *testing.T, c *Cluster) {
assert.NoError(t, err)
}

func TestPersistMons(t *testing.T) {
clientset := test.New(t, 1)
ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef()
c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{})
setCommonMonProperties(c, 1, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "myversion")

// Persist mon a
err := c.persistExpectedMonDaemons()
assert.NoError(t, err)

cm, err := c.context.Clientset.CoreV1().ConfigMaps(c.Namespace).Get(context.TODO(), EndpointConfigMapName, metav1.GetOptions{})
assert.NoError(t, err)
assert.Equal(t, "a=1.2.3.1:6789", cm.Data[EndpointDataKey])

// Persist mon b, and remove mon a for simply testing the configmap is updated
c.ClusterInfo.Monitors["b"] = &cephclient.MonInfo{Name: "b", Endpoint: "4.5.6.7:3300"}
delete(c.ClusterInfo.Monitors, "a")
err = c.persistExpectedMonDaemons()
assert.NoError(t, err)

cm, err = c.context.Clientset.CoreV1().ConfigMaps(c.Namespace).Get(context.TODO(), EndpointConfigMapName, metav1.GetOptions{})
assert.NoError(t, err)
assert.Equal(t, "b=4.5.6.7:3300", cm.Data[EndpointDataKey])
}

func TestSaveMonEndpoints(t *testing.T) {
ctx := context.TODO()
clientset := test.New(t, 1)
Expand Down

0 comments on commit 5dd2d38

Please sign in to comment.