ceph: ability to enable mgr module via CRD

We can now enable via the CRD any manager module, to do this simply do the following: ``` mgr: modules: - name: pg_autoscaler ``` Closes: rook#560 Signed-off-by: Sébastien Han <seb@redhat.com>
leseb · Sep 4, 2019 · 1e37209 · 1e37209
1 parent 5c59a1e
commit 1e37209
Show file tree

Hide file tree

Showing 10 changed files with 84 additions and 2 deletions.
diff --git a/Documentation/ceph-cluster-crd.md b/Documentation/ceph-cluster-crd.md
@@ -130,6 +130,8 @@ For more details on the mons and when to choose a number other than `3`, see the
 - `disruptionManagement`: The section for configuring management of daemon disruptions
   - `managePodBudgets`: if `true`, the operator will create and manage PodDsruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph-managed-disruptionbudgets.md). The operator will block eviction of OSDs by default and unblock them safely when drains are detected.
   - `osdMaintenanceTimeout`: is a duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the default DOWN/OUT interval) when it is draining. This is only relevant when  `managePodBudgets` is `true`. The default value is `30` minutes.
+- `mgr`: manager top level section
+  - `modules`: is the list of ceph manager modules to enable
 
 ### Mon Settings
 
@@ -151,6 +153,16 @@ To change the defaults that the operator uses to determine the mon health and wh
 - `ROOK_MON_HEALTHCHECK_INTERVAL`: The frequency with which to check if mons are in quorum (default is 45 seconds)
 - `ROOK_MON_OUT_TIMEOUT`: The interval to wait before marking a mon as "out" and starting a new mon to replace it in the quorum (default is 600 seconds)
 
+### Mgr Settings
+
+You can use the cluster CR to enable any manager modules, you can do this like so:
+
+```yaml
+mgr:
+  modules:
+    - name: <name of the module>
+```
+
 ### Node Settings
 In addition to the cluster level settings specified above, each individual node can also specify configuration to override the cluster level settings and defaults.
 If a node does not specify any configuration then it will inherit the cluster level settings.

diff --git a/PendingReleaseNotes.md b/PendingReleaseNotes.md
@@ -54,6 +54,7 @@ an example usage
 - The Ceph cluster custom resource now contains a `configOverrides` section where users can specify
   configuration changes to Ceph which Rook should apply.
 - Rook can now manage PodDisruptionBudgets for the following Daemons: OSD, Mon, RGW, MDS. OSD budgets are dynamically managed as documented in the [design](https://github.com/rook/rook/blob/master/design/ceph-managed-disruptionbudgets.md). This can be enabled with the `managePodBudgets` flag in the cluster CR. When this is enabled, drains on OSDs will be blocked by default and dynamically unblocked in a safe manner one failureDomain at a time. When a failure domain is draining, it will be marked as no out for a longer time than the default DOWN/OUT interval.
+- Rook now has a new config CRD `mgr` to enable ceph manager modules
 
 ### YugabyteDB
 

diff --git a/cluster/examples/kubernetes/ceph/cluster.yaml b/cluster/examples/kubernetes/ceph/cluster.yaml
@@ -34,6 +34,11 @@ spec:
     count: 3
     allowMultiplePerNode: false
   # enable the ceph dashboard for viewing cluster status
+  mgr:
+    # list of managers module to enable
+    # available modules can be seen at https://docs.ceph.com/docs/master/mgr/
+    modules:
+      - name: pg_autoscaler
   dashboard:
     enabled: true
     # serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)

diff --git a/pkg/apis/ceph.rook.io/v1/types.go b/pkg/apis/ceph.rook.io/v1/types.go
@@ -92,6 +92,9 @@ type ClusterSpec struct {
 	// Whether the Ceph Cluster is running external to this Kubernetes cluster
 	// mon, mgr, osd, mds, and discover daemons will not be created for external clusters.
 	External ExternalSpec `json:"external"`
+
+	// A spec for mgr related options
+	Mgr MgrSpec `json:"mgr,omitempty"`
 }
 
 // VersionSpec represents the settings for the Ceph version that Rook is orchestrating.
@@ -176,6 +179,16 @@ type MonSpec struct {
 	VolumeClaimTemplate  *v1.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"`
 }
 
+// MgrSpec represents options to configure a ceph mgr
+type MgrSpec struct {
+	Modules []Module `json:"modules,omitempty"`
+}
+
+// Module represents mgr modules that the user wants to enable
+type Module struct {
+	Name string `json:"name,omitempty"`
+}
+
 // ExternalSpec represents the options supported by an external cluster
 type ExternalSpec struct {
 	Enable bool `json:"enable"`

diff --git a/pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go b/pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go
diff --git a/pkg/operator/ceph/cluster/cluster.go b/pkg/operator/ceph/cluster/cluster.go
@@ -246,7 +246,7 @@ func (c *cluster) doOrchestration(rookImage string, cephVersion cephver.CephVers
 
 	mgrs := mgr.New(c.Info, c.context, c.Namespace, rookImage,
 		spec.CephVersion, cephv1.GetMgrPlacement(spec.Placement), cephv1.GetMgrAnnotations(c.Spec.Annotations),
-		spec.Network, spec.Dashboard, spec.Monitoring, cephv1.GetMgrResources(spec.Resources), c.ownerRef, c.Spec.DataDirHostPath, c.isUpgrade)
+		spec.Network, spec.Dashboard, spec.Monitoring, spec.Mgr, cephv1.GetMgrResources(spec.Resources), c.ownerRef, c.Spec.DataDirHostPath, c.isUpgrade)
 	err = mgrs.Start()
 	if err != nil {
 		return fmt.Errorf("failed to start the ceph mgr. %+v", err)

diff --git a/pkg/operator/ceph/cluster/mgr/mgr.go b/pkg/operator/ceph/cluster/mgr/mgr.go
@@ -68,6 +68,7 @@ type Cluster struct {
 	ownerRef        metav1.OwnerReference
 	dashboard       cephv1.DashboardSpec
 	monitoringSpec  cephv1.MonitoringSpec
+	mgrSpec         cephv1.MgrSpec
 	cephVersion     cephv1.CephVersionSpec
 	rookVersion     string
 	exitCode        func(err error) (int, bool)
@@ -86,6 +87,7 @@ func New(
 	network cephv1.NetworkSpec,
 	dashboard cephv1.DashboardSpec,
 	monitoringSpec cephv1.MonitoringSpec,
+	mgrSpec cephv1.MgrSpec,
 	resources v1.ResourceRequirements,
 	ownerRef metav1.OwnerReference,
 	dataDirHostPath string,
@@ -102,6 +104,7 @@ func New(
 		dataDir:         k8sutil.DataDir,
 		dashboard:       dashboard,
 		monitoringSpec:  monitoringSpec,
+		mgrSpec:         mgrSpec,
 		Network:         network,
 		resources:       resources,
 		ownerRef:        ownerRef,
@@ -190,6 +193,10 @@ func (c *Cluster) Start() error {
 			logger.Errorf("failed to enable mgr dashboard. %+v", err)
 		}
 
+		if err := c.enableMgrModules(c.context, c.Namespace, c.mgrSpec); err != nil {
+			logger.Errorf("failed to enable mgr module(s) from the spec. %+v", err)
+		}
+
 	}
 
 	// create the metrics service
@@ -240,6 +247,25 @@ func (c *Cluster) enablePrometheusModule(clusterName string) error {
 	return nil
 }
 
+func (c *Cluster) enableMgrModules(context *clusterd.Context, clusterName string, mgrSpec cephv1.MgrSpec) error {
+	// Enable mgr modules from the spec
+	for _, module := range mgrSpec.Modules {
+		if err := client.MgrEnableModule(c.context, c.Namespace, module.Name, true); err != nil {
+			return fmt.Errorf("failed to enable mgr %s module. %+v", module, err)
+		}
+		// Ceph Octopus will have that option enabled
+		if module.Name == "pg_autoscaler" && c.clusterInfo.CephVersion.IsAtLeastNautilus() {
+			monStore := config.GetMonStore(context, clusterName)
+			err := monStore.Set("global", "osd_pool_default_pg_autoscale_mode", "on")
+			if err != nil {
+				return fmt.Errorf("failed to enable pg autoscale mode for newly created pools. %+v", err)
+			}
+		}
+	}
+
+	return nil
+}
+
 // add a servicemonitor that allows prometheus to scrape from the monitoring endpoint of the cluster
 func (c *Cluster) enableServiceMonitor(service *v1.Service) error {
 	name := service.GetName()

diff --git a/pkg/operator/ceph/cluster/mgr/mgr_test.go b/pkg/operator/ceph/cluster/mgr/mgr_test.go
@@ -64,6 +64,7 @@ func TestStartMGR(t *testing.T) {
 		cephv1.NetworkSpec{},
 		cephv1.DashboardSpec{Enabled: true},
 		cephv1.MonitoringSpec{Enabled: true, RulesNamespace: ""},
+		cephv1.MgrSpec{},
 		v1.ResourceRequirements{},
 		metav1.OwnerReference{},
 		"/var/lib/rook/",

diff --git a/pkg/operator/ceph/cluster/mgr/spec_test.go b/pkg/operator/ceph/cluster/mgr/spec_test.go
@@ -46,6 +46,7 @@ func TestPodSpec(t *testing.T) {
 		cephv1.NetworkSpec{},
 		cephv1.DashboardSpec{},
 		cephv1.MonitoringSpec{},
+		cephv1.MgrSpec{},
 		v1.ResourceRequirements{
 			Limits: v1.ResourceList{
 				v1.ResourceCPU:    *resource.NewQuantity(200.0, resource.BinarySI),
@@ -97,6 +98,7 @@ func TestServiceSpec(t *testing.T) {
 		cephv1.NetworkSpec{},
 		cephv1.DashboardSpec{},
 		cephv1.MonitoringSpec{},
+		cephv1.MgrSpec{},
 		v1.ResourceRequirements{},
 		metav1.OwnerReference{},
 		"/var/lib/rook/",
@@ -122,6 +124,7 @@ func TestHostNetwork(t *testing.T) {
 		cephv1.NetworkSpec{HostNetwork: true},
 		cephv1.DashboardSpec{},
 		cephv1.MonitoringSpec{},
+		cephv1.MgrSpec{},
 		v1.ResourceRequirements{},
 		metav1.OwnerReference{},
 		"/var/lib/rook/",
@@ -155,6 +158,7 @@ func TestHttpBindFix(t *testing.T) {
 		cephv1.NetworkSpec{},
 		cephv1.DashboardSpec{},
 		cephv1.MonitoringSpec{},
+		cephv1.MgrSpec{},
 		v1.ResourceRequirements{},
 		metav1.OwnerReference{},
 		"/var/lib/rook/",

diff --git a/tests/framework/installer/ceph_manifests_v1.0.go b/tests/framework/installer/ceph_manifests_v1.0.go
@@ -1198,7 +1198,10 @@ spec:
     config:
       storeType: "` + settings.StoreType + `"
       databaseSizeMB: "1024"
-      journalSizeMB: "1024"`
+      journalSizeMB: "1024"
+  mgr:
+    modules:
+      - name: pg_autoscaler`
 }
 
 // GetRookToolBox returns rook-toolbox manifest