From 17099f56f280823c85792dd4e9bcf97457580a13 Mon Sep 17 00:00:00 2001 From: parth-gr Date: Thu, 13 Jan 2022 18:59:42 +0530 Subject: [PATCH 01/16] csi: add osd blocklist capabilities to the external cephcluster The CSI users created by the external script(both for rbd and cephfs) need the blocklist capability so the CSI pods can execute the `osd blocklist` commands for metro DR update the mon caps of csi users with `profile simple-rados-client-with-blocklist` so they have privileges to run osd blocklist cmds Closes: https://issues.redhat.com/browse/RHSTOR-2451 Signed-off-by: parth-gr (cherry picked from commit 7e851f15c6506838329ce6b9d8012f73177996ef) --- .github/workflows/canary-integration-test.yml | 7 +++ .../create-external-cluster-resources.py | 51 ++++++++++--------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index 5ff32fab4853..fc1ea7be8067 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -57,6 +57,13 @@ jobs: kubectl -n rook-ceph exec $toolbox -- mkdir -p /etc/ceph/test-data kubectl -n rook-ceph cp tests/ceph-status-out $toolbox:/etc/ceph/test-data/ kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph + # remove the existing client auth that will be re-created by external script + kubectl -n rook-ceph exec $toolbox -- ceph auth del client.csi-cephfs-node + kubectl -n rook-ceph exec $toolbox -- ceph auth del client.csi-cephfs-provisioner + kubectl -n rook-ceph exec $toolbox -- ceph auth del client.csi-rbd-node + kubectl -n rook-ceph exec $toolbox -- ceph auth del client.csi-rbd-provisioner + # print existing client auth + kubectl -n rook-ceph exec $toolbox -- ceph auth ls timeout 10 sh -c "until kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool; do echo 'waiting for script to succeed' && sleep 1; done" - name: dry run external script create-external-cluster-resources.py diff --git a/deploy/examples/create-external-cluster-resources.py b/deploy/examples/create-external-cluster-resources.py index 688d9e5f2f3d..44a8e78d9b17 100644 --- a/deploy/examples/create-external-cluster-resources.py +++ b/deploy/examples/create-external-cluster-resources.py @@ -93,12 +93,12 @@ def _init_cmd_output_map(self): self.cmd_output_map[self.cmd_names['mgr services'] ] = '''{"dashboard":"https://ceph-dashboard:8443/","prometheus":"http://ceph-dashboard-db:9283/"}''' self.cmd_output_map['''{"caps": ["mon", "allow r, allow command quorum_status", "osd", "allow rwx pool=default.rgw.meta, allow r pool=.rgw.root, allow rw pool=default.rgw.control, allow x pool=default.rgw.buckets.index"], "entity": "client.healthchecker", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.healthchecker","key":"AQDFkbNeft5bFRAATndLNUSEKruozxiZi3lrdA==","caps":{"mon":"allow r, allow command quorum_status","osd":"allow rwx pool=default.rgw.meta, allow r pool=.rgw.root, allow rw pool=default.rgw.control, allow x pool=default.rgw.buckets.index"}}]''' - self.cmd_output_map['''{"caps": ["mon", "profile rbd", "osd", "profile rbd"], "entity": "client.csi-rbd-node", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-rbd-node","key":"AQBOgrNeHbK1AxAAubYBeV8S1U/GPzq5SVeq6g==","caps":{"mon":"profile rbd","osd":"profile rbd"}}]''' - self.cmd_output_map['''{"caps": ["mon", "profile rbd", "mgr", "allow rw", "osd", "profile rbd"], "entity": "client.csi-rbd-provisioner", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-rbd-provisioner","key":"AQBNgrNe1geyKxAA8ekViRdE+hss5OweYBkwNg==","caps":{"mgr":"allow rw","mon":"profile rbd","osd":"profile rbd"}}]''' - self.cmd_output_map['''{"caps": ["mon", "allow r", "mgr", "allow rw", "osd", "allow rw tag cephfs *=*", "mds", "allow rw"], "entity": "client.csi-cephfs-node", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-node","key":"AQBOgrNeENunKxAAPCmgE7R6G8DcXnaJ1F32qg==","caps":{"mds":"allow rw","mgr":"allow rw","mon":"allow r","osd":"allow rw tag cephfs *=*"}}]''' - self.cmd_output_map['''{"caps": ["mon", "allow r", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=*"], "entity": "client.csi-cephfs-provisioner", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-provisioner","key":"AQBOgrNeAFgcGBAAvGqKOAD0D3xxmVY0R912dg==","caps":{"mgr":"allow rw","mon":"allow r","osd":"allow rw tag cephfs metadata=*"}}]''' - self.cmd_output_map['''{"caps": ["mon", "allow r", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=*"], "entity": "client.csi-cephfs-provisioner-openshift-storage", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-provisioner-openshift-storage","key":"BQBOgrNeAFgcGBAAvGqKOAD0D3xxmVY0R912dg==","caps":{"mgr":"allow rw","mon":"allow r","osd":"allow rw tag cephfs metadata=*"}}]''' - self.cmd_output_map['''{"caps": ["mon", "allow r", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=myfs"], "entity": "client.csi-cephfs-provisioner-openshift-storage-myfs", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-provisioner-openshift-storage-myfs","key":"CQBOgrNeAFgcGBAAvGqKOAD0D3xxmVY0R912dg==","caps":{"mgr":"allow rw","mon":"allow r","osd":"allow rw tag cephfs metadata=myfs"}}]''' + self.cmd_output_map['''{"caps": ["mon", "profile rbd, allow command 'osd blocklist'", "osd", "profile rbd"], "entity": "client.csi-rbd-node", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-rbd-node","key":"AQBOgrNeHbK1AxAAubYBeV8S1U/GPzq5SVeq6g==","caps":{"mon":"profile rbd, allow command 'osd blocklist'","osd":"profile rbd"}}]''' + self.cmd_output_map['''{"caps": ["mon", "profile rbd, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "profile rbd"], "entity": "client.csi-rbd-provisioner", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-rbd-provisioner","key":"AQBNgrNe1geyKxAA8ekViRdE+hss5OweYBkwNg==","caps":{"mgr":"allow rw","mon":"profile rbd, allow command 'osd blocklist'","osd":"profile rbd"}}]''' + self.cmd_output_map['''{"caps": ["mon", "allow r, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "allow rw tag cephfs *=*", "mds", "allow rw"], "entity": "client.csi-cephfs-node", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-node","key":"AQBOgrNeENunKxAAPCmgE7R6G8DcXnaJ1F32qg==","caps":{"mds":"allow rw","mgr":"allow rw","mon":"allow r, allow command 'osd blocklist'","osd":"allow rw tag cephfs *=*"}}]''' + self.cmd_output_map['''{"caps": ["mon", "allow r, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=*"], "entity": "client.csi-cephfs-provisioner", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-provisioner","key":"AQBOgrNeAFgcGBAAvGqKOAD0D3xxmVY0R912dg==","caps":{"mgr":"allow rw","mon":"allow r, allow command 'osd blocklist'","osd":"allow rw tag cephfs metadata=*"}}]''' + self.cmd_output_map['''{"caps": ["mon", "allow r, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=*"], "entity": "client.csi-cephfs-provisioner-openshift-storage", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-provisioner-openshift-storage","key":"BQBOgrNeAFgcGBAAvGqKOAD0D3xxmVY0R912dg==","caps":{"mgr":"allow rw","mon":"allow r, allow command 'osd blocklist'","osd":"allow rw tag cephfs metadata=*"}}]''' + self.cmd_output_map['''{"caps": ["mon", "allow r, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=myfs"], "entity": "client.csi-cephfs-provisioner-openshift-storage-myfs", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.csi-cephfs-provisioner-openshift-storage-myfs","key":"CQBOgrNeAFgcGBAAvGqKOAD0D3xxmVY0R912dg==","caps":{"mgr":"allow rw","mon":"allow r, allow command 'osd blocklist'","osd":"allow rw tag cephfs metadata=myfs"}}]''' self.cmd_output_map['''{"caps": ["mon", "allow r, allow command quorum_status, allow command version", "mgr", "allow command config", "osd", "allow rwx pool=default.rgw.meta, allow r pool=.rgw.root, allow rw pool=default.rgw.control, allow rx pool=default.rgw.log, allow x pool=default.rgw.buckets.index"], "entity": "client.healthchecker", "format": "json", "prefix": "auth get-or-create"}'''] = '''[{"entity":"client.healthchecker","key":"AQDFkbNeft5bFRAATndLNUSEKruozxiZi3lrdA==","caps":{"mon": "allow r, allow command quorum_status, allow command version", "mgr": "allow command config", "osd": "allow rwx pool=default.rgw.meta, allow r pool=.rgw.root, allow rw pool=default.rgw.control, allow rx pool=default.rgw.log, allow x pool=default.rgw.buckets.index"}}]''' self.cmd_output_map['''{"format": "json", "prefix": "mgr services"}'''] = '''{"dashboard": "http://rook-ceph-mgr-a-57cf9f84bc-f4jnl:7000/", "prometheus": "http://rook-ceph-mgr-a-57cf9f84bc-f4jnl:9283/"}''' self.cmd_output_map['''{"entity": "client.healthchecker", "format": "json", "prefix": "auth get"}'''] = '''{"dashboard": "http://rook-ceph-mgr-a-57cf9f84bc-f4jnl:7000/", "prometheus": "http://rook-ceph-mgr-a-57cf9f84bc-f4jnl:9283/"}''' @@ -525,20 +525,23 @@ def create_cephCSIKeyring_cephFSProvisioner(self): entity = "client.csi-cephfs-provisioner-{}".format(cluster_name) cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "allow r", "mgr", "allow rw", - "osd", "allow rw tag cephfs metadata=*"], + "caps": ["mon", "allow r, allow command 'osd blocklist'", + "mgr", "allow rw", + "osd", "allow rw tag cephfs metadata=*"], "format": "json"} else: entity = "client.csi-cephfs-provisioner-{}-{}".format(cluster_name,cephfs_filesystem) cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "allow r", "mgr", "allow rw", - "osd", "allow rw tag cephfs metadata={}".format(cephfs_filesystem)], + "caps": ["mon", "allow r, allow command 'osd blocklist'", + "mgr", "allow rw", + "osd", "allow rw tag cephfs metadata={}".format(cephfs_filesystem)], "format": "json"} else: cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "allow r", "mgr", "allow rw", + "caps": ["mon", "allow r, allow command 'osd blocklist'", + "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=*"], "format": "json"} if self._arg_parser.dry_run: @@ -564,25 +567,25 @@ def create_cephCSIKeyring_cephFSNode(self): entity = "client.csi-cephfs-node-{}".format(cluster_name) cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "allow r", - "mgr", "allow rw", - "osd", "allow rw tag cephfs *=*", - "mds", "allow rw"], + "caps": ["mon", "allow r, allow command 'osd blocklist'", + "mgr", "allow rw", + "osd", "allow rw tag cephfs *=*", + "mds", "allow rw"], "format": "json"} else: entity = "client.csi-cephfs-node-{}-{}".format(cluster_name,cephfs_filesystem) cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "allow r", - "mgr", "allow rw", - "osd", "allow rw tag cephfs data={}".format( + "caps": ["mon", "allow r, allow command 'osd blocklist'", + "mgr", "allow rw", + "osd", "allow rw tag cephfs data={}".format( cephfs_filesystem), - "mds", "allow rw"], + "mds", "allow rw"], "format": "json"} else: cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "allow r", + "caps": ["mon", "allow r, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "allow rw tag cephfs *=*", "mds", "allow rw"], @@ -610,14 +613,14 @@ def create_cephCSIKeyring_RBDProvisioner(self): entity = "client.csi-rbd-provisioner-{}-{}-{}".format(cluster_name,rbd_pool_name,rados_namespace) cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "profile rbd", + "caps": ["mon", "profile rbd, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "profile rbd pool={}".format(rbd_pool_name)], "format": "json"} else: cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "profile rbd", + "caps": ["mon", "profile rbd, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "profile rbd"], "format": "json"} @@ -733,13 +736,13 @@ def create_cephCSIKeyring_RBDNode(self): entity = "client.csi-rbd-node-{}-{}-{}".format(cluster_name,rbd_pool_name,rados_namespace) cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "profile rbd", + "caps": ["mon", "profile rbd, allow command 'osd blocklist'", "osd", "profile rbd pool={}".format(rbd_pool_name)], "format": "json"} else: cmd_json = {"prefix": "auth get-or-create", "entity": entity, - "caps": ["mon", "profile rbd", + "caps": ["mon", "profile rbd, allow command 'osd blocklist'", "osd", "profile rbd"], "format": "json"} if self._arg_parser.dry_run: From d229cd4c8dfdce2cc755de6c8f09ea16b4463ff0 Mon Sep 17 00:00:00 2001 From: yati1998 Date: Wed, 19 Jan 2022 12:07:46 +0530 Subject: [PATCH 02/16] csi: restrict csiaddons to use only 1 port This commit restrict csiaddons to open only 1 port as per the requirement which is required for security. Fixes: #9604 Signed-off-by: yati1998 (cherry picked from commit c559bbdf6bdf5727530b080129c21c4bf91a2437) --- build/rbac/rbac.yaml | 4 ++-- deploy/charts/rook-ceph/templates/psp.yaml | 4 ++-- deploy/examples/common.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build/rbac/rbac.yaml b/build/rbac/rbac.yaml index 0a78706c367a..8e372e4bbbe4 100644 --- a/build/rbac/rbac.yaml +++ b/build/rbac/rbac.yaml @@ -758,8 +758,8 @@ spec: - min: 9283 max: 9283 # port for CSIAddons - - min: 9061 - max: 9079 + - min: 9070 + max: 9070 --- kind: Role apiVersion: rbac.authorization.k8s.io/v1 diff --git a/deploy/charts/rook-ceph/templates/psp.yaml b/deploy/charts/rook-ceph/templates/psp.yaml index c158e2e7b03e..23c5a415ba07 100644 --- a/deploy/charts/rook-ceph/templates/psp.yaml +++ b/deploy/charts/rook-ceph/templates/psp.yaml @@ -77,8 +77,8 @@ spec: - min: 9283 max: 9283 # port for CSIAddons - - min: 9061 - max: 9079 + - min: 9070 + max: 9070 {{- if .Values.rbacEnable }} --- apiVersion: rbac.authorization.k8s.io/v1 diff --git a/deploy/examples/common.yaml b/deploy/examples/common.yaml index 81df9f3a59d9..254a03b2eb28 100644 --- a/deploy/examples/common.yaml +++ b/deploy/examples/common.yaml @@ -842,8 +842,8 @@ spec: - min: 9283 max: 9283 # port for CSIAddons - - min: 9061 - max: 9079 + - min: 9070 + max: 9070 --- kind: Role apiVersion: rbac.authorization.k8s.io/v1 From 2e0c595d336562cebe39de0e75c89ef68cd21368 Mon Sep 17 00:00:00 2001 From: subhamkrai Date: Sun, 9 Jan 2022 19:22:56 +0530 Subject: [PATCH 03/16] mon: add annotation to mon secret and cm add annotations to secrets and configmaps to use different tools to sync secrets and configmap. Currently, these annotation is applied only on the `rook-ceph-mon-endpoints` configmap and the secrets `rook-ceph-mon` and `rook-ceph-admin-keyring`. Closes: https://github.com/rook/rook/issues/8999 Signed-off-by: subhamkrai (cherry picked from commit 02d43e82e04d080f7c98ec9aa96ed56a2a2673ad) --- Documentation/ceph-cluster-crd.md | 4 ++- deploy/examples/cluster.yaml | 4 +++ pkg/apis/ceph.rook.io/v1/annotations.go | 4 +++ pkg/apis/ceph.rook.io/v1/keys.go | 21 +++++------ pkg/operator/ceph/cluster/mon/mon.go | 9 ++++- pkg/operator/ceph/cluster/mon/mon_test.go | 13 ++++++- pkg/operator/ceph/config/keyring/admin.go | 35 +++++++++++++++++-- .../ceph/config/keyring/admin_test.go | 12 +++++-- 8 files changed, 84 insertions(+), 18 deletions(-) diff --git a/Documentation/ceph-cluster-crd.md b/Documentation/ceph-cluster-crd.md index f72625754804..654f26ed032d 100755 --- a/Documentation/ceph-cluster-crd.md +++ b/Documentation/ceph-cluster-crd.md @@ -516,13 +516,15 @@ Annotations and Labels can be specified so that the Rook components will have th You can set annotations / labels for Rook components for the list of key value pairs: -* `all`: Set annotations / labels for all components +* `all`: Set annotations / labels for all components except `clusterMetadata`. * `mgr`: Set annotations / labels for MGRs * `mon`: Set annotations / labels for mons * `osd`: Set annotations / labels for OSDs * `prepareosd`: Set annotations / labels for OSD Prepare Jobs * `monitoring`: Set annotations / labels for service monitor * `crashcollector`: Set annotations / labels for crash collectors +* `clusterMetadata`: Set annotations only to `rook-ceph-mon-endpoints` configmap and the `rook-ceph-mon` and `rook-ceph-admin-keyring` secrets. These annotations will not be merged with the `all` annotations. The common usage is for backing up these critical resources with `kubed`. +Note the clusterMetadata annotation will not be merged with the `all` annotation. When other keys are set, `all` will be merged together with the specific component. ### Placement Configuration Settings diff --git a/deploy/examples/cluster.yaml b/deploy/examples/cluster.yaml index 365908df3471..2c626aabadf0 100644 --- a/deploy/examples/cluster.yaml +++ b/deploy/examples/cluster.yaml @@ -166,6 +166,10 @@ spec: # osd: # cleanup: # prepareosd: +# clusterMetadata annotations will be applied to only `rook-ceph-mon-endpoints` configmap and the `rook-ceph-mon` and `rook-ceph-admin-keyring` secrets. +# And clusterMetadata annotations will not be merged with `all` annotations. +# clusterMetadata: +# kubed.appscode.com/sync: "true" # If no mgr annotations are set, prometheus scrape annotations will be set by default. # mgr: labels: diff --git a/pkg/apis/ceph.rook.io/v1/annotations.go b/pkg/apis/ceph.rook.io/v1/annotations.go index d68ab99910ee..4f611abf42f1 100644 --- a/pkg/apis/ceph.rook.io/v1/annotations.go +++ b/pkg/apis/ceph.rook.io/v1/annotations.go @@ -57,6 +57,10 @@ func GetCleanupAnnotations(a AnnotationsSpec) Annotations { return mergeAllAnnotationsWithKey(a, KeyCleanup) } +func GetClusterMetadataAnnotations(a AnnotationsSpec) Annotations { + return a[KeyClusterMetadata] +} + func mergeAllAnnotationsWithKey(a AnnotationsSpec, name KeyType) Annotations { all := a.All() if all != nil { diff --git a/pkg/apis/ceph.rook.io/v1/keys.go b/pkg/apis/ceph.rook.io/v1/keys.go index 3257a312f08f..3c4bf633b154 100644 --- a/pkg/apis/ceph.rook.io/v1/keys.go +++ b/pkg/apis/ceph.rook.io/v1/keys.go @@ -17,14 +17,15 @@ limitations under the License. package v1 const ( - KeyAll = "all" - KeyMds KeyType = "mds" - KeyMon KeyType = "mon" - KeyMonArbiter KeyType = "arbiter" - KeyMgr KeyType = "mgr" - KeyOSDPrepare KeyType = "prepareosd" - KeyOSD KeyType = "osd" - KeyCleanup KeyType = "cleanup" - KeyMonitoring KeyType = "monitoring" - KeyCrashCollector KeyType = "crashcollector" + KeyAll = "all" + KeyMds KeyType = "mds" + KeyMon KeyType = "mon" + KeyMonArbiter KeyType = "arbiter" + KeyMgr KeyType = "mgr" + KeyOSDPrepare KeyType = "prepareosd" + KeyOSD KeyType = "osd" + KeyCleanup KeyType = "cleanup" + KeyMonitoring KeyType = "monitoring" + KeyCrashCollector KeyType = "crashcollector" + KeyClusterMetadata KeyType = "clusterMetadata" ) diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go index c0c30865f869..070ce2edca8e 100644 --- a/pkg/operator/ceph/cluster/mon/mon.go +++ b/pkg/operator/ceph/cluster/mon/mon.go @@ -493,6 +493,11 @@ func (c *Cluster) initClusterInfo(cephVersion cephver.CephVersion, clusterName s return errors.Wrap(err, "failed to get cluster info") } + err = keyring.ApplyClusterMetadataToSecret(c.ClusterInfo, AppName, c.context, c.spec.Annotations) + if err != nil { + return errors.Wrap(err, "failed to apply annotation") + } + c.ClusterInfo.CephVersion = cephVersion c.ClusterInfo.OwnerInfo = c.ownerInfo c.ClusterInfo.Context = context @@ -509,7 +514,7 @@ func (c *Cluster) initClusterInfo(cephVersion cephver.CephVersion, clusterName s return errors.Wrap(err, "failed to save mon keyring secret") } // also store the admin keyring for other daemons that might need it during init - if err := k.Admin().CreateOrUpdate(c.ClusterInfo); err != nil { + if err := k.Admin().CreateOrUpdate(c.ClusterInfo, c.context, c.spec.Annotations); err != nil { return errors.Wrap(err, "failed to save admin keyring secret") } @@ -1052,6 +1057,8 @@ func (c *Cluster) persistExpectedMonDaemons() error { Finalizers: []string{DisasterProtectionFinalizerName}, }, } + cephv1.GetClusterMetadataAnnotations(c.spec.Annotations).ApplyToObjectMeta(&configMap.ObjectMeta) + err := c.ownerInfo.SetControllerReference(configMap) if err != nil { return errors.Wrapf(err, "failed to set owner reference mon configmap %q", configMap.Name) diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go index 31938d307303..14e6f37db521 100644 --- a/pkg/operator/ceph/cluster/mon/mon_test.go +++ b/pkg/operator/ceph/cluster/mon/mon_test.go @@ -181,11 +181,21 @@ func TestStartMonPods(t *testing.T) { assert.NoError(t, err) c := newCluster(context, namespace, true, v1.ResourceRequirements{}) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) + c.spec.Annotations = cephv1.AnnotationsSpec{ + cephv1.KeyClusterMetadata: cephv1.Annotations{ + "key": "value", + }, + } // start a basic cluster _, err = c.Start(c.ClusterInfo, c.rookVersion, cephver.Octopus, c.spec) assert.NoError(t, err) + // test annotations + secret, err := c.context.Clientset.CoreV1().Secrets(c.Namespace).Get(c.ClusterInfo.Context, AppName, metav1.GetOptions{}) + assert.NoError(t, err) + assert.Equal(t, map[string]string{"key": "value"}, secret.Annotations) + validateStart(t, c) // starting again should be a no-op @@ -263,7 +273,7 @@ func validateStart(t *testing.T, c *Cluster) { func TestPersistMons(t *testing.T) { clientset := test.New(t, 1) ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{Annotations: cephv1.AnnotationsSpec{cephv1.KeyClusterMetadata: cephv1.Annotations{"key": "value"}}}, ownerInfo, &sync.Mutex{}) setCommonMonProperties(c, 1, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "myversion") // Persist mon a @@ -273,6 +283,7 @@ func TestPersistMons(t *testing.T) { cm, err := c.context.Clientset.CoreV1().ConfigMaps(c.Namespace).Get(context.TODO(), EndpointConfigMapName, metav1.GetOptions{}) assert.NoError(t, err) assert.Equal(t, "a=1.2.3.1:6789", cm.Data[EndpointDataKey]) + assert.Equal(t, map[string]string{"key": "value"}, cm.Annotations) // Persist mon b, and remove mon a for simply testing the configmap is updated c.ClusterInfo.Monitors["b"] = &cephclient.MonInfo{Name: "b", Endpoint: "4.5.6.7:3300"} diff --git a/pkg/operator/ceph/config/keyring/admin.go b/pkg/operator/ceph/config/keyring/admin.go index fa793f6a7245..cd3ac2d13a65 100644 --- a/pkg/operator/ceph/config/keyring/admin.go +++ b/pkg/operator/ceph/config/keyring/admin.go @@ -19,7 +19,11 @@ package keyring import ( "fmt" + "github.com/pkg/errors" + v1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" + "github.com/rook/rook/pkg/clusterd" cephclient "github.com/rook/rook/pkg/daemon/ceph/client" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( @@ -48,7 +52,34 @@ func (s *SecretStore) Admin() *AdminStore { } // CreateOrUpdate creates or updates the admin keyring secret with cluster information. -func (a *AdminStore) CreateOrUpdate(c *cephclient.ClusterInfo) error { +func (a *AdminStore) CreateOrUpdate(c *cephclient.ClusterInfo, context *clusterd.Context, annotation v1.AnnotationsSpec) error { keyring := fmt.Sprintf(adminKeyringTemplate, c.CephCred.Secret) - return a.secretStore.CreateOrUpdate(adminKeyringResourceName, keyring) + err := a.secretStore.CreateOrUpdate(adminKeyringResourceName, keyring) + if err != nil { + return err + } + err = ApplyClusterMetadataToSecret(c, keyringSecretName(adminKeyringResourceName), context, annotation) + if err != nil { + return errors.Errorf("failed to update admin secrets. %v", err) + } + + return nil +} + +func ApplyClusterMetadataToSecret(c *cephclient.ClusterInfo, secretName string, context *clusterd.Context, annotation v1.AnnotationsSpec) error { + // Get secret to update annotation + secret, err := context.Clientset.CoreV1().Secrets(c.Namespace).Get(c.Context, secretName, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "failed to get %s secrets", secretName) + } + // We would need to reset the annotations back to empty, then reapply the annotations this is because the in some rook-ceph-mon secret is retrieved + // and then updated, instead of a new secret being generated. + secret.Annotations = map[string]string{} + v1.GetClusterMetadataAnnotations(annotation).ApplyToObjectMeta(&secret.ObjectMeta) + + _, err = context.Clientset.CoreV1().Secrets(c.Namespace).Update(c.Context, secret, metav1.UpdateOptions{}) + if err != nil { + return errors.Wrapf(err, "failed to update %s secret.", secretName) + } + return nil } diff --git a/pkg/operator/ceph/config/keyring/admin_test.go b/pkg/operator/ceph/config/keyring/admin_test.go index beee9a2e7a1a..fa5261dbe0e1 100644 --- a/pkg/operator/ceph/config/keyring/admin_test.go +++ b/pkg/operator/ceph/config/keyring/admin_test.go @@ -22,6 +22,7 @@ import ( "path" "testing" + v1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" "github.com/rook/rook/pkg/clusterd" cephclient "github.com/rook/rook/pkg/daemon/ceph/client" clienttest "github.com/rook/rook/pkg/daemon/ceph/client/test" @@ -53,13 +54,18 @@ func TestAdminKeyringStore(t *testing.T) { // create key clusterInfo.CephCred.Secret = "adminsecretkey" - err := k.Admin().CreateOrUpdate(clusterInfo) + err := k.Admin().CreateOrUpdate(clusterInfo, ctx, v1.AnnotationsSpec{v1.KeyClusterMetadata: v1.Annotations{"key": "value"}}) assert.NoError(t, err) assertKeyringData(fmt.Sprintf(adminKeyringTemplate, "adminsecretkey")) + // test annotation + secret, err := ctx.Clientset.CoreV1().Secrets(clusterInfo.Namespace).Get(clusterInfo.Context, keyringSecretName(adminKeyringResourceName), metav1.GetOptions{}) + assert.NoError(t, err) + assert.Equal(t, map[string]string{"key": "value"}, secret.Annotations) + // update key clusterInfo.CephCred.Secret = "differentsecretkey" - err = k.Admin().CreateOrUpdate(clusterInfo) + err = k.Admin().CreateOrUpdate(clusterInfo, ctx, v1.AnnotationsSpec{}) assert.NoError(t, err) assertKeyringData(fmt.Sprintf(adminKeyringTemplate, "differentsecretkey")) } @@ -74,7 +80,7 @@ func TestAdminVolumeAndMount(t *testing.T) { s := GetSecretStore(ctx, clusterInfo, ownerInfo) clusterInfo.CephCred.Secret = "adminsecretkey" - err := s.Admin().CreateOrUpdate(clusterInfo) + err := s.Admin().CreateOrUpdate(clusterInfo, ctx, v1.AnnotationsSpec{}) assert.NoError(t, err) v := Volume().Admin() From 6e45d4730a242207033d8f2de22a7e6710aef3ab Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Mon, 17 Jan 2022 20:39:09 +0530 Subject: [PATCH 04/16] csi: add support to create volumesnapshotclass add support for helm charts to create default volumesnapshotclass for cephfs and rbd fixes: #9566 Signed-off-by: Madhu Rajanna (cherry picked from commit 85dc21270434b97b3710fcd5084eca2a1c586fff) --- .../templates/volumesnapshotclass.yaml | 48 +++++++++++++++++++ deploy/charts/rook-ceph-cluster/values.yaml | 16 +++++++ 2 files changed, 64 insertions(+) create mode 100644 deploy/charts/rook-ceph-cluster/templates/volumesnapshotclass.yaml diff --git a/deploy/charts/rook-ceph-cluster/templates/volumesnapshotclass.yaml b/deploy/charts/rook-ceph-cluster/templates/volumesnapshotclass.yaml new file mode 100644 index 000000000000..704eb43f9d7b --- /dev/null +++ b/deploy/charts/rook-ceph-cluster/templates/volumesnapshotclass.yaml @@ -0,0 +1,48 @@ +{{- $filesystemvsc := .Values.cephFileSystemVolumeSnapshotClass -}} +{{- $blockpoolvsc := .Values.cephBlockPoolsVolumeSnapshotClass -}} + +--- +{{- if default false $filesystemvsc.enabled }} +{{- if .Capabilities.APIVersions.Has "snapshot.storage.k8s.io/v1" }} +apiVersion: snapshot.storage.k8s.io/v1 +{{- else }} +apiVersion: snapshot.storage.k8s.io/v1beta1 +{{- end }} +kind: VolumeSnapshotClass +metadata: + name: {{ $filesystemvsc.name }} + annotations: + snapshot.storage.kubernetes.io/is-default-class: "{{ if default false $filesystemvsc.isDefault }}true{{ else }}false{{ end }}" +driver: {{ .Values.operatorNamespace }}.cephfs.csi.ceph.com +parameters: + clusterID: {{ .Release.Namespace }} + csi.storage.k8s.io/snapshotter-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/snapshotter-secret-namespace: {{ .Release.Namespace }} +{{- if $filesystemvsc.parameters }} +{{ toYaml $filesystemvsc.parameters | indent 2 }} +{{- end }} +deletionPolicy: {{ default "Delete" $filesystemvsc.deletionPolicy }} +{{- end }} + +--- +{{- if default false $blockpoolvsc.enabled }} +{{- if .Capabilities.APIVersions.Has "snapshot.storage.k8s.io/v1" }} +apiVersion: snapshot.storage.k8s.io/v1 +{{- else }} +apiVersion: snapshot.storage.k8s.io/v1beta1 +{{- end }} +kind: VolumeSnapshotClass +metadata: + name: {{ $blockpoolvsc.name }} + annotations: + snapshot.storage.kubernetes.io/is-default-class: "{{ if default false $blockpoolvsc.isDefault }}true{{ else }}false{{ end }}" +driver: {{ .Values.operatorNamespace }}.rbd.csi.ceph.com +parameters: + clusterID: {{ .Release.Namespace }} + csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/snapshotter-secret-namespace: {{ .Release.Namespace }} +{{- if $blockpoolvsc.parameters }} +{{ toYaml $blockpoolvsc.parameters | indent 2 }} +{{- end }} +deletionPolicy: {{ default "Delete" $blockpoolvsc.deletionPolicy }} +{{- end }} diff --git a/deploy/charts/rook-ceph-cluster/values.yaml b/deploy/charts/rook-ceph-cluster/values.yaml index 1ca0664ecb4d..6b7444bb892b 100644 --- a/deploy/charts/rook-ceph-cluster/values.yaml +++ b/deploy/charts/rook-ceph-cluster/values.yaml @@ -397,6 +397,22 @@ cephFileSystems: # in hyperconverged settings where the volume is mounted on the same node as the osds. csi.storage.k8s.io/fstype: ext4 +cephFileSystemVolumeSnapshotClass: + enabled: false + name: ceph-filesystem + isDefault: true + deletionPolicy: Delete + # see https://rook.io/docs/rook/latest/ceph-csi-snapshot.html#cephfs-snapshots for available configuration + parameters: {} + +cephBlockPoolsVolumeSnapshotClass: + enabled: false + name: ceph-block + isDefault: false + deletionPolicy: Delete + # see https://rook.io/docs/rook/latest/ceph-csi-snapshot.html#rbd-snapshots for available configuration + parameters: {} + cephObjectStores: - name: ceph-objectstore # see https://github.com/rook/rook/blob/master/Documentation/ceph-object-store-crd.md#object-store-settings for available configuration From 638741890cc7c62d9fcebd93481e119fb084c89f Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Tue, 28 Dec 2021 03:03:29 +0000 Subject: [PATCH 05/16] core: support priority class for crashcollector Support priorityClass to crashcollectors as mons, mgrs, and osds. https://rook.io/docs/rook/v1.8/ceph-cluster-crd.html#priority-class-names-configuration-settings The main use case is applying the high priority to crashcollectors to preempt normal pods under heavy load. Without this feature, we might lose crash information. Closes: https://github.com/rook/rook/issues/9500 Signed-off-by: Satoru Takeuchi (cherry picked from commit 5465481e0b3cfcfeb09481080488c4bf62545fb7) --- Documentation/ceph-cluster-crd.md | 3 +- deploy/charts/rook-ceph-cluster/values.yaml | 1 + deploy/examples/cluster.yaml | 1 + pkg/apis/ceph.rook.io/v1/priorityclasses.go | 8 +++ .../ceph.rook.io/v1/priorityclasses_test.go | 10 +-- pkg/operator/ceph/cluster/crash/crash.go | 9 +-- pkg/operator/ceph/cluster/crash/crash_test.go | 63 +++++++++++++++++++ 7 files changed, 86 insertions(+), 9 deletions(-) diff --git a/Documentation/ceph-cluster-crd.md b/Documentation/ceph-cluster-crd.md index 654f26ed032d..00dc85f01549 100755 --- a/Documentation/ceph-cluster-crd.md +++ b/Documentation/ceph-cluster-crd.md @@ -603,10 +603,11 @@ Priority class names can be specified so that the Rook components will have thos You can set priority class names for Rook components for the list of key value pairs: -* `all`: Set priority class names for MGRs, Mons, OSDs. +* `all`: Set priority class names for MGRs, Mons, OSDs, and crashcollectors. * `mgr`: Set priority class names for MGRs. * `mon`: Set priority class names for Mons. * `osd`: Set priority class names for OSDs. +* `crashcollector`: Set priority class names for crashcollectors. The specific component keys will act as overrides to `all`. diff --git a/deploy/charts/rook-ceph-cluster/values.yaml b/deploy/charts/rook-ceph-cluster/values.yaml index 1ca0664ecb4d..743de21eac15 100644 --- a/deploy/charts/rook-ceph-cluster/values.yaml +++ b/deploy/charts/rook-ceph-cluster/values.yaml @@ -238,6 +238,7 @@ cephClusterSpec: # mon: rook-ceph-mon-priority-class # osd: rook-ceph-osd-priority-class # mgr: rook-ceph-mgr-priority-class + # crashcollector: rook-ceph-crashcollector-priority-class storage: # cluster level storage configuration and selection useAllNodes: true diff --git a/deploy/examples/cluster.yaml b/deploy/examples/cluster.yaml index 2c626aabadf0..619c8aaaf64c 100644 --- a/deploy/examples/cluster.yaml +++ b/deploy/examples/cluster.yaml @@ -211,6 +211,7 @@ spec: # mon: rook-ceph-mon-priority-class # osd: rook-ceph-osd-priority-class # mgr: rook-ceph-mgr-priority-class +# crashcollector: rook-ceph-crashcollector-priority-class storage: # cluster level storage configuration and selection useAllNodes: true useAllDevices: true diff --git a/pkg/apis/ceph.rook.io/v1/priorityclasses.go b/pkg/apis/ceph.rook.io/v1/priorityclasses.go index d60ed1acd8cf..c984b852cf51 100644 --- a/pkg/apis/ceph.rook.io/v1/priorityclasses.go +++ b/pkg/apis/ceph.rook.io/v1/priorityclasses.go @@ -55,3 +55,11 @@ func GetCleanupPriorityClassName(p PriorityClassNamesSpec) string { } return p[KeyCleanup] } + +// GetCrashCollectorPriorityClassName returns the priority class name for the crashcollector +func GetCrashCollectorPriorityClassName(p PriorityClassNamesSpec) string { + if _, ok := p[KeyCrashCollector]; !ok { + return p.All() + } + return p[KeyCrashCollector] +} diff --git a/pkg/apis/ceph.rook.io/v1/priorityclasses_test.go b/pkg/apis/ceph.rook.io/v1/priorityclasses_test.go index a92e584bcf31..cc2f018acd2b 100644 --- a/pkg/apis/ceph.rook.io/v1/priorityclasses_test.go +++ b/pkg/apis/ceph.rook.io/v1/priorityclasses_test.go @@ -30,6 +30,7 @@ all: all-class mgr: mgr-class mon: mon-class osd: osd-class +crashcollector: crashcollector-class `) // convert the raw spec yaml into JSON @@ -43,10 +44,11 @@ osd: osd-class // the unmarshalled priority class names spec should equal the expected spec below expected := PriorityClassNamesSpec{ - "all": "all-class", - "mgr": "mgr-class", - "mon": "mon-class", - "osd": "osd-class", + "all": "all-class", + "mgr": "mgr-class", + "mon": "mon-class", + "osd": "osd-class", + "crashcollector": "crashcollector-class", } assert.Equal(t, expected, priorityClassNames) } diff --git a/pkg/operator/ceph/cluster/crash/crash.go b/pkg/operator/ceph/cluster/crash/crash.go index d5836a02f369..b0db4acfc553 100644 --- a/pkg/operator/ceph/cluster/crash/crash.go +++ b/pkg/operator/ceph/cluster/crash/crash.go @@ -113,10 +113,11 @@ func (r *ReconcileNode) createOrUpdateCephCrash(node corev1.Node, tolerations [] Containers: []corev1.Container{ getCrashDaemonContainer(cephCluster, *cephVersion), }, - Tolerations: tolerations, - RestartPolicy: corev1.RestartPolicyAlways, - HostNetwork: cephCluster.Spec.Network.IsHost(), - Volumes: volumes, + Tolerations: tolerations, + RestartPolicy: corev1.RestartPolicyAlways, + HostNetwork: cephCluster.Spec.Network.IsHost(), + Volumes: volumes, + PriorityClassName: cephv1.GetCrashCollectorPriorityClassName(cephCluster.Spec.PriorityClassNames), }, } diff --git a/pkg/operator/ceph/cluster/crash/crash_test.go b/pkg/operator/ceph/cluster/crash/crash_test.go index 3ead1f591add..55e233b5edc4 100644 --- a/pkg/operator/ceph/cluster/crash/crash_test.go +++ b/pkg/operator/ceph/cluster/crash/crash_test.go @@ -18,6 +18,7 @@ package crash import ( "context" + "fmt" "testing" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" @@ -25,14 +26,18 @@ import ( "github.com/rook/rook/pkg/client/clientset/versioned/scheme" "github.com/rook/rook/pkg/clusterd" cephver "github.com/rook/rook/pkg/operator/ceph/version" + "github.com/rook/rook/pkg/operator/k8sutil" "github.com/rook/rook/pkg/operator/test" "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/batch/v1" "k8s.io/api/batch/v1beta1" + corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" cntrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) @@ -105,3 +110,61 @@ func TestCreateOrUpdateCephCron(t *testing.T) { assert.Error(t, err) assert.True(t, kerrors.IsNotFound(err)) } + +func TestCreateOrUpdateCephCrash(t *testing.T) { + cephCluster := cephv1.CephCluster{ + ObjectMeta: metav1.ObjectMeta{Namespace: "rook-ceph"}, + } + cephCluster.Spec.Labels = cephv1.LabelsSpec{} + cephCluster.Spec.PriorityClassNames = cephv1.PriorityClassNamesSpec{} + cephVersion := &cephver.CephVersion{Major: 16, Minor: 2, Extra: 0} + ctx := context.TODO() + context := &clusterd.Context{ + Clientset: test.New(t, 1), + RookClientset: rookclient.NewSimpleClientset(), + } + + s := scheme.Scheme + err := appsv1.AddToScheme(s) + if err != nil { + assert.Fail(t, "failed to build scheme") + } + r := &ReconcileNode{ + scheme: s, + client: fake.NewClientBuilder().WithScheme(s).WithRuntimeObjects().Build(), + context: context, + } + + node := corev1.Node{} + nodeSelector := map[string]string{corev1.LabelHostname: "testnode"} + node.SetLabels(nodeSelector) + tolerations := []corev1.Toleration{{}} + res, err := r.createOrUpdateCephCrash(node, tolerations, cephCluster, cephVersion) + assert.NoError(t, err) + assert.Equal(t, controllerutil.OperationResult("created"), res) + name := k8sutil.TruncateNodeName(fmt.Sprintf("%s-%%s", AppName), "testnode") + deploy := appsv1.Deployment{} + err = r.client.Get(ctx, types.NamespacedName{Namespace: "rook-ceph", Name: name}, &deploy) + assert.NoError(t, err) + podSpec := deploy.Spec.Template + assert.Equal(t, nodeSelector, podSpec.Spec.NodeSelector) + assert.Equal(t, "", podSpec.ObjectMeta.Labels["foo"]) + assert.Equal(t, tolerations, podSpec.Spec.Tolerations) + assert.Equal(t, false, podSpec.Spec.HostNetwork) + assert.Equal(t, "", podSpec.Spec.PriorityClassName) + + cephCluster.Spec.Labels[cephv1.KeyCrashCollector] = map[string]string{"foo": "bar"} + cephCluster.Spec.Network.HostNetwork = true + cephCluster.Spec.PriorityClassNames[cephv1.KeyCrashCollector] = "test-priority-class" + tolerations = []corev1.Toleration{{Key: "key", Operator: "Equal", Value: "value", Effect: "NoSchedule"}} + res, err = r.createOrUpdateCephCrash(node, tolerations, cephCluster, cephVersion) + assert.NoError(t, err) + assert.Equal(t, controllerutil.OperationResult("updated"), res) + err = r.client.Get(ctx, types.NamespacedName{Namespace: "rook-ceph", Name: name}, &deploy) + assert.NoError(t, err) + podSpec = deploy.Spec.Template + assert.Equal(t, "bar", podSpec.ObjectMeta.Labels["foo"]) + assert.Equal(t, tolerations, podSpec.Spec.Tolerations) + assert.Equal(t, true, podSpec.Spec.HostNetwork) + assert.Equal(t, "test-priority-class", podSpec.Spec.PriorityClassName) +} From 08dd2b9732eb4d61fd7e01244081d402d62a8a1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 20 Jan 2022 12:11:47 +0100 Subject: [PATCH 06/16] core: reconcile operator configuration with env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we were ignoring configuration changes coming from the operator's pod env variables. We were assuming most users were using the operator configmap "rook-ceph-operator-config" but most Helm users don't. Now we will reconcile if a cephcluster is found during a CREATE event (can be an operator restart or a cephcluster creation) AND no "rook-ceph-operator-config" is found which means the operator's pod env var are used. Also, unit tests have been added (long due) for the predicate! Closes: #9602, #9487, #9579 Signed-off-by: Sébastien Han (cherry picked from commit 1208bc141023ec0aa64ac9a8e7f2293f6b8a7043) --- pkg/operator/ceph/controller/predicate.go | 2 + pkg/operator/ceph/csi/controller.go | 8 +- pkg/operator/ceph/csi/predicate.go | 21 +++- pkg/operator/ceph/csi/predicate_test.go | 125 ++++++++++++++++++++++ 4 files changed, 151 insertions(+), 5 deletions(-) diff --git a/pkg/operator/ceph/controller/predicate.go b/pkg/operator/ceph/controller/predicate.go index 77eeac7ecc40..7c2ed2011bd6 100644 --- a/pkg/operator/ceph/controller/predicate.go +++ b/pkg/operator/ceph/controller/predicate.go @@ -745,6 +745,8 @@ func DuplicateCephClusters(ctx context.Context, c client.Client, object client.O return true } + logger.Debugf("found %d ceph clusters in namespace %q", len(cephClusterList.Items), object.GetNamespace()) + // This check is needed when the operator is down and a cluster was created if len(cephClusterList.Items) > 1 { // Since multiple predicate are using this function we don't want all of them to log the diff --git a/pkg/operator/ceph/csi/controller.go b/pkg/operator/ceph/csi/controller.go index 8fd48c264857..a905f1ddd9af 100644 --- a/pkg/operator/ceph/csi/controller.go +++ b/pkg/operator/ceph/csi/controller.go @@ -54,7 +54,7 @@ type ReconcileCSI struct { // Add creates a new Ceph CSI Controller and adds it to the Manager. The Manager will set fields on the Controller // and Start it when the Manager is Started. func Add(mgr manager.Manager, context *clusterd.Context, opManagerContext context.Context, opConfig opcontroller.OperatorConfig) error { - return add(opManagerContext, mgr, newReconciler(mgr, context, opManagerContext, opConfig)) + return add(opManagerContext, mgr, newReconciler(mgr, context, opManagerContext, opConfig), opConfig) } // newReconciler returns a new reconcile.Reconciler @@ -67,7 +67,7 @@ func newReconciler(mgr manager.Manager, context *clusterd.Context, opManagerCont } } -func add(ctx context.Context, mgr manager.Manager, r reconcile.Reconciler) error { +func add(ctx context.Context, mgr manager.Manager, r reconcile.Reconciler, opConfig opcontroller.OperatorConfig) error { // Create a new controller c, err := controller.New(controllerName, mgr, controller.Options{Reconciler: r}) if err != nil { @@ -77,14 +77,14 @@ func add(ctx context.Context, mgr manager.Manager, r reconcile.Reconciler) error // Watch for ConfigMap (operator config) err = c.Watch(&source.Kind{ - Type: &v1.ConfigMap{TypeMeta: metav1.TypeMeta{Kind: "ConfigMap", APIVersion: v1.SchemeGroupVersion.String()}}}, &handler.EnqueueRequestForObject{}, predicateController(ctx, mgr.GetClient())) + Type: &v1.ConfigMap{TypeMeta: metav1.TypeMeta{Kind: "ConfigMap", APIVersion: v1.SchemeGroupVersion.String()}}}, &handler.EnqueueRequestForObject{}, predicateController(ctx, mgr.GetClient(), opConfig.OperatorNamespace)) if err != nil { return err } // Watch for CephCluster err = c.Watch(&source.Kind{ - Type: &cephv1.CephCluster{TypeMeta: metav1.TypeMeta{Kind: "CephCluster", APIVersion: v1.SchemeGroupVersion.String()}}}, &handler.EnqueueRequestForObject{}, predicateController(ctx, mgr.GetClient())) + Type: &cephv1.CephCluster{TypeMeta: metav1.TypeMeta{Kind: "CephCluster", APIVersion: v1.SchemeGroupVersion.String()}}}, &handler.EnqueueRequestForObject{}, predicateController(ctx, mgr.GetClient(), opConfig.OperatorNamespace)) if err != nil { return err } diff --git a/pkg/operator/ceph/csi/predicate.go b/pkg/operator/ceph/csi/predicate.go index c6b3c539d0bf..2742475e9b3b 100644 --- a/pkg/operator/ceph/csi/predicate.go +++ b/pkg/operator/ceph/csi/predicate.go @@ -25,14 +25,16 @@ import ( "github.com/rook/rook/pkg/operator/ceph/controller" opcontroller "github.com/rook/rook/pkg/operator/ceph/controller" v1 "k8s.io/api/core/v1" + kerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/predicate" ) // predicateController is the predicate function to trigger reconcile on operator configuration cm change -func predicateController(ctx context.Context, c client.Client) predicate.Funcs { +func predicateController(ctx context.Context, c client.Client, opNamespace string) predicate.Funcs { return predicate.Funcs{ CreateFunc: func(e event.CreateEvent) bool { // if the operator configuration file is created we want to reconcile @@ -48,12 +50,24 @@ func predicateController(ctx context.Context, c client.Client) predicate.Funcs { if controller.DuplicateCephClusters(ctx, c, e.Object, false) { return false } + + // We still have users that don't use the ConfigMap to configure the + // operator/csi and still rely on environment variables from the operator pod's + // spec. So we still want to reconcile the controller if the ConfigMap cannot be found. + err := c.Get(ctx, types.NamespacedName{Name: opcontroller.OperatorSettingConfigMapName, Namespace: opNamespace}, &v1.ConfigMap{}) + if err != nil && kerrors.IsNotFound(err) { + logger.Debugf("could not find operator configuration ConfigMap, will reconcile the csi controller") + return true + } + // This allows us to avoid a double reconcile of the CSI controller if this is not // the first generation of the CephCluster. So only return true if this is the very // first instance of the CephCluster // Corner case is when the cluster is created but the operator is down AND the cm // does not exist... However, these days the operator config map is part of the // operator.yaml so it's probably acceptable? + // This does not account for the case where the cephcluster is already deployed and + // the upgrade the operator or restart it. However, the CM check above should catch that return cephCluster.Generation == 1 } @@ -76,6 +90,11 @@ func predicateController(ctx context.Context, c client.Client) predicate.Funcs { }, DeleteFunc: func(e event.DeleteEvent) bool { + // if the operator configuration file is deleted we want to reconcile to apply the + // configuration based on environment variables present in the operator's pod spec + if cm, ok := e.Object.(*v1.ConfigMap); ok { + return cm.Name == opcontroller.OperatorSettingConfigMapName + } return false }, diff --git a/pkg/operator/ceph/csi/predicate_test.go b/pkg/operator/ceph/csi/predicate_test.go index af718e63d7a4..95f86820dff6 100644 --- a/pkg/operator/ceph/csi/predicate_test.go +++ b/pkg/operator/ceph/csi/predicate_test.go @@ -17,9 +17,18 @@ limitations under the License. package csi import ( + "context" "testing" + "github.com/coreos/pkg/capnslog" + cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" + "github.com/rook/rook/pkg/client/clientset/versioned/scheme" "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" ) func Test_findCSIChange(t *testing.T) { @@ -123,3 +132,119 @@ func Test_findCSIChange(t *testing.T) { assert.True(t, b) }) } + +func Test_predicateController(t *testing.T) { + capnslog.SetGlobalLogLevel(capnslog.DEBUG) + var ( + client client.WithWatch + namespace v1.Namespace + p predicate.Funcs + c event.CreateEvent + d event.DeleteEvent + u event.UpdateEvent + cm v1.ConfigMap + cm2 v1.ConfigMap + cluster cephv1.CephCluster + cluster2 cephv1.CephCluster + ) + + s := scheme.Scheme + s.AddKnownTypes(cephv1.SchemeGroupVersion, &v1.ConfigMap{}, &v1.ConfigMapList{}, &cephv1.CephCluster{}, &cephv1.CephClusterList{}) + client = fake.NewClientBuilder().WithScheme(s).Build() + + t.Run("create event is not the one we are looking for", func(t *testing.T) { + c = event.CreateEvent{Object: &namespace} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.False(t, p.Create(c)) + }) + + t.Run("create event is a CM but not the operator config", func(t *testing.T) { + c = event.CreateEvent{Object: &cm} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.False(t, p.Create(c)) + }) + + t.Run("delete event is a CM and it's not the operator config", func(t *testing.T) { + d = event.DeleteEvent{Object: &cm} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.False(t, p.Delete(d)) + }) + + t.Run("create event is a CM and it's the operator config", func(t *testing.T) { + cm.Name = "rook-ceph-operator-config" + c = event.CreateEvent{Object: &cm} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.True(t, p.Create(c)) + }) + + t.Run("delete event is a CM and it's the operator config", func(t *testing.T) { + d = event.DeleteEvent{Object: &cm} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.True(t, p.Delete(d)) + }) + + t.Run("update event is a CM and it's the operator config but nothing changed", func(t *testing.T) { + u = event.UpdateEvent{ObjectOld: &cm, ObjectNew: &cm} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.False(t, p.Update(u)) + }) + + t.Run("update event is a CM and the content changed but with incorrect setting", func(t *testing.T) { + cm.Data = map[string]string{"foo": "bar"} + cm2.Name = "rook-ceph-operator-config" + cm2.Data = map[string]string{"foo": "ooo"} + u = event.UpdateEvent{ObjectOld: &cm, ObjectNew: &cm2} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.False(t, p.Update(u)) + }) + + t.Run("update event is a CM and the content changed with correct setting", func(t *testing.T) { + cm.Data = map[string]string{"foo": "bar"} + cm2.Name = "rook-ceph-operator-config" + cm2.Data = map[string]string{"CSI_PROVISIONER_REPLICAS": "2"} + u = event.UpdateEvent{ObjectOld: &cm, ObjectNew: &cm2} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.True(t, p.Update(u)) + }) + + t.Run("create event is a CephCluster and it's the first instance and a cm is present", func(t *testing.T) { + cm.Namespace = "rook-ceph" + err := client.Create(context.TODO(), &cm) + assert.NoError(t, err) + cluster.Generation = 1 + c = event.CreateEvent{Object: &cluster} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.True(t, p.Create(c)) + }) + + t.Run("create event is a CephCluster and it's not the first instance", func(t *testing.T) { + cluster.Generation = 2 + c = event.CreateEvent{Object: &cluster} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.False(t, p.Create(c)) + }) + + t.Run("create event is a CephCluster and it's not the first instance but no cm so we reconcile", func(t *testing.T) { + err := client.Delete(context.TODO(), &cm) + assert.NoError(t, err) + cluster.Generation = 2 + c = event.CreateEvent{Object: &cluster} + p = predicateController(context.TODO(), client, "rook-ceph") + assert.True(t, p.Create(c)) + }) + + t.Run("create event is a CephCluster and a cluster already exists - not reconciling", func(t *testing.T) { + cluster.Name = "ceph1" + cluster2.Name = "ceph2" + cluster.Namespace = "rook-ceph" + cluster2.Namespace = "rook-ceph" + err := client.Create(context.TODO(), &cluster) + assert.NoError(t, err) + err = client.Create(context.TODO(), &cluster2) + assert.NoError(t, err) + c = event.CreateEvent{Object: &cluster2} + assert.Equal(t, "rook-ceph", c.Object.GetNamespace()) + p = predicateController(context.TODO(), client, "rook-ceph") + assert.False(t, p.Create(c)) + }) +} From 7470e51d5b8ecfe827422ebe1cc08b9da0ccd747 Mon Sep 17 00:00:00 2001 From: Travis Nielsen Date: Fri, 29 Oct 2021 14:47:59 -0600 Subject: [PATCH 07/16] helm: apply operator settings in configmap instead of deployment Some operator settings can be applied dynamically in a configmap instead of requiring the operator to restart. While these settings may be infrequently updated, applying these settings in the configmap can avoid an unnecessary operator restart. This will also make the operator deployment more consistent with the non-helm install. Signed-off-by: Travis Nielsen (cherry picked from commit 741f211a13b30b6c5ba41ea08f3549584700618e) --- .../charts/rook-ceph/templates/configmap.yaml | 163 ++++++++++++++ .../rook-ceph/templates/deployment.yaml | 210 ------------------ .../installer/ceph_helm_installer.go | 3 - tests/framework/utils/k8s_helper.go | 17 -- 4 files changed, 163 insertions(+), 230 deletions(-) create mode 100644 deploy/charts/rook-ceph/templates/configmap.yaml diff --git a/deploy/charts/rook-ceph/templates/configmap.yaml b/deploy/charts/rook-ceph/templates/configmap.yaml new file mode 100644 index 000000000000..ccc0a3a48468 --- /dev/null +++ b/deploy/charts/rook-ceph/templates/configmap.yaml @@ -0,0 +1,163 @@ +# Operator settings that can be updated without an operator restart +# Operator settings that require an operator restart are found in the operator env vars +kind: ConfigMap +apiVersion: v1 +metadata: + name: rook-ceph-operator-config +data: + ROOK_LOG_LEVEL: {{ .Values.logLevel | quote }} + ROOK_CEPH_COMMANDS_TIMEOUT_SECONDS: {{ .Values.cephCommandsTimeoutSeconds | quote }} + ROOK_OBC_WATCH_OPERATOR_NAMESPACE: {{ .Values.enableOBCWatchOperatorNamespace | quote }} +{{- if .Values.csi }} + ROOK_CSI_ENABLE_RBD: {{ .Values.csi.enableRbdDriver | quote }} + ROOK_CSI_ENABLE_CEPHFS: {{ .Values.csi.enableCephfsDriver | quote }} + CSI_ENABLE_CEPHFS_SNAPSHOTTER: {{ .Values.csi.enableCephfsSnapshotter | quote }} + CSI_ENABLE_RBD_SNAPSHOTTER: {{ .Values.csi.enableRBDSnapshotter | quote }} + CSI_PLUGIN_ENABLE_SELINUX_HOST_MOUNT: {{ .Values.csi.enablePluginSelinuxHostMount | quote }} + CSI_ENABLE_OMAP_GENERATOR: {{ .Values.csi.enableOMAPGenerator | quote }} +{{- if .Values.csi.pluginPriorityClassName }} + CSI_PLUGIN_PRIORITY_CLASSNAME: {{ .Values.csi.pluginPriorityClassName | quote }} +{{- end }} +{{- if .Values.csi.provisionerPriorityClassName }} + CSI_PROVISIONER_PRIORITY_CLASSNAME: {{ .Values.csi.provisionerPriorityClassName | quote }} +{{- end }} +{{- if .Values.csi.enableCSIHostNetwork }} + CSI_ENABLE_HOST_NETWORK: {{ .Values.csi.enableCSIHostNetwork | quote }} +{{- end }} +{{- if .Values.csi.cephFSPluginUpdateStrategy }} + CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY: {{ .Values.csi.cephFSPluginUpdateStrategy | quote }} +{{- end }} +{{- if .Values.csi.rbdFSGroupPolicy }} + CSI_RBD_FSGROUPPOLICY: {{ .Values.csi.rbdFSGroupPolicy | quote }} +{{- end }} +{{- if .Values.csi.cephFSFSGroupPolicy }} + CSI_CEPHFS_FSGROUPPOLICY: {{ .Values.csi.cephFSFSGroupPolicy | quote }} +{{- end }} +{{- if .Values.csi.rbdPluginUpdateStrategy }} + CSI_RBD_PLUGIN_UPDATE_STRATEGY: {{ .Values.csi.rbdPluginUpdateStrategy | quote }} +{{- end }} +{{- if .Values.csi.kubeletDirPath }} + ROOK_CSI_KUBELET_DIR_PATH: {{ .Values.csi.kubeletDirPath | quote }} +{{- end }} + ROOK_CSI_ENABLE_GRPC_METRICS: {{ .Values.csi.enableGrpcMetrics | quote }} +{{- if .Values.csi.cephcsi }} +{{- if .Values.csi.cephcsi.image }} + ROOK_CSI_CEPH_IMAGE: {{ .Values.csi.cephcsi.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.registrar }} +{{- if .Values.csi.registrar.image }} + ROOK_CSI_REGISTRAR_IMAGE: {{ .Values.csi.registrar.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.provisioner }} +{{- if .Values.csi.provisioner.image }} + ROOK_CSI_PROVISIONER_IMAGE: {{ .Values.csi.provisioner.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.snapshotter }} +{{- if .Values.csi.snapshotter.image }} + ROOK_CSI_SNAPSHOTTER_IMAGE: {{ .Values.csi.snapshotter.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.attacher }} +{{- if .Values.csi.attacher.image }} + ROOK_CSI_ATTACHER_IMAGE: {{ .Values.csi.attacher.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.resizer }} +{{- if .Values.csi.resizer.image }} + ROOK_CSI_RESIZER_IMAGE: {{ .Values.csi.resizer.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.volumeReplication }} + CSI_ENABLE_VOLUME_REPLICATION: {{ .Values.csi.volumeReplication.enabled | quote }} +{{- if .Values.csi.volumeReplication.image }} + CSI_VOLUME_REPLICATION_IMAGE: {{ .Values.csi.volumeReplication.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.csiAddons }} + CSI_ENABLE_CSIADDONS: {{ .Values.csi.csiAddons.enabled | quote }} +{{- if .Values.csi.csiAddons.image }} + ROOK_CSIADDONS_IMAGE: {{ .Values.csi.csiAddons.image | quote }} +{{- end }} +{{- end }} +{{- if .Values.csi.cephfsPodLabels }} + ROOK_CSI_CEPHFS_POD_LABELS: {{ .Values.csi.cephfsPodLabels | quote }} +{{- end }} +{{- if .Values.csi.rbdPodLabels }} + ROOK_CSI_RBD_POD_LABELS: {{ .Values.csi.rbdPodLabels | quote }} +{{- end }} +{{- if .Values.csi.provisionerTolerations }} + CSI_PROVISIONER_TOLERATIONS: {{ toYaml .Values.csi.provisionerTolerations | quote }} +{{- end }} +{{- if .Values.csi.provisionerNodeAffinity }} + CSI_PROVISIONER_NODE_AFFINITY: {{ .Values.csi.provisionerNodeAffinity }} +{{- end }} +{{- if .Values.csi.rbdProvisionerTolerations }} + CSI_RBD_PROVISIONER_TOLERATIONS: {{ toYaml .Values.csi.rbdProvisionerTolerations | quote }} +{{- end }} +{{- if .Values.csi.rbdProvisionerNodeAffinity }} + CSI_RBD_PROVISIONER_NODE_AFFINITY: {{ .Values.csi.rbdProvisionerNodeAffinity }} +{{- end }} +{{- if .Values.csi.cephFSProvisionerTolerations }} + CSI_CEPHFS_PROVISIONER_TOLERATIONS: {{ toYaml .Values.csi.cephFSProvisionerTolerations | quote }} +{{- end }} +{{- if .Values.csi.cephFSProvisionerNodeAffinity }} + CSI_CEPHFS_PROVISIONER_NODE_AFFINITY: {{ .Values.csi.cephFSProvisionerNodeAffinity }} +{{- end }} +{{- if .Values.csi.allowUnsupportedVersion }} + ROOK_CSI_ALLOW_UNSUPPORTED_VERSION: {{ .Values.csi.allowUnsupportedVersion | quote }} +{{- end }} +{{- if .Values.csi.pluginTolerations }} + CSI_PLUGIN_TOLERATIONS: {{ toYaml .Values.csi.pluginTolerations | quote }} +{{- end }} +{{- if .Values.csi.pluginNodeAffinity }} + CSI_PLUGIN_NODE_AFFINITY: {{ .Values.csi.pluginNodeAffinity }} +{{- end }} +{{- if .Values.csi.rbdPluginTolerations }} + CSI_RBD_PLUGIN_TOLERATIONS: {{ toYaml .Values.csi.rbdPluginTolerations | quote }} +{{- end }} +{{- if .Values.csi.rbdPluginNodeAffinity }} + CSI_RBD_PLUGIN_NODE_AFFINITY: {{ .Values.csi.rbdPluginNodeAffinity }} +{{- end }} +{{- if .Values.csi.cephFSPluginTolerations }} + CSI_CEPHFS_PLUGIN_TOLERATIONS: {{ toYaml .Values.csi.cephFSPluginTolerations | quote }} +{{- end }} +{{- if .Values.csi.cephFSPluginNodeAffinity }} + CSI_CEPHFS_PLUGIN_NODE_AFFINITY: {{ .Values.csi.cephFSPluginNodeAffinity }} +{{- end }} +{{- if .Values.csi.cephfsGrpcMetricsPort }} + CSI_CEPHFS_GRPC_METRICS_PORT: {{ .Values.csi.cephfsGrpcMetricsPort | quote }} +{{- end }} +{{- if .Values.csi.cephfsLivenessMetricsPort }} + CSI_CEPHFS_LIVENESS_METRICS_PORT: {{ .Values.csi.cephfsLivenessMetricsPort | quote }} +{{- end }} +{{- if .Values.csi.rbdGrpcMetricsPort }} + CSI_RBD_GRPC_METRICS_PORT: {{ .Values.csi.rbdGrpcMetricsPort | quote }} +{{- end }} +{{- if .Values.csi.rbdLivenessMetricsPort }} + CSI_RBD_LIVENESS_METRICS_PORT: {{ .Values.csi.rbdLivenessMetricsPort | quote }} +{{- end }} +{{- if .Values.csi.forceCephFSKernelClient }} + CSI_FORCE_CEPHFS_KERNEL_CLIENT: {{ .Values.csi.forceCephFSKernelClient | quote }} +{{- end }} +{{- if .Values.csi.logLevel }} + CSI_LOG_LEVEL: {{ .Values.csi.logLevel | quote }} +{{- end }} +{{- if .Values.csi.provisionerReplicas }} + CSI_PROVISIONER_REPLICAS: {{ .Values.csi.provisionerReplicas | quote }} +{{- end }} +{{- if .Values.csi.csiRBDProvisionerResource }} + CSI_RBD_PROVISIONER_RESOURCE: {{ .Values.csi.csiRBDProvisionerResource | quote }} +{{- end }} +{{- if .Values.csi.csiRBDPluginResource }} + CSI_RBD_PLUGIN_RESOURCE: {{ .Values.csi.csiRBDPluginResource | quote }} +{{- end }} +{{- if .Values.csi.csiCephFSProvisionerResource }} + CSI_CEPHFS_PROVISIONER_RESOURCE: {{ .Values.csi.csiCephFSProvisionerResource | quote }} +{{- end }} +{{- if .Values.csi.csiCephFSPluginResource }} + CSI_CEPHFS_PLUGIN_RESOURCE: {{ .Values.csi.csiCephFSPluginResource | quote }} +{{- end }} +{{- end }} diff --git a/deploy/charts/rook-ceph/templates/deployment.yaml b/deploy/charts/rook-ceph/templates/deployment.yaml index 446e59b24e41..9e98f86f7509 100644 --- a/deploy/charts/rook-ceph/templates/deployment.yaml +++ b/deploy/charts/rook-ceph/templates/deployment.yaml @@ -68,222 +68,12 @@ spec: {{- end }} - name: ROOK_HOSTPATH_REQUIRES_PRIVILEGED value: "{{ .Values.hostpathRequiresPrivileged }}" - - name: ROOK_LOG_LEVEL - value: {{ .Values.logLevel }} - name: ROOK_ENABLE_SELINUX_RELABELING value: "{{ .Values.enableSelinuxRelabeling }}" - name: ROOK_DISABLE_DEVICE_HOTPLUG value: "{{ .Values.disableDeviceHotplug }}" -{{- if .Values.csi }} - - name: ROOK_CSI_ENABLE_RBD - value: {{ .Values.csi.enableRbdDriver | quote }} - - name: ROOK_CSI_ENABLE_CEPHFS - value: {{ .Values.csi.enableCephfsDriver | quote }} - - name: CSI_ENABLE_CEPHFS_SNAPSHOTTER - value: {{ .Values.csi.enableCephfsSnapshotter | quote }} - - name: CSI_ENABLE_RBD_SNAPSHOTTER - value: {{ .Values.csi.enableRBDSnapshotter | quote }} - - name: CSI_PLUGIN_PRIORITY_CLASSNAME - value: {{ .Values.csi.pluginPriorityClassName | quote }} - - name: CSI_PROVISIONER_PRIORITY_CLASSNAME - value: {{ .Values.csi.provisionerPriorityClassName | quote }} - - name: CSI_ENABLE_OMAP_GENERATOR - value: {{ .Values.csi.enableOMAPGenerator | quote }} - - name: CSI_ENABLE_VOLUME_REPLICATION - value: {{ .Values.csi.volumeReplication.enabled | quote }} - - name: CSI_ENABLE_CSIADDONS - value: {{ .Values.csi.csiAddons.enabled | quote }} - - name: CSI_PLUGIN_ENABLE_SELINUX_HOST_MOUNT - value: {{ .Values.csi.enablePluginSelinuxHostMount | quote }} -{{- if .Values.csi.enableCSIHostNetwork }} - - name: CSI_ENABLE_HOST_NETWORK - value: {{ .Values.csi.enableCSIHostNetwork | quote }} -{{- end }} -{{- if .Values.csi.cephFSPluginUpdateStrategy }} - - name: CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY - value: {{ .Values.csi.cephFSPluginUpdateStrategy | quote }} -{{- end }} -{{- if .Values.csi.rbdFSGroupPolicy }} - - name: CSI_RBD_FSGROUPPOLICY - value: {{ .Values.csi.rbdFSGroupPolicy | quote }} -{{- end }} -{{- if .Values.csi.cephFSFSGroupPolicy }} - - name: CSI_CEPHFS_FSGROUPPOLICY - value: {{ .Values.csi.cephFSFSGroupPolicy | quote }} -{{- end }} -{{- if .Values.csi.rbdPluginUpdateStrategy }} - - name: CSI_RBD_PLUGIN_UPDATE_STRATEGY - value: {{ .Values.csi.rbdPluginUpdateStrategy | quote }} -{{- end }} -{{- if .Values.csi.kubeletDirPath }} - - name: ROOK_CSI_KUBELET_DIR_PATH - value: {{ .Values.csi.kubeletDirPath | quote }} -{{- end }} - - name: ROOK_CSI_ENABLE_GRPC_METRICS - value: {{ .Values.csi.enableGrpcMetrics | quote }} -{{- if .Values.csi.cephcsi }} -{{- if .Values.csi.cephcsi.image }} - - name: ROOK_CSI_CEPH_IMAGE - value: {{ .Values.csi.cephcsi.image | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.registrar }} -{{- if .Values.csi.registrar.image }} - - name: ROOK_CSI_REGISTRAR_IMAGE - value: {{ .Values.csi.registrar.image | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.provisioner }} -{{- if .Values.csi.provisioner.image }} - - name: ROOK_CSI_PROVISIONER_IMAGE - value: {{ .Values.csi.provisioner.image | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.snapshotter }} -{{- if .Values.csi.snapshotter.image }} - - name: ROOK_CSI_SNAPSHOTTER_IMAGE - value: {{ .Values.csi.snapshotter.image | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.attacher }} -{{- if .Values.csi.attacher.image }} - - name: ROOK_CSI_ATTACHER_IMAGE - value: {{ .Values.csi.attacher.image | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.resizer }} -{{- if .Values.csi.resizer.image }} - - name: ROOK_CSI_RESIZER_IMAGE - value: {{ .Values.csi.resizer.image | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.volumeReplication }} -{{- if .Values.csi.volumeReplication.image }} - - name: CSI_VOLUME_REPLICATION_IMAGE - value: {{ .Values.csi.volumeReplication.image | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.csiAddons }} -{{- if .Values.csi.csiAddons.image }} - - name: ROOK_CSIADDONS_IMAGE - value: {{ .Values.csi.csiAddons.image | quote }} -{{- end }} -{{- if .Values.csi.csiAddonsPort }} - - name: CSIADDONS_PORT - value: {{ .Values.csi.csiAddonsPort | quote }} -{{- end }} -{{- end }} -{{- if .Values.csi.cephfsPodLabels }} - - name: ROOK_CSI_CEPHFS_POD_LABELS - value: {{ .Values.csi.cephfsPodLabels | quote }} -{{- end }} -{{- if .Values.csi.rbdPodLabels }} - - name: ROOK_CSI_RBD_POD_LABELS - value: {{ .Values.csi.rbdPodLabels | quote }} -{{- end }} -{{- if .Values.csi.provisionerTolerations }} - - name: CSI_PROVISIONER_TOLERATIONS - value: {{ toYaml .Values.csi.provisionerTolerations | quote }} -{{- end }} -{{- if .Values.csi.provisionerNodeAffinity }} - - name: CSI_PROVISIONER_NODE_AFFINITY - value: {{ .Values.csi.provisionerNodeAffinity }} -{{- end }} -{{- if .Values.csi.rbdProvisionerTolerations }} - - name: CSI_RBD_PROVISIONER_TOLERATIONS - value: {{ toYaml .Values.csi.rbdProvisionerTolerations | quote }} -{{- end }} -{{- if .Values.csi.rbdProvisionerNodeAffinity }} - - name: CSI_RBD_PROVISIONER_NODE_AFFINITY - value: {{ .Values.csi.rbdProvisionerNodeAffinity }} -{{- end }} -{{- if .Values.csi.cephFSProvisionerTolerations }} - - name: CSI_CEPHFS_PROVISIONER_TOLERATIONS - value: {{ toYaml .Values.csi.cephFSProvisionerTolerations | quote }} -{{- end }} -{{- if .Values.csi.cephFSProvisionerNodeAffinity }} - - name: CSI_CEPHFS_PROVISIONER_NODE_AFFINITY - value: {{ .Values.csi.cephFSProvisionerNodeAffinity }} -{{- end }} -{{- if .Values.csi.allowUnsupportedVersion }} - - name: ROOK_CSI_ALLOW_UNSUPPORTED_VERSION - value: {{ .Values.csi.allowUnsupportedVersion | quote }} -{{- end }} -{{- if .Values.csi.pluginTolerations }} - - name: CSI_PLUGIN_TOLERATIONS - value: {{ toYaml .Values.csi.pluginTolerations | quote }} -{{- end }} -{{- if .Values.csi.pluginNodeAffinity }} - - name: CSI_PLUGIN_NODE_AFFINITY - value: {{ .Values.csi.pluginNodeAffinity }} -{{- end }} -{{- if .Values.csi.rbdPluginTolerations }} - - name: CSI_RBD_PLUGIN_TOLERATIONS - value: {{ toYaml .Values.csi.rbdPluginTolerations | quote }} -{{- end }} -{{- if .Values.csi.rbdPluginNodeAffinity }} - - name: CSI_RBD_PLUGIN_NODE_AFFINITY - value: {{ .Values.csi.rbdPluginNodeAffinity }} -{{- end }} -{{- if .Values.csi.cephFSPluginTolerations }} - - name: CSI_CEPHFS_PLUGIN_TOLERATIONS - value: {{ toYaml .Values.csi.cephFSPluginTolerations | quote }} -{{- end }} -{{- if .Values.csi.cephFSPluginNodeAffinity }} - - name: CSI_CEPHFS_PLUGIN_NODE_AFFINITY - value: {{ .Values.csi.cephFSPluginNodeAffinity }} -{{- end }} -{{- if .Values.csi.cephfsGrpcMetricsPort }} - - name: CSI_CEPHFS_GRPC_METRICS_PORT - value: {{ .Values.csi.cephfsGrpcMetricsPort | quote }} -{{- end }} -{{- if .Values.csi.cephfsLivenessMetricsPort }} - - name: CSI_CEPHFS_LIVENESS_METRICS_PORT - value: {{ .Values.csi.cephfsLivenessMetricsPort | quote }} -{{- end }} -{{- if .Values.csi.rbdGrpcMetricsPort }} - - name: CSI_RBD_GRPC_METRICS_PORT - value: {{ .Values.csi.rbdGrpcMetricsPort | quote }} -{{- end }} -{{- if .Values.csi.rbdLivenessMetricsPort }} - - name: CSI_RBD_LIVENESS_METRICS_PORT - value: {{ .Values.csi.rbdLivenessMetricsPort | quote }} -{{- end }} -{{- if .Values.csi.forceCephFSKernelClient }} - - name: CSI_FORCE_CEPHFS_KERNEL_CLIENT - value: {{ .Values.csi.forceCephFSKernelClient | quote }} -{{- end }} -{{- if .Values.csi.logLevel }} - - name: CSI_LOG_LEVEL - value: {{ .Values.csi.logLevel | quote }} -{{- end }} -{{- if .Values.csi.provisionerReplicas }} - - name: CSI_PROVISIONER_REPLICAS - value: {{ .Values.csi.provisionerReplicas | quote }} -{{- end }} -{{- if .Values.csi.csiRBDProvisionerResource }} - - name: CSI_RBD_PROVISIONER_RESOURCE - value: {{ .Values.csi.csiRBDProvisionerResource | quote }} -{{- end }} -{{- if .Values.csi.csiRBDPluginResource }} - - name: CSI_RBD_PLUGIN_RESOURCE - value: {{ .Values.csi.csiRBDPluginResource | quote }} -{{- end }} -{{- if .Values.csi.csiCephFSProvisionerResource }} - - name: CSI_CEPHFS_PROVISIONER_RESOURCE - value: {{ .Values.csi.csiCephFSProvisionerResource | quote }} -{{- end }} -{{- if .Values.csi.csiCephFSPluginResource }} - - name: CSI_CEPHFS_PLUGIN_RESOURCE - value: {{ .Values.csi.csiCephFSPluginResource | quote }} -{{- end }} -{{- end }} - name: ROOK_ENABLE_DISCOVERY_DAEMON value: "{{ .Values.enableDiscoveryDaemon }}" - - name: ROOK_CEPH_COMMANDS_TIMEOUT_SECONDS - value: "{{ .Values.cephCommandsTimeoutSeconds }}" - - name: ROOK_OBC_WATCH_OPERATOR_NAMESPACE - value: "{{ .Values.enableOBCWatchOperatorNamespace }}" - name: NODE_NAME valueFrom: diff --git a/tests/framework/installer/ceph_helm_installer.go b/tests/framework/installer/ceph_helm_installer.go index 7c9ecf03c282..93ac2c1a56d9 100644 --- a/tests/framework/installer/ceph_helm_installer.go +++ b/tests/framework/installer/ceph_helm_installer.go @@ -47,9 +47,6 @@ func (h *CephInstaller) CreateRookOperatorViaHelm(values map[string]interface{}) if err := h.k8shelper.CreateNamespace(h.settings.OperatorNamespace); err != nil { return errors.Errorf("failed to create namespace %s. %v", h.settings.Namespace, err) } - if err := h.k8shelper.CreateOpConfigMap(h.settings.OperatorNamespace); err != nil { - return errors.Errorf("failed to create operator config map. %v", err) - } if err := h.startAdmissionController(); err != nil { return errors.Errorf("failed to start admission controllers. %v", err) } diff --git a/tests/framework/utils/k8s_helper.go b/tests/framework/utils/k8s_helper.go index 8d4d71f57491..f57440c11e11 100644 --- a/tests/framework/utils/k8s_helper.go +++ b/tests/framework/utils/k8s_helper.go @@ -37,7 +37,6 @@ import ( rookclient "github.com/rook/rook/pkg/client/clientset/versioned" "github.com/rook/rook/pkg/clusterd" "github.com/rook/rook/pkg/operator/ceph/cluster/crash" - "github.com/rook/rook/pkg/operator/ceph/controller" "github.com/rook/rook/pkg/operator/k8sutil" "github.com/rook/rook/pkg/util/exec" "github.com/stretchr/testify/require" @@ -375,22 +374,6 @@ func (k8sh *K8sHelper) CreateNamespace(namespace string) error { return nil } -func (k8sh *K8sHelper) CreateOpConfigMap(namespace string) error { - ctx := context.TODO() - cm := &v1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: controller.OperatorSettingConfigMapName, - Namespace: namespace, - }, - } - _, err := k8sh.Clientset.CoreV1().ConfigMaps(namespace).Create(ctx, cm, metav1.CreateOptions{}) - if err != nil && !kerrors.IsAlreadyExists(err) { - return fmt.Errorf("failed to create operator config map %s. %+v", controller.OperatorSettingConfigMapName, err) - } - - return nil -} - func (k8sh *K8sHelper) CountPodsWithLabel(label string, namespace string) (int, error) { ctx := context.TODO() options := metav1.ListOptions{LabelSelector: label} From 989fb3438173cc2d20d0710f34f32421176cca95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Mon, 3 Jan 2022 18:24:38 +0100 Subject: [PATCH 08/16] nfs: use hashing function from existing k8sutil package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a function to hash a string so let's use it. Signed-off-by: Sébastien Han (cherry picked from commit 92ada526108f8bf93ff1672035725cf23568beff) --- pkg/operator/ceph/nfs/nfs.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pkg/operator/ceph/nfs/nfs.go b/pkg/operator/ceph/nfs/nfs.go index 236140003c86..2da06a4b8bd1 100644 --- a/pkg/operator/ceph/nfs/nfs.go +++ b/pkg/operator/ceph/nfs/nfs.go @@ -18,7 +18,6 @@ limitations under the License. package nfs import ( - "crypto/sha256" "fmt" "github.com/banzaicloud/k8s-objectmatcher/patch" @@ -215,13 +214,7 @@ func (r *ReconcileCephNFS) createConfigMap(n *cephv1.CephNFS, name string) (stri } } - h := sha256.New() - if _, err := h.Write([]byte(fmt.Sprintf("%v", configMap.Data))); err != nil { - return "", "", errors.Wrapf(err, "failed to get hash of ganesha config map") - } - configHash := fmt.Sprintf("%x", h.Sum(nil)) - - return configMap.Name, configHash, nil + return configMap.Name, k8sutil.Hash(fmt.Sprintf("%v", configMap.Data)), nil } // Down scale the ganesha server From 496fe25b844310a2f8fb678d40d697814f7084b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Tue, 4 Jan 2022 10:36:48 +0100 Subject: [PATCH 09/16] subvolumegroup: fix code deletion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to log properly when ENOTEMPTY is caught during deletion, so fixing the condition. Signed-off-by: Sébastien Han (cherry picked from commit 640d30339d51fb298be998191cc84426e1520ef1) --- pkg/operator/ceph/file/subvolumegroup/controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/operator/ceph/file/subvolumegroup/controller.go b/pkg/operator/ceph/file/subvolumegroup/controller.go index c65434723cf8..09895b9e614f 100644 --- a/pkg/operator/ceph/file/subvolumegroup/controller.go +++ b/pkg/operator/ceph/file/subvolumegroup/controller.go @@ -245,8 +245,8 @@ func (r *ReconcileCephFilesystemSubVolumeGroup) deleteSubVolumeGroup(cephFilesys code, ok := exec.ExitStatus(err) // If the subvolumegroup has subvolumes the command will fail with: // Error ENOTEMPTY: error in rmdir /volumes/csi - if ok && code != int(syscall.ENOTEMPTY) { - return errors.Wrapf(err, "failed to delete ceph filesystem subvolume group %q, remove the subvolumes first.", cephFilesystemSubVolumeGroup.Name) + if ok && code == int(syscall.ENOTEMPTY) { + return errors.Wrapf(err, "failed to delete ceph filesystem subvolume group %q, remove the subvolumes first", cephFilesystemSubVolumeGroup.Name) } return errors.Wrapf(err, "failed to delete ceph filesystem subvolume group %q", cephFilesystemSubVolumeGroup.Name) From b006674fea07a0e1b73c84af1e2b8003c5a7d97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 22 Dec 2021 14:57:34 +0100 Subject: [PATCH 10/16] csi: change rook-ceph-csi-config to expose clusterID for subvolume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rook-ceph-csi-config config map now includes CephFS subvolumegroups when a subvolumegroup CR is created. It is useful for ceph-csi to understand which subvolumegroup to use for a given cluster. Signed-off-by: Sébastien Han (cherry picked from commit 739abf66627a531dcec04924b2b221520bea5d48) --- pkg/operator/ceph/cluster/cluster.go | 5 +- pkg/operator/ceph/cluster/cluster_external.go | 2 +- pkg/operator/ceph/cluster/controller.go | 11 +- pkg/operator/ceph/cluster/mon/drain_test.go | 3 +- pkg/operator/ceph/cluster/mon/health_test.go | 13 +- pkg/operator/ceph/cluster/mon/mon.go | 8 +- pkg/operator/ceph/cluster/mon/mon_test.go | 7 +- pkg/operator/ceph/cluster/mon/node_test.go | 5 +- pkg/operator/ceph/cluster/mon/service_test.go | 3 +- pkg/operator/ceph/cluster/mon/spec_test.go | 4 - pkg/operator/ceph/csi/cluster_config.go | 86 +++++--- pkg/operator/ceph/csi/cluster_config_test.go | 193 ++++++++++++------ pkg/operator/ceph/csi/predicate.go | 3 +- .../ceph/file/subvolumegroup/controller.go | 40 +++- .../file/subvolumegroup/controller_test.go | 29 ++- 15 files changed, 281 insertions(+), 131 deletions(-) diff --git a/pkg/operator/ceph/cluster/cluster.go b/pkg/operator/ceph/cluster/cluster.go index 3a6ffdb6bab7..c9cf926f4566 100755 --- a/pkg/operator/ceph/cluster/cluster.go +++ b/pkg/operator/ceph/cluster/cluster.go @@ -22,7 +22,6 @@ import ( "fmt" "os/exec" "path" - "sync" "syscall" "github.com/pkg/errors" @@ -67,7 +66,7 @@ type clusterHealth struct { internalCancel context.CancelFunc } -func newCluster(c *cephv1.CephCluster, context *clusterd.Context, csiMutex *sync.Mutex, ownerInfo *k8sutil.OwnerInfo) *cluster { +func newCluster(c *cephv1.CephCluster, context *clusterd.Context, ownerInfo *k8sutil.OwnerInfo) *cluster { return &cluster{ // at this phase of the cluster creation process, the identity components of the cluster are // not yet established. we reserve this struct which is filled in as soon as the cluster's @@ -79,7 +78,7 @@ func newCluster(c *cephv1.CephCluster, context *clusterd.Context, csiMutex *sync namespacedName: types.NamespacedName{Namespace: c.Namespace, Name: c.Name}, monitoringRoutines: make(map[string]*clusterHealth), ownerInfo: ownerInfo, - mons: mon.New(context, c.Namespace, c.Spec, ownerInfo, csiMutex), + mons: mon.New(context, c.Namespace, c.Spec, ownerInfo), } } diff --git a/pkg/operator/ceph/cluster/cluster_external.go b/pkg/operator/ceph/cluster/cluster_external.go index 284d34a9ab55..fb97a9adfc7c 100644 --- a/pkg/operator/ceph/cluster/cluster_external.go +++ b/pkg/operator/ceph/cluster/cluster_external.go @@ -111,7 +111,7 @@ func (c *ClusterController) configureExternalCephCluster(cluster *cluster) error } // Save CSI configmap - err = csi.SaveClusterConfig(c.context.Clientset, c.namespacedName.Namespace, cluster.ClusterInfo, c.csiConfigMutex) + err = csi.SaveClusterConfig(c.context.Clientset, c.namespacedName.Namespace, cluster.ClusterInfo, &csi.CsiClusterConfigEntry{Monitors: csi.MonEndpoints(cluster.ClusterInfo.Monitors)}) if err != nil { return errors.Wrap(err, "failed to update csi cluster config") } diff --git a/pkg/operator/ceph/cluster/controller.go b/pkg/operator/ceph/cluster/controller.go index d232a821faaf..f1fa428745f8 100644 --- a/pkg/operator/ceph/cluster/controller.go +++ b/pkg/operator/ceph/cluster/controller.go @@ -21,7 +21,6 @@ import ( "context" "fmt" "os" - "sync" "github.com/coreos/pkg/capnslog" "github.com/pkg/errors" @@ -85,7 +84,6 @@ type ClusterController struct { context *clusterd.Context rookImage string clusterMap map[string]*cluster - csiConfigMutex *sync.Mutex osdChecker *osd.OSDHealthMonitor client client.Client namespacedName types.NamespacedName @@ -327,10 +325,9 @@ func (r *ReconcileCephCluster) reconcileDelete(cephCluster *cephv1.CephCluster) // NewClusterController create controller for watching cluster custom resources created func NewClusterController(context *clusterd.Context, rookImage string) *ClusterController { return &ClusterController{ - context: context, - rookImage: rookImage, - clusterMap: make(map[string]*cluster), - csiConfigMutex: &sync.Mutex{}, + context: context, + rookImage: rookImage, + clusterMap: make(map[string]*cluster), } } @@ -343,7 +340,7 @@ func (c *ClusterController) reconcileCephCluster(clusterObj *cephv1.CephCluster, cluster, ok := c.clusterMap[clusterObj.Namespace] if !ok { // It's a new cluster so let's populate the struct - cluster = newCluster(clusterObj, c.context, c.csiConfigMutex, ownerInfo) + cluster = newCluster(clusterObj, c.context, ownerInfo) } cluster.namespacedName = c.namespacedName diff --git a/pkg/operator/ceph/cluster/mon/drain_test.go b/pkg/operator/ceph/cluster/mon/drain_test.go index f64e1f6e713f..2ff565d21ae7 100644 --- a/pkg/operator/ceph/cluster/mon/drain_test.go +++ b/pkg/operator/ceph/cluster/mon/drain_test.go @@ -18,7 +18,6 @@ package mon import ( "context" - "sync" "testing" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" @@ -48,7 +47,7 @@ func createFakeCluster(t *testing.T, cephClusterObj *cephv1.CephCluster, k8sVers cl := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects().Build() clientset := test.New(t, 3) - c := New(&clusterd.Context{Client: cl, Clientset: clientset}, mockNamespace, cephClusterObj.Spec, ownerInfo, &sync.Mutex{}) + c := New(&clusterd.Context{Client: cl, Clientset: clientset}, mockNamespace, cephClusterObj.Spec, ownerInfo) test.SetFakeKubernetesVersion(clientset, k8sVersion) return c } diff --git a/pkg/operator/ceph/cluster/mon/health_test.go b/pkg/operator/ceph/cluster/mon/health_test.go index f27a1a75bd37..7ba8724a89d3 100644 --- a/pkg/operator/ceph/cluster/mon/health_test.go +++ b/pkg/operator/ceph/cluster/mon/health_test.go @@ -22,7 +22,6 @@ import ( "io/ioutil" "os" "reflect" - "sync" "testing" "time" @@ -66,7 +65,7 @@ func TestCheckHealth(t *testing.T) { Executor: executor, } ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo) // clusterInfo is nil so we return err err := c.checkHealth(ctx) assert.NotNil(t, err) @@ -155,7 +154,7 @@ func TestCheckHealth(t *testing.T) { } func TestSkipMonFailover(t *testing.T) { - c := New(&clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil, nil) + c := New(&clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) monName := "arb" @@ -201,7 +200,7 @@ func TestEvictMonOnSameNode(t *testing.T) { } context := &clusterd.Context{Clientset: clientset, ConfigDir: configDir, Executor: executor} ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo) setCommonMonProperties(c, 1, cephv1.MonSpec{Count: 0}, "myversion") c.maxMonID = 2 c.waitForStart = false @@ -256,7 +255,7 @@ func TestScaleMonDeployment(t *testing.T) { clientset := test.New(t, 1) context := &clusterd.Context{Clientset: clientset} ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo) setCommonMonProperties(c, 1, cephv1.MonSpec{Count: 0, AllowMultiplePerNode: true}, "myversion") name := "a" @@ -307,7 +306,7 @@ func TestCheckHealthNotFound(t *testing.T) { Executor: executor, } ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo) setCommonMonProperties(c, 2, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "myversion") c.waitForStart = false defer os.RemoveAll(c.context.ConfigDir) @@ -370,7 +369,7 @@ func TestAddRemoveMons(t *testing.T) { Executor: executor, } ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(context, "ns", cephv1.ClusterSpec{}, ownerInfo) setCommonMonProperties(c, 0, cephv1.MonSpec{Count: 5, AllowMultiplePerNode: true}, "myversion") c.maxMonID = 0 // "a" is max mon id c.waitForStart = false diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go index 070ce2edca8e..bf7988ba3fcf 100644 --- a/pkg/operator/ceph/cluster/mon/mon.go +++ b/pkg/operator/ceph/cluster/mon/mon.go @@ -120,7 +120,6 @@ type Cluster struct { monTimeoutList map[string]time.Time mapping *Mapping ownerInfo *k8sutil.OwnerInfo - csiConfigMutex *sync.Mutex isUpgrade bool arbiterMon string } @@ -164,7 +163,7 @@ type SchedulingResult struct { } // New creates an instance of a mon cluster -func New(context *clusterd.Context, namespace string, spec cephv1.ClusterSpec, ownerInfo *k8sutil.OwnerInfo, csiConfigMutex *sync.Mutex) *Cluster { +func New(context *clusterd.Context, namespace string, spec cephv1.ClusterSpec, ownerInfo *k8sutil.OwnerInfo) *Cluster { return &Cluster{ context: context, spec: spec, @@ -175,8 +174,7 @@ func New(context *clusterd.Context, namespace string, spec cephv1.ClusterSpec, o mapping: &Mapping{ Schedule: map[string]*MonScheduleInfo{}, }, - ownerInfo: ownerInfo, - csiConfigMutex: csiConfigMutex, + ownerInfo: ownerInfo, } } @@ -1042,7 +1040,7 @@ func (c *Cluster) saveMonConfig() error { return errors.Wrap(err, "failed to write connection config for new mons") } - if err := csi.SaveClusterConfig(c.context.Clientset, c.Namespace, c.ClusterInfo, c.csiConfigMutex); err != nil { + if err := csi.SaveClusterConfig(c.context.Clientset, c.Namespace, c.ClusterInfo, &csi.CsiClusterConfigEntry{Monitors: csi.MonEndpoints(c.ClusterInfo.Monitors)}); err != nil { return errors.Wrap(err, "failed to update csi cluster config") } diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go index 14e6f37db521..67fb0b4af54a 100644 --- a/pkg/operator/ceph/cluster/mon/mon_test.go +++ b/pkg/operator/ceph/cluster/mon/mon_test.go @@ -25,7 +25,6 @@ import ( "reflect" "strconv" "strings" - "sync" "testing" "time" @@ -273,7 +272,7 @@ func validateStart(t *testing.T, c *Cluster) { func TestPersistMons(t *testing.T) { clientset := test.New(t, 1) ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{Annotations: cephv1.AnnotationsSpec{cephv1.KeyClusterMetadata: cephv1.Annotations{"key": "value"}}}, ownerInfo, &sync.Mutex{}) + c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{Annotations: cephv1.AnnotationsSpec{cephv1.KeyClusterMetadata: cephv1.Annotations{"key": "value"}}}, ownerInfo) setCommonMonProperties(c, 1, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "myversion") // Persist mon a @@ -302,7 +301,7 @@ func TestSaveMonEndpoints(t *testing.T) { configDir, _ := ioutil.TempDir("", "") defer os.RemoveAll(configDir) ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(&clusterd.Context{Clientset: clientset, ConfigDir: configDir}, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(&clusterd.Context{Clientset: clientset, ConfigDir: configDir}, "ns", cephv1.ClusterSpec{}, ownerInfo) setCommonMonProperties(c, 1, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "myversion") // create the initial config map @@ -350,7 +349,7 @@ func TestMaxMonID(t *testing.T) { configDir, _ := ioutil.TempDir("", "") defer os.RemoveAll(configDir) ownerInfo := cephclient.NewMinimumOwnerInfoWithOwnerRef() - c := New(&clusterd.Context{Clientset: clientset, ConfigDir: configDir}, "ns", cephv1.ClusterSpec{}, ownerInfo, &sync.Mutex{}) + c := New(&clusterd.Context{Clientset: clientset, ConfigDir: configDir}, "ns", cephv1.ClusterSpec{}, ownerInfo) c.ClusterInfo = clienttest.CreateTestClusterInfo(1) // when the configmap is not found, the maxMonID is -1 diff --git a/pkg/operator/ceph/cluster/mon/node_test.go b/pkg/operator/ceph/cluster/mon/node_test.go index 8fa24cf59d9f..98bf7fbe88a4 100644 --- a/pkg/operator/ceph/cluster/mon/node_test.go +++ b/pkg/operator/ceph/cluster/mon/node_test.go @@ -19,7 +19,6 @@ package mon import ( "context" "strings" - "sync" "testing" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" @@ -37,7 +36,7 @@ import ( func TestNodeAffinity(t *testing.T) { ctx := context.TODO() clientset := test.New(t, 4) - c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, &k8sutil.OwnerInfo{}, &sync.Mutex{}) + c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, &k8sutil.OwnerInfo{}) setCommonMonProperties(c, 0, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "myversion") c.spec.Placement = map[cephv1.KeyType]cephv1.Placement{} @@ -167,7 +166,7 @@ func TestPodMemory(t *testing.T) { func TestHostNetwork(t *testing.T) { clientset := test.New(t, 3) - c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, &k8sutil.OwnerInfo{}, &sync.Mutex{}) + c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, &k8sutil.OwnerInfo{}) setCommonMonProperties(c, 0, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "myversion") c.spec.Network.HostNetwork = true diff --git a/pkg/operator/ceph/cluster/mon/service_test.go b/pkg/operator/ceph/cluster/mon/service_test.go index 62b5dc574393..c8cf31e36d9b 100644 --- a/pkg/operator/ceph/cluster/mon/service_test.go +++ b/pkg/operator/ceph/cluster/mon/service_test.go @@ -18,7 +18,6 @@ package mon import ( "context" - "sync" "testing" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" @@ -33,7 +32,7 @@ import ( func TestCreateService(t *testing.T) { ctx := context.TODO() clientset := test.New(t, 1) - c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, &k8sutil.OwnerInfo{}, &sync.Mutex{}) + c := New(&clusterd.Context{Clientset: clientset}, "ns", cephv1.ClusterSpec{}, &k8sutil.OwnerInfo{}) c.ClusterInfo = client.AdminTestClusterInfo("rook-ceph") m := &monConfig{ResourceName: "rook-ceph-mon-b", DaemonName: "b"} clusterIP, err := c.createService(m) diff --git a/pkg/operator/ceph/cluster/mon/spec_test.go b/pkg/operator/ceph/cluster/mon/spec_test.go index 2b213e2be6f8..c1f58ad9f42b 100644 --- a/pkg/operator/ceph/cluster/mon/spec_test.go +++ b/pkg/operator/ceph/cluster/mon/spec_test.go @@ -17,7 +17,6 @@ limitations under the License. package mon import ( - "sync" "testing" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" @@ -48,7 +47,6 @@ func testPodSpec(t *testing.T, monID string, pvc bool) { "ns", cephv1.ClusterSpec{}, ownerInfo, - &sync.Mutex{}, ) setCommonMonProperties(c, 0, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "rook/rook:myversion") c.spec.CephVersion = cephv1.CephVersionSpec{Image: "quay.io/ceph/ceph:myceph"} @@ -98,7 +96,6 @@ func TestDeploymentPVCSpec(t *testing.T) { "ns", cephv1.ClusterSpec{}, ownerInfo, - &sync.Mutex{}, ) setCommonMonProperties(c, 0, cephv1.MonSpec{Count: 3, AllowMultiplePerNode: true}, "rook/rook:myversion") c.spec.CephVersion = cephv1.CephVersionSpec{Image: "quay.io/ceph/ceph:myceph"} @@ -158,7 +155,6 @@ func testRequiredDuringScheduling(t *testing.T, hostNetwork, allowMultiplePerNod "ns", cephv1.ClusterSpec{}, &k8sutil.OwnerInfo{}, - &sync.Mutex{}, ) c.spec.Network.HostNetwork = hostNetwork diff --git a/pkg/operator/ceph/csi/cluster_config.go b/pkg/operator/ceph/csi/cluster_config.go index 03e76934f348..3f4ad3b8c81b 100644 --- a/pkg/operator/ceph/csi/cluster_config.go +++ b/pkg/operator/ceph/csi/cluster_config.go @@ -33,15 +33,22 @@ import ( ) var ( - logger = capnslog.NewPackageLogger("github.com/rook/rook", "ceph-csi") + logger = capnslog.NewPackageLogger("github.com/rook/rook", "ceph-csi") + configMutex sync.Mutex ) -type csiClusterConfigEntry struct { - ClusterID string `json:"clusterID"` - Monitors []string `json:"monitors"` +type CsiClusterConfigEntry struct { + ClusterID string `json:"clusterID"` + Monitors []string `json:"monitors"` + CephFS *CsiCephFSSpec `json:"cephFS,omitempty"` + RadosNamespace string `json:"radosNamespace,omitempty"` } -type csiClusterConfig []csiClusterConfigEntry +type CsiCephFSSpec struct { + SubvolumeGroup string `json:"subvolumeGroup,omitempty"` +} + +type csiClusterConfig []CsiClusterConfigEntry // FormatCsiClusterConfig returns a json-formatted string containing // the cluster-to-mon mapping required to configure ceph csi. @@ -79,7 +86,7 @@ func formatCsiClusterConfig(cc csiClusterConfig) (string, error) { return string(ccJson), nil } -func monEndpoints(mons map[string]*cephclient.MonInfo) []string { +func MonEndpoints(mons map[string]*cephclient.MonInfo) []string { endpoints := make([]string, 0) for _, m := range mons { endpoints = append(endpoints, m.Endpoint) @@ -89,32 +96,59 @@ func monEndpoints(mons map[string]*cephclient.MonInfo) []string { // updateCsiClusterConfig returns a json-formatted string containing // the cluster-to-mon mapping required to configure ceph csi. -func updateCsiClusterConfig( - curr, clusterKey string, mons map[string]*cephclient.MonInfo) (string, error) { - +func updateCsiClusterConfig(curr, clusterKey string, newCsiClusterConfigEntry *CsiClusterConfigEntry) (string, error) { var ( cc csiClusterConfig - centry csiClusterConfigEntry + centry CsiClusterConfigEntry found bool ) + cc, err := parseCsiClusterConfig(curr) if err != nil { return "", errors.Wrap(err, "failed to parse current csi cluster config") } + // Regardless of which controllers call updateCsiClusterConfig(), the values will be preserved since + // a lock is acquired for the update operation. So concurrent updates (rare event) will block and + // wait for the other update to complete. Monitors and Subvolumegroup will be updated + // independently and won't collide. for i, centry := range cc { if centry.ClusterID == clusterKey { - centry.Monitors = monEndpoints(mons) + // If the new entry is nil, this means the entry is being deleted so remove it from the list + if newCsiClusterConfigEntry == nil { + cc = append(cc[:i], cc[i+1:]...) + found = true + break + } + centry.Monitors = newCsiClusterConfigEntry.Monitors + if newCsiClusterConfigEntry.CephFS != nil && newCsiClusterConfigEntry.CephFS.SubvolumeGroup != "" { + centry.CephFS = newCsiClusterConfigEntry.CephFS + } + if newCsiClusterConfigEntry.RadosNamespace != "" { + centry.RadosNamespace = newCsiClusterConfigEntry.RadosNamespace + } found = true cc[i] = centry break } } if !found { - centry.ClusterID = clusterKey - centry.Monitors = monEndpoints(mons) - cc = append(cc, centry) + // If it's the first time we create the cluster, the entry does not exist, so the removal + // will fail with a dangling pointer + if newCsiClusterConfigEntry != nil { + centry.ClusterID = clusterKey + centry.Monitors = newCsiClusterConfigEntry.Monitors + // Add a condition not to fill with empty values + if newCsiClusterConfigEntry.CephFS != nil && newCsiClusterConfigEntry.CephFS.SubvolumeGroup != "" { + centry.CephFS = newCsiClusterConfigEntry.CephFS + } + if newCsiClusterConfigEntry.RadosNamespace != "" { + centry.RadosNamespace = newCsiClusterConfigEntry.RadosNamespace + } + cc = append(cc, centry) + } } + return formatCsiClusterConfig(cc) } @@ -155,24 +189,24 @@ func CreateCsiConfigMap(namespace string, clientset kubernetes.Interface, ownerI // value that is provided to ceph-csi uses in the storage class. // The locker l is typically a mutex and is used to prevent the config // map from being updated for multiple clusters simultaneously. -func SaveClusterConfig( - clientset kubernetes.Interface, clusterNamespace string, - clusterInfo *cephclient.ClusterInfo, l sync.Locker) error { +func SaveClusterConfig(clientset kubernetes.Interface, clusterNamespace string, clusterInfo *cephclient.ClusterInfo, newCsiClusterConfigEntry *CsiClusterConfigEntry) error { if !CSIEnabled() { + logger.Debug("skipping csi config update because csi is not enabled") return nil } - l.Lock() - defer l.Unlock() + + configMutex.Lock() + defer configMutex.Unlock() + // csi is deployed into the same namespace as the operator csiNamespace := os.Getenv(k8sutil.PodNamespaceEnvVar) if csiNamespace == "" { - return errors.Errorf("namespace value missing for %s", k8sutil.PodNamespaceEnvVar) + return errors.Errorf("namespace value missing for %q", k8sutil.PodNamespaceEnvVar) } - logger.Debugf("Using %+v for CSI ConfigMap Namespace", csiNamespace) + logger.Debugf("using %q for csi configmap namespace", csiNamespace) // fetch current ConfigMap contents - configMap, err := clientset.CoreV1().ConfigMaps(csiNamespace).Get(clusterInfo.Context, - ConfigName, metav1.GetOptions{}) + configMap, err := clientset.CoreV1().ConfigMaps(csiNamespace).Get(clusterInfo.Context, ConfigName, metav1.GetOptions{}) if err != nil { return errors.Wrap(err, "failed to fetch current csi config map") } @@ -182,8 +216,8 @@ func SaveClusterConfig( if currData == "" { currData = "[]" } - newData, err := updateCsiClusterConfig( - currData, clusterNamespace, clusterInfo.Monitors) + + newData, err := updateCsiClusterConfig(currData, clusterNamespace, newCsiClusterConfigEntry) if err != nil { return errors.Wrap(err, "failed to update csi config map data") } @@ -191,7 +225,7 @@ func SaveClusterConfig( // update ConfigMap with new contents if _, err := clientset.CoreV1().ConfigMaps(csiNamespace).Update(clusterInfo.Context, configMap, metav1.UpdateOptions{}); err != nil { - return errors.Wrapf(err, "failed to update csi config map") + return errors.Wrap(err, "failed to update csi config map") } return nil diff --git a/pkg/operator/ceph/csi/cluster_config_test.go b/pkg/operator/ceph/csi/cluster_config_test.go index afa53e074b77..08c9e1cad82a 100644 --- a/pkg/operator/ceph/csi/cluster_config_test.go +++ b/pkg/operator/ceph/csi/cluster_config_test.go @@ -19,71 +19,142 @@ package csi import ( "testing" - cephclient "github.com/rook/rook/pkg/daemon/ceph/client" "github.com/stretchr/testify/assert" ) func TestUpdateCsiClusterConfig(t *testing.T) { - // initialize an empty list & add a simple mons list - mons := map[string]*cephclient.MonInfo{ - "foo": {Name: "foo", Endpoint: "1.2.3.4:5000"}, + csiClusterConfigEntry := CsiClusterConfigEntry{ + Monitors: []string{"1.2.3.4:5000"}, } - s, err := updateCsiClusterConfig("[]", "alpha", mons) - assert.NoError(t, err) - assert.Equal(t, s, - `[{"clusterID":"alpha","monitors":["1.2.3.4:5000"]}]`) - - // add a 2nd mon to the current cluster - mons["bar"] = &cephclient.MonInfo{ - Name: "bar", Endpoint: "10.11.12.13:5000"} - s, err = updateCsiClusterConfig(s, "alpha", mons) - assert.NoError(t, err) - cc, err := parseCsiClusterConfig(s) - assert.NoError(t, err) - assert.Equal(t, len(cc), 1) - assert.Equal(t, cc[0].ClusterID, "alpha") - assert.Contains(t, cc[0].Monitors, "1.2.3.4:5000") - assert.Contains(t, cc[0].Monitors, "10.11.12.13:5000") - assert.Equal(t, len(cc[0].Monitors), 2) - - // add a 2nd cluster with 3 mons - mons2 := map[string]*cephclient.MonInfo{ - "flim": {Name: "flim", Endpoint: "20.1.1.1:5000"}, - "flam": {Name: "flam", Endpoint: "20.1.1.2:5000"}, - "blam": {Name: "blam", Endpoint: "20.1.1.3:5000"}, + csiClusterConfigEntry2 := CsiClusterConfigEntry{ + Monitors: []string{"20.1.1.1:5000", "20.1.1.2:5000", "20.1.1.3:5000"}, } - s, err = updateCsiClusterConfig(s, "beta", mons2) - assert.NoError(t, err) - cc, err = parseCsiClusterConfig(s) - assert.NoError(t, err) - assert.Equal(t, len(cc), 2) - assert.Equal(t, cc[0].ClusterID, "alpha") - assert.Contains(t, cc[0].Monitors, "1.2.3.4:5000") - assert.Contains(t, cc[0].Monitors, "10.11.12.13:5000") - assert.Equal(t, len(cc[0].Monitors), 2) - assert.Equal(t, cc[1].ClusterID, "beta") - assert.Contains(t, cc[1].Monitors, "20.1.1.1:5000") - assert.Contains(t, cc[1].Monitors, "20.1.1.2:5000") - assert.Contains(t, cc[1].Monitors, "20.1.1.3:5000") - assert.Equal(t, len(cc[1].Monitors), 3) - - // remove a mon from the 2nd cluster - delete(mons2, "blam") - s, err = updateCsiClusterConfig(s, "beta", mons2) - assert.NoError(t, err) - cc, err = parseCsiClusterConfig(s) - assert.NoError(t, err) - assert.Equal(t, len(cc), 2) - assert.Equal(t, cc[0].ClusterID, "alpha") - assert.Contains(t, cc[0].Monitors, "1.2.3.4:5000") - assert.Contains(t, cc[0].Monitors, "10.11.12.13:5000") - assert.Equal(t, len(cc[0].Monitors), 2) - assert.Equal(t, cc[1].ClusterID, "beta") - assert.Contains(t, cc[1].Monitors, "20.1.1.1:5000") - assert.Contains(t, cc[1].Monitors, "20.1.1.2:5000") - assert.Equal(t, len(cc[1].Monitors), 2) - - // does it return error on garbage input? - _, err = updateCsiClusterConfig("qqq", "beta", mons2) - assert.Error(t, err) + csiClusterConfigEntry3 := CsiClusterConfigEntry{ + Monitors: []string{"10.1.1.1:5000", "10.1.1.2:5000", "10.1.1.3:5000"}, + CephFS: &CsiCephFSSpec{ + SubvolumeGroup: "mygroup", + }, + } + var s string + var err error + + t.Run("add a simple mons list", func(t *testing.T) { + s, err = updateCsiClusterConfig("[]", "alpha", &csiClusterConfigEntry) + assert.NoError(t, err) + assert.Equal(t, s, + `[{"clusterID":"alpha","monitors":["1.2.3.4:5000"]}]`) + }) + + t.Run("add a 2nd mon to the current cluster", func(t *testing.T) { + csiClusterConfigEntry.Monitors = append(csiClusterConfigEntry.Monitors, "10.11.12.13:5000") + s, err = updateCsiClusterConfig(s, "alpha", &csiClusterConfigEntry) + assert.NoError(t, err) + cc, err := parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, 1, len(cc)) + assert.Equal(t, "alpha", cc[0].ClusterID) + assert.Contains(t, cc[0].Monitors, "1.2.3.4:5000") + assert.Contains(t, cc[0].Monitors, "10.11.12.13:5000") + assert.Equal(t, 2, len(cc[0].Monitors)) + }) + + t.Run("add a 2nd cluster with 3 mons", func(t *testing.T) { + s, err = updateCsiClusterConfig(s, "beta", &csiClusterConfigEntry2) + assert.NoError(t, err) + cc, err := parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, 2, len(cc)) + assert.Equal(t, "alpha", cc[0].ClusterID) + assert.Contains(t, cc[0].Monitors, "1.2.3.4:5000") + assert.Contains(t, cc[0].Monitors, "10.11.12.13:5000") + assert.Equal(t, 2, len(cc[0].Monitors)) + assert.Equal(t, "beta", cc[1].ClusterID) + assert.Contains(t, cc[1].Monitors, "20.1.1.1:5000") + assert.Contains(t, cc[1].Monitors, "20.1.1.2:5000") + assert.Contains(t, cc[1].Monitors, "20.1.1.3:5000") + assert.Equal(t, 3, len(cc[1].Monitors)) + }) + + t.Run("remove a mon from the 2nd cluster", func(t *testing.T) { + i := 2 + // Remove last element of the slice + csiClusterConfigEntry2.Monitors = append(csiClusterConfigEntry2.Monitors[:i], csiClusterConfigEntry2.Monitors[i+1:]...) + s, err = updateCsiClusterConfig(s, "beta", &csiClusterConfigEntry2) + assert.NoError(t, err) + cc, err := parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, 2, len(cc)) + assert.Equal(t, "alpha", cc[0].ClusterID) + assert.Contains(t, cc[0].Monitors, "1.2.3.4:5000") + assert.Contains(t, cc[0].Monitors, "10.11.12.13:5000") + assert.Equal(t, 2, len(cc[0].Monitors)) + assert.Equal(t, "beta", cc[1].ClusterID) + assert.Contains(t, cc[1].Monitors, "20.1.1.1:5000") + assert.Contains(t, cc[1].Monitors, "20.1.1.2:5000") + assert.Equal(t, 2, len(cc[1].Monitors)) + }) + + t.Run("add a 3rd cluster with subvolumegroup", func(t *testing.T) { + s, err = updateCsiClusterConfig(s, "baba", &csiClusterConfigEntry3) + assert.NoError(t, err) + cc, err := parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, 3, len(cc)) + assert.Equal(t, "alpha", cc[0].ClusterID) + assert.Equal(t, 2, len(cc[0].Monitors)) + assert.Equal(t, "beta", cc[1].ClusterID) + assert.Equal(t, "baba", cc[2].ClusterID) + assert.Equal(t, "10.1.1.1:5000", cc[2].Monitors[0]) + assert.Equal(t, 3, len(cc[2].Monitors)) + assert.Equal(t, "mygroup", cc[2].CephFS.SubvolumeGroup) + + }) + + t.Run("add a 4th mon to the 3rd cluster and subvolumegroup is preserved", func(t *testing.T) { + csiClusterConfigEntry3.Monitors = append(csiClusterConfigEntry3.Monitors, "10.11.12.13:5000") + s, err = updateCsiClusterConfig(s, "baba", &csiClusterConfigEntry3) + assert.NoError(t, err) + cc, err := parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, 3, len(cc)) + assert.Equal(t, 4, len(cc[2].Monitors)) + assert.Equal(t, "mygroup", cc[2].CephFS.SubvolumeGroup) + }) + + t.Run("remove subvolumegroup", func(t *testing.T) { + csiClusterConfigEntry3.CephFS.SubvolumeGroup = "" + s, err = updateCsiClusterConfig(s, "baba", nil) + assert.NoError(t, err) + cc, err := parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, 2, len(cc)) // we only have 2 clusters now + }) + + t.Run("add subvolumegroup and mons after", func(t *testing.T) { + csiClusterConfigEntry4 := CsiClusterConfigEntry{ + CephFS: &CsiCephFSSpec{ + SubvolumeGroup: "mygroup2", + }, + } + s, err = updateCsiClusterConfig(s, "quatre", &csiClusterConfigEntry4) + assert.NoError(t, err) + cc, err := parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, 3, len(cc), cc) + assert.Equal(t, 0, len(cc[2].Monitors)) + assert.Equal(t, "mygroup2", cc[2].CephFS.SubvolumeGroup, cc) + + csiClusterConfigEntry4.Monitors = []string{"10.1.1.1:5000", "10.1.1.2:5000", "10.1.1.3:5000"} + s, err = updateCsiClusterConfig(s, "quatre", &csiClusterConfigEntry4) + assert.NoError(t, err) + cc, err = parseCsiClusterConfig(s) + assert.NoError(t, err) + assert.Equal(t, "mygroup2", cc[2].CephFS.SubvolumeGroup) + assert.Equal(t, 3, len(cc[2].Monitors)) + }) + + t.Run("does it return error on garbage input?", func(t *testing.T) { + _, err = updateCsiClusterConfig("qqq", "beta", &csiClusterConfigEntry2) + assert.Error(t, err) + }) } diff --git a/pkg/operator/ceph/csi/predicate.go b/pkg/operator/ceph/csi/predicate.go index 2742475e9b3b..bfe4055e8aed 100644 --- a/pkg/operator/ceph/csi/predicate.go +++ b/pkg/operator/ceph/csi/predicate.go @@ -22,7 +22,6 @@ import ( "github.com/google/go-cmp/cmp" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" - "github.com/rook/rook/pkg/operator/ceph/controller" opcontroller "github.com/rook/rook/pkg/operator/ceph/controller" v1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" @@ -47,7 +46,7 @@ func predicateController(ctx context.Context, c client.Client, opNamespace strin // If a Ceph Cluster is created we want to reconcile the csi driver if cephCluster, ok := e.Object.(*cephv1.CephCluster); ok { // If there are more than one ceph cluster in the same namespace do not reconcile - if controller.DuplicateCephClusters(ctx, c, e.Object, false) { + if opcontroller.DuplicateCephClusters(ctx, c, e.Object, false) { return false } diff --git a/pkg/operator/ceph/file/subvolumegroup/controller.go b/pkg/operator/ceph/file/subvolumegroup/controller.go index 09895b9e614f..9c9af8fa6502 100644 --- a/pkg/operator/ceph/file/subvolumegroup/controller.go +++ b/pkg/operator/ceph/file/subvolumegroup/controller.go @@ -40,7 +40,9 @@ import ( "github.com/rook/rook/pkg/clusterd" "github.com/rook/rook/pkg/operator/ceph/cluster/mon" opcontroller "github.com/rook/rook/pkg/operator/ceph/controller" + "github.com/rook/rook/pkg/operator/ceph/csi" "github.com/rook/rook/pkg/operator/ceph/reporting" + "github.com/rook/rook/pkg/operator/k8sutil" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -175,9 +177,18 @@ func (r *ReconcileCephFilesystemSubVolumeGroup) reconcile(request reconcile.Requ logger.Debugf("deleting subvolume group %q", cephFilesystemSubVolumeGroup.Name) err := r.deleteSubVolumeGroup(cephFilesystemSubVolumeGroup) if err != nil { + if strings.Contains(err.Error(), opcontroller.UninitializedCephConfigError) { + logger.Info(opcontroller.OperatorNotInitializedMessage) + return opcontroller.WaitForRequeueIfOperatorNotInitialized, nil + } return reconcile.Result{}, errors.Wrapf(err, "failed to delete ceph ceph filesystem subvolume group %q", cephFilesystemSubVolumeGroup.Name) } + err = csi.SaveClusterConfig(r.context.Clientset, buildClusterID(cephFilesystemSubVolumeGroup), r.clusterInfo, nil) + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "failed to save cluster config") + } + // Remove finalizer err = opcontroller.RemoveFinalizer(r.opManagerContext, r.client, cephFilesystemSubVolumeGroup) if err != nil { @@ -218,6 +229,20 @@ func (r *ReconcileCephFilesystemSubVolumeGroup) reconcile(request reconcile.Requ return reconcile.Result{}, errors.Wrapf(err, "failed to create or update ceph filesystem subvolume group %q", cephFilesystemSubVolumeGroup.Name) } + // Update CSI config map + // If the mon endpoints change, the mon health check go routine will take care of updating the + // config map, so no special care is needed in this controller + csiClusterConfigEntry := csi.CsiClusterConfigEntry{ + Monitors: csi.MonEndpoints(r.clusterInfo.Monitors), + CephFS: &csi.CsiCephFSSpec{ + SubvolumeGroup: cephFilesystemSubVolumeGroup.Name, + }, + } + err = csi.SaveClusterConfig(r.context.Clientset, buildClusterID(cephFilesystemSubVolumeGroup), r.clusterInfo, &csiClusterConfigEntry) + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "failed to save cluster config") + } + // Success! Let's update the status r.updateStatus(r.client, request.NamespacedName, cephv1.ConditionReady) @@ -241,11 +266,16 @@ func (r *ReconcileCephFilesystemSubVolumeGroup) createOrUpdateSubVolumeGroup(cep // Delete the ceph filesystem subvolume group func (r *ReconcileCephFilesystemSubVolumeGroup) deleteSubVolumeGroup(cephFilesystemSubVolumeGroup *cephv1.CephFilesystemSubVolumeGroup) error { logger.Infof("deleting ceph filesystem subvolume group object %q", cephFilesystemSubVolumeGroup.Name) - if err := cephclient.DeleteCephFSSubVolumeGroup(r.context, r.clusterInfo, cephFilesystemSubVolumeGroup.Name, cephFilesystemSubVolumeGroup.Spec.FilesystemName); err != nil { + if err := cephclient.DeleteCephFSSubVolumeGroup(r.context, r.clusterInfo, cephFilesystemSubVolumeGroup.Spec.FilesystemName, cephFilesystemSubVolumeGroup.Name); err != nil { code, ok := exec.ExitStatus(err) + // If the subvolumegroup does not exit, we should not return an error + if ok && code == int(syscall.ENOENT) { + logger.Debugf("ceph filesystem subvolume group %q do not exist", cephFilesystemSubVolumeGroup.Name) + return nil + } // If the subvolumegroup has subvolumes the command will fail with: // Error ENOTEMPTY: error in rmdir /volumes/csi - if ok && code == int(syscall.ENOTEMPTY) { + if ok && (code == int(syscall.ENOTEMPTY)) { return errors.Wrapf(err, "failed to delete ceph filesystem subvolume group %q, remove the subvolumes first", cephFilesystemSubVolumeGroup.Name) } @@ -272,9 +302,15 @@ func (r *ReconcileCephFilesystemSubVolumeGroup) updateStatus(client client.Clien } cephFilesystemSubVolumeGroup.Status.Phase = status + cephFilesystemSubVolumeGroup.Status.Info = map[string]string{"clusterID": buildClusterID(cephFilesystemSubVolumeGroup)} if err := reporting.UpdateStatus(client, cephFilesystemSubVolumeGroup); err != nil { logger.Errorf("failed to set ceph ceph filesystem subvolume group %q status to %q. %v", name, status, err) return } logger.Debugf("ceph ceph filesystem subvolume group %q status updated to %q", name, status) } + +func buildClusterID(cephFilesystemSubVolumeGroup *cephv1.CephFilesystemSubVolumeGroup) string { + clusterID := fmt.Sprintf("%s-%s-file-%s", cephFilesystemSubVolumeGroup.Namespace, cephFilesystemSubVolumeGroup.Spec.FilesystemName, cephFilesystemSubVolumeGroup.Name) + return k8sutil.Hash(clusterID) +} diff --git a/pkg/operator/ceph/file/subvolumegroup/controller_test.go b/pkg/operator/ceph/file/subvolumegroup/controller_test.go index 6fffbdea5661..1d0e5a555b36 100644 --- a/pkg/operator/ceph/file/subvolumegroup/controller_test.go +++ b/pkg/operator/ceph/file/subvolumegroup/controller_test.go @@ -26,6 +26,7 @@ import ( rookclient "github.com/rook/rook/pkg/client/clientset/versioned/fake" "github.com/rook/rook/pkg/client/clientset/versioned/scheme" "github.com/rook/rook/pkg/clusterd" + "github.com/rook/rook/pkg/operator/ceph/csi" "github.com/rook/rook/pkg/operator/k8sutil" testop "github.com/rook/rook/pkg/operator/test" exectest "github.com/rook/rook/pkg/util/exec/test" @@ -206,15 +207,39 @@ func TestCephClientController(t *testing.T) { client: cl, scheme: s, context: c, - opManagerContext: context.TODO(), + opManagerContext: ctx, } + // Enable CSI + csi.EnableRBD = true + os.Setenv("POD_NAMESPACE", namespace) + // Create CSI config map + ownerRef := &metav1.OwnerReference{} + ownerInfo := k8sutil.NewOwnerInfoWithOwnerRef(ownerRef, "") + err = csi.CreateCsiConfigMap(namespace, c.Clientset, ownerInfo) + assert.NoError(t, err) + res, err := r.Reconcile(ctx, req) assert.NoError(t, err) assert.False(t, res.Requeue) - err = r.client.Get(context.TODO(), req.NamespacedName, cephFilesystemSubVolumeGroup) + err = r.client.Get(ctx, req.NamespacedName, cephFilesystemSubVolumeGroup) assert.NoError(t, err) assert.Equal(t, cephv1.ConditionReady, cephFilesystemSubVolumeGroup.Status.Phase) + assert.NotEmpty(t, cephFilesystemSubVolumeGroup.Status.Info["clusterID"]) + + // test that csi configmap is created + cm, err := c.Clientset.CoreV1().ConfigMaps(namespace).Get(ctx, csi.ConfigName, metav1.GetOptions{}) + assert.NoError(t, err) + assert.NotEmpty(t, cm.Data[csi.ConfigKey]) + assert.Contains(t, cm.Data[csi.ConfigKey], "clusterID") + assert.Contains(t, cm.Data[csi.ConfigKey], "group-a") }) } + +func Test_buildClusterID(t *testing.T) { + longName := "foooooooooooooooooooooooooooooooooooooooooooo" + cephFilesystemSubVolumeGroup := &cephv1.CephFilesystemSubVolumeGroup{ObjectMeta: metav1.ObjectMeta{Namespace: "rook-ceph", Name: longName}, Spec: cephv1.CephFilesystemSubVolumeGroupSpec{FilesystemName: "myfs"}} + clusterID := buildClusterID(cephFilesystemSubVolumeGroup) + assert.Equal(t, "29e92135b7e8c014079b9f9f3566777d", clusterID) +} From aadd5f7ef8ccb73d9ca29e615a21e6941c863d9b Mon Sep 17 00:00:00 2001 From: yati1998 Date: Fri, 21 Jan 2022 17:09:59 +0530 Subject: [PATCH 11/16] csi: updates the default csi-addons image This commit updates the csi-addons image version from v0.1.0 to v0.2.1. Also, this updates the staging path from {{ .KubeletDirPath }}/plugins/kubernetes.io/csi/pv/ to {{ .KubeletDirPath }}/plugins/kubernetes.io/csi/. This changes is done as the reclaimspace on block mode was not working. For more details: https://github.com/csi-addons/kubernetes-csi-addons/pull/97 Signed-off-by: yati1998 (cherry picked from commit 07c5fc7e4654775254f4d8a9cabbfc7115dac47e) --- Documentation/ceph-upgrade.md | 4 ++-- Documentation/helm-operator.md | 2 +- deploy/charts/rook-ceph/values.yaml | 2 +- deploy/examples/images.txt | 2 +- deploy/examples/operator-openshift.yaml | 2 +- deploy/examples/operator.yaml | 2 +- pkg/operator/ceph/csi/spec.go | 2 +- .../ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml | 2 +- pkg/operator/ceph/csi/template/rbd/csi-rbdplugin.yaml | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/ceph-upgrade.md b/Documentation/ceph-upgrade.md index d4f8591b8a58..90365fa80ced 100644 --- a/Documentation/ceph-upgrade.md +++ b/Documentation/ceph-upgrade.md @@ -552,7 +552,7 @@ ROOK_CSI_ATTACHER_IMAGE: "k8s.gcr.io/sig-storage/csi-attacher:v3.4.0" ROOK_CSI_RESIZER_IMAGE: "k8s.gcr.io/sig-storage/csi-resizer:v1.3.0" ROOK_CSI_SNAPSHOTTER_IMAGE: "k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0" CSI_VOLUME_REPLICATION_IMAGE: "quay.io/csiaddons/volumereplication-operator:v0.1.0" -ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.1.0" +ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.2.1" ``` ### **Use default images** @@ -578,5 +578,5 @@ k8s.gcr.io/sig-storage/csi-resizer:v1.3.0 k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0 quay.io/cephcsi/cephcsi:v3.5.0 quay.io/csiaddons/volumereplication-operator:v0.1.0 -quay.io/csiaddons/k8s-sidecar:v0.1.0 +quay.io/csiaddons/k8s-sidecar:v0.2.1 ``` diff --git a/Documentation/helm-operator.md b/Documentation/helm-operator.md index 919c5cfc315e..b135ec59f000 100644 --- a/Documentation/helm-operator.md +++ b/Documentation/helm-operator.md @@ -147,7 +147,7 @@ The following tables lists the configurable parameters of the rook-operator char | `csi.volumeReplication.enabled` | Enable Volume Replication. | `false` | | `csi.volumeReplication.image` | Volume Replication Controller image. | `quay.io/csiaddons/volumereplication-operator:v0.1.0` | | `csi.csiAddons.enabled` | Enable CSIAddons | `false` | -| `csi.csiAddons.image` | CSIAddons Sidecar image. | `quay.io/csiaddons/k8s-sidecar:v0.1.0` | +| `csi.csiAddons.image` | CSIAddons Sidecar image. | `quay.io/csiaddons/k8s-sidecar:v0.2.1` | | `admissionController.tolerations` | Array of tolerations in YAML format which will be added to admission controller deployment. | | | `admissionController.nodeAffinity` | The node labels for affinity of the admission controller deployment (***) | | | `allowMultipleFilesystems` | **(experimental in Octopus (v15))** Allows multiple filesystems to be deployed to a Ceph cluster. | `false` | diff --git a/deploy/charts/rook-ceph/values.yaml b/deploy/charts/rook-ceph/values.yaml index 8a869b761dde..1d6a9223c1bb 100644 --- a/deploy/charts/rook-ceph/values.yaml +++ b/deploy/charts/rook-ceph/values.yaml @@ -300,7 +300,7 @@ csi: # Enable the CSIAddons sidecar. csiAddons: enabled: false - #image: "quay.io/csiaddons/k8s-sidecar:v0.1.0" + #image: "quay.io/csiaddons/k8s-sidecar:v0.2.1" enableDiscoveryDaemon: false cephCommandsTimeoutSeconds: "15" diff --git a/deploy/examples/images.txt b/deploy/examples/images.txt index 2a4f7edac342..37025ad2d0b1 100644 --- a/deploy/examples/images.txt +++ b/deploy/examples/images.txt @@ -5,6 +5,6 @@ k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0 quay.io/ceph/ceph:v16.2.7 quay.io/cephcsi/cephcsi:v3.5.0 - quay.io/csiaddons/k8s-sidecar:v0.1.0 + quay.io/csiaddons/k8s-sidecar:v0.2.1 quay.io/csiaddons/volumereplication-operator:v0.1.0 rook/ceph:v1.8.2 diff --git a/deploy/examples/operator-openshift.yaml b/deploy/examples/operator-openshift.yaml index df3dec70e8ee..09316dc4477a 100644 --- a/deploy/examples/operator-openshift.yaml +++ b/deploy/examples/operator-openshift.yaml @@ -420,7 +420,7 @@ data: # CSI_VOLUME_REPLICATION_IMAGE: "quay.io/csiaddons/volumereplication-operator:v0.1.0" # Enable the csi addons sidecar. CSI_ENABLE_CSIADDONS: "false" - # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.1.0" + # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.2.1" --- # The deployment for the rook operator # OLM: BEGIN OPERATOR DEPLOYMENT diff --git a/deploy/examples/operator.yaml b/deploy/examples/operator.yaml index 1b3bf73c0ad0..45f890c7ed72 100644 --- a/deploy/examples/operator.yaml +++ b/deploy/examples/operator.yaml @@ -338,7 +338,7 @@ data: # CSI_VOLUME_REPLICATION_IMAGE: "quay.io/csiaddons/volumereplication-operator:v0.1.0" # Enable the csi addons sidecar. CSI_ENABLE_CSIADDONS: "false" - # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.1.0" + # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.2.1" --- # OLM: BEGIN OPERATOR DEPLOYMENT apiVersion: apps/v1 diff --git a/pkg/operator/ceph/csi/spec.go b/pkg/operator/ceph/csi/spec.go index cad77a8f102b..0fc086ad9c60 100644 --- a/pkg/operator/ceph/csi/spec.go +++ b/pkg/operator/ceph/csi/spec.go @@ -108,7 +108,7 @@ var ( DefaultSnapshotterImage = "k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0" DefaultResizerImage = "k8s.gcr.io/sig-storage/csi-resizer:v1.3.0" DefaultVolumeReplicationImage = "quay.io/csiaddons/volumereplication-operator:v0.1.0" - DefaultCSIAddonsImage = "quay.io/csiaddons/k8s-sidecar:v0.1.0" + DefaultCSIAddonsImage = "quay.io/csiaddons/k8s-sidecar:v0.2.1" // Local package template path for RBD //go:embed template/rbd/csi-rbdplugin.yaml diff --git a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml index b586ffb94c61..9088d158d02d 100644 --- a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml +++ b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml @@ -139,7 +139,7 @@ spec: - "--pod=$(POD_NAME)" - "--namespace=$(POD_NAMESPACE)" - "--pod-uid=$(POD_UID)" - - "--stagingpath={{ .KubeletDirPath }}/plugins/kubernetes.io/csi/pv/" + - "--stagingpath={{ .KubeletDirPath }}/plugins/kubernetes.io/csi/" ports: - containerPort: {{ .CSIAddonsPort }} env: diff --git a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin.yaml b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin.yaml index 2b1ae4fcbee5..e688e6af2c4f 100644 --- a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin.yaml +++ b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin.yaml @@ -132,7 +132,7 @@ spec: - "--pod=$(POD_NAME)" - "--namespace=$(POD_NAMESPACE)" - "--pod-uid=$(POD_UID)" - - "--stagingpath={{ .KubeletDirPath }}/plugins/kubernetes.io/csi/pv/" + - "--stagingpath={{ .KubeletDirPath }}/plugins/kubernetes.io/csi/" ports: - containerPort: {{ .CSIAddonsPort }} env: From b7bbb2198cb97eb270b0f032036e342774221df5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 20 Jan 2022 14:43:21 +0100 Subject: [PATCH 12/16] osd: rename a few configuration flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For better consistency with IBM_KP_SERVICE_API_KEY and use "ibmkeyprotect" as a provider name for more clarity. Signed-off-by: Sébastien Han (cherry picked from commit 7cc6e23a0b710173e7b46971c4633d6365040db1) --- .github/workflows/canary-integration-test.yml | 2 +- .github/workflows/daily-nightly-jobs.yml | 2 +- .../workflows/encryption-pvc-kms-ibm-kp/action.yml | 10 +++++----- Documentation/ceph-kms.md | 4 ++-- pkg/apis/ceph.rook.io/v1/security.go | 2 +- pkg/daemon/ceph/osd/kms/envs_test.go | 12 ++++++------ pkg/daemon/ceph/osd/kms/ibm_key_protect.go | 8 ++++---- pkg/daemon/ceph/osd/kms/ibm_key_protect_test.go | 2 +- pkg/daemon/ceph/osd/kms/kms.go | 8 ++++---- pkg/daemon/ceph/osd/kms/kms_test.go | 6 +++--- tests/manifests/test-kms-ibm-kp-spec.in | 4 ++-- 11 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index fc1ea7be8067..2111c1a6b7db 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -840,7 +840,7 @@ jobs: - name: run Encryption KMS IBM Key Protect uses: ./.github/workflows/encryption-pvc-kms-ibm-kp - if: "env.IBM_KP_INSTANCE_ID != '' && env.IBM_KP_SERVICE_API_KEY != ''" + if: "env.IBM_KP_SERVICE_INSTANCE_ID != '' && env.IBM_KP_SERVICE_API_KEY != ''" with: ibm-instance-id: ${{ secrets.IBM_INSTANCE_ID }} ibm-service-api-key: ${{ secrets.IBM_SERVICE_API_KEY }} diff --git a/.github/workflows/daily-nightly-jobs.yml b/.github/workflows/daily-nightly-jobs.yml index 5020b9e6532f..31499f4ba538 100644 --- a/.github/workflows/daily-nightly-jobs.yml +++ b/.github/workflows/daily-nightly-jobs.yml @@ -326,7 +326,7 @@ jobs: - name: run Encryption KMS IBM Key Protect uses: ./.github/workflows/encryption-pvc-kms-ibm-kp - if: "env.IBM_KP_INSTANCE_ID != '' && env.IBM_KP_SERVICE_API_KEY != ''" + if: "env.IBM_KP_SERVICE_INSTANCE_ID != '' && env.IBM_KP_SERVICE_API_KEY != ''" with: github-token: ${{ secrets.GITHUB_TOKEN }} ibm-instance-id: ${{ secrets.IBM_KP_INSTANCE_ID }} diff --git a/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml b/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml index 4b83110a846a..ee8fdc6db482 100644 --- a/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml +++ b/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml @@ -2,7 +2,7 @@ name: Encryption KMS IBM Key Protect description: Reusable workflow to test Encryption KMS IBM Key Protect inputs: ibm-instance-id: - description: IBM_KP_INSTANCE_ID from the calling workflow + description: IBM_KP_SERVICE_INSTANCE_ID from the calling workflow required: true ibm-service-api-key: description: IBM_KP_SERVICE_API_KEY from the calling workflow @@ -15,12 +15,12 @@ runs: using: "composite" steps: - name: fail if env no present - if: "env.IBM_KP_INSTANCE_ID == '' || env.IBM_KP_SERVICE_API_KEY == ''" + if: "env.IBM_KP_SERVICE_INSTANCE_ID == '' || env.IBM_KP_SERVICE_API_KEY == ''" env: - IBM_KP_INSTANCE_ID: ${{ inputs.ibm-instance-id }} + IBM_KP_SERVICE_INSTANCE_ID: ${{ inputs.ibm-instance-id }} IBM_KP_SERVICE_API_KEY: ${{ inputs.ibm-service-api-key }} shell: bash --noprofile --norc -eo pipefail -x {0} - run: echo "IBM_KP_INSTANCE_ID and IBM_KP_SERVICE_API_KEY must be set in the environment" && exit 0 + run: echo "IBM_KP_SERVICE_INSTANCE_ID and IBM_KP_SERVICE_API_KEY must be set in the environment" && exit 0 - name: setup cluster resources uses: ./.github/workflows/setup-cluster-resources @@ -42,7 +42,7 @@ runs: - name: deploy cluster shell: bash --noprofile --norc -eo pipefail -x {0} env: - IBM_KP_INSTANCE_ID: ${{ inputs.ibm-instance-id }} + IBM_KP_SERVICE_INSTANCE_ID: ${{ inputs.ibm-instance-id }} IBM_KP_SERVICE_API_KEY: ${{ inputs.ibm-service-api-key }} run: | tests/scripts/github-action-helper.sh deploy_manifest_with_local_build deploy/examples/operator.yaml diff --git a/Documentation/ceph-kms.md b/Documentation/ceph-kms.md index 24f0d373b7b6..24f84d55384b 100644 --- a/Documentation/ceph-kms.md +++ b/Documentation/ceph-kms.md @@ -266,8 +266,8 @@ security: kms: # name of the k8s config map containing all the kms connection details connectionDetails: - KMS_PROVIDER: ibm-kp - IBM_KP_INSTANCE_ID: + KMS_PROVIDER: ibmkeyprotect + IBM_KP_SERVICE_INSTANCE_ID: # name of the k8s secret containing the service API Key tokenSecretName: ibm-kp-svc-api-key ``` diff --git a/pkg/apis/ceph.rook.io/v1/security.go b/pkg/apis/ceph.rook.io/v1/security.go index 01f678898028..135c97e0b61d 100644 --- a/pkg/apis/ceph.rook.io/v1/security.go +++ b/pkg/apis/ceph.rook.io/v1/security.go @@ -50,7 +50,7 @@ func (kms *KeyManagementServiceSpec) IsVaultKMS() bool { // IsIBMKeyProtectKMS return whether IBM Key Protect KMS is configured func (kms *KeyManagementServiceSpec) IsIBMKeyProtectKMS() bool { - return getParam(kms.ConnectionDetails, "KMS_PROVIDER") == secrets.TypeIBM + return getParam(kms.ConnectionDetails, "KMS_PROVIDER") == "ibmkeyprotect" } // IsTLSEnabled return KMS TLS details are configured diff --git a/pkg/daemon/ceph/osd/kms/envs_test.go b/pkg/daemon/ceph/osd/kms/envs_test.go index aafb8b31247f..a8c562f936b7 100644 --- a/pkg/daemon/ceph/osd/kms/envs_test.go +++ b/pkg/daemon/ceph/osd/kms/envs_test.go @@ -63,7 +63,7 @@ func TestVaultTLSEnvVarFromSecret(t *testing.T) { t.Run("ibm kp", func(t *testing.T) { spec := cephv1.ClusterSpec{Security: cephv1.SecuritySpec{KeyManagementService: cephv1.KeyManagementServiceSpec{ TokenSecretName: "ibm-kp-token", - ConnectionDetails: map[string]string{"KMS_PROVIDER": "ibm-kp", "IBM_KP_INSTANCE_ID": "1"}}}, + ConnectionDetails: map[string]string{"KMS_PROVIDER": TypeIBM, "IBM_KP_SERVICE_INSTANCE_ID": "1"}}}, } envVars := ConfigToEnvVar(spec) areEnvVarsSorted := sort.SliceIsSorted(envVars, func(i, j int) bool { @@ -71,8 +71,8 @@ func TestVaultTLSEnvVarFromSecret(t *testing.T) { }) assert.True(t, areEnvVarsSorted) assert.Equal(t, 3, len(envVars)) - assert.Contains(t, envVars, v1.EnvVar{Name: "KMS_PROVIDER", Value: "ibm-kp"}) - assert.Contains(t, envVars, v1.EnvVar{Name: "IBM_KP_INSTANCE_ID", Value: "1"}) + assert.Contains(t, envVars, v1.EnvVar{Name: "KMS_PROVIDER", Value: TypeIBM}) + assert.Contains(t, envVars, v1.EnvVar{Name: "IBM_KP_SERVICE_INSTANCE_ID", Value: "1"}) assert.Contains(t, envVars, v1.EnvVar{Name: "IBM_KP_SERVICE_API_KEY", ValueFrom: &v1.EnvVarSource{SecretKeyRef: &v1.SecretKeySelector{LocalObjectReference: v1.LocalObjectReference{Name: "ibm-kp-token"}, Key: "IBM_KP_SERVICE_API_KEY"}}}) }) } @@ -140,11 +140,11 @@ func TestVaultConfigToEnvVar(t *testing.T) { }, { "ibm kp - IBM_KP_SERVICE_API_KEY is removed from the details", - args{spec: cephv1.ClusterSpec{Security: cephv1.SecuritySpec{KeyManagementService: cephv1.KeyManagementServiceSpec{ConnectionDetails: map[string]string{"KMS_PROVIDER": "ibm-kp", "IBM_KP_SERVICE_API_KEY": "foo", "IBM_KP_INSTANCE_ID": "1"}, TokenSecretName: "ibm-kp-token"}}}}, + args{spec: cephv1.ClusterSpec{Security: cephv1.SecuritySpec{KeyManagementService: cephv1.KeyManagementServiceSpec{ConnectionDetails: map[string]string{"KMS_PROVIDER": TypeIBM, "IBM_KP_SERVICE_API_KEY": "foo", "IBM_KP_SERVICE_INSTANCE_ID": "1"}, TokenSecretName: "ibm-kp-token"}}}}, []v1.EnvVar{ - {Name: "IBM_KP_INSTANCE_ID", Value: "1"}, {Name: "IBM_KP_SERVICE_API_KEY", ValueFrom: &v1.EnvVarSource{SecretKeyRef: &v1.SecretKeySelector{LocalObjectReference: v1.LocalObjectReference{Name: "ibm-kp-token"}, Key: "IBM_KP_SERVICE_API_KEY"}}}, - {Name: "KMS_PROVIDER", Value: "ibm-kp"}, + {Name: "IBM_KP_SERVICE_INSTANCE_ID", Value: "1"}, + {Name: "KMS_PROVIDER", Value: TypeIBM}, }, }, } diff --git a/pkg/daemon/ceph/osd/kms/ibm_key_protect.go b/pkg/daemon/ceph/osd/kms/ibm_key_protect.go index 675a5380d6b6..c673372f4d33 100644 --- a/pkg/daemon/ceph/osd/kms/ibm_key_protect.go +++ b/pkg/daemon/ceph/osd/kms/ibm_key_protect.go @@ -18,15 +18,15 @@ package kms import ( kp "github.com/IBM/keyprotect-go-client" - "github.com/libopenstorage/secrets" "github.com/pkg/errors" ) const ( + TypeIBM = "ibmkeyprotect" // IbmKeyProtectServiceApiKey is the IBM Key Protect service API key IbmKeyProtectServiceApiKey = "IBM_KP_SERVICE_API_KEY" // IbmKeyProtectInstanceIdKey is the IBM Key Protect instance id - IbmKeyProtectInstanceIdKey = "IBM_KP_INSTANCE_ID" + IbmKeyProtectInstanceIdKey = "IBM_KP_SERVICE_INSTANCE_ID" // IbmKeyProtectBaseUrlKey is the IBM Key Protect base url IbmKeyProtectBaseUrlKey = "IBM_KP_BASE_URL" // IbmKeyProtectTokenUrlKey is the IBM Key Protect token url @@ -39,7 +39,7 @@ var ( kmsIBMKeyProtectMandatoryConnectionDetails = []string{IbmKeyProtectInstanceIdKey, IbmKeyProtectServiceApiKey} // ErrIbmServiceApiKeyNotSet is returned when IBM_KP_SERVICE_API_KEY is not set ErrIbmServiceApiKeyNotSet = errors.Errorf("%s not set.", IbmKeyProtectServiceApiKey) - // ErrIbmInstanceIdKeyNotSet is returned when IBM_KP_INSTANCE_ID is not set + // ErrIbmInstanceIdKeyNotSet is returned when IBM_KP_SERVICE_INSTANCE_ID is not set ErrIbmInstanceIdKeyNotSet = errors.Errorf("%s not set.", IbmKeyProtectInstanceIdKey) ) @@ -83,4 +83,4 @@ func InitKeyProtect(config map[string]string) (*kp.Client, error) { } // IsIBMKeyProtect determines whether the configured KMS is IBM Key Protect -func (c *Config) IsIBMKeyProtect() bool { return c.Provider == secrets.TypeIBM } +func (c *Config) IsIBMKeyProtect() bool { return c.Provider == TypeIBM } diff --git a/pkg/daemon/ceph/osd/kms/ibm_key_protect_test.go b/pkg/daemon/ceph/osd/kms/ibm_key_protect_test.go index 5a19b164a389..aeeaeb9480fe 100644 --- a/pkg/daemon/ceph/osd/kms/ibm_key_protect_test.go +++ b/pkg/daemon/ceph/osd/kms/ibm_key_protect_test.go @@ -35,7 +35,7 @@ func TestInitKeyProtect(t *testing.T) { config[IbmKeyProtectServiceApiKey] = "foo" }) - t.Run("IBM_KP_INSTANCE_ID not set", func(t *testing.T) { + t.Run("IBM_KP_SERVICE_INSTANCE_ID not set", func(t *testing.T) { _, err := InitKeyProtect(config) assert.Error(t, err) assert.ErrorIs(t, err, ErrIbmInstanceIdKeyNotSet) diff --git a/pkg/daemon/ceph/osd/kms/kms.go b/pkg/daemon/ceph/osd/kms/kms.go index ab005c2bf512..cf1734284ab3 100644 --- a/pkg/daemon/ceph/osd/kms/kms.go +++ b/pkg/daemon/ceph/osd/kms/kms.go @@ -65,8 +65,8 @@ func NewConfig(context *clusterd.Context, clusterSpec *cephv1.ClusterSpec, clust config.Provider = secrets.TypeK8s case secrets.TypeVault: config.Provider = secrets.TypeVault - case secrets.TypeIBM: - config.Provider = secrets.TypeIBM + case TypeIBM: + config.Provider = TypeIBM default: logger.Errorf("unsupported kms type %q", Provider) } @@ -248,7 +248,7 @@ func ValidateConnectionDetails(ctx context.Context, clusterdContext *clusterd.Co return errors.Wrap(err, "failed to set vault kms token to an env var") } - case secrets.TypeIBM: + case TypeIBM: for _, config := range kmsIBMKeyProtectMandatoryTokenDetails { v, ok := kmsToken.Data[config] if !ok || len(v) == 0 { @@ -281,7 +281,7 @@ func ValidateConnectionDetails(ctx context.Context, clusterdContext *clusterd.Co } } - case secrets.TypeIBM: + case TypeIBM: for _, config := range kmsIBMKeyProtectMandatoryConnectionDetails { if GetParam(securitySpec.KeyManagementService.ConnectionDetails, config) == "" { return errors.Errorf("failed to validate kms config %q. cannot be empty", config) diff --git a/pkg/daemon/ceph/osd/kms/kms_test.go b/pkg/daemon/ceph/osd/kms/kms_test.go index 180f9b32b79c..459d8718beb0 100644 --- a/pkg/daemon/ceph/osd/kms/kms_test.go +++ b/pkg/daemon/ceph/osd/kms/kms_test.go @@ -58,7 +58,7 @@ func TestValidateConnectionDetails(t *testing.T) { } ibmSecuritySpec := &cephv1.SecuritySpec{KeyManagementService: cephv1.KeyManagementServiceSpec{ ConnectionDetails: map[string]string{ - "KMS_PROVIDER": "ibm-kp", + "KMS_PROVIDER": TypeIBM, }, }} @@ -183,8 +183,8 @@ func TestValidateConnectionDetails(t *testing.T) { assert.NoError(t, err) err = ValidateConnectionDetails(ctx, context, ibmSecuritySpec, ns) assert.Error(t, err, "") - assert.EqualError(t, err, "failed to validate kms config \"IBM_KP_INSTANCE_ID\". cannot be empty") - ibmSecuritySpec.KeyManagementService.ConnectionDetails["IBM_KP_INSTANCE_ID"] = "foo" + assert.EqualError(t, err, "failed to validate kms config \"IBM_KP_SERVICE_INSTANCE_ID\". cannot be empty") + ibmSecuritySpec.KeyManagementService.ConnectionDetails["IBM_KP_SERVICE_INSTANCE_ID"] = "foo" }) t.Run("ibm kp - success", func(t *testing.T) { diff --git a/tests/manifests/test-kms-ibm-kp-spec.in b/tests/manifests/test-kms-ibm-kp-spec.in index 4070971a8b79..6bbebd77d1c3 100644 --- a/tests/manifests/test-kms-ibm-kp-spec.in +++ b/tests/manifests/test-kms-ibm-kp-spec.in @@ -2,6 +2,6 @@ spec: security: kms: connectionDetails: - KMS_PROVIDER: ibm-kp - IBM_KP_INSTANCE_ID: $IBM_KP_INSTANCE_ID + KMS_PROVIDER: ibmkeyprotect + IBM_KP_SERVICE_INSTANCE_ID: $IBM_KP_SERVICE_INSTANCE_ID tokenSecretName: rook-ibm-kp-token From a7f48d7cd25aee4e61ee91625bb34ae7b644fd19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Fri, 21 Jan 2022 10:19:21 +0100 Subject: [PATCH 13/16] ci: fix encryption test condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the "if" to work we need to bring the env in the step with the "env" keyword. Otherwise, the env is never imported and the condition is always false. Signed-off-by: Sébastien Han (cherry picked from commit 802a86ccd5013da977695b91cc4d9b91db0ca53c) --- .github/workflows/canary-integration-test.yml | 5 ++++- .github/workflows/daily-nightly-jobs.yml | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index 2111c1a6b7db..9aa1c7d304cb 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -838,9 +838,12 @@ jobs: with: fetch-depth: 0 - - name: run Encryption KMS IBM Key Protect + - name: run encryption KMS IBM Key Protect uses: ./.github/workflows/encryption-pvc-kms-ibm-kp if: "env.IBM_KP_SERVICE_INSTANCE_ID != '' && env.IBM_KP_SERVICE_API_KEY != ''" + env: + IBM_KP_SERVICE_INSTANCE_ID: ${{ secrets.IBM_INSTANCE_ID }} + IBM_KP_SERVICE_API_KEY: ${{ secrets.IBM_SERVICE_API_KEY }} with: ibm-instance-id: ${{ secrets.IBM_INSTANCE_ID }} ibm-service-api-key: ${{ secrets.IBM_SERVICE_API_KEY }} diff --git a/.github/workflows/daily-nightly-jobs.yml b/.github/workflows/daily-nightly-jobs.yml index 31499f4ba538..0aa7d9782ab1 100644 --- a/.github/workflows/daily-nightly-jobs.yml +++ b/.github/workflows/daily-nightly-jobs.yml @@ -327,6 +327,9 @@ jobs: - name: run Encryption KMS IBM Key Protect uses: ./.github/workflows/encryption-pvc-kms-ibm-kp if: "env.IBM_KP_SERVICE_INSTANCE_ID != '' && env.IBM_KP_SERVICE_API_KEY != ''" + env: + IBM_KP_SERVICE_INSTANCE_ID: ${{ secrets.IBM_INSTANCE_ID }} + IBM_KP_SERVICE_API_KEY: ${{ secrets.IBM_SERVICE_API_KEY }} with: github-token: ${{ secrets.GITHUB_TOKEN }} ibm-instance-id: ${{ secrets.IBM_KP_INSTANCE_ID }} From daa636b9f12b018a2626bf9e40347ae1154a0870 Mon Sep 17 00:00:00 2001 From: Yuma Ogami Date: Tue, 18 Jan 2022 08:16:35 +0000 Subject: [PATCH 14/16] rgw: handle the non-existence of the bucket correctly when removing bucket ceph might return NoSuchKey than NoSuchBucket when the target bucket does not exist. then we can use GetBucketInfo() to judge the existence of the bucket. see: https://github.com/ceph/ceph/pull/44413 Closes: https://github.com/rook/rook/issues/9494 Signed-off-by: Yuma Ogami (cherry picked from commit c4ee218da18b0d45cb20352323ee11143605e3b0) --- .../ceph/object/bucket/rgw-handlers.go | 10 ++ .../ceph/object/bucket/rgw-handlers_test.go | 92 +++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 pkg/operator/ceph/object/bucket/rgw-handlers_test.go diff --git a/pkg/operator/ceph/object/bucket/rgw-handlers.go b/pkg/operator/ceph/object/bucket/rgw-handlers.go index bcce7954ed1e..6fc13dd781ff 100644 --- a/pkg/operator/ceph/object/bucket/rgw-handlers.go +++ b/pkg/operator/ceph/object/bucket/rgw-handlers.go @@ -101,6 +101,16 @@ func (p *Provisioner) deleteOBCResource(bucketName string) error { } else if errors.Is(err, admin.ErrNoSuchBucket) { // opinion: "not found" is not an error logger.Infof("bucket %q does not exist", p.bucketName) + } else if errors.Is(err, admin.ErrNoSuchKey) { + // ceph might return NoSuchKey than NoSuchBucket when the target bucket does not exist. + // then we can use GetBucketInfo() to judge the existence of the bucket. + // see: https://github.com/ceph/ceph/pull/44413 + _, err2 := p.adminOpsClient.GetBucketInfo(p.clusterInfo.Context, admin.Bucket{Bucket: bucketName, PurgeObject: &thePurge}) + if errors.Is(err2, admin.ErrNoSuchBucket) { + logger.Infof("bucket info %q does not exist", p.bucketName) + } else { + return errors.Wrapf(err, "failed to delete bucket %q (could not get bucket info)", bucketName) + } } else { return errors.Wrapf(err, "failed to delete bucket %q", bucketName) } diff --git a/pkg/operator/ceph/object/bucket/rgw-handlers_test.go b/pkg/operator/ceph/object/bucket/rgw-handlers_test.go new file mode 100644 index 000000000000..ba487b8d16c4 --- /dev/null +++ b/pkg/operator/ceph/object/bucket/rgw-handlers_test.go @@ -0,0 +1,92 @@ +/* +Copyright 2022 The Rook Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package bucket + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "testing" + + "github.com/ceph/go-ceph/rgw/admin" + rookclient "github.com/rook/rook/pkg/client/clientset/versioned/fake" + "github.com/rook/rook/pkg/clusterd" + "github.com/rook/rook/pkg/daemon/ceph/client" + cephobject "github.com/rook/rook/pkg/operator/ceph/object" + "github.com/rook/rook/pkg/operator/test" + "github.com/stretchr/testify/assert" +) + +type statusError struct { + Code string `json:"Code,omitempty"` + RequestID string `json:"RequestId,omitempty"` + HostID string `json:"HostId,omitempty"` +} + +func TestDeleteOBCResource(t *testing.T) { + clusterInfo := client.AdminTestClusterInfo("ns") + p := NewProvisioner(&clusterd.Context{RookClientset: rookclient.NewSimpleClientset(), Clientset: test.New(t, 1)}, clusterInfo) + mockClient := func(errCodeRemoveBucket string, errCodeGetBucketInfo string) *cephobject.MockClient { + return &cephobject.MockClient{ + MockDo: func(req *http.Request) (*http.Response, error) { + if req.URL.Path == "rook-ceph-rgw-my-store.mycluster.svc/admin/bucket" { + if req.Method == http.MethodDelete { + status, _ := json.Marshal(statusError{errCodeRemoveBucket, "requestid", "hostid"}) + return &http.Response{ + StatusCode: 404, + Body: ioutil.NopCloser(bytes.NewReader([]byte(status))), + }, nil + } + if req.Method == http.MethodGet { + status, _ := json.Marshal(statusError{errCodeGetBucketInfo, "requestid", "hostid"}) + return &http.Response{ + StatusCode: 404, + Body: ioutil.NopCloser(bytes.NewReader([]byte(status))), + }, nil + } + } + return nil, fmt.Errorf("unexpected request: %q. method %q. path %q", req.URL.RawQuery, req.Method, req.URL.Path) + }, + } + } + + t.Run("remove bucket returns NoSuchBucket", func(t *testing.T) { + adminClient, err := admin.New("rook-ceph-rgw-my-store.mycluster.svc", "53S6B9S809NUP19IJ2K3", "1bXPegzsGClvoGAiJdHQD1uOW2sQBLAZM9j9VtXR", mockClient("NoSuchBucket", "")) + assert.NoError(t, err) + p.adminOpsClient = adminClient + err = p.deleteOBCResource("bucket") + assert.NoError(t, err) + }) + + t.Run("remove bucket returns NoSuchKey and get bucket info returns NoSuchBucket", func(t *testing.T) { + adminClient, err := admin.New("rook-ceph-rgw-my-store.mycluster.svc", "53S6B9S809NUP19IJ2K3", "1bXPegzsGClvoGAiJdHQD1uOW2sQBLAZM9j9VtXR", mockClient("NoSuchKey", "NoSuchBucket")) + assert.NoError(t, err) + p.adminOpsClient = adminClient + err = p.deleteOBCResource("bucket") + assert.NoError(t, err) + }) + + t.Run("remove bucket returns NoSuchKey and get bucket info returns an error other than NoSuchBucket", func(t *testing.T) { + adminClient, err := admin.New("rook-ceph-rgw-my-store.mycluster.svc", "53S6B9S809NUP19IJ2K3", "1bXPegzsGClvoGAiJdHQD1uOW2sQBLAZM9j9VtXR", mockClient("NoSuchKey", "NoSuchKey")) + assert.NoError(t, err) + p.adminOpsClient = adminClient + err = p.deleteOBCResource("bucket") + assert.Error(t, err) + }) +} From 64641a6ce8293479f3eb4fb2b77507cc1a527b3f Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Fri, 21 Jan 2022 17:54:16 +0530 Subject: [PATCH 15/16] csi: update cephcsi to 3.5.1 release This PR updates the cephcsi version from 3.5.0 to 3.5.1 which includes minor fixes fixes: #9619 Signed-off-by: Madhu Rajanna (cherry picked from commit 39825f230211ca7bdc008793ecabae5956658d62) --- Documentation/ceph-upgrade.md | 4 ++-- Documentation/helm-operator.md | 2 +- deploy/charts/rook-ceph/values.yaml | 2 +- deploy/examples/images.txt | 2 +- deploy/examples/operator-openshift.yaml | 2 +- deploy/examples/operator.yaml | 2 +- pkg/operator/ceph/csi/spec.go | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/ceph-upgrade.md b/Documentation/ceph-upgrade.md index 90365fa80ced..0748ac4be7ca 100644 --- a/Documentation/ceph-upgrade.md +++ b/Documentation/ceph-upgrade.md @@ -545,7 +545,7 @@ kubectl -n $ROOK_OPERATOR_NAMESPACE edit configmap rook-ceph-operator-config The default upstream images are included below, which you can change to your desired images. ```yaml -ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.5.0" +ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.5.1" ROOK_CSI_REGISTRAR_IMAGE: "k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.4.0" ROOK_CSI_PROVISIONER_IMAGE: "k8s.gcr.io/sig-storage/csi-provisioner:v3.1.0" ROOK_CSI_ATTACHER_IMAGE: "k8s.gcr.io/sig-storage/csi-attacher:v3.4.0" @@ -576,7 +576,7 @@ k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.4.0 k8s.gcr.io/sig-storage/csi-provisioner:v3.1.0 k8s.gcr.io/sig-storage/csi-resizer:v1.3.0 k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0 -quay.io/cephcsi/cephcsi:v3.5.0 +quay.io/cephcsi/cephcsi:v3.5.1 quay.io/csiaddons/volumereplication-operator:v0.1.0 quay.io/csiaddons/k8s-sidecar:v0.2.1 ``` diff --git a/Documentation/helm-operator.md b/Documentation/helm-operator.md index b135ec59f000..0fcab3438c69 100644 --- a/Documentation/helm-operator.md +++ b/Documentation/helm-operator.md @@ -134,7 +134,7 @@ The following tables lists the configurable parameters of the rook-operator char | `csi.rbdLivenessMetricsPort` | Ceph CSI RBD driver metrics port. | `8080` | | `csi.forceCephFSKernelClient` | Enable Ceph Kernel clients on kernel < 4.17 which support quotas for Cephfs. | `true` | | `csi.kubeletDirPath` | Kubelet root directory path (if the Kubelet uses a different path for the `--root-dir` flag) | `/var/lib/kubelet` | -| `csi.cephcsi.image` | Ceph CSI image. | `quay.io/cephcsi/cephcsi:v3.5.0` | +| `csi.cephcsi.image` | Ceph CSI image. | `quay.io/cephcsi/cephcsi:v3.5.1` | | `csi.rbdPluginUpdateStrategy` | CSI Rbd plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. | `OnDelete` | | `csi.cephFSPluginUpdateStrategy` | CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate. | `OnDelete` | | `csi.registrar.image` | Kubernetes CSI registrar image. | `k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.4.0` | diff --git a/deploy/charts/rook-ceph/values.yaml b/deploy/charts/rook-ceph/values.yaml index 1d6a9223c1bb..1213e5cf8da0 100644 --- a/deploy/charts/rook-ceph/values.yaml +++ b/deploy/charts/rook-ceph/values.yaml @@ -276,7 +276,7 @@ csi: #rbdLivenessMetricsPort: 9080 #kubeletDirPath: /var/lib/kubelet #cephcsi: - #image: quay.io/cephcsi/cephcsi:v3.5.0 + #image: quay.io/cephcsi/cephcsi:v3.5.1 #registrar: #image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.4.0 #provisioner: diff --git a/deploy/examples/images.txt b/deploy/examples/images.txt index 37025ad2d0b1..42d6e220960f 100644 --- a/deploy/examples/images.txt +++ b/deploy/examples/images.txt @@ -4,7 +4,7 @@ k8s.gcr.io/sig-storage/csi-resizer:v1.3.0 k8s.gcr.io/sig-storage/csi-snapshotter:v4.2.0 quay.io/ceph/ceph:v16.2.7 - quay.io/cephcsi/cephcsi:v3.5.0 + quay.io/cephcsi/cephcsi:v3.5.1 quay.io/csiaddons/k8s-sidecar:v0.2.1 quay.io/csiaddons/volumereplication-operator:v0.1.0 rook/ceph:v1.8.2 diff --git a/deploy/examples/operator-openshift.yaml b/deploy/examples/operator-openshift.yaml index 09316dc4477a..58bfd846f171 100644 --- a/deploy/examples/operator-openshift.yaml +++ b/deploy/examples/operator-openshift.yaml @@ -161,7 +161,7 @@ data: # The default version of CSI supported by Rook will be started. To change the version # of the CSI driver to something other than what is officially supported, change # these images to the desired release of the CSI driver. - # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.5.0" + # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.5.1" # ROOK_CSI_REGISTRAR_IMAGE: "k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.4.0" # ROOK_CSI_RESIZER_IMAGE: "k8s.gcr.io/sig-storage/csi-resizer:v1.3.0" # ROOK_CSI_PROVISIONER_IMAGE: "k8s.gcr.io/sig-storage/csi-provisioner:v3.1.0" diff --git a/deploy/examples/operator.yaml b/deploy/examples/operator.yaml index 45f890c7ed72..9893f183efb3 100644 --- a/deploy/examples/operator.yaml +++ b/deploy/examples/operator.yaml @@ -79,7 +79,7 @@ data: # The default version of CSI supported by Rook will be started. To change the version # of the CSI driver to something other than what is officially supported, change # these images to the desired release of the CSI driver. - # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.5.0" + # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.5.1" # ROOK_CSI_REGISTRAR_IMAGE: "k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.4.0" # ROOK_CSI_RESIZER_IMAGE: "k8s.gcr.io/sig-storage/csi-resizer:v1.3.0" # ROOK_CSI_PROVISIONER_IMAGE: "k8s.gcr.io/sig-storage/csi-provisioner:v3.1.0" diff --git a/pkg/operator/ceph/csi/spec.go b/pkg/operator/ceph/csi/spec.go index 0fc086ad9c60..de394bef79aa 100644 --- a/pkg/operator/ceph/csi/spec.go +++ b/pkg/operator/ceph/csi/spec.go @@ -101,7 +101,7 @@ var ( // manually challenging. var ( // image names - DefaultCSIPluginImage = "quay.io/cephcsi/cephcsi:v3.5.0" + DefaultCSIPluginImage = "quay.io/cephcsi/cephcsi:v3.5.1" DefaultRegistrarImage = "k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.4.0" DefaultProvisionerImage = "k8s.gcr.io/sig-storage/csi-provisioner:v3.1.0" DefaultAttacherImage = "k8s.gcr.io/sig-storage/csi-attacher:v3.4.0" From c0e46c572130dfbd11e6049cca11b31815fc26d1 Mon Sep 17 00:00:00 2001 From: Travis Nielsen Date: Fri, 21 Jan 2022 11:57:49 -0700 Subject: [PATCH 16/16] build: update the release version to v1.8.3 With the patch release the example manifests and docs are updated to v1.8.3 Signed-off-by: Travis Nielsen --- Documentation/ceph-monitoring.md | 2 +- Documentation/ceph-upgrade.md | 30 ++++++++++++------------- Documentation/quickstart.md | 2 +- deploy/examples/direct-mount.yaml | 2 +- deploy/examples/images.txt | 2 +- deploy/examples/operator-openshift.yaml | 2 +- deploy/examples/operator.yaml | 2 +- deploy/examples/osd-purge.yaml | 2 +- deploy/examples/toolbox-job.yaml | 4 ++-- deploy/examples/toolbox.yaml | 2 +- 10 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Documentation/ceph-monitoring.md b/Documentation/ceph-monitoring.md index bcc5976b1e0f..4625b733dc96 100644 --- a/Documentation/ceph-monitoring.md +++ b/Documentation/ceph-monitoring.md @@ -38,7 +38,7 @@ With the Prometheus operator running, we can create a service monitor that will From the root of your locally cloned Rook repo, go the monitoring directory: ```console -$ git clone --single-branch --branch v1.8.2 https://github.com/rook/rook.git +$ git clone --single-branch --branch v1.8.3 https://github.com/rook/rook.git cd rook/deploy/examples/monitoring ``` diff --git a/Documentation/ceph-upgrade.md b/Documentation/ceph-upgrade.md index 0748ac4be7ca..2849a555e0f8 100644 --- a/Documentation/ceph-upgrade.md +++ b/Documentation/ceph-upgrade.md @@ -71,12 +71,12 @@ With this upgrade guide, there are a few notes to consider: Unless otherwise noted due to extenuating requirements, upgrades from one patch release of Rook to another are as simple as updating the common resources and the image of the Rook operator. For -example, when Rook v1.8.2 is released, the process of updating from v1.8.0 is as simple as running +example, when Rook v1.8.3 is released, the process of updating from v1.8.0 is as simple as running the following: First get the latest common resources manifests that contain the latest changes for Rook v1.8. ```sh -git clone --single-branch --depth=1 --branch v1.8.2 https://github.com/rook/rook.git +git clone --single-branch --depth=1 --branch v1.8.3 https://github.com/rook/rook.git cd rook/deploy/examples ``` @@ -87,7 +87,7 @@ section for instructions on how to change the default namespaces in `common.yaml Then apply the latest changes from v1.8 and update the Rook Operator image. ```console kubectl apply -f common.yaml -f crds.yaml -kubectl -n rook-ceph set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.8.2 +kubectl -n rook-ceph set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.8.3 ``` As exemplified above, it is a good practice to update Rook-Ceph common resources from the example @@ -266,7 +266,7 @@ Any pod that is using a Rook volume should also remain healthy: ## Rook Operator Upgrade Process In the examples given in this guide, we will be upgrading a live Rook cluster running `v1.7.8` to -the version `v1.8.2`. This upgrade should work from any official patch release of Rook v1.7 to any +the version `v1.8.3`. This upgrade should work from any official patch release of Rook v1.7 to any official patch release of v1.8. **Rook release from `master` are expressly unsupported.** It is strongly recommended that you use @@ -291,7 +291,7 @@ by the Operator. Also update the Custom Resource Definitions (CRDs). Get the latest common resources manifests that contain the latest changes. ```sh -git clone --single-branch --depth=1 --branch v1.8.2 https://github.com/rook/rook.git +git clone --single-branch --depth=1 --branch v1.8.3 https://github.com/rook/rook.git cd rook/deploy/examples ``` @@ -343,7 +343,7 @@ The largest portion of the upgrade is triggered when the operator's image is upd When the operator is updated, it will proceed to update all of the Ceph daemons. ```sh -kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.8.2 +kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.8.3 ``` #### Admission controller @@ -377,16 +377,16 @@ watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster= ``` As an example, this cluster is midway through updating the OSDs. When all deployments report `1/1/1` -availability and `rook-version=v1.8.2`, the Ceph cluster's core components are fully updated. +availability and `rook-version=v1.8.3`, the Ceph cluster's core components are fully updated. >``` >Every 2.0s: kubectl -n rook-ceph get deployment -o j... > ->rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.8.2 ->rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.8.2 ->rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.8.2 ->rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.8.2 ->rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.8.2 +>rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.8.3 +>rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.8.3 +>rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.8.3 +>rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.8.3 +>rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.8.3 >rook-ceph-osd-1 req/upd/avl: 1/1/1 rook-version=v1.7.8 >rook-ceph-osd-2 req/upd/avl: 1/1/1 rook-version=v1.7.8 >``` @@ -398,14 +398,14 @@ An easy check to see if the upgrade is totally finished is to check that there i # kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{"rook-version="}{.metadata.labels.rook-version}{"\n"}{end}' | sort | uniq This cluster is not yet finished: rook-version=v1.7.8 - rook-version=v1.8.2 + rook-version=v1.8.3 This cluster is finished: - rook-version=v1.8.2 + rook-version=v1.8.3 ``` ### **5. Verify the updated cluster** -At this point, your Rook operator should be running version `rook/ceph:v1.8.2`. +At this point, your Rook operator should be running version `rook/ceph:v1.8.3`. Verify the Ceph cluster's health using the [health verification section](#health-verification). diff --git a/Documentation/quickstart.md b/Documentation/quickstart.md index aee8b046fdda..c09242c315cd 100644 --- a/Documentation/quickstart.md +++ b/Documentation/quickstart.md @@ -34,7 +34,7 @@ In order to configure the Ceph storage cluster, at least one of these local stor A simple Rook cluster can be created with the following kubectl commands and [example manifests](https://github.com/rook/rook/blob/{{ branchName }}/deploy/examples). ```console -$ git clone --single-branch --branch v1.8.2 https://github.com/rook/rook.git +$ git clone --single-branch --branch v1.8.3 https://github.com/rook/rook.git cd rook/deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml kubectl create -f cluster.yaml diff --git a/deploy/examples/direct-mount.yaml b/deploy/examples/direct-mount.yaml index daa32229d0b6..b65a27ef3de0 100644 --- a/deploy/examples/direct-mount.yaml +++ b/deploy/examples/direct-mount.yaml @@ -18,7 +18,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: rook-direct-mount - image: rook/ceph:v1.8.2 + image: rook/ceph:v1.8.3 command: ["/bin/bash"] args: ["-m", "-c", "/usr/local/bin/toolbox.sh"] imagePullPolicy: IfNotPresent diff --git a/deploy/examples/images.txt b/deploy/examples/images.txt index 42d6e220960f..1d566afa05f7 100644 --- a/deploy/examples/images.txt +++ b/deploy/examples/images.txt @@ -7,4 +7,4 @@ quay.io/cephcsi/cephcsi:v3.5.1 quay.io/csiaddons/k8s-sidecar:v0.2.1 quay.io/csiaddons/volumereplication-operator:v0.1.0 - rook/ceph:v1.8.2 + rook/ceph:v1.8.3 diff --git a/deploy/examples/operator-openshift.yaml b/deploy/examples/operator-openshift.yaml index 58bfd846f171..5f715576902f 100644 --- a/deploy/examples/operator-openshift.yaml +++ b/deploy/examples/operator-openshift.yaml @@ -449,7 +449,7 @@ spec: serviceAccountName: rook-ceph-system containers: - name: rook-ceph-operator - image: rook/ceph:v1.8.2 + image: rook/ceph:v1.8.3 args: ["ceph", "operator"] securityContext: runAsNonRoot: true diff --git a/deploy/examples/operator.yaml b/deploy/examples/operator.yaml index 9893f183efb3..c21aef1c50dc 100644 --- a/deploy/examples/operator.yaml +++ b/deploy/examples/operator.yaml @@ -366,7 +366,7 @@ spec: serviceAccountName: rook-ceph-system containers: - name: rook-ceph-operator - image: rook/ceph:v1.8.2 + image: rook/ceph:v1.8.3 args: ["ceph", "operator"] securityContext: runAsNonRoot: true diff --git a/deploy/examples/osd-purge.yaml b/deploy/examples/osd-purge.yaml index 64fe8b2dc508..46d32a367ccd 100644 --- a/deploy/examples/osd-purge.yaml +++ b/deploy/examples/osd-purge.yaml @@ -25,7 +25,7 @@ spec: serviceAccountName: rook-ceph-purge-osd containers: - name: osd-removal - image: rook/ceph:v1.8.2 + image: rook/ceph:v1.8.3 # TODO: Insert the OSD ID in the last parameter that is to be removed # The OSD IDs are a comma-separated list. For example: "0" or "0,2". # If you want to preserve the OSD PVCs, set `--preserve-pvc true`. diff --git a/deploy/examples/toolbox-job.yaml b/deploy/examples/toolbox-job.yaml index 1e6b97b05677..b35c6423c617 100644 --- a/deploy/examples/toolbox-job.yaml +++ b/deploy/examples/toolbox-job.yaml @@ -10,7 +10,7 @@ spec: spec: initContainers: - name: config-init - image: rook/ceph:v1.8.2 + image: rook/ceph:v1.8.3 command: ["/usr/local/bin/toolbox.sh"] args: ["--skip-watch"] imagePullPolicy: IfNotPresent @@ -32,7 +32,7 @@ spec: mountPath: /etc/rook containers: - name: script - image: rook/ceph:v1.8.2 + image: rook/ceph:v1.8.3 volumeMounts: - mountPath: /etc/ceph name: ceph-config diff --git a/deploy/examples/toolbox.yaml b/deploy/examples/toolbox.yaml index fe5a56071a9b..1e86f7399fb6 100644 --- a/deploy/examples/toolbox.yaml +++ b/deploy/examples/toolbox.yaml @@ -18,7 +18,7 @@ spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: rook-ceph-tools - image: rook/ceph:v1.8.2 + image: rook/ceph:v1.8.3 command: ["/bin/bash"] args: ["-m", "-c", "/usr/local/bin/toolbox.sh"] imagePullPolicy: IfNotPresent