From 5445ccf4cbc8aa5794e125f1b66dcc16ae4a9572 Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Thu, 18 Aug 2016 16:50:59 -0400 Subject: [PATCH] support storage class in Ceph RBD volume Signed-off-by: Huamin Chen --- cmd/kube-controller-manager/app/plugins.go | 4 +- .../persistent-volume-provisioning/README.md | 94 ++++++++ .../rbd/ceph-secret-admin.yaml | 6 + .../rbd/ceph-secret-user.yaml | 6 + .../rbd/pod.yaml | 23 ++ .../rbd/rbd-storage-class.yaml | 14 ++ pkg/volume/rbd/disk_manager.go | 5 + pkg/volume/rbd/rbd.go | 227 ++++++++++++++++-- pkg/volume/rbd/rbd_util.go | 68 ++++++ 9 files changed, 432 insertions(+), 15 deletions(-) create mode 100644 examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-admin.yaml create mode 100644 examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-user.yaml create mode 100644 examples/experimental/persistent-volume-provisioning/rbd/pod.yaml create mode 100644 examples/experimental/persistent-volume-provisioning/rbd/rbd-storage-class.yaml diff --git a/cmd/kube-controller-manager/app/plugins.go b/cmd/kube-controller-manager/app/plugins.go index d015bb5b11a1..1a340edf87fb 100644 --- a/cmd/kube-controller-manager/app/plugins.go +++ b/cmd/kube-controller-manager/app/plugins.go @@ -43,6 +43,7 @@ import ( "k8s.io/kubernetes/pkg/volume/glusterfs" "k8s.io/kubernetes/pkg/volume/host_path" "k8s.io/kubernetes/pkg/volume/nfs" + "k8s.io/kubernetes/pkg/volume/rbd" "k8s.io/kubernetes/pkg/volume/vsphere_volume" ) @@ -99,7 +100,8 @@ func ProbeControllerVolumePlugins(cloud cloudprovider.Interface, config componen } allPlugins = append(allPlugins, nfs.ProbeVolumePlugins(nfsConfig)...) allPlugins = append(allPlugins, glusterfs.ProbeVolumePlugins()...) - + // add rbd provisioner + allPlugins = append(allPlugins, rbd.ProbeVolumePlugins()...) if cloud != nil { switch { case aws.ProviderName == cloud.ProviderName(): diff --git a/examples/experimental/persistent-volume-provisioning/README.md b/examples/experimental/persistent-volume-provisioning/README.md index 280cffb097b8..1457df7f0c10 100644 --- a/examples/experimental/persistent-volume-provisioning/README.md +++ b/examples/experimental/persistent-volume-provisioning/README.md @@ -121,6 +121,32 @@ parameters: * `type`: [VolumeType](http://docs.openstack.org/admin-guide/dashboard-manage-volumes.html) created in Cinder. Default is empty. * `availability`: Availability Zone. Default is empty. +#### Ceph RBD + +```yaml + apiVersion: extensions/v1beta1 + kind: StorageClass + metadata: + name: fast +provisioner: kubernetes.io/rbd +parameters: + monitors: 10.16.153.105:6789 + adminID: kube + adminSecretName: ceph-secret + adminSecretNamespace: kube-system + pool: kube + userId: kube + secretName: ceph-secret-user +``` + +* `monitors`: Ceph monitors, comma delimited +* `adminID`: Ceph client ID that is capable of creating images in the pool. Default is "admin" +* `adminSecret`: Secret Name for `adminID` +* `adminSecretNamespace`: The namespace for `adminSecret`. Default is "default" +* `pool`: Ceph RBD pool. Default is "rbd" +* `userId`: Ceph client ID that is used to map the RBD image. Default is the same as `adminID` +* `secretName`: The name of Ceph Secret. It must exist in the same namespace as PVCs. + ### User provisioning requests Users request dynamically provisioned storage by including a storage class in their `PersistentVolumeClaim`. @@ -152,6 +178,7 @@ In the future, the storage class may remain in an annotation or become a field o ### Sample output +#### GCE This example uses GCE but any provisioner would follow the same flow. First we note there are no Persistent Volumes in the cluster. After creating a storage class and a claim including that storage class, we see a new PV is created @@ -184,6 +211,73 @@ $ kubectl get pv ``` + +#### Ceph RBD + +First create Ceph admin's Secret in the system namespace. Here the Secret is created in `kube-system`: + +``` +$ kubectl create -f examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-admin.yaml --namespace=kube-system +``` + +Then create RBD Storage Class: + +``` +$ kubectl create -f examples/experimental/persistent-volume-provisioning/rbd/rbd-storage-class.yaml +``` + +Before creating PVC in user's namespace (e.g. myns), make sure the Ceph user's Secret exists, if not, create the Secret: + +``` +$ kubectl create -f examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-user.yaml --namespace=myns +``` +Now create a PVC in user's namespace (e.g. myns): + +``` +$ kubectl create -f examples/experimental/persistent-volume-provisioning/claim1.json --namespace=myns +``` + +Check the PV and PVC are created: +``` +$ kubectl describe pvc --namespace=myns +Name: claim1 +Namespace: myns +Status: Bound +Volume: pvc-1cfa23b3-664b-11e6-9eb9-90b11c09520d +Labels: +Capacity: 3Gi +Access Modes: RWO +No events. + +$ kubectl describe pv +Name: pvc-1cfa23b3-664b-11e6-9eb9-90b11c09520d +Labels: +Status: Bound +Claim: myns/claim1 +Reclaim Policy: Delete +Access Modes: RWO +Capacity: 3Gi +Message: +Source: + Type: RBD (a Rados Block Device mount on the host that shares a pod's lifetime) + CephMonitors: [10.16.153.105:6789] + RBDImage: kubernetes-dynamic-pvc-1cfb1862-664b-11e6-9a5d-90b11c09520d + FSType: + RBDPool: kube + RadosUser: kube + Keyring: /etc/ceph/keyring + SecretRef: &{ceph-secret-user} + ReadOnly: false +No events. +``` + +Create a Pod to use the PVC: + +``` +$ kubectl create -f examples/experimental/persistent-volume-provisioning/rbd/pod.yaml --namespace=myns +``` + + [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/experimental/persistent-volume-provisioning/README.md?pixel)]() diff --git a/examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-admin.yaml b/examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-admin.yaml new file mode 100644 index 000000000000..5c7af1e13893 --- /dev/null +++ b/examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-admin.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ceph-secret-admin +data: + key: QVFEQ1pMdFhPUnQrSmhBQUFYaERWNHJsZ3BsMmNjcDR6RFZST0E9PQ== diff --git a/examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-user.yaml b/examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-user.yaml new file mode 100644 index 000000000000..32f8420938b3 --- /dev/null +++ b/examples/experimental/persistent-volume-provisioning/rbd/ceph-secret-user.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ceph-secret-user +data: + key: QVFBTWdYaFZ3QkNlRGhBQTlubFBhRnlmVVNhdEdENGRyRldEdlE9PQ== diff --git a/examples/experimental/persistent-volume-provisioning/rbd/pod.yaml b/examples/experimental/persistent-volume-provisioning/rbd/pod.yaml new file mode 100644 index 000000000000..6eea26f94f22 --- /dev/null +++ b/examples/experimental/persistent-volume-provisioning/rbd/pod.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ReplicationController +metadata: + name: server +spec: + replicas: 1 + selector: + role: server + template: + metadata: + labels: + role: server + spec: + containers: + - name: server + image: nginx + volumeMounts: + - mountPath: /var/lib/www/html + name: mypvc + volumes: + - name: mypvc + persistentVolumeClaim: + claimName: claim1 diff --git a/examples/experimental/persistent-volume-provisioning/rbd/rbd-storage-class.yaml b/examples/experimental/persistent-volume-provisioning/rbd/rbd-storage-class.yaml new file mode 100644 index 000000000000..26e3679df22c --- /dev/null +++ b/examples/experimental/persistent-volume-provisioning/rbd/rbd-storage-class.yaml @@ -0,0 +1,14 @@ +apiVersion: extensions/v1beta1 +kind: StorageClass +metadata: + name: slow +provisioner: kubernetes.io/rbd +parameters: + monitors: 10.16.153.105:6789 + adminID: admin + adminSecretName: ceph-secret-admin + adminSecretNamespace: "kube-system" + pool: kube + userId: kube + secretName: ceph-secret-user + diff --git a/pkg/volume/rbd/disk_manager.go b/pkg/volume/rbd/disk_manager.go index 15d1e8ef073a..065049bcffb6 100644 --- a/pkg/volume/rbd/disk_manager.go +++ b/pkg/volume/rbd/disk_manager.go @@ -26,6 +26,7 @@ import ( "os" "github.com/golang/glog" + "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/util/mount" "k8s.io/kubernetes/pkg/volume" ) @@ -37,6 +38,10 @@ type diskManager interface { AttachDisk(disk rbdMounter) error // Detaches the disk from the kubelet's host machine. DetachDisk(disk rbdUnmounter, mntPath string) error + // Creates a rbd image + CreateImage(provisioner *rbdVolumeProvisioner) (r *api.RBDVolumeSource, volumeSizeGB int, err error) + // Deletes a rbd image + DeleteImage(deleter *rbdVolumeDeleter) error } // utility to mount a disk based filesystem diff --git a/pkg/volume/rbd/rbd.go b/pkg/volume/rbd/rbd.go index bfc852b17ef7..b32947f5b372 100644 --- a/pkg/volume/rbd/rbd.go +++ b/pkg/volume/rbd/rbd.go @@ -18,13 +18,16 @@ package rbd import ( "fmt" + dstrings "strings" "github.com/golang/glog" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" "k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/util/exec" "k8s.io/kubernetes/pkg/util/mount" "k8s.io/kubernetes/pkg/util/strings" + "k8s.io/kubernetes/pkg/util/uuid" "k8s.io/kubernetes/pkg/volume" ) @@ -40,9 +43,14 @@ type rbdPlugin struct { var _ volume.VolumePlugin = &rbdPlugin{} var _ volume.PersistentVolumePlugin = &rbdPlugin{} +var _ volume.DeletableVolumePlugin = &rbdPlugin{} +var _ volume.ProvisionableVolumePlugin = &rbdPlugin{} const ( - rbdPluginName = "kubernetes.io/rbd" + rbdPluginName = "kubernetes.io/rbd" + annCephAdminID = "rbd.kubernetes.io/admin" + annCephAdminSecretName = "rbd.kubernetes.io/adminsecretname" + annCephAdminSecretNameSpace = "rbd.kubernetes.io/adminsecretnamespace" ) func (plugin *rbdPlugin) Init(host volume.VolumeHost) error { @@ -86,30 +94,40 @@ func (plugin *rbdPlugin) GetAccessModes() []api.PersistentVolumeAccessMode { } func (plugin *rbdPlugin) NewMounter(spec *volume.Spec, pod *api.Pod, _ volume.VolumeOptions) (volume.Mounter, error) { - secret := "" + var secret string + var err error source, _ := plugin.getRBDVolumeSource(spec) if source.SecretRef != nil { - kubeClient := plugin.host.GetKubeClient() - if kubeClient == nil { - return nil, fmt.Errorf("Cannot get kube client") - } - - secretName, err := kubeClient.Core().Secrets(pod.Namespace).Get(source.SecretRef.Name) - if err != nil { + if secret, err = plugin.getSecret(pod.Namespace, source.SecretRef.Name); err != nil { glog.Errorf("Couldn't get secret %v/%v", pod.Namespace, source.SecretRef) return nil, err } - for name, data := range secretName.Data { - secret = string(data) - glog.V(1).Infof("ceph secret info: %s/%s", name, secret) - } - } + // Inject real implementations here, test through the internal function. return plugin.newMounterInternal(spec, pod.UID, &RBDUtil{}, plugin.host.GetMounter(), secret) } +func (plugin *rbdPlugin) getSecret(namespace, secretName string) (string, error) { + secret := "" + kubeClient := plugin.host.GetKubeClient() + if kubeClient == nil { + return "", fmt.Errorf("Cannot get kube client") + } + + secrets, err := kubeClient.Core().Secrets(namespace).Get(secretName) + if err != nil { + return "", err + } + for name, data := range secrets.Data { + secret = string(data) + glog.V(4).Infof("ceph secret [%q/%q] info: %s/%s", namespace, secretName, name, secret) + } + return secret, nil + +} + func (plugin *rbdPlugin) getRBDVolumeSource(spec *volume.Spec) (*api.RBDVolumeSource, bool) { // rbd volumes used directly in a pod have a ReadOnly flag set by the pod author. // rbd volumes used as a PersistentVolume gets the ReadOnly flag indirectly through the persistent-claim volume used to mount the PV @@ -177,6 +195,187 @@ func (plugin *rbdPlugin) ConstructVolumeSpec(volumeName, mountPath string) (*vol return volume.NewSpecFromVolume(rbdVolume), nil } +func (plugin *rbdPlugin) NewDeleter(spec *volume.Spec) (volume.Deleter, error) { + if spec.PersistentVolume != nil && spec.PersistentVolume.Spec.RBD == nil { + return nil, fmt.Errorf("spec.PersistentVolumeSource.Spec.RBD is nil") + } + admin, adminSecretName, adminSecretNamespace, err := selectorToParam(spec.PersistentVolume) + if err != nil { + return nil, fmt.Errorf("cannot find Ceph credentials to delete rbd PV") + } + secret := "" + if secret, err = plugin.getSecret(adminSecretNamespace, adminSecretName); err != nil { + // log error but don't return yet + glog.Errorf("failed to get admin secret from [%q/%q]", adminSecretNamespace, adminSecretName) + } + return plugin.newDeleterInternal(spec, admin, secret, &RBDUtil{}) +} + +func (plugin *rbdPlugin) newDeleterInternal(spec *volume.Spec, admin, secret string, manager diskManager) (volume.Deleter, error) { + return &rbdVolumeDeleter{ + rbdMounter: &rbdMounter{ + rbd: &rbd{ + volName: spec.Name(), + Image: spec.PersistentVolume.Spec.RBD.RBDImage, + Pool: spec.PersistentVolume.Spec.RBD.RBDPool, + manager: manager, + plugin: plugin, + }, + Mon: spec.PersistentVolume.Spec.RBD.CephMonitors, + Id: admin, + Secret: secret, + }}, nil +} + +func (plugin *rbdPlugin) NewProvisioner(options volume.VolumeOptions) (volume.Provisioner, error) { + if len(options.AccessModes) == 0 { + options.AccessModes = plugin.GetAccessModes() + } + return plugin.newProvisionerInternal(options, &RBDUtil{}) +} + +func (plugin *rbdPlugin) newProvisionerInternal(options volume.VolumeOptions, manager diskManager) (volume.Provisioner, error) { + return &rbdVolumeProvisioner{ + rbdMounter: &rbdMounter{ + rbd: &rbd{ + manager: manager, + plugin: plugin, + }, + }, + options: options, + }, nil +} + +type rbdVolumeProvisioner struct { + *rbdMounter + options volume.VolumeOptions +} + +func (r *rbdVolumeProvisioner) Provision() (*api.PersistentVolume, error) { + if r.options.Selector != nil { + return nil, fmt.Errorf("claim Selector is not supported") + } + var err error + adminSecretName := "" + adminSecretNamespace := "default" + secretName := "" + userId := "" + secret := "" + + for k, v := range r.options.Parameters { + switch dstrings.ToLower(k) { + case "monitors": + arr := dstrings.Split(v, ",") + for _, m := range arr { + r.Mon = append(r.Mon, m) + } + case "adminid": + r.Id = v + case "adminsecretname": + adminSecretName = v + case "adminsecretnamespace": + adminSecretNamespace = v + case "userid": + userId = v + case "pool": + r.Pool = v + case "secretname": + secretName = v + default: + return nil, fmt.Errorf("invalid option %q for volume plugin %s", k, r.plugin.GetPluginName()) + } + } + // sanity check + if adminSecretName == "" { + return nil, fmt.Errorf("missing Ceph admin secret name") + } + if secret, err = r.plugin.getSecret(adminSecretNamespace, adminSecretName); err != nil { + // log error but don't return yet + glog.Errorf("failed to get admin secret from [%q/%q]", adminSecretNamespace, adminSecretName) + } + r.Secret = secret + if len(r.Mon) < 1 { + return nil, fmt.Errorf("missing Ceph monitors") + } + if secretName == "" { + return nil, fmt.Errorf("missing secret name") + } + if r.Id == "" { + r.Id = "admin" + } + if r.Pool == "" { + r.Pool = "rbd" + } + if userId == "" { + userId = r.Id + } + + // create random image name + image := fmt.Sprintf("kubernetes-dynamic-pvc-%s", uuid.NewUUID()) + r.rbdMounter.Image = image + rbd, sizeMB, err := r.manager.CreateImage(r) + if err != nil { + glog.Errorf("rbd: create volume failed, err: %v", err) + return nil, fmt.Errorf("rbd: create volume failed, err: %v", err) + } + pv := new(api.PersistentVolume) + rbd.SecretRef = new(api.LocalObjectReference) + rbd.SecretRef.Name = secretName + rbd.RadosUser = userId + pv.Spec.PersistentVolumeSource.RBD = rbd + pv.Spec.PersistentVolumeReclaimPolicy = r.options.PersistentVolumeReclaimPolicy + pv.Spec.AccessModes = r.options.AccessModes + pv.Spec.Capacity = api.ResourceList{ + api.ResourceName(api.ResourceStorage): resource.MustParse(fmt.Sprintf("%dMi", sizeMB)), + } + // place parameters in pv selector + paramToSelector(r.Id, adminSecretNamespace, adminSecretName, pv) + return pv, nil +} + +type rbdVolumeDeleter struct { + *rbdMounter +} + +func (r *rbdVolumeDeleter) GetPath() string { + name := rbdPluginName + return r.plugin.host.GetPodVolumeDir(r.podUID, strings.EscapeQualifiedNameForDisk(name), r.volName) +} + +func (r *rbdVolumeDeleter) Delete() error { + return r.manager.DeleteImage(r) +} + +func paramToSelector(admin, adminSecretNamespace, adminSecretName string, pv *api.PersistentVolume) { + if pv.Annotations == nil { + pv.Annotations = make(map[string]string) + } + pv.Annotations[annCephAdminID] = admin + pv.Annotations[annCephAdminSecretName] = adminSecretName + pv.Annotations[annCephAdminSecretNameSpace] = adminSecretNamespace +} + +func selectorToParam(pv *api.PersistentVolume) (string, string, string, error) { + if pv.Annotations == nil { + return "", "", "", fmt.Errorf("PV has no annotation, cannot get Ceph admin cedentials") + } + var admin, adminSecretName, adminSecretNamespace string + found := false + admin, found = pv.Annotations[annCephAdminID] + if !found { + return "", "", "", fmt.Errorf("Cannot get Ceph admin id from PV annotations") + } + adminSecretName, found = pv.Annotations[annCephAdminSecretName] + if !found { + return "", "", "", fmt.Errorf("Cannot get Ceph admin secret from PV annotations") + } + adminSecretNamespace, found = pv.Annotations[annCephAdminSecretNameSpace] + if !found { + return "", "", "", fmt.Errorf("Cannot get Ceph admin secret namespace from PV annotations") + } + return admin, adminSecretName, adminSecretNamespace, nil +} + type rbd struct { volName string podUID types.UID diff --git a/pkg/volume/rbd/rbd_util.go b/pkg/volume/rbd/rbd_util.go index 1cfc42b5d719..2fa2745fd075 100644 --- a/pkg/volume/rbd/rbd_util.go +++ b/pkg/volume/rbd/rbd_util.go @@ -34,6 +34,7 @@ import ( "time" "github.com/golang/glog" + "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/util/exec" "k8s.io/kubernetes/pkg/util/mount" "k8s.io/kubernetes/pkg/util/node" @@ -307,3 +308,70 @@ func (util *RBDUtil) DetachDisk(c rbdUnmounter, mntPath string) error { } return nil } + +func (util *RBDUtil) CreateImage(p *rbdVolumeProvisioner) (r *api.RBDVolumeSource, size int, err error) { + volSizeBytes := p.options.Capacity.Value() + // convert to MB that rbd defaults on + const mb = 1024 * 1024 + sz := int((volSizeBytes + mb - 1) / mb) + volSz := fmt.Sprintf("%d", sz) + // rbd create + l := len(p.rbdMounter.Mon) + // pick a mon randomly + start := rand.Int() % l + // iterate all monitors until create succeeds. + for i := start; i < start+l; i++ { + mon := p.Mon[i%l] + glog.V(4).Infof("rbd: create %s size %s using mon %s, pool %s id %s key %s", p.rbdMounter.Image, volSz, mon, p.rbdMounter.Pool, p.rbdMounter.Id, p.rbdMounter.Secret) + var output []byte + output, err = p.rbdMounter.plugin.execCommand("rbd", + []string{"create", p.rbdMounter.Image, "--size", volSz, "--pool", p.rbdMounter.Pool, "--id", p.rbdMounter.Id, "-m", mon, "--key=" + p.rbdMounter.Secret}) + if err == nil { + break + } else { + glog.V(4).Infof("failed to create rbd image, output %v", string(output)) + } + // if failed, fall back to image format 1 + output, err = p.rbdMounter.plugin.execCommand("rbd", + []string{"create", p.rbdMounter.Image, "--size", volSz, "--pool", p.rbdMounter.Pool, "--id", p.rbdMounter.Id, "-m", mon, "--key=" + p.rbdMounter.Secret, "--image-format", "1"}) + if err == nil { + break + } else { + glog.V(4).Infof("failed to create rbd image, output %v", string(output)) + } + + } + + if err != nil { + glog.Errorf("rbd: Error creating rbd image: %v", err) + return nil, 0, err + } + + return &api.RBDVolumeSource{ + CephMonitors: p.rbdMounter.Mon, + RBDImage: p.rbdMounter.Image, + RBDPool: p.rbdMounter.Pool, + }, sz, nil +} + +func (util *RBDUtil) DeleteImage(p *rbdVolumeDeleter) error { + var err error + var output []byte + // rbd rm + l := len(p.rbdMounter.Mon) + // pick a mon randomly + start := rand.Int() % l + // iterate all monitors until rm succeeds. + for i := start; i < start+l; i++ { + mon := p.rbdMounter.Mon[i%l] + glog.V(4).Infof("rbd: rm %s using mon %s, pool %s id %s key %s", p.rbdMounter.Image, mon, p.rbdMounter.Pool, p.rbdMounter.Id, p.rbdMounter.Secret) + output, err = p.plugin.execCommand("rbd", + []string{"rm", p.rbdMounter.Image, "--pool", p.rbdMounter.Pool, "--id", p.rbdMounter.Id, "-m", mon, "--key=" + p.rbdMounter.Secret}) + if err == nil { + return nil + } else { + glog.Errorf("failed to delete rbd image, error %v ouput %v", err, string(output)) + } + } + return err +}