Skip to content

Commit

Permalink
ceph: support OSD on PVC backed by LV
Browse files Browse the repository at this point in the history
"OSD on PVC" doesn't work for PV backed by LV. Fixing this problem
by the following changes.

- Rook accepts LVM disk type.
- If a LV-backed device is passed, Rook/Ceph invokes
  "ceph-volume lvm prepare" with "--data vg/lv"
  instead of "--data /path/to/device".
- If a LV-backed device is passed, Rook/Ceph suppresses
  activation/deactivation of VG that owns this LV.

Fixes: rook#4185
Signed-off-by: dulltz <isrgnoe@gmail.com>
Co-authored-by: Satoru Takeuchi <satoru.takeuchi@gmail.com>
  • Loading branch information
2 people authored and RAJAT SINGH committed Dec 17, 2019
1 parent ea45457 commit d517bfd
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Documentation/ceph-cluster-crd.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ However, if there are more OSDs than nodes, this anti-affinity will not be effec
* `portable`: If `true`, the OSDs will be allowed to move between nodes during failover. This requires a storage class that supports portability (e.g. `aws-ebs`, but not the local storage provisioner). If `false`, the OSDs will be assigned to a node permanently. Rook will configure Ceph's CRUSH map to support the portability.
* `volumeClaimTemplates`: A list of PVC templates to use for provisioning the underlying storage devices.
* `resources.requests.storage`: The desired capacity for the underlying storage devices.
* `storageClassName`: The StorageClass to provision PVCs from. Default would be to use the cluster-default StorageClass.
* `storageClassName`: The StorageClass to provision PVCs from. Default would be to use the cluster-default StorageClass. This StorageClass should provide a raw block device or logical volume. Other types are not supported.
* `volumeMode`: The volume mode to be set for the PVC. Which should be Block
* `accessModes`: The access mode for the PVC to be bound by OSD.

Expand Down
1 change: 1 addition & 0 deletions PendingReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
fix this to be `<dataDirHostPath>/log/<namespace>`, the same as other daemons.
- Use the mon configuration database for directory-based OSDs, and do not generate a config
- Add a new CRD property `devicePathFilter` to support device filtering with path names, e.g. `/dev/disk/by-path/pci-.*-sas-.*`.
- Support PersistentVolume backed by LVM Logical Volume for "OSD on PVC".
- A new ceph-crashcollector controller has been added, that new pod will run on any node where a Ceph pod is running. Read more about this in the [doc](Documentation/ceph-cluster-crd.html#cluster-wide-resources-configuration-settings)
- PriorityClassNames can now be added to the Rook/Ceph components to influence the scheduler's pod preemption.
- mgr/mon/osd/rbdmirror: [priority class names configuration settings](Documentation/ceph-cluster-crd.md#priority-class-names-configuration-settings)
Expand Down
4 changes: 3 additions & 1 deletion cmd/rook/ceph/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ var (
osdIsDevice bool
pvcBackedOSD bool
lvPath string
lvBackedPV bool
)

func addOSDFlags(command *cobra.Command) {
Expand Down Expand Up @@ -103,6 +104,7 @@ func addOSDFlags(command *cobra.Command) {
osdStartCmd.Flags().StringVar(&osdStoreType, "osd-store-type", "", "whether the osd is bluestore or filestore")
osdStartCmd.Flags().BoolVar(&pvcBackedOSD, "pvc-backed-osd", false, "Whether the OSD backing store in PVC or not")
osdStartCmd.Flags().StringVar(&lvPath, "lv-path", "", "LV path for the OSD created by ceph volume")
osdStartCmd.Flags().BoolVar(&lvBackedPV, "lv-backed-pv", false, "Whether the PV located on LV")

// add the subcommands to the parent osd command
osdCmd.AddCommand(osdConfigCmd,
Expand Down Expand Up @@ -152,7 +154,7 @@ func startOSD(cmd *cobra.Command, args []string) error {
context := createContext()

// Run OSD start sequence
err := osddaemon.StartOSD(context, osdStoreType, osdStringID, osdUUID, lvPath, pvcBackedOSD, args)
err := osddaemon.StartOSD(context, osdStoreType, osdStringID, osdUUID, lvPath, pvcBackedOSD, lvBackedPV, args)
if err != nil {
rook.TerminateFatal(err)
}
Expand Down
3 changes: 3 additions & 0 deletions design/ceph/storage-class-device-set.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ workflow would look something like this:
1. Run an OsdPrepare Job on all unused and uninitialized PVCs
* This would be one Job per PVC
1. Create an OSD Deployment for each unused but initialized PVC
* Deploy OSD with `ceph-volume` if available.
* If PV is not backed by LV, create a LV in this PV.
* If PV is backed by LV, use this PV as is.

### Additional considerations for local storage

Expand Down
2 changes: 1 addition & 1 deletion pkg/clusterd/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func PopulateDeviceInfo(d string, executor exec.Executor) (*sys.LocalDisk, error
}

diskType, ok := diskProps["TYPE"]
if !ok || (diskType != sys.SSDType && diskType != sys.CryptType && diskType != sys.DiskType && diskType != sys.PartType && diskType != sys.LinearType) {
if !ok || (diskType != sys.SSDType && diskType != sys.CryptType && diskType != sys.DiskType && diskType != sys.PartType && diskType != sys.LinearType && diskType != sys.LVMType) {
if !ok {
return nil, errors.New("diskType is empty")
} else {
Expand Down
2 changes: 1 addition & 1 deletion pkg/daemon/ceph/osd/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (a *OsdAgent) configureDevices(context *clusterd.Context, devices *DeviceOs
}
skipLVRelease = true
}
return getCephVolumeOSDs(context, a.cluster.Name, a.cluster.FSID, lvPath, skipLVRelease)
return getCephVolumeOSDs(context, a.cluster.Name, a.cluster.FSID, lvPath, skipLVRelease, false)
}
return osds, nil
}
Expand Down
10 changes: 5 additions & 5 deletions pkg/daemon/ceph/osd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ var (
)

// StartOSD starts an OSD on a device that was provisioned by ceph-volume
func StartOSD(context *clusterd.Context, osdType, osdID, osdUUID, lvPath string, pvcBackedOSD bool, cephArgs []string) error {
func StartOSD(context *clusterd.Context, osdType, osdID, osdUUID, lvPath string, pvcBackedOSD, lvBackedPV bool, cephArgs []string) error {

// ensure the config mount point exists
configDir := fmt.Sprintf("/var/lib/ceph/osd/ceph-%s", osdID)
Expand All @@ -52,12 +52,12 @@ func StartOSD(context *clusterd.Context, osdType, osdID, osdUUID, lvPath string,
}

// Update LVM config at runtime
if err := updateLVMConfig(context, pvcBackedOSD); err != nil {
if err := updateLVMConfig(context, pvcBackedOSD, lvBackedPV); err != nil {
return fmt.Errorf("failed to update lvm configuration file, %+v", err) // fail return here as validation provided by ceph-volume
}

var volumeGroupName string
if pvcBackedOSD {
if pvcBackedOSD && !lvBackedPV {
volumeGroupName, err = getVolumeGroupName(lvPath)
if err != nil {
return fmt.Errorf("error fetching volume group name for OSD %s. %+v", osdID, err)
Expand Down Expand Up @@ -85,7 +85,7 @@ func StartOSD(context *clusterd.Context, osdType, osdID, osdUUID, lvPath string,
logger.Errorf("failed to start osd or shutting down. %+v", err)
}

if pvcBackedOSD {
if pvcBackedOSD && !lvBackedPV {
if err := releaseLVMDevice(context, volumeGroupName); err != nil {
return fmt.Errorf("failed to release device from lvm. %+v", err)
}
Expand Down Expand Up @@ -272,7 +272,7 @@ func Provision(context *clusterd.Context, agent *OsdAgent, crushLocation string)

logger.Infof("device osds:%+v\ndir osds: %+v", deviceOSDs, dirOSDs)

if agent.pvcBacked && !deviceOSDs[0].SkipLVRelease {
if agent.pvcBacked && !deviceOSDs[0].SkipLVRelease && !deviceOSDs[0].LVBackedPV {
volumeGroupName, err := getVolumeGroupName(deviceOSDs[0].LVPath)
if err != nil {
return fmt.Errorf("error fetching volume group name. %+v", err)
Expand Down
76 changes: 63 additions & 13 deletions pkg/daemon/ceph/osd/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ const (
func (a *OsdAgent) configureCVDevices(context *clusterd.Context, devices *DeviceOsdMapping) ([]oposd.OSDInfo, error) {
var osds []oposd.OSDInfo
var lv string
var lvBackedPV bool

var err error
if len(devices.Entries) == 0 {
logger.Infof("no new devices to configure. returning devices already configured with ceph-volume.")
osds, err = getCephVolumeOSDs(context, a.cluster.Name, a.cluster.FSID, lv, false)
osds, err = getCephVolumeOSDs(context, a.cluster.Name, a.cluster.FSID, lv, false, lvBackedPV)
if err != nil {
logger.Infof("failed to get devices already provisioned by ceph-volume. %+v", err)
}
Expand All @@ -70,11 +71,21 @@ func (a *OsdAgent) configureCVDevices(context *clusterd.Context, devices *Device
return nil, fmt.Errorf("failed to generate osd keyring. %+v", err)
}
// Update LVM configuration file
if err := updateLVMConfig(context, a.pvcBacked); err != nil {
if a.pvcBacked {
for _, device := range devices.Entries {
lvBackedPV, err = sys.IsLV(device.Config.Name, context.Executor)
if err != nil {
return nil, fmt.Errorf("failed to check device type. %+v", err)
}
break
}
}
if err := updateLVMConfig(context, a.pvcBacked, lvBackedPV); err != nil {
return nil, fmt.Errorf("failed to update lvm configuration file, %+v", err) // fail return here as validation provided by ceph-volume
}

if a.pvcBacked {
if lv, err = a.initializeBlockPVC(context, devices); err != nil {
if lv, err = a.initializeBlockPVC(context, devices, lvBackedPV); err != nil {
return nil, fmt.Errorf("failed to initialize devices. %+v", err)
}
} else {
Expand All @@ -83,11 +94,11 @@ func (a *OsdAgent) configureCVDevices(context *clusterd.Context, devices *Device
}
}

osds, err = getCephVolumeOSDs(context, a.cluster.Name, a.cluster.FSID, lv, false)
osds, err = getCephVolumeOSDs(context, a.cluster.Name, a.cluster.FSID, lv, lvBackedPV, lvBackedPV) // skip release if PV is LV
return osds, err
}

func (a *OsdAgent) initializeBlockPVC(context *clusterd.Context, devices *DeviceOsdMapping) (string, error) {
func (a *OsdAgent) initializeBlockPVC(context *clusterd.Context, devices *DeviceOsdMapping, lvBackedPV bool) (string, error) {
baseCommand := "stdbuf"
baseArgs := []string{"-oL", cephVolumeCmd, "lvm", "prepare"}
var lvpath string
Expand All @@ -98,7 +109,18 @@ func (a *OsdAgent) initializeBlockPVC(context *clusterd.Context, devices *Device
}
if device.Data == -1 {
logger.Infof("configuring new device %s", name)
deviceArg := device.Config.Name
var err error
var deviceArg string
if lvBackedPV {
// pass 'vg/lv' to ceph-volume
deviceArg, err = getLVNameFromDevicePath(context, device.Config.Name)
if err != nil {
return "", fmt.Errorf("failed to get lv name from device path %q. %+v", device.Config.Name, err)
}
} else {
deviceArg = device.Config.Name
}

immediateExecuteArgs := append(baseArgs, []string{
"--data",
deviceArg,
Expand All @@ -109,9 +131,13 @@ func (a *OsdAgent) initializeBlockPVC(context *clusterd.Context, devices *Device
return "", fmt.Errorf("failed ceph-volume. %+v", err) // fail return here as validation provided by ceph-volume
} else {
logger.Infof("%v", op)
lvpath = getLVPath(op)
if lvpath == "" {
return "", fmt.Errorf("failed to get lvpath from ceph-volume lvm prepare output")
if lvBackedPV {
lvpath = deviceArg
} else {
lvpath = getLVPath(op)
if lvpath == "" {
return "", fmt.Errorf("failed to get lvpath from ceph-volume lvm prepare output")
}
}
}
} else {
Expand All @@ -137,7 +163,24 @@ func getLVPath(op string) string {
return ""
}

func updateLVMConfig(context *clusterd.Context, onPVC bool) error {
func getLVNameFromDevicePath(context *clusterd.Context, devicePath string) (string, error) {
devInfo, err := context.Executor.ExecuteCommandWithOutput(true, "",
"dmsetup", "info", "-c", "--noheadings", "-o", "name", devicePath)
if err != nil {
return "", fmt.Errorf("failed dmsetup info. output: %q. %+v", devInfo, err)
}
out, err := context.Executor.ExecuteCommandWithOutput(true, "", "dmsetup", "splitname", devInfo, "--noheadings")
if err != nil {
return "", fmt.Errorf("failed dmsetup splitname %q. %+v", devInfo, err)
}
split := strings.Split(out, ":")
if len(split) < 2 {
return "", fmt.Errorf("dmsetup splitname returned unexpected result for %q. output: %q", devInfo, out)
}
return fmt.Sprintf("%s/%s", split[0], split[1]), nil
}

func updateLVMConfig(context *clusterd.Context, onPVC, lvBackedPV bool) error {

input, err := ioutil.ReadFile(lvmConfPath)
if err != nil {
Expand All @@ -158,8 +201,14 @@ func updateLVMConfig(context *clusterd.Context, onPVC bool) error {
// We have 2 different regex depending on the version of LVM present in the container...
// Since https://github.com/lvmteam/lvm2/commit/08396b4bce45fb8311979250623f04ec0ddb628c#diff-13c602a6258e57ce666a240e67c44f38
// the content changed, so depending which version is installled one of the two replace will work
output = bytes.Replace(output, []byte(`# filter = [ "a|.*/|" ]`), []byte(`filter = [ "a|^/mnt/.*|", "r|.*|" ]`), 1)
output = bytes.Replace(output, []byte(`# filter = [ "a|.*|" ]`), []byte(`filter = [ "a|^/mnt/.*|", "r|.*|" ]`), 1)
if lvBackedPV {
// ceph-volume calls lvs to locate given "vg/lv", so allow "/dev" here. However, ignore loopback devices
output = bytes.Replace(output, []byte(`# filter = [ "a|.*/|" ]`), []byte(`filter = [ "a|^/mnt/.*|", "r|^/dev/loop.*|", "a|^/dev/.*|", "r|.*|" ]`), 1)
output = bytes.Replace(output, []byte(`# filter = [ "a|.*|" ]`), []byte(`filter = [ "a|^/mnt/.*|", "r|^/dev/loop.*|", "a|^/dev/.*|", "r|.*|" ]`), 1)
} else {
output = bytes.Replace(output, []byte(`# filter = [ "a|.*/|" ]`), []byte(`filter = [ "a|^/mnt/.*|", "r|.*|" ]`), 1)
output = bytes.Replace(output, []byte(`# filter = [ "a|.*|" ]`), []byte(`filter = [ "a|^/mnt/.*|", "r|.*|" ]`), 1)
}
}

if err = ioutil.WriteFile(lvmConfPath, output, 0644); err != nil {
Expand Down Expand Up @@ -388,7 +437,7 @@ func getCephVolumeSupported(context *clusterd.Context) (bool, error) {
return true, nil
}

func getCephVolumeOSDs(context *clusterd.Context, clusterName string, cephfsid string, lv string, skipLVRelease bool) ([]oposd.OSDInfo, error) {
func getCephVolumeOSDs(context *clusterd.Context, clusterName string, cephfsid string, lv string, skipLVRelease, lvBackedPV bool) ([]oposd.OSDInfo, error) {

result, err := context.Executor.ExecuteCommandWithCombinedOutput(false, "", cephVolumeCmd, "lvm", "list", lv, "--format", "json")
if err != nil {
Expand Down Expand Up @@ -439,6 +488,7 @@ func getCephVolumeOSDs(context *clusterd.Context, clusterName string, cephfsid s
IsFileStore: isFilestore,
LVPath: lv,
SkipLVRelease: skipLVRelease,
LVBackedPV: lvBackedPV,
}
osds = append(osds, osd)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/daemon/ceph/osd/volume_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func TestParseCephVolumeResult(t *testing.T) {
}

context := &clusterd.Context{Executor: executor}
osds, err := getCephVolumeOSDs(context, "rook", "4bfe8b72-5e69-4330-b6c0-4d914db8ab89", "", false)
osds, err := getCephVolumeOSDs(context, "rook", "4bfe8b72-5e69-4330-b6c0-4d914db8ab89", "", false, false)
assert.Nil(t, err)
require.NotNil(t, osds)
assert.Equal(t, 2, len(osds))
Expand All @@ -272,7 +272,7 @@ func TestCephVolumeResultMultiClusterSingleOSD(t *testing.T) {
}

context := &clusterd.Context{Executor: executor}
osds, err := getCephVolumeOSDs(context, "rook", "451267e6-883f-4936-8dff-080d781c67d5", "", false)
osds, err := getCephVolumeOSDs(context, "rook", "451267e6-883f-4936-8dff-080d781c67d5", "", false, false)
assert.Nil(t, err)
require.NotNil(t, osds)
assert.Equal(t, 1, len(osds))
Expand All @@ -293,7 +293,7 @@ func TestCephVolumeResultMultiClusterMultiOSD(t *testing.T) {
}

context := &clusterd.Context{Executor: executor}
osds, err := getCephVolumeOSDs(context, "rook", "451267e6-883f-4936-8dff-080d781c67d5", "", false)
osds, err := getCephVolumeOSDs(context, "rook", "451267e6-883f-4936-8dff-080d781c67d5", "", false, false)
assert.Nil(t, err)
require.NotNil(t, osds)
assert.Equal(t, 1, len(osds))
Expand Down
1 change: 1 addition & 0 deletions pkg/operator/ceph/cluster/osd/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ type OSDInfo struct {
LVPath string `json:"lv-path"`
SkipLVRelease bool `json:"skip-lv-release"`
Location string `json:"location"`
LVBackedPV bool `json:"lv-backed-pv"`
}

type OrchestrationStatus struct {
Expand Down
6 changes: 6 additions & 0 deletions pkg/operator/ceph/cluster/osd/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const (
osdMetadataDeviceEnvVarName = "ROOK_METADATA_DEVICE"
pvcBackedOSDVarName = "ROOK_PVC_BACKED_OSD"
lvPathVarName = "ROOK_LV_PATH"
lvBackedPVVarName = "ROOK_LV_BACKED_PV"
rookBinariesMountPath = "/rook"
rookBinariesVolumeName = "rook-binaries"
activateOSDVolumeName = "activate-osd"
Expand Down Expand Up @@ -357,6 +358,7 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd OSDInfo, provisionC
volumeMounts = append(volumeMounts, getPvcOSDBridgeMount(osdProps.pvc.ClaimName))
envVars = append(envVars, pvcBackedOSDEnvVar("true"))
envVars = append(envVars, lvPathEnvVariable(osd.LVPath))
envVars = append(envVars, lvBackedPVEnvVar(strconv.FormatBool(osd.LVBackedPV)))
}

privileged := true
Expand Down Expand Up @@ -882,6 +884,10 @@ func lvPathEnvVariable(lvPath string) v1.EnvVar {
return v1.EnvVar{Name: lvPathVarName, Value: lvPath}
}

func lvBackedPVEnvVar(lvBackedPV string) v1.EnvVar {
return v1.EnvVar{Name: lvBackedPVVarName, Value: lvBackedPV}
}

func getDirectoriesFromContainer(osdContainer v1.Container) []rookalpha.Directory {
var dirsArg string
for _, envVar := range osdContainer.Env {
Expand Down
12 changes: 12 additions & 0 deletions pkg/util/sys/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,18 @@ func GetDevicePropertiesFromPath(devicePath string, executor exec.Executor) (map
return parseKeyValuePairString(output), nil
}

func IsLV(devicePath string, executor exec.Executor) (bool, error) {
devProps, err := GetDevicePropertiesFromPath(devicePath, executor)
if err != nil {
return false, fmt.Errorf("failed to get device properties for %q: %+v", devicePath, err)
}
diskType, ok := devProps["TYPE"]
if !ok {
return false, fmt.Errorf("TYPE property is not found for %q", devicePath)
}
return diskType == LVMType, nil
}

func GetUdevInfo(device string, executor exec.Executor) (map[string]string, error) {
cmd := fmt.Sprintf("udevadm info %s", device)
output, err := executor.ExecuteCommandWithOutput(false, cmd, "udevadm", "info", "--query=property", fmt.Sprintf("/dev/%s", device))
Expand Down

0 comments on commit d517bfd

Please sign in to comment.