Permalink
Browse files

Add support for availability zones to the crush map

If the deployemnt environment supports it, and the user would like to,
we can use Juju's Availability Zone information to setup the Ceph
cluster to use Availability Zones as the failure domain instead of
the host.

Change-Id: I4566696750b388918761ded0ed5beb0bf82ff501
Depends-On: Ie25ac1b001db558d6a40fe3eaca014e8f4174241
  • Loading branch information...
1 parent 3ba6011 commit aef61caa46e7fd6ba5b1280653d9aec5ffcd03d1 @ChrisMacNaughton ChrisMacNaughton committed with cholcombe973 Mar 17, 2016
Showing with 33 additions and 0 deletions.
  1. +6 −0 config.yaml
  2. +27 −0 hooks/ceph_hooks.py
View
@@ -104,6 +104,12 @@ options:
threads-max to a high value to avoid problems with large numbers (>20)
of OSDs recovering. very large clusters should set those values even
higher (e.g. max for kernel.pid_max is 4194303).
+ customize-failure-domain:
+ type: boolean
+ default: false
+ description: |
+ Setting this to true will tell Ceph to replicate across Juju's
+ Availability Zone instead of specifically by host.
nagios_context:
type: string
default: "juju"
View
@@ -417,6 +417,33 @@ def mon_relation():
ceph.bootstrap_monitor_cluster(leader_get('monitor-secret'))
ceph.wait_for_bootstrap()
ceph.wait_for_quorum()
+ # If we can and want to
+ if is_leader() and config('customize-failure-domain'):
+ # But only if the environment supports it
+ if os.environ.get('JUJU_AVAILABILITY_ZONE'):
+ cmds = [
+ "ceph osd getcrushmap -o /tmp/crush.map",
+ "crushtool -d /tmp/crush.map| "
+ "sed 's/step chooseleaf firstn 0 type host/step "
+ "chooseleaf firstn 0 type rack/' > "
+ "/tmp/crush.decompiled",
+ "crushtool -c /tmp/crush.decompiled -o /tmp/crush.map",
+ "crushtool -i /tmp/crush.map --test",
+ "ceph osd setcrushmap -i /tmp/crush.map"
+ ]
+ for cmd in cmds:
+ try:
+ subprocess.check_call(cmd, shell=True)
+ except subprocess.CalledProcessError as e:
+ log("Failed to modify crush map:", level='error')
+ log("Cmd: {}".format(cmd), level='error')
+ log("Error: {}".format(e.output), level='error')
+ break
+ else:
+ log(
+ "Your Juju environment doesn't"
+ "have support for Availability Zones"
+ )
notify_osds()
notify_radosgws()
notify_client()

0 comments on commit aef61ca

Please sign in to comment.