Add api-extra-args support to the kubernetes-master juju layer #47912

Merged
merged 1 commit into from Sep 23, 2017
Jump to file or symbol
Failed to load files and symbols.
+134 −23
Split
@@ -31,3 +31,12 @@ options:
default: ""
description: |
Password to be used for admin user (leave empty for random password).
+ api-extra-args:
+ type: string
+ default: ""
+ description: |
+ Space separated list of flags and key=value pairs that will be passed as arguments to
+ kube-apiserver. For example a value like this:
+ runtime-config=batch/v2alpha1=true profiling=true
+ will result in kube-apiserver being run with the following options:
+ --runtime-config=batch/v2alpha1=true --profiling=true
@@ -37,7 +37,7 @@
from charms.reactive import remove_state
from charms.reactive import set_state
from charms.reactive import is_state
-from charms.reactive import when, when_any, when_not
+from charms.reactive import when, when_any, when_not, when_all
from charms.reactive.helpers import data_changed, any_file_changed
from charms.kubernetes.common import get_version
from charms.kubernetes.common import retry
@@ -360,19 +360,17 @@ def start_master(etcd):
# the master unit disconnects from etcd and is ready to terminate.
# No point in trying to start master services and fail. Just return.
return
+
+ # TODO: Make sure below relation is handled on change
+ # https://github.com/kubernetes/kubernetes/issues/43461
handle_etcd_relation(etcd)
- configure_master_services()
- hookenv.status_set('maintenance',
- 'Starting the Kubernetes master services.')
- services = ['kube-apiserver',
- 'kube-controller-manager',
- 'kube-scheduler']
- for service in services:
- host.service_restart('snap.%s.daemon' % service)
+ # Add CLI options to all components
+ configure_apiserver()
+ configure_controller_manager()
+ configure_scheduler()
@Cynerva

Cynerva Jun 22, 2017

Contributor

This is a great refactor! Thanks for cleaning things up.

hookenv.open_port(6443)
- set_state('kubernetes-master.components.started')
@when('etcd.available')
@@ -675,6 +673,12 @@ def on_config_allow_privileged_change():
remove_state('config.changed.allow-privileged')
+@when('config.changed.api-extra-args')
+@when('kubernetes-master.components.started')
+def on_config_api_extra_args_change():
+ configure_apiserver()
+
+
@when('kube-control.gpu.available')
@when('kubernetes-master.components.started')
@when_not('kubernetes-master.gpu.enabled')
@@ -716,6 +720,44 @@ def shutdown():
service_stop('snap.kube-scheduler.daemon')
+@when('kube-apiserver.do-restart')
+def restart_apiserver():
+ prev_state, prev_msg = hookenv.status_get()
@chuckbutler

chuckbutler Jun 22, 2017

Member

👍 I didn't know we could do this.

+ hookenv.status_set('maintenance', 'Restarting kube-apiserver')
+ host.service_restart('snap.kube-apiserver.daemon')
+ hookenv.status_set(prev_state, prev_msg)
+ remove_state('kube-apiserver.do-restart')
+ set_state('kube-apiserver.started')
+
+
+@when('kube-controller-manager.do-restart')
+def restart_controller_manager():
+ prev_state, prev_msg = hookenv.status_get()
+ hookenv.status_set('maintenance', 'Restarting kube-controller-manager')
+ host.service_restart('snap.kube-controller-manager.daemon')
+ hookenv.status_set(prev_state, prev_msg)
+ remove_state('kube-controller-manager.do-restart')
+ set_state('kube-controller-manager.started')
+
+
+@when('kube-scheduler.do-restart')
+def restart_scheduler():
+ prev_state, prev_msg = hookenv.status_get()
+ hookenv.status_set('maintenance', 'Restarting kube-scheduler')
+ host.service_restart('snap.kube-scheduler.daemon')
+ hookenv.status_set(prev_state, prev_msg)
+ remove_state('kube-scheduler.do-restart')
+ set_state('kube-scheduler.started')
+
+
+@when_all('kube-apiserver.started',
+ 'kube-controller-manager.started',
+ 'kube-scheduler.started')
+@when_not('kubernetes-master.components.started')
+def componenets_started():
+ set_state('kubernetes-master.components.started')
+
+
def arch():
'''Return the package architecture as a string. Raise an exception if the
architecture is not supported by kubernetes.'''
@@ -840,14 +882,50 @@ def handle_etcd_relation(reldata):
api_opts.add('etcd-servers', connection_string, strict=True)
-def configure_master_services():
- ''' Add remaining flags for the master services and configure snaps to use
- them '''
+def get_config_args():
+ db = unitdata.kv()
+ old_config_args = db.get('api-extra-args', [])
+ # We have to convert them to tuples becuase we use sets
+ old_config_args = [tuple(i) for i in old_config_args]
+ new_config_args = []
+ new_config_arg_names = []
+ for arg in hookenv.config().get('api-extra-args', '').split():
+ new_config_arg_names.append(arg.split('=', 1)[0])
+ if len(arg.split('=', 1)) == 1: # handle flags ie. --profiling
@Cynerva

Cynerva Jun 22, 2017

Contributor

Is this a valid case? It'd be profiling=true wouldn't it?

@jacekn

jacekn Jun 23, 2017

Contributor

https://kubernetes.io/docs/admin/kube-apiserver/ suggests flags without values might be supported for example:
--contention-profiling Enable lock contention profiling, if profiling is enabled

Other arguments document expected value for example:
--etcd-cafile string
--audit-log-maxsize int

@Cynerva

Cynerva Jun 23, 2017

Contributor

They are supported by kube-apiserver, but not snap set kube-apiserver. It takes them as boolean values instead, e.g. contention-profiling=true and contention-profiling=false.

My specific concern here is that this case will break on the following line:

new_config_args.append(tuple(arg.split('=', 1), None))

I don't think tuple accepts 2 arguments. That and the None probably needs to be 'true' since it's eventually gonna go through FlagManager to a snap set call.

More generally, I get the feeling this case shouldn't exist at all. The description in config.yaml makes me think api-extra-args follows the same conventions as snap set, with the profiling=true example, but the case here says otherwise.

+ new_config_args.append(tuple([arg, 'true']))
+ else:
+ new_config_args.append(tuple(arg.split('=', 1)))
+
+ hookenv.log('Handling "api-extra-args" option.')
+ hookenv.log('Old arguments: {}'.format(old_config_args))
+ hookenv.log('New arguments: {}'.format(new_config_args))
+ if set(new_config_args) == set(old_config_args):
+ return (new_config_args, [])
+ # Store new args
+ db.set('api-extra-args', new_config_args)
+ to_add = set(new_config_args)
+ to_remove = set(old_config_args) - set(new_config_args)
+ # Extract option names only
+ to_remove = [i[0] for i in to_remove if i[0] not in new_config_arg_names]
+ return (to_add, to_remove)
+
+
+def configure_apiserver():
+ # TODO: investigate if it's possible to use config file to store args
+ # https://github.com/juju-solutions/bundle-canonical-kubernetes/issues/315
+ # Handle api-extra-args config option
+ to_add, to_remove = get_config_args()
api_opts = FlagManager('kube-apiserver')
- controller_opts = FlagManager('kube-controller-manager')
- scheduler_opts = FlagManager('kube-scheduler')
- scheduler_opts.add('v', '2')
+
+ # Remove arguments that are no longer provided as config option
+ # this allows them to be reverted to charm defaults
+ for arg in to_remove:
+ hookenv.log('Removing option: {}'.format(arg))
+ api_opts.destroy(arg)
+ # We need to "unset" options by settig their value to "null" string
+ cmd = ['snap', 'set', 'kube-apiserver', '{}=null'.format(arg)]
+ check_call(cmd)
@Cynerva

Cynerva Jun 22, 2017

Contributor

I think this could have confusing behavior when removing overridden args. What happens if I do this?

juju config kubernetes-master api-extra-args="storage-backend=etcd3"
juju config kubernetes-master api-extra-args=""
  1. By default, the charm uses storage-backend=etcd2.
  2. Setting api-extra-args will allow us to override it with storage-backend=etcd3, which seems reasonable.
  3. Once we clear api-extra-args, it will set it to storage-backend=null, instead of reverting to etcd2.

I think it'd be worthwhile to fix the logic here so it reverts back properly. That or prevent overrides from happening if that's something we don't want.

@Cynerva

Cynerva Jun 22, 2017

Contributor

Similarly, the positioning of this would not allow us to override admission-control even though it lets us override other options. Let's make the behavior consistent one way or the other.

@jacekn

jacekn Jun 23, 2017

Contributor

This problem is basically unsolvable. FlagManger supports options like this:
{'foo': ['bar', 'baz']}

Which will render to:
--foo=bar --foo=baz

Let's assume an environment has the following state:
{'foo': ['bar']}

And we run:
juju config kubernetes-master api-extra-args="foo=baz"

There is no way to tell if the operator wants end result to be:
{'foo': ['bar', 'baz']}
or
{'foo': ['baz']}

My feeling is that we should make it {'foo': ['baz']} and trust that the operator knows what they are doing.
Similarly if an operator sets and then unsets certain option it's not obvious that they want it reverted to a value harcoded in the charm. An example might be "--min-request-timeout" which the charm sets to 300s.
The operator might set it to 600s, realize it's too short and "unset" to revert expecting the flag to be removed (and in consequence having k8s default value of 1800s).

ACK your point about positioning above admission-control, I'll get it fixed

@Cynerva

Cynerva Jun 23, 2017

Contributor

My feeling is that we should make it {'foo': ['baz']} and trust that the operator knows what they are doing.

👍 to this

An example might be "--min-request-timeout" which the charm sets to 300s. The operator might set it to 600s, realize it's too short and "unset" to revert expecting the flag to be removed (and in consequence having k8s default value of 1800s).

Intuition tells me that juju config kubernetes-master api-extra-args="" should give the same result no matter what it was set to prior. I think that behavior is important.

If you take that stance, then I think it becomes clear: removing a value from api-extra-args has to revert to charm default (300s in your example). If the operator really wants to bypass the charm default, they can do it by overriding with an explicit value.

That said, I don't see a quick fix for this with how the charm uses FlagManager today. Seems like there might be a lot of refactoring involved to get it right. :\

@jacekn

jacekn Jun 23, 2017

Contributor

Point taken, I'll give it a go. It may actually not be too difficult to have the charm revert to defaults when api-extra-args option is unset.

@jacekn

jacekn Jun 26, 2017

Contributor

@Cynerva I pushed new (rebased) commit with that fixes it. Config options are not revert back to charm defaults when they are unset.
Please note that we are still affected by https://bugs.launchpad.net/snapd/+bug/1699768 so unsetting some options will break hooks. There is unfortunately nothing I can do about that.

# Get the tls paths from the layer data.
layer_options = layer.options('tls-client')
@@ -877,6 +955,7 @@ def configure_master_services():
api_opts.add('insecure-bind-address', '127.0.0.1')
api_opts.add('insecure-port', '8080')
api_opts.add('storage-backend', 'etcd2') # FIXME: add etcd3 support
+
admission_control = [
'Initializers',
'NamespaceLifecycle',
@@ -894,27 +973,50 @@ def configure_master_services():
admission_control.remove('Initializers')
api_opts.add('admission-control', ','.join(admission_control), strict=True)
+ # Add operator-provided arguments, this allows operators to override defaults
+ for arg in to_add:
+ hookenv.log('Adding option: {} {}'.format(arg[0], arg[1]))
+ # Make sure old value is gone
+ api_opts.destroy(arg[0])
+ api_opts.add(arg[0], arg[1])
+
+ cmd = ['snap', 'set', 'kube-apiserver'] + api_opts.to_s().split(' ')
+ check_call(cmd)
+ set_state('kube-apiserver.do-restart')
+
+
+def configure_controller_manager():
+ controller_opts = FlagManager('kube-controller-manager')
+
+ # Get the tls paths from the layer data.
+ layer_options = layer.options('tls-client')
+ ca_cert_path = layer_options.get('ca_certificate_path')
+
# Default to 3 minute resync. TODO: Make this configureable?
controller_opts.add('min-resync-period', '3m')
controller_opts.add('v', '2')
controller_opts.add('root-ca-file', ca_cert_path)
controller_opts.add('logtostderr', 'true')
controller_opts.add('master', 'http://127.0.0.1:8080')
- scheduler_opts.add('v', '2')
- scheduler_opts.add('logtostderr', 'true')
- scheduler_opts.add('master', 'http://127.0.0.1:8080')
-
- cmd = ['snap', 'set', 'kube-apiserver'] + api_opts.to_s().split(' ')
- check_call(cmd)
-
cmd = (
['snap', 'set', 'kube-controller-manager'] +
controller_opts.to_s().split(' ')
)
check_call(cmd)
+ set_state('kube-controller-manager.do-restart')
+
+
+def configure_scheduler():
+ scheduler_opts = FlagManager('kube-scheduler')
+
+ scheduler_opts.add('v', '2')
+ scheduler_opts.add('logtostderr', 'true')
+ scheduler_opts.add('master', 'http://127.0.0.1:8080')
+
cmd = ['snap', 'set', 'kube-scheduler'] + scheduler_opts.to_s().split(' ')
check_call(cmd)
+ set_state('kube-scheduler.do-restart')
def setup_basic_auth(password=None, username='admin', uid='admin'):