Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-4.5] Bug 1860311: Civilize logging #312

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion kuryr_kubernetes/cni/binding/bridge.py
Expand Up @@ -112,5 +112,7 @@ def is_alive(self):
h_ipdb.interfaces[bridge_name]
return True
except Exception:
LOG.debug("Reporting Driver not healthy.")
LOG.error("The configured ovs_bridge=%s integration interface "
"does not exists. Reporting that driver is not healthy.",
bridge_name)
return False
56 changes: 13 additions & 43 deletions kuryr_kubernetes/cni/health.py
Expand Up @@ -13,15 +13,13 @@
from http import client as httplib
import os

from flask import Flask
from oslo_config import cfg
from oslo_log import log as logging
from pyroute2 import IPDB

from kuryr.lib._i18n import _
from kuryr_kubernetes import clients
from kuryr_kubernetes.cni import utils
from kuryr_kubernetes import exceptions as exc
from oslo_config import cfg
from oslo_log import log as logging
from kuryr_kubernetes import health as base_server

LOG = logging.getLogger(__name__)
CONF = cfg.CONF
Expand Down Expand Up @@ -88,7 +86,7 @@ def _get_memsw_usage(cgroup_mem_path):
return memsw_in_bytes / BYTES_AMOUNT


class CNIHealthServer(object):
class CNIHealthServer(base_server.BaseHealthServer):
"""Server used by readiness and liveness probe to manage CNI health checks.

Verifies presence of NET_ADMIN capabilities, IPDB in working order,
Expand All @@ -98,73 +96,45 @@ class CNIHealthServer(object):

def __init__(self, components_healthy):

self.ctx = None
super().__init__('daemon-health', CONF.cni_health_server.port)
self._components_healthy = components_healthy
self.application = Flask('cni-health-daemon')
self.application.add_url_rule(
'/ready', methods=['GET'], view_func=self.readiness_status)
self.application.add_url_rule(
'/alive', methods=['GET'], view_func=self.liveness_status)
self.headers = {'Connection': 'close'}

def readiness_status(self):
data = 'ok'
k8s_conn = self.verify_k8s_connection()

if not _has_cap(CAP_NET_ADMIN, EFFECTIVE_CAPS):
error_message = 'NET_ADMIN capabilities not present.'
LOG.error(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
return error_message, httplib.INTERNAL_SERVER_ERROR, {}
if not k8s_conn:
error_message = 'Error when processing k8s healthz request.'
error_message = 'K8s API healtz endpoint failed.'
LOG.error(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
return error_message, httplib.INTERNAL_SERVER_ERROR, {}

LOG.info('CNI driver readiness verified.')
return data, httplib.OK, self.headers
return 'ok', httplib.OK, {}

def liveness_status(self):
data = 'ok'
no_limit = -1
try:
with IPDB():
pass
except Exception:
error_message = 'IPDB not in working order.'
LOG.error(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
return error_message, httplib.INTERNAL_SERVER_ERROR, {}

if CONF.cni_health_server.max_memory_usage != no_limit:
mem_usage = _get_memsw_usage(_get_cni_cgroup_path())

if mem_usage > CONF.cni_health_server.max_memory_usage:
err_message = 'CNI daemon exceeded maximum memory usage.'
LOG.error(err_message)
return err_message, httplib.INTERNAL_SERVER_ERROR, self.headers
return err_message, httplib.INTERNAL_SERVER_ERROR, {}

with self._components_healthy.get_lock():
if not self._components_healthy.value:
err_message = 'Kuryr CNI components not healthy.'
LOG.error(err_message)
return err_message, httplib.INTERNAL_SERVER_ERROR, self.headers

LOG.debug('Kuryr CNI Liveness verified.')
return data, httplib.OK, self.headers
return err_message, httplib.INTERNAL_SERVER_ERROR, {}

def run(self):
address = '::'
try:
LOG.info('Starting CNI health check server.')
self.application.run(address, CONF.cni_health_server.port)
except Exception:
LOG.exception('Failed to start CNI health check server.')
raise

def verify_k8s_connection(self):
k8s = clients.get_kubernetes_client()
try:
k8s.get('/healthz', json=False, headers={'Connection': 'close'})
except exc.K8sClientException:
LOG.exception('Exception when trying to reach Kubernetes API.')
return False
return True
return 'ok', httplib.OK, {}
6 changes: 4 additions & 2 deletions kuryr_kubernetes/controller/handlers/namespace.py
Expand Up @@ -144,9 +144,11 @@ def _add_kuryrnetwork_crd(self, namespace, ns_labels):
raise

def is_ready(self, quota):
if not utils.has_kuryr_crd(constants.K8S_API_CRD_KURYRNETS):
if not (utils.has_kuryr_crd(constants.K8S_API_CRD_KURYRNETS) and
self._check_quota(quota)):
LOG.error('Marking NamespaceHandler as not ready.')
return False
return self._check_quota(quota)
return True

def _check_quota(self, quota):
resources = ('subnets', 'networks', 'security_groups')
Expand Down
6 changes: 4 additions & 2 deletions kuryr_kubernetes/controller/handlers/policy.py
Expand Up @@ -127,9 +127,11 @@ def on_deleted(self, policy):
self._drv_lbaas.update_lbaas_sg(svc, sgs)

def is_ready(self, quota):
if not utils.has_kuryr_crd(k_const.K8S_API_CRD_KURYRNETPOLICIES):
if not (utils.has_kuryr_crd(k_const.K8S_API_CRD_KURYRNETPOLICIES) and
self._check_quota(quota)):
LOG.error("Marking NetworkPolicyHandler as not ready.")
return False
return self._check_quota(quota)
return True

def _check_quota(self, quota):
if utils.has_limit(quota.security_groups):
Expand Down
6 changes: 4 additions & 2 deletions kuryr_kubernetes/controller/handlers/vif.py
Expand Up @@ -212,8 +212,10 @@ def on_deleted(self, pod):
self._update_services(services, crd_pod_selectors, project_id)

def is_ready(self, quota):
if utils.has_limit(quota.ports):
return utils.is_available('ports', quota.ports)
if (utils.has_limit(quota.ports) and
not utils.is_available('ports', quota.ports)):
LOG.error('Marking VIFHandler as not ready.')
return False
return True

@staticmethod
Expand Down
55 changes: 13 additions & 42 deletions kuryr_kubernetes/controller/managers/health.py
Expand Up @@ -15,7 +15,6 @@
from http import client as httplib
import os

from flask import Flask
from oslo_config import cfg
from oslo_log import log as logging

Expand All @@ -24,8 +23,8 @@
from kuryr.lib import utils
from kuryr_kubernetes import clients
from kuryr_kubernetes import config
from kuryr_kubernetes import exceptions as exc
from kuryr_kubernetes.handlers import health as h_health
from kuryr_kubernetes import health as base_server

LOG = logging.getLogger(__name__)
CONF = cfg.CONF
Expand All @@ -39,7 +38,7 @@
CONF.register_opts(health_server_opts, "health_server")


class HealthServer(object):
class HealthServer(base_server.BaseHealthServer):
"""Proxy server used by readiness and liveness probes to manage health checks.

Allows to verify connectivity with Kubernetes API, Keystone and Neutron.
Expand All @@ -49,14 +48,8 @@ class HealthServer(object):
"""

def __init__(self):
self.ctx = None
super().__init__('controller-health', CONF.health_server.port)
self._registry = h_health.HealthRegister.get_instance().registry
self.application = Flask('health-daemon')
self.application.add_url_rule(
'/ready', methods=['GET'], view_func=self.readiness_status)
self.application.add_url_rule(
'/alive', methods=['GET'], view_func=self.liveness_status)
self.headers = {'Connection': 'close'}

def _components_ready(self):
os_net = clients.get_network_client()
Expand All @@ -70,64 +63,42 @@ def _components_ready(self):
return True

def readiness_status(self):
data = 'ok'

if CONF.kubernetes.vif_pool_driver != 'noop':
if not os.path.exists('/tmp/pools_loaded'):
error_message = 'Ports not loaded into the pools.'
LOG.error(error_message)
return error_message, httplib.NOT_FOUND, self.headers
return error_message, httplib.NOT_FOUND, {}

k8s_conn = self.verify_k8s_connection()
if not k8s_conn:
error_message = 'Error when processing k8s healthz request.'
LOG.error(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
return error_message, httplib.INTERNAL_SERVER_ERROR, {}
try:
self.verify_keystone_connection()
except Exception as ex:
error_message = ('Error when creating a Keystone session and '
'getting a token: %s.' % ex)
LOG.exception(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
return error_message, httplib.INTERNAL_SERVER_ERROR, {}

try:
if not self._components_ready():
return '', httplib.INTERNAL_SERVER_ERROR, self.headers
return '', httplib.INTERNAL_SERVER_ERROR, {}
except Exception as ex:
error_message = ('Error when processing neutron request %s' % ex)
LOG.exception(error_message)
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
return error_message, httplib.INTERNAL_SERVER_ERROR, {}

LOG.info('Kuryr Controller readiness verified.')
return data, httplib.OK, self.headers
return 'ok', httplib.OK, {}

def liveness_status(self):
data = 'ok'
for component in self._registry:
if not component.is_alive():
LOG.debug('Kuryr Controller not healthy.')
return '', httplib.INTERNAL_SERVER_ERROR, self.headers
LOG.debug('Kuryr Controller Liveness verified.')
return data, httplib.OK, self.headers

def run(self):
address = '::'
try:
LOG.info('Starting health check server.')
self.application.run(address, CONF.health_server.port)
except Exception:
LOG.exception('Failed to start health check server.')
raise

def verify_k8s_connection(self):
k8s = clients.get_kubernetes_client()
try:
k8s.get('/healthz', json=False, headers={'Connection': 'close'})
except exc.K8sClientException:
LOG.exception('Exception when trying to reach Kubernetes API.')
return False
return True
msg = 'Component %s is dead.' % component.__class__.__name__
LOG.error(msg)
return msg, httplib.INTERNAL_SERVER_ERROR, {}
return 'ok', httplib.OK, {}

def verify_keystone_connection(self):
# Obtain a new token to ensure connectivity with keystone
Expand Down
76 changes: 76 additions & 0 deletions kuryr_kubernetes/health.py
@@ -0,0 +1,76 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import abc

from flask import Flask
from oslo_config import cfg
from oslo_log import log as logging

from kuryr_kubernetes import clients

LOG = logging.getLogger(__name__)
CONF = cfg.CONF


class BaseHealthServer(abc.ABC):
"""Base class of server used to provide readiness and liveness probes."""

def __init__(self, app_name, port):
self.app_name = app_name
self.port = port
self.ctx = None
self.application = Flask(app_name)
self.application.add_url_rule(
'/ready', methods=['GET'], view_func=self.readiness_status)
self.application.add_url_rule(
'/alive', methods=['GET'], view_func=self.liveness_status)

def apply_conn_close(response):
response.headers['Connection'] = 'close'
return response

self.application.after_request(apply_conn_close)

@abc.abstractmethod
def readiness_status(self):
raise NotImplementedError()

@abc.abstractmethod
def liveness_status(self):
raise NotImplementedError()

def run(self):
# Disable obtrusive werkzeug logs.
logging.getLogger('werkzeug').setLevel(logging.WARNING)

address = '::'
LOG.info('Starting %s health check server on %s:%d.', self.app_name,
address, self.port)
try:
self.application.run(address, self.port)
except Exception:
LOG.exception('Failed to start %s health check server.',
self.app_name)
raise

def verify_k8s_connection(self):
k8s = clients.get_kubernetes_client()
try:
k8s.get('/healthz', json=False, headers={'Connection': 'close'})
except Exception as e:
# Not LOG.exception to make sure long message from K8s API is not
# repeated.
LOG.error('Exception when trying to reach Kubernetes API: %s.', e)
return False

return True