Fix logging in eventlet native threads

There is a bug in eventlet where logging within a native thread can lead to a deadlock situation: eventlet/eventlet#432 When encountered with this issue some projects in OpenStack using oslo.log, eg. Cinder, resolve them by removing any logging withing native threads. There is actually a better approach. The Swift team came up with a solution a long time ago [1], and in this patch that fix is included as part of the setup method, but will only be run if the eventlet library has already been loaded. This patch adds the eventlet library as a testing dependency for the PipeMutext unit tests. [1]: https://opendev.org/openstack/swift/commit/69c715c505cf9e5df29dc1dff2fa1a4847471cb6 Closes-Bug: #1983863 Change-Id: Iac1b0891ae584ce4b95964e6cdc0ff2483a4e57d
openstack · Aug 8, 2022 · 94b9dc3 · 94b9dc3
1 parent 6401da7
commit 94b9dc3
Show file tree

Hide file tree

Showing 5 changed files with 376 additions and 0 deletions.
diff --git a/oslo_log/log.py b/oslo_log/log.py
@@ -265,8 +265,25 @@ def register_options(conf):
     conf.register_mutate_hook(_mutate_hook)
 
 
+def _fix_eventlet_logging():
+    """Properly setup logging with eventlet on native threads.
+
+    Workaround for: https://github.com/eventlet/eventlet/issues/432
+    """
+
+    # If eventlet was not loaded before call to setup assume it's not used.
+    eventlet = sys.modules.get('eventlet')
+    if eventlet:
+        import eventlet.green.threading
+        from oslo_log import pipe_mutex
+        logging.threading = eventlet.green.threading
+        logging._lock = logging.threading.RLock()
+        logging.Handler.createLock = pipe_mutex.pipe_createLock
+
+
 def setup(conf, product_name, version='unknown'):
     """Setup logging for the current application."""
+    _fix_eventlet_logging()
     if conf.log_config_append:
         _load_log_config(conf.log_config_append)
     else:

diff --git a/oslo_log/pipe_mutex.py b/oslo_log/pipe_mutex.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2010-2012 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import errno
+import fcntl
+import os
+
+import eventlet
+import eventlet.debug
+import eventlet.greenthread
+import eventlet.hubs
+
+
+class PipeMutex(object):
+    """Mutex using a pipe.
+
+    Works across both greenlets and real threads, even at the same time.
+
+    Class code copied from Swift's swift/common/utils.py
+    Related eventlet bug: https://github.com/eventlet/eventlet/issues/432
+    """
+    def __init__(self):
+        self.rfd, self.wfd = os.pipe()
+
+        # You can't create a pipe in non-blocking mode; you must set it
+        # later.
+        rflags = fcntl.fcntl(self.rfd, fcntl.F_GETFL)
+        fcntl.fcntl(self.rfd, fcntl.F_SETFL, rflags | os.O_NONBLOCK)
+        os.write(self.wfd, b'-')  # start unlocked
+
+        self.owner = None
+
+        self.recursion_depth = 0
+        # Usually, it's an error to have multiple greenthreads all waiting
+        # to read the same file descriptor. It's often a sign of inadequate
+        # concurrency control; for example, if you have two greenthreads
+        # trying to use the same memcache connection, they'll end up writing
+        # interleaved garbage to the socket or stealing part of each others'
+        # responses.
+        #
+        # In this case, we have multiple greenthreads waiting on the same
+        # file descriptor by design. This lets greenthreads in real thread A
+        # wait with greenthreads in real thread B for the same mutex.
+        # Therefore, we must turn off eventlet's multiple-reader detection.
+        #
+        # It would be better to turn off multiple-reader detection for only
+        # our calls to trampoline(), but eventlet does not support that.
+        eventlet.debug.hub_prevent_multiple_readers(False)
+
+    def acquire(self, blocking=True):
+        """Acquire the mutex.
+
+        If called with blocking=False, returns True if the mutex was
+        acquired and False if it wasn't. Otherwise, blocks until the mutex
+        is acquired and returns True.
+        This lock is recursive; the same greenthread may acquire it as many
+        times as it wants to, though it must then release it that many times
+        too.
+        """
+        current_greenthread_id = id(eventlet.greenthread.getcurrent())
+        if self.owner == current_greenthread_id:
+            self.recursion_depth += 1
+            return True
+
+        while True:
+            try:
+                # If there is a byte available, this will read it and remove
+                # it from the pipe. If not, this will raise OSError with
+                # errno=EAGAIN.
+                os.read(self.rfd, 1)
+                self.owner = current_greenthread_id
+                return True
+            except OSError as err:
+                if err.errno != errno.EAGAIN:
+                    raise
+
+                if not blocking:
+                    return False
+
+                # Tell eventlet to suspend the current greenthread until
+                # self.rfd becomes readable. This will happen when someone
+                # else writes to self.wfd.
+                eventlet.hubs.trampoline(self.rfd, read=True)
+
+    def release(self):
+        """Release the mutex."""
+        current_greenthread_id = id(eventlet.greenthread.getcurrent())
+        if self.owner != current_greenthread_id:
+            raise RuntimeError("cannot release un-acquired lock")
+
+        if self.recursion_depth > 0:
+            self.recursion_depth -= 1
+            return
+
+        self.owner = None
+        os.write(self.wfd, b'X')
+
+    def close(self):
+        """Close the mutex.
+
+        This releases its file descriptors.
+        You can't use a mutex after it's been closed.
+        """
+        if self.wfd is not None:
+            os.close(self.wfd)
+            self.wfd = None
+        if self.rfd is not None:
+            os.close(self.rfd)
+            self.rfd = None
+        self.owner = None
+        self.recursion_depth = 0
+
+    def __del__(self):
+        # We need this so we don't leak file descriptors. Otherwise, if you
+        # call get_logger() and don't explicitly dispose of it by calling
+        # logger.logger.handlers[0].lock.close() [1], the pipe file
+        # descriptors are leaked.
+        #
+        # This only really comes up in tests. Service processes tend to call
+        # get_logger() once and then hang on to it until they exit, but the
+        # test suite calls get_logger() a lot.
+        #
+        # [1] and that's a completely ridiculous thing to expect callers to
+        # do, so nobody does it and that's okay.
+        self.close()
+
+
+def pipe_createLock(self):
+    """Replacement for logging.Handler.createLock method."""
+    self.lock = PipeMutex()
diff --git a/oslo_log/tests/unit/test_pipe_mutex.py b/oslo_log/tests/unit/test_pipe_mutex.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2010-2012 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import contextlib
+import unittest
+from unittest import mock
+
+import eventlet
+from eventlet import debug as eventlet_debug
+from eventlet import greenpool
+
+from oslo_log import pipe_mutex
+
+
+@contextlib.contextmanager
+def quiet_eventlet_exceptions():
+    orig_state = greenpool.DEBUG
+    eventlet_debug.hub_exceptions(False)
+    try:
+        yield
+    finally:
+        eventlet_debug.hub_exceptions(orig_state)
+
+
+class TestPipeMutex(unittest.TestCase):
+    """From  Swift's test/unit/common/test_utils.py"""
+    def setUp(self):
+        self.mutex = pipe_mutex.PipeMutex()
+
+    def tearDown(self):
+        self.mutex.close()
+
+    def test_nonblocking(self):
+        evt_lock1 = eventlet.event.Event()
+        evt_lock2 = eventlet.event.Event()
+        evt_unlock = eventlet.event.Event()
+
+        def get_the_lock():
+            self.mutex.acquire()
+            evt_lock1.send('got the lock')
+            evt_lock2.wait()
+            self.mutex.release()
+            evt_unlock.send('released the lock')
+
+        eventlet.spawn(get_the_lock)
+        evt_lock1.wait()  # Now, the other greenthread has the lock.
+
+        self.assertFalse(self.mutex.acquire(blocking=False))
+        evt_lock2.send('please release the lock')
+        evt_unlock.wait()  # The other greenthread has released the lock.
+        self.assertTrue(self.mutex.acquire(blocking=False))
+
+    def test_recursive(self):
+        self.assertTrue(self.mutex.acquire(blocking=False))
+        self.assertTrue(self.mutex.acquire(blocking=False))
+
+        def try_acquire_lock():
+            return self.mutex.acquire(blocking=False)
+
+        self.assertFalse(eventlet.spawn(try_acquire_lock).wait())
+        self.mutex.release()
+        self.assertFalse(eventlet.spawn(try_acquire_lock).wait())
+        self.mutex.release()
+        self.assertTrue(eventlet.spawn(try_acquire_lock).wait())
+
+    def test_release_without_acquire(self):
+        self.assertRaises(RuntimeError, self.mutex.release)
+
+    def test_too_many_releases(self):
+        self.mutex.acquire()
+        self.mutex.release()
+        self.assertRaises(RuntimeError, self.mutex.release)
+
+    def test_wrong_releaser(self):
+        self.mutex.acquire()
+        with quiet_eventlet_exceptions():
+            self.assertRaises(RuntimeError,
+                              eventlet.spawn(self.mutex.release).wait)
+
+    def test_blocking(self):
+        evt = eventlet.event.Event()
+
+        sequence = []
+
+        def coro1():
+            eventlet.sleep(0)  # let coro2 go
+
+            self.mutex.acquire()
+            sequence.append('coro1 acquire')
+            evt.send('go')
+            self.mutex.release()
+            sequence.append('coro1 release')
+
+        def coro2():
+            evt.wait()  # wait for coro1 to start us
+            self.mutex.acquire()
+            sequence.append('coro2 acquire')
+            self.mutex.release()
+            sequence.append('coro2 release')
+
+        c1 = eventlet.spawn(coro1)
+        c2 = eventlet.spawn(coro2)
+
+        c1.wait()
+        c2.wait()
+
+        self.assertEqual(sequence, [
+            'coro1 acquire',
+            'coro1 release',
+            'coro2 acquire',
+            'coro2 release'])
+
+    def test_blocking_tpool(self):
+        # Note: this test's success isn't a guarantee that the mutex is
+        # working. However, this test's failure means that the mutex is
+        # definitely broken.
+        sequence = []
+
+        def do_stuff():
+            n = 10
+            while n > 0:
+                self.mutex.acquire()
+                sequence.append("<")
+                eventlet.sleep(0.0001)
+                sequence.append(">")
+                self.mutex.release()
+                n -= 1
+
+        greenthread1 = eventlet.spawn(do_stuff)
+        greenthread2 = eventlet.spawn(do_stuff)
+
+        real_thread1 = eventlet.patcher.original('threading').Thread(
+            target=do_stuff)
+        real_thread1.start()
+
+        real_thread2 = eventlet.patcher.original('threading').Thread(
+            target=do_stuff)
+        real_thread2.start()
+
+        greenthread1.wait()
+        greenthread2.wait()
+        real_thread1.join()
+        real_thread2.join()
+
+        self.assertEqual(''.join(sequence), "<>" * 40)
+
+    def test_blocking_preserves_ownership(self):
+        pthread1_event = eventlet.patcher.original('threading').Event()
+        pthread2_event1 = eventlet.patcher.original('threading').Event()
+        pthread2_event2 = eventlet.patcher.original('threading').Event()
+        thread_id = []
+        owner = []
+
+        def pthread1():
+            thread_id.append(id(eventlet.greenthread.getcurrent()))
+            self.mutex.acquire()
+            owner.append(self.mutex.owner)
+            pthread2_event1.set()
+
+            orig_os_write = pipe_mutex.os.write
+
+            def patched_os_write(*a, **kw):
+                try:
+                    return orig_os_write(*a, **kw)
+                finally:
+                    pthread1_event.wait()
+
+            with mock.patch.object(pipe_mutex.os, 'write', patched_os_write):
+                self.mutex.release()
+            pthread2_event2.set()
+
+        def pthread2():
+            pthread2_event1.wait()  # ensure pthread1 acquires lock first
+            thread_id.append(id(eventlet.greenthread.getcurrent()))
+            self.mutex.acquire()
+            pthread1_event.set()
+            pthread2_event2.wait()
+            owner.append(self.mutex.owner)
+            self.mutex.release()
+
+        real_thread1 = eventlet.patcher.original('threading').Thread(
+            target=pthread1)
+        real_thread1.start()
+
+        real_thread2 = eventlet.patcher.original('threading').Thread(
+            target=pthread2)
+        real_thread2.start()
+
+        real_thread1.join()
+        real_thread2.join()
+        self.assertEqual(thread_id, owner)
+        self.assertIsNone(self.mutex.owner)
+
+    @classmethod
+    def tearDownClass(cls):
+        # PipeMutex turns this off when you instantiate one
+        eventlet.debug.hub_prevent_multiple_readers(True)
diff --git a/releasenotes/notes/native-threads-logging-cc84f7288c4835a0.yaml b/releasenotes/notes/native-threads-logging-cc84f7288c4835a0.yaml
@@ -0,0 +1,6 @@
+---
+fixes:
+  - |
+    `Bug #1983863
+    <https://bugs.launchpad.net/oslo.log/+bug/1983863>`_: Fixed logging in
+    eventlet native threads.
diff --git a/test-requirements.txt b/test-requirements.txt
@@ -16,3 +16,5 @@ bandit>=1.6.0,<1.7.0 # Apache-2.0
 fixtures>=3.0.0 # Apache-2.0/BSD
 
 pre-commit>=2.6.0 # MIT
+
+eventlet>=0.30.1,!=0.32.0 # MIT