Skip to content

Commit dc8da5b

Browse files
committed
Use "poll" or "selects" Eventlet hub for all Swift daemons.
Previously, Swift's WSGI servers, the object replicator, and the object reconstructor were setting Eventlet's hub to either "poll" or "selects", depending on availability. Other daemons were letting Eventlet use its default hub, which is "epoll". In any daemons that fork, we really don't want to use epoll. Epoll instances end up shared between the parent and all children, and you get some awful messes when file descriptors are shared. Here's an example where two processes are trying to wait on the same file descriptor using the same epoll instance, and everything goes wrong: [proc A] epoll_ctl(6, EPOLL_CTL_ADD, 3, ...) = 0 [proc B] epoll_ctl(6, EPOLL_CTL_ADD, 3, ...) = -1 EEXIST (File exists) [proc B] epoll_wait(6, ...) = 1 [proc B] epoll_ctl(6, EPOLL_CTL_DEL, 3, ...) = 0 [proc A] epoll_wait(6, ...) This primarily affects the container updater and object updater since they fork. I've decided to change the hub for all Swift daemons so that we don't add multiprocessing support to some other daemon someday and suffer through this same bug again. This problem was made more apparent by commit 6d16079, which made our logging mutex use file descriptors. However, it could have struck on any shared file descriptor on which a read or write returned EAGAIN. Change-Id: Ic2c1178ac918c88b0b901e581eb4fab3b2666cfe Closes-Bug: 1722951
1 parent b70436f commit dc8da5b

File tree

5 files changed

+38
-18
lines changed

5 files changed

+38
-18
lines changed

swift/common/daemon.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from re import sub
2222

2323
import eventlet.debug
24+
from eventlet.hubs import use_hub
2425

2526
from swift.common import utils
2627

@@ -266,6 +267,8 @@ def run_daemon(klass, conf_file, section_name='', once=False, **kwargs):
266267
# and results in an exit code of 1.
267268
sys.exit(e)
268269

270+
use_hub(utils.get_hub())
271+
269272
# once on command line (i.e. daemonize=false) will over-ride config
270273
once = once or not utils.config_true_value(conf.get('daemonize', 'true'))
271274

swift/common/utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,6 +1982,25 @@ def get_hub():
19821982
getting swallowed somewhere. Then when that file descriptor
19831983
was re-used, eventlet would freak right out because it still
19841984
thought it was waiting for activity from it in some other coro.
1985+
1986+
Another note about epoll: it's hard to use when forking. epoll works
1987+
like so:
1988+
1989+
* create an epoll instance: efd = epoll_create(...)
1990+
1991+
* register file descriptors of interest with epoll_ctl(efd,
1992+
EPOLL_CTL_ADD, fd, ...)
1993+
1994+
* wait for events with epoll_wait(efd, ...)
1995+
1996+
If you fork, you and all your child processes end up using the same
1997+
epoll instance, and everyone becomes confused. It is possible to use
1998+
epoll and fork and still have a correct program as long as you do the
1999+
right things, but eventlet doesn't do those things. Really, it can't
2000+
even try to do those things since it doesn't get notified of forks.
2001+
2002+
In contrast, both poll() and select() specify the set of interesting
2003+
file descriptors with each call, so there's no problem with forking.
19852004
"""
19862005
try:
19872006
import select

swift/obj/reconstructor.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,13 @@
2626
import six.moves.cPickle as pickle
2727
import shutil
2828

29-
from eventlet import (GreenPile, GreenPool, Timeout, sleep, hubs, tpool,
30-
spawn)
29+
from eventlet import (GreenPile, GreenPool, Timeout, sleep, tpool, spawn)
3130
from eventlet.support.greenlets import GreenletExit
3231

3332
from swift import gettext_ as _
3433
from swift.common.utils import (
3534
whataremyips, unlink_older_than, compute_eta, get_logger,
36-
dump_recon_cache, mkdirs, config_true_value, list_from_csv, get_hub,
35+
dump_recon_cache, mkdirs, config_true_value, list_from_csv,
3736
tpool_reraise, GreenAsyncPile, Timestamp, remove_file)
3837
from swift.common.header_key_dict import HeaderKeyDict
3938
from swift.common.bufferedhttp import http_connect
@@ -51,9 +50,6 @@
5150
SYNC, REVERT = ('sync_only', 'sync_revert')
5251

5352

54-
hubs.use_hub(get_hub())
55-
56-
5753
def _get_partners(frag_index, part_nodes):
5854
"""
5955
Returns the left and right partners of the node whose index is

swift/obj/replicator.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@
2525
from swift import gettext_ as _
2626

2727
import eventlet
28-
from eventlet import GreenPool, tpool, Timeout, sleep, hubs
28+
from eventlet import GreenPool, tpool, Timeout, sleep
2929
from eventlet.green import subprocess
3030
from eventlet.support.greenlets import GreenletExit
3131

3232
from swift.common.ring.utils import is_local_device
3333
from swift.common.utils import whataremyips, unlink_older_than, \
3434
compute_eta, get_logger, dump_recon_cache, ismount, \
3535
rsync_module_interpolation, mkdirs, config_true_value, list_from_csv, \
36-
get_hub, tpool_reraise, config_auto_int_value, storage_directory
36+
tpool_reraise, config_auto_int_value, storage_directory
3737
from swift.common.bufferedhttp import http_connect
3838
from swift.common.daemon import Daemon
3939
from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
@@ -43,8 +43,6 @@
4343

4444
DEFAULT_RSYNC_TIMEOUT = 900
4545

46-
hubs.use_hub(get_hub())
47-
4846

4947
def _do_listdir(partition, replication_cycle):
5048
return (((partition + replication_cycle) % 10) == 0)

test/unit/common/test_daemon.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,16 @@ def test_signal(self):
139139

140140
def test_run_daemon(self):
141141
sample_conf = "[my-daemon]\nuser = %s\n" % getuser()
142-
with tmpfile(sample_conf) as conf_file:
143-
with mock.patch.dict('os.environ', {'TZ': ''}):
144-
with mock.patch('time.tzset') as mock_tzset:
145-
daemon.run_daemon(MyDaemon, conf_file)
146-
self.assertTrue(MyDaemon.forever_called)
147-
self.assertEqual(os.environ['TZ'], 'UTC+0')
148-
self.assertEqual(mock_tzset.mock_calls, [mock.call()])
142+
with tmpfile(sample_conf) as conf_file, \
143+
mock.patch('swift.common.daemon.use_hub') as mock_use_hub:
144+
with mock.patch.dict('os.environ', {'TZ': ''}), \
145+
mock.patch('time.tzset') as mock_tzset:
146+
daemon.run_daemon(MyDaemon, conf_file)
147+
self.assertTrue(MyDaemon.forever_called)
148+
self.assertEqual(os.environ['TZ'], 'UTC+0')
149+
self.assertEqual(mock_tzset.mock_calls, [mock.call()])
150+
self.assertEqual(mock_use_hub.mock_calls,
151+
[mock.call(utils.get_hub())])
149152
daemon.run_daemon(MyDaemon, conf_file, once=True)
150153
self.assertEqual(MyDaemon.once_called, True)
151154

@@ -182,7 +185,8 @@ def test_run_daemon_diff_tz(self):
182185
self.assertEqual(18000, time.timezone)
183186

184187
sample_conf = "[my-daemon]\nuser = %s\n" % getuser()
185-
with tmpfile(sample_conf) as conf_file:
188+
with tmpfile(sample_conf) as conf_file, \
189+
mock.patch('swift.common.daemon.use_hub'):
186190
daemon.run_daemon(MyDaemon, conf_file)
187191
self.assertFalse(MyDaemon.once_called)
188192
self.assertTrue(MyDaemon.forever_called)

0 commit comments

Comments
 (0)