Skip to content

Commit

Permalink
Lock the data file and config file properly
Browse files Browse the repository at this point in the history
When we write to the config file, we lock it exclusively instead of dumping the
config to a temp file and renaming it.

We lock the data file for the whole program, so it will hopefully never get
corrupted and r2e instances running in parallel will wait for the one with the
lock to die before reading or writing the data file.
  • Loading branch information
kaashif committed Feb 1, 2020
1 parent 595ce9e commit 18d58b4
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 40 deletions.
87 changes: 47 additions & 40 deletions rss2email/feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,33 @@ def __init__(self, configfiles=None, datafile_path=None, config=None):
if datafile_path is None:
datafile_path = self._get_datafile_path()
self.datafile_path = _os.path.realpath(datafile_path)

if not _os.path.exists(self.datafile_path):
_LOG.info('feed data file not found at {}'.format(self.datafile_path))
_LOG.debug('creating an empty data file')
dirname = _os.path.dirname(self.datafile_path)
if dirname and not _os.path.isdir(dirname):
_os.makedirs(dirname, mode=0o700, exist_ok=True)
with open(self.datafile_path, 'w') as f:
self._save_feed_states(feeds=[], stream=f)

if config is None:
config = _config.CONFIG
self.config = config
self._datafile = None

# We immediately acquire a lock so that there are never two instances of
# rss2email working on the same data file at the same time. This
# avoids errors when e.g. a user is running r2e at the same time cron
# is.
try:
self.datafile = _codecs.open(self.datafile_path, 'r+', self.datafile_encoding)
except IOError as e:
raise _error.DataFileError(feeds=self) from e

_fcntl.lockf(self.datafile, _fcntl.LOCK_EX)

def close(self):
self.datafile.close()

def __getitem__(self, key):
for feed in self:
Expand Down Expand Up @@ -242,37 +265,19 @@ def load(self, lock=True, require=False):

def _load_feeds(self, lock, require):
_LOG.debug('load feed data from {}'.format(self.datafile_path))
if not _os.path.exists(self.datafile_path):
if require:
raise _error.NoDataFile(feeds=self)
_LOG.info('feed data file not found at {}'.format(self.datafile_path))
_LOG.debug('creating an empty data file')
dirname = _os.path.dirname(self.datafile_path)
if dirname and not _os.path.isdir(dirname):
_os.makedirs(dirname, mode=0o700, exist_ok=True)
with _codecs.open(self.datafile_path, 'w', self.datafile_encoding) as f:
self._save_feed_states(feeds=[], stream=f)
try:
self._datafile = _codecs.open(
self.datafile_path, 'r', self.datafile_encoding)
except IOError as e:
raise _error.DataFileError(feeds=self) from e

locktype = 0
if lock:
locktype = _fcntl.LOCK_SH
_fcntl.lockf(self._datafile.fileno(), locktype)

self.clear()

level = _LOG.level
handlers = list(_LOG.handlers)
feeds = []

try:
data = _json.load(self._datafile)
data = _json.load(self.datafile)
except ValueError as e:
_LOG.info('could not load data file using JSON')
data = self._load_pickled_data(self._datafile)
data = self._load_pickled_data()

version = data.get('version', None)
if version != self.datafile_version:
data = self._upgrade_state_data(data)
Expand All @@ -289,10 +294,6 @@ def _load_feeds(self, lock, require):
_LOG.handlers = handlers
self.extend(feeds)

if locktype == 0:
self._datafile.close()
self._datafile = None

for feed in self:
feed.load_from_config(self.config)

Expand All @@ -312,8 +313,9 @@ def key(feed):
return order[feed.name]
self.sort(key=key)

def _load_pickled_data(self, stream):
def _load_pickled_data(self):
_LOG.info('try and load data file using Pickle')
# Need to open again because self.datafile is opened for text, not binary
with open(self.datafile_path, 'rb') as f:
feeds = list(feed.get_state() for feed in _pickle.load(f))
return {
Expand All @@ -337,32 +339,37 @@ def save(self):
dst_config_file = _os.path.realpath(self.configfiles[-1])
_LOG.debug('save feed configuration to {}'.format(dst_config_file))
for feed in self:
# Saves feed config to the config object, not the actual file (yet)
feed.save_to_config()
dirname = _os.path.dirname(dst_config_file)
if dirname and not _os.path.isdir(dirname):
_os.makedirs(dirname, mode=0o700, exist_ok=True)
tmpfile = dst_config_file + '.tmp'
with open(tmpfile, 'w') as f:

with open(dst_config_file, 'w') as f:
_fcntl.lockf(f, _fcntl.LOCK_EX) # lock for writing
self.config.write(f)
f.flush()
_os.fsync(f.fileno())
_os.replace(tmpfile, dst_config_file)

self._save_feeds()

def _save_feeds(self):
_LOG.debug('save feed data to {}'.format(self.datafile_path))
dirname = _os.path.dirname(self.datafile_path)
if dirname and not _os.path.isdir(dirname):
_os.makedirs(dirname, mode=0o700, exist_ok=True)
tmpfile = self.datafile_path + '.tmp'
with _codecs.open(tmpfile, 'w', self.datafile_encoding) as f:
self._save_feed_states(feeds=self, stream=f)
f.flush()
_os.fsync(f.fileno())
_os.replace(tmpfile, self.datafile_path)
if self._datafile is not None:
self._datafile.close() # release the lock
self._datafile = None

# Note: the data file is locked for writing for the entire lifetime
# of the feeds object, so the following is safe.

# We don't want to close the file and release the lock (allowing
# another rss2email to overwrite the data under us), so we do a
# seek and truncate
self.datafile.seek(0)
self._save_feed_states(feeds=self, stream=self.datafile)
self.datafile.truncate()
self.datafile.flush()
_os.fsync(self.datafile.fileno())

def _save_feed_states(self, feeds, stream):
_json.dump(
Expand Down
2 changes: 2 additions & 0 deletions rss2email/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ def run(*args, **kwargs):
if _logging.ERROR - 10 * args.verbose < _logging.DEBUG:
raise # don't mask the traceback
_sys.exit(1)
finally:
feeds.close()


if __name__ == '__main__':
Expand Down

0 comments on commit 18d58b4

Please sign in to comment.