pfnet · belldandyxtq · Dec 2, 2019 · Nov 27, 2019 · Dec 2, 2019 · belldandyxtq
diff --git a/chainerio/cache/file_cache.py b/chainerio/cache/file_cache.py
@@ -100,9 +100,6 @@ class FileCache(cache.Cache):
         dir (str): The path to the directory to place cache data in
             case home directory is not backed by fast storage device.
 
-    TODO(kuenishi): retain cache file in case of correct process
-    termination and reuse for future process re-invocation.
-
     '''
 
     def __init__(self, length, multithread_safe=False, do_pickle=False,
@@ -126,9 +123,7 @@ def __init__(self, length, multithread_safe=False, do_pickle=False,
 
         self.closed = False
         self.indexfp = tempfile.NamedTemporaryFile(delete=True, dir=self.dir)
-        self.indexfile = self.indexfp.name
         self.datafp = tempfile.NamedTemporaryFile(delete=True, dir=self.dir)
-        self.datafile = self.datafp.name
 
         # allocate space to store 2n 64bit unsigned integers
         # 16 bytes * n chunks
@@ -142,15 +137,23 @@ def __init__(self, length, multithread_safe=False, do_pickle=False,
             assert r == self.buflen
         self.verbose = verbose
         if self.verbose:
-            print('created index file:', self.indexfile)
-            print('created data file:', self.datafile)
+            print('created index file:', self.indexfp.name)
+            print('created data file:', self.datafp.name)
+
+        self._frozen = False
 
     def __len__(self):
         return self.length
 
+    @property
+    def frozen(self):
+        return self._frozen
+
     @property
     def multiprocess_safe(self):
-        return False
+        # If it's preseved/preloaded, then the file contents are
+        # fixed.
+        return self._frozen
 
     @property
     def multithread_safe(self):
@@ -178,6 +181,7 @@ def _get(self, i):
             return data
 
     def put(self, i, data):
+        assert not self._frozen
         try:
             if self.do_pickle:
                 data = pickle.dumps(data)
@@ -251,3 +255,56 @@ def close(self):
                 self.datafp.close()
                 self.indexfp = None
                 self.datafp = None
+
+    def preload(self, name):
+        '''Load the cache saved by ``preserve()``
+
+        After loading the files, no data can be added to the cache.
+        ``name`` is the prefix of the persistent files. To use cache
+        in ``multiprocessing`` environment, call this method at every
+        forked process, except the process that called ``preserve()``.
+
+        .. note:: This feature is experimental.
+
+        '''
+        if self._frozen:
+            return
+
+        indexfile = os.path.join(self.dir, '{}.cachei'.format(name))
+        datafile = os.path.join(self.dir, '{}.cached'.format(name))
+
+        with self.lock.wrlock():
+            # Hard link and save them
+            self.indexfp.close()
+            self.datafp.close()
+
+            self.indexfp = open(indexfile, 'rb')
+            self.datafp = open(datafile, 'rb')
+            self._frozen = True
+
+    def preserve(self, name):
+        '''Preserve the cache as persistent files on the disk
+
+        Once the cache is preserved, cache files will not be removed
+        at cache close. To read data from preserved files, use
+        ``preload()`` method. After preservation, no data can be added
+        to the cache.  ``name`` is the prefix of the persistent
+        files.
+
+        .. note:: This feature is experimental.
+
+        '''
+
+        indexfile = os.path.join(self.dir, '{}.cachei'.format(name))
+        datafile = os.path.join(self.dir, '{}.cached'.format(name))
+
+        with self.lock.wrlock():
+            # Hard link and save them
+            os.link(self.indexfp.name, indexfile)
+            os.link(self.datafp.name, datafile)
+            self.indexfp.close()
+            self.datafp.close()
+
+            self.indexfp = open(indexfile, 'rb')
+            self.datafp = open(datafile, 'rb')
+            self._frozen = True
diff --git a/docs/source/reference.rst b/docs/source/reference.rst
@@ -40,7 +40,7 @@ Cache API
    :members:
 
 .. autoclass:: FileCache
-   :members:
+   :members: preserve, preload
 
 Chainer Extensions API
 ----------------------

diff --git a/tests/cache_tests/test_file_cache.py b/tests/cache_tests/test_file_cache.py
@@ -1,5 +1,6 @@
 from chainerio.cache import FileCache
 import os
+import tempfile
 
 import pytest
 
@@ -28,3 +29,25 @@ def mock_pread(_fd, _buf, _offset):
 
             with pytest.raises(OSError):
                 cache.put(4, str(4))
+
+
+def test_preservation():
+    with tempfile.TemporaryDirectory() as d:
+        cache = FileCache(10, dir=d, do_pickle=True)
+
+        for i in range(10):
+            cache.put(i, str(i))
+
+        cache.preserve('preserved')
+
+        for i in range(10):
+            assert str(i) == cache.get(i)
+
+        cache.close()
+
+        # Imitating a new process, fresh load
+        cache2 = FileCache(10, dir=d, do_pickle=True)
+
+        cache2.preload('preserved')
+        for i in range(10):
+            assert str(i) == cache2.get(i)