From e08f7f000c9d8fcbe3af0307bf8c1be4376820d4 Mon Sep 17 00:00:00 2001
From: xzhu1900 <xzhu1900@gmail.com>
Date: Mon, 17 Jun 2019 18:29:15 -0700
Subject: [PATCH 1/5] add save/load support

---
 test/cpp/api/dataloader.cpp                   | 204 +++++++++++++++++-
 .../api/include/torch/data/datasets/chunk.h   |  40 +++-
 .../include/torch/data/datasets/stateful.h    |   3 +
 3 files changed, 237 insertions(+), 10 deletions(-)
diff --git a/test/cpp/api/dataloader.cpp b/test/cpp/api/dataloader.cpp
index bb8552d38d225..8a125d3e7ebbe 100644
--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@@ -8,6 +8,7 @@
 #include <test/cpp/api/support.h>
 
 #include <c10/util/ArrayRef.h>
+#include <c10/util/tempfile.h>
 
 #include <algorithm>
 #include <chrono>
@@ -98,7 +99,10 @@ TEST(DataTest, ChunkDataSetWithInvalidInitParameter) {
   samplers::SequentialSampler sampler(0);
 
   auto initialization_function =
-      [&](size_t preloader_count, size_t batch_size, size_t cache_size) {
+      [&](size_t preloader_count,
+          size_t batch_size,
+          size_t cache_size,
+          std::string checkpoint_file_name = "") {
         datasets::SharedBatchDataset<datasets::ChunkDataset<
             DummyChunkDataReader,
             samplers::SequentialSampler,
@@ -111,7 +115,10 @@ TEST(DataTest, ChunkDataSetWithInvalidInitParameter) {
                 sampler,
                 sampler,
                 datasets::ChunkDatasetOptions(
-                    preloader_count, batch_size, cache_size));
+                    preloader_count,
+                    batch_size,
+                    cache_size,
+                    checkpoint_file_name));
       };
 
   ASSERT_THROWS_WITH(
@@ -1465,6 +1472,7 @@ TEST(DataLoaderTest, StatefulDatasetWithNoWorkers) {
     void reset() override {
       counter = 0;
     }
+    void save(const std::string& save_file_name) override{};
     int counter = 0;
   };
 
@@ -1501,6 +1509,8 @@ TEST(DataLoaderTest, StatefulDatasetWithManyWorkers) {
     void reset() override {
       counter = 0;
     }
+    void save(const std::string& save_file_name) override{};
+
     int counter = 0;
     std::mutex mutex;
   };
@@ -1538,6 +1548,7 @@ TEST(DataLoaderTest, StatefulDatasetWithMap) {
     void reset() override {
       counter = 0;
     }
+    void save(const std::string& save_file_name) override{};
     int counter = 0;
   };
 
@@ -1585,6 +1596,7 @@ TEST(DataLoaderTest, StatefulDatasetWithCollate) {
     void reset() override {
       counter = 0;
     }
+    void save(const std::string& save_file_name) override{};
     int counter = 0;
   };
 
@@ -1880,4 +1892,190 @@ TEST(DataLoaderTest, ChunkDatasetDoesNotHang) {
   // to fill the batch buffer but it is not draining. Still we need to exit
   // cleanly.
   auto iterator = data_loader->begin();
-}
\ No newline at end of file
+}
+
+TEST(DataLoaderTest, ChunkDatasetSave) {
+  const size_t chunk_count_ = 6;
+  const size_t chunk_size = 10;
+
+  struct DummyTestChunkDataReader : datasets::ChunkDataReader<int> {
+   public:
+    using BatchType = datasets::ChunkDataReader<int>::ChunkType;
+
+    BatchType read_chunk(size_t chunk_index) override {
+      return batch_data_;
+    }
+
+    size_t chunk_count() override {
+      return chunk_count_;
+    };
+
+    void reset() override{};
+    BatchType batch_data_ = BatchType(chunk_size, 0);
+  };
+
+  const size_t prefetch_count = 1;
+  const size_t batch_size = chunk_size;
+  const size_t dataloader_worker_count = 0;
+  samplers::SequentialSampler sampler(0);
+  const int epoch_count = 2;
+
+  DummyTestChunkDataReader data_reader;
+
+  // tested save_intervals
+  const size_t save_intervals[] = {1, 2};
+
+  using datasets::ChunkDatasetOptions;
+
+  for (auto save_interval : save_intervals) {
+    auto tempfile = c10::make_tempfile();
+
+    datasets::SharedBatchDataset<datasets::ChunkDataset<
+        DummyTestChunkDataReader,
+        samplers::SequentialSampler,
+        samplers::SequentialSampler>>
+        dataset = datasets::make_shared_dataset<datasets::ChunkDataset<
+            DummyTestChunkDataReader,
+            samplers::SequentialSampler,
+            samplers::SequentialSampler>>(
+            data_reader,
+            sampler,
+            sampler,
+            ChunkDatasetOptions(
+                prefetch_count,
+                batch_size,
+                chunk_size /*cache size*/));
+
+    auto data_loader = torch::data::make_data_loader(
+        dataset,
+        DataLoaderOptions(batch_size).workers(dataloader_worker_count));
+
+    for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
+      int iteration_count = 0;
+      for (auto iterator = data_loader->begin(); iterator != data_loader->end();
+           ++iterator, ++iteration_count) {
+             
+        if ((iteration_count + 1) % save_interval == 0) {
+          dataset->save(tempfile.name);
+
+          samplers::SequentialSampler new_sampler(0);
+          torch::load(new_sampler, tempfile.name);
+
+          // Verify save logic. For ChunkDataset, the chunk data is stored in a
+          // cache inside the dataset. One pool of threads are constantly
+          // writing to the cache, and a different pool of thread are constantly
+          // reading from the cache. Due to the nature of asynchronization, at
+          // the time of get_batch(), which chunk is written to the cache is not
+          // fully deterministic.
+          // But we can still calculate a restricted window on the expected
+          // output, hence verify the logic. In this test, the cache size is
+          // configured to be the same as chunk size and batch size. So the
+          // chunk data is written to the cache one by one. Only the current
+          // batch is retrieved, the next chunk is writen. Now after the first
+          // batch is retrieved, when we tries to retrive the second batch,
+          // there are three possible scenarios for the writer thread:
+          // 1. it hasn't started loading the next chunk data yet, so the
+          // sequential sampler index is still 0;
+          // 2. it started to load the second chunk, so the sequencial sampler
+          // index is at 1;
+          // 3. it finished loading the second chunk, and start to load the
+          // third chunk, because the cache is still fully occupied by the data
+          // from the second chunk, it is waiting to write to the cache. At this
+          // point, the sampler index is at 2.
+          // So now we have a window of [0, 2], which is what we expected the
+          // sampler to save the index from. Now noted for sequential sampler,
+          // it advances to the next index automatically in the call next(). So
+          // when save the index, it saves the next index in stead of the
+          // current one. In other word, after getting the first index from
+          // sequential sampler, it already moves to the second index. So when
+          // we save it, it is the second index we save. As a result,
+          // we need to advance the window by one. Now we have the expected
+          // window of [0, 3].
+          // This analysis applies to all scenarios. So extend it to a more
+          // general case: the expected saved index should falling into the
+          // range of [iteration - 1, iteration + 2], which is the validation
+          // below.
+          ASSERT_TRUE(
+              new_sampler.index() >= std::max(0, iteration_count - 1) &&
+              new_sampler.index() <= iteration_count + 2);
+        }
+      }
+    }
+  }
+}
+
+TEST(DataLoaderTest, ChunkDatasetResume) {
+  auto tempfile = c10::make_tempfile();
+
+  const size_t prefetch_count = 1;
+  const size_t batch_size = 10;
+  const size_t dataloader_worker_count = 0;
+  const size_t save_interval = 2;
+
+  DummyChunkDataReader data_reader;
+  samplers::SequentialSampler sampler(0);
+
+  const size_t skipped_chunk = 2;
+
+  // Configure sampler to skip 2 chunks
+  {
+    sampler.reset(data_reader.chunk_count());
+    sampler.next(skipped_chunk);
+    torch::save(sampler, tempfile.name);
+  }
+
+  // test functionality across epoch boundary. The first epoch should be
+  // affected by the checkpoint, but the second should start normally.
+  const int epoch_count = 2;
+
+  datasets::SharedBatchDataset<datasets::ChunkDataset<
+      DummyChunkDataReader,
+      samplers::SequentialSampler,
+      samplers::SequentialSampler>>
+      dataset = datasets::make_shared_dataset<datasets::ChunkDataset<
+          DummyChunkDataReader,
+          samplers::SequentialSampler,
+          samplers::SequentialSampler>>(
+          data_reader,
+          sampler,
+          sampler,
+          datasets::ChunkDatasetOptions(
+              prefetch_count,
+              batch_size,
+              20 /*cache size*/,
+
+              tempfile.name));
+
+  auto data_loader = torch::data::make_data_loader(
+      dataset, DataLoaderOptions(batch_size).workers(dataloader_worker_count));
+
+  for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
+    int iteration_count = 0;
+
+    // For the first epoch, the returned batch should be returned from the
+    // third chunk, because the check point skipped the first two chunks. But
+    // for the next epoch, it should start from the first batch.
+    int initial_value = epoch_index == 0 ? 15 : 0;
+
+    for (auto iterator = data_loader->begin(); iterator != data_loader->end();
+         ++iterator, ++iteration_count) {
+      DummyChunkDataReader::BatchType batch = *iterator;
+
+      std::vector<int> expected_result;
+      size_t expected_size = (epoch_index > 0 && iteration_count == 3) ? 5 : 10;
+      expected_result.resize(expected_size);
+      std::iota(expected_result.begin(), expected_result.end(), initial_value);
+
+      ASSERT_EQ(batch.size(), expected_result.size());
+      ASSERT_TRUE(
+          std::equal(batch.begin(), batch.end(), expected_result.begin()));
+
+      initial_value += batch_size;
+    }
+  }
+
+  samplers::SequentialSampler new_sampler(0);
+  torch::load(new_sampler, tempfile.name);
+
+  ASSERT_EQ(new_sampler.index(), skipped_chunk);
+}
diff --git a/torch/csrc/api/include/torch/data/datasets/chunk.h b/torch/csrc/api/include/torch/data/datasets/chunk.h
index c519a964d42f6..314804e9ae3dd 100644
--- a/torch/csrc/api/include/torch/data/datasets/chunk.h
+++ b/torch/csrc/api/include/torch/data/datasets/chunk.h
@@ -7,6 +7,8 @@
 #include <queue>
 #include <thread>
 
+#include <torch/serialize.h>
+
 namespace torch {
 namespace data {
 namespace datasets {
@@ -245,10 +247,12 @@ struct ChunkDatasetOptions {
   ChunkDatasetOptions(
       size_t preloader_count,
       size_t batch_size,
-      size_t cache_size = 2048)
+      size_t cache_size = 2048,
+      std::string resume_from_file = "")
       : preloader_count_(preloader_count),
         batch_size_(batch_size),
-        cache_size_(cache_size) {
+        cache_size_(cache_size),
+        resume_from_file_(std::move(resume_from_file)) {
     TORCH_CHECK(
         preloader_count_ > 0,
         "Preloader count is 0. At least one preloader needs to be specified.");
@@ -270,8 +274,14 @@ struct ChunkDatasetOptions {
   /// The size of each batch.
   TORCH_ARG(size_t, batch_size);
 
-  // the capacity of the queue for batch caching.
+  /// The capacity of the queue for batch caching.
   TORCH_ARG(size_t, cache_size) = 2048;
+
+  /// The file name from where to load ChunkDatset's state. Default to empty
+  /// string meaning start ChunkDataset from fresh begining; when specified with
+  /// a file name, ChunkDataset::reset() will try to load the sampler state from
+  /// that file.
+  TORCH_ARG(std::string, resume_from_file) = "";
 };
 
 /// A stateful dataset that support hierarchical sampling and prefetching of
@@ -308,7 +318,8 @@ class ChunkDataset final
         example_sampler_(std::move(example_sampler)),
         options_(std::move(options)),
         quit_worker_(false),
-        running_preloaders_(0) {}
+        running_preloaders_(0),
+        load_checkpoint_(!options_.resume_from_file_.empty()) {}
 
   virtual ~ChunkDataset() {
     // stop batch buffer first.
@@ -332,10 +343,14 @@ class ChunkDataset final
       "The requested batch size does not match with the initialized batch size.\n"
       " The requested batch size is ", batch_size,
       ", while the dataset is created with batch size equal to ", options_.batch_size_);
-
     return batch_buffer_->get_batch();
   }
 
+  void save(const std::string& save_file_name) override {
+    std::lock_guard<std::mutex> lock(chunk_index_guard_);
+    torch::save(this->chunk_sampler(), save_file_name);
+  }
+
   /// This will clear any internal state and starts the internal prefetching
   /// mechanism for the chunk dataset.
   void reset() override {
@@ -347,9 +362,17 @@ class ChunkDataset final
     free_workers();
     preload_threads_.clear();
 
-    chunk_reader_.reset();
+    if (!load_checkpoint_){
+      chunk_reader_.reset();
+      chunk_sampler_.reset(chunk_reader_.chunk_count());
+    }
+    else {
+      torch::load(chunk_sampler_, options_.resume_from_file_);
+      
+      // After the checkpoint is loaded, mark the boolean to false to prevent future loading.
+      load_checkpoint_ = false;
+    }
 
-    chunk_sampler_.reset(chunk_reader_.chunk_count());
 
     // Throw out any existing cached batch in the buffer and re-creates a new
     // chunk buffer.
@@ -451,6 +474,9 @@ class ChunkDataset final
 
   // mutex to synchronize chunk sampler next() call.
   std::mutex chunk_index_guard_;
+
+  // boolean value to indicate whether we need to load the checkpoint for chunk_sampler_.
+  bool load_checkpoint_;
 };
 } // namespace datasets
 } // namespace data
diff --git a/torch/csrc/api/include/torch/data/datasets/stateful.h b/torch/csrc/api/include/torch/data/datasets/stateful.h
index eba22cbb3f035..f42ac5f2f23f2 100644
--- a/torch/csrc/api/include/torch/data/datasets/stateful.h
+++ b/torch/csrc/api/include/torch/data/datasets/stateful.h
@@ -30,6 +30,9 @@ class StatefulDataset
  public:
   /// Resets internal state of the dataset.
   virtual void reset() = 0;
+
+  /// Saves the dataset's state to file.
+  virtual void save(const std::string& save_file_name) = 0;
 };
 } // namespace datasets
 } // namespace data

From 2c5777c6548346344256c8344b63e65f581892fd Mon Sep 17 00:00:00 2001
From: xzhu1900 <xzhu1900@gmail.com>
Date: Mon, 17 Jun 2019 19:04:12 -0700
Subject: [PATCH 2/5] test

---
 test/cpp/api/dataloader.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/cpp/api/dataloader.cpp b/test/cpp/api/dataloader.cpp
index 8a125d3e7ebbe..6244cc2662cee 100644
--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@@ -1971,9 +1971,9 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
           // output, hence verify the logic. In this test, the cache size is
           // configured to be the same as chunk size and batch size. So the
           // chunk data is written to the cache one by one. Only the current
-          // batch is retrieved, the next chunk is writen. Now after the first
-          // batch is retrieved, when we tries to retrive the second batch,
-          // there are three possible scenarios for the writer thread:
+          // batch is retrieved, the next chunk is writen. Now in iteration 0,
+          // after the first batch is retrieved, when we save the dataset
+          // statues, there are three possible scenarios for the writer thread:
           // 1. it hasn't started loading the next chunk data yet, so the
           // sequential sampler index is still 0;
           // 2. it started to load the second chunk, so the sequencial sampler
@@ -1990,14 +1990,14 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
           // sequential sampler, it already moves to the second index. So when
           // we save it, it is the second index we save. As a result,
           // we need to advance the window by one. Now we have the expected
-          // window of [0, 3].
+          // window of [1, 3].
           // This analysis applies to all scenarios. So extend it to a more
           // general case: the expected saved index should falling into the
-          // range of [iteration - 1, iteration + 2], which is the validation
+          // range of [iteration, iteration + 3], which is the validation
           // below.
           ASSERT_TRUE(
-              new_sampler.index() >= std::max(0, iteration_count - 1) &&
-              new_sampler.index() <= iteration_count + 2);
+              new_sampler.index() >= iteration_count + 1 &&
+              new_sampler.index() <= iteration_count + 3);
         }
       }
     }

From 1e1a814e8fc20b866e7e252b22b1decb87bbab63 Mon Sep 17 00:00:00 2001
From: xzhu1900 <xzhu1900@gmail.com>
Date: Tue, 25 Jun 2019 19:25:15 -0700
Subject: [PATCH 3/5] change the API to be conforment with existing PyTorch
 style

---
 test/cpp/api/dataloader.cpp                   | 27 +++++++-------
 .../api/include/torch/data/datasets/chunk.h   | 37 +++++++------------
 .../include/torch/data/datasets/stateful.h    | 26 ++++++++++++-
 3 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/test/cpp/api/dataloader.cpp b/test/cpp/api/dataloader.cpp
index 6244cc2662cee..ff78197d90adb 100644
--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@@ -101,8 +101,7 @@ TEST(DataTest, ChunkDataSetWithInvalidInitParameter) {
   auto initialization_function =
       [&](size_t preloader_count,
           size_t batch_size,
-          size_t cache_size,
-          std::string checkpoint_file_name = "") {
+          size_t cache_size) {
         datasets::SharedBatchDataset<datasets::ChunkDataset<
             DummyChunkDataReader,
             samplers::SequentialSampler,
@@ -117,8 +116,7 @@ TEST(DataTest, ChunkDataSetWithInvalidInitParameter) {
                 datasets::ChunkDatasetOptions(
                     preloader_count,
                     batch_size,
-                    cache_size,
-                    checkpoint_file_name));
+                    cache_size));
       };
 
   ASSERT_THROWS_WITH(
@@ -1472,7 +1470,8 @@ TEST(DataLoaderTest, StatefulDatasetWithNoWorkers) {
     void reset() override {
       counter = 0;
     }
-    void save(const std::string& save_file_name) override{};
+    void save(torch::serialize::OutputArchive& archive) const override{};
+    void load(torch::serialize::InputArchive& archive) override {}
     int counter = 0;
   };
 
@@ -1509,8 +1508,8 @@ TEST(DataLoaderTest, StatefulDatasetWithManyWorkers) {
     void reset() override {
       counter = 0;
     }
-    void save(const std::string& save_file_name) override{};
-
+    void save(torch::serialize::OutputArchive& archive) const override{};
+    void load(torch::serialize::InputArchive& archive) override {}
     int counter = 0;
     std::mutex mutex;
   };
@@ -1548,7 +1547,8 @@ TEST(DataLoaderTest, StatefulDatasetWithMap) {
     void reset() override {
       counter = 0;
     }
-    void save(const std::string& save_file_name) override{};
+    void save(torch::serialize::OutputArchive& archive) const override{};
+    void load(torch::serialize::InputArchive& archive) override {}
     int counter = 0;
   };
 
@@ -1596,7 +1596,8 @@ TEST(DataLoaderTest, StatefulDatasetWithCollate) {
     void reset() override {
       counter = 0;
     }
-    void save(const std::string& save_file_name) override{};
+    void save(torch::serialize::OutputArchive& archive) const override{};
+    void load(torch::serialize::InputArchive& archive) override {}
     int counter = 0;
   };
 
@@ -1956,7 +1957,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
            ++iterator, ++iteration_count) {
              
         if ((iteration_count + 1) % save_interval == 0) {
-          dataset->save(tempfile.name);
+          torch::save(*dataset, tempfile.name);
 
           samplers::SequentialSampler new_sampler(0);
           torch::load(new_sampler, tempfile.name);
@@ -2004,7 +2005,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
   }
 }
 
-TEST(DataLoaderTest, ChunkDatasetResume) {
+TEST(DataLoaderTest, ChunkDatasetLoad) {
   auto tempfile = c10::make_tempfile();
 
   const size_t prefetch_count = 1;
@@ -2042,9 +2043,9 @@ TEST(DataLoaderTest, ChunkDatasetResume) {
           datasets::ChunkDatasetOptions(
               prefetch_count,
               batch_size,
-              20 /*cache size*/,
+              20 /*cache size*/));
 
-              tempfile.name));
+  torch::load(*dataset, tempfile.name);
 
   auto data_loader = torch::data::make_data_loader(
       dataset, DataLoaderOptions(batch_size).workers(dataloader_worker_count));
diff --git a/torch/csrc/api/include/torch/data/datasets/chunk.h b/torch/csrc/api/include/torch/data/datasets/chunk.h
index 4ee16e890e272..5b627a2714001 100644
--- a/torch/csrc/api/include/torch/data/datasets/chunk.h
+++ b/torch/csrc/api/include/torch/data/datasets/chunk.h
@@ -247,12 +247,10 @@ struct ChunkDatasetOptions {
   ChunkDatasetOptions(
       size_t preloader_count,
       size_t batch_size,
-      size_t cache_size = 2048,
-      std::string resume_from_file = "")
+      size_t cache_size = 2048)
       : preloader_count_(preloader_count),
         batch_size_(batch_size),
-        cache_size_(cache_size),
-        resume_from_file_(std::move(resume_from_file)) {
+        cache_size_(cache_size) {
     TORCH_CHECK(
         preloader_count_ > 0,
         "Preloader count is 0. At least one preloader needs to be specified.");
@@ -276,12 +274,6 @@ struct ChunkDatasetOptions {
 
   /// The capacity of the queue for batch caching.
   TORCH_ARG(size_t, cache_size) = 2048;
-
-  /// The file name from where to load ChunkDatset's state. Default to empty
-  /// string meaning start ChunkDataset from fresh begining; when specified with
-  /// a file name, ChunkDataset::reset() will try to load the sampler state from
-  /// that file.
-  TORCH_ARG(std::string, resume_from_file) = "";
 };
 
 /// A stateful dataset that support hierarchical sampling and prefetching of
@@ -319,7 +311,7 @@ class ChunkDataset final
         options_(std::move(options)),
         quit_worker_(false),
         running_preloaders_(0),
-        load_checkpoint_(!options_.resume_from_file_.empty()) {}
+        load_checkpoint_(false) {}
 
   virtual ~ChunkDataset() {
     // stop batch buffer first.
@@ -346,11 +338,6 @@ class ChunkDataset final
     return batch_buffer_->get_batch();
   }
 
-  void save(const std::string& save_file_name) override {
-    std::lock_guard<std::mutex> lock(chunk_index_guard_);
-    torch::save(this->chunk_sampler(), save_file_name);
-  }
-            
   /// Helper method around get_batch as `batch_size` is not strictly necessary
   BatchType get_batch() {
     return get_batch(options_.batch_size_);
@@ -370,15 +357,9 @@ class ChunkDataset final
     if (!load_checkpoint_){
       chunk_reader_.reset();
       chunk_sampler_.reset(chunk_reader_.chunk_count());
-    }
-    else {
-      torch::load(chunk_sampler_, options_.resume_from_file_);
-      
-      // After the checkpoint is loaded, mark the boolean to false to prevent future loading.
       load_checkpoint_ = false;
     }
 
-
     // Throw out any existing cached batch in the buffer and re-creates a new
     // chunk buffer.
     batch_buffer_ = torch::make_unique<
@@ -408,6 +389,16 @@ class ChunkDataset final
     return chunk_sampler_;
   }
 
+  void save(serialize::OutputArchive& archive) const override {
+    std::lock_guard<std::mutex> lock(chunk_index_guard_);
+    chunk_sampler_.save(archive);
+  }
+
+  void load(serialize::InputArchive& archive) override{
+    chunk_sampler_.load(archive);
+    load_checkpoint_ = true;
+  }
+
  private:
   /// running on worker thread to preload chunk data.
   void preloader(size_t id) {
@@ -478,7 +469,7 @@ class ChunkDataset final
   std::atomic<size_t> running_preloaders_;
 
   // mutex to synchronize chunk sampler next() call.
-  std::mutex chunk_index_guard_;
+  mutable std::mutex chunk_index_guard_;
 
   // boolean value to indicate whether we need to load the checkpoint for chunk_sampler_.
   bool load_checkpoint_;
diff --git a/torch/csrc/api/include/torch/data/datasets/stateful.h b/torch/csrc/api/include/torch/data/datasets/stateful.h
index f42ac5f2f23f2..ae0f0b060472a 100644
--- a/torch/csrc/api/include/torch/data/datasets/stateful.h
+++ b/torch/csrc/api/include/torch/data/datasets/stateful.h
@@ -31,9 +31,31 @@ class StatefulDataset
   /// Resets internal state of the dataset.
   virtual void reset() = 0;
 
-  /// Saves the dataset's state to file.
-  virtual void save(const std::string& save_file_name) = 0;
+  /// Saves the statefulDataset's state to OutputArchive.
+  virtual void save(serialize::OutputArchive& archive) const = 0;
+
+  /// Deserializes the statefulDataset's state from the `archive`.
+  virtual void load(serialize::InputArchive& archive) = 0;
 };
+
+/// Serializes a statefulDataset to `OutputArchive`.
+template <typename... Args>
+serialize::OutputArchive& operator<<(
+    serialize::OutputArchive& archive,
+    const StatefulDataset<Args...>& statefulDataset) {
+  statefulDataset.save(archive);
+  return archive;
+}
+
+/// Deserializes a statefulDataset from an `InputArchive`.
+template <typename... Args>
+serialize::InputArchive& operator>>(
+    serialize::InputArchive& archive,
+    StatefulDataset<Args...>& statefulDataset) {
+  statefulDataset.load(archive);
+  return archive;
+}
+
 } // namespace datasets
 } // namespace data
 } // namespace torch

From 280a9e45af447cb1d93ad878299f823f7d8504e7 Mon Sep 17 00:00:00 2001
From: xzhu1900 <xzhu1900@gmail.com>
Date: Wed, 26 Jun 2019 11:55:53 -0700
Subject: [PATCH 4/5] address feedback

---
 test/cpp/api/dataloader.cpp                   | 29 ++++++++++++++-----
 .../api/include/torch/data/datasets/chunk.h   |  1 +
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/test/cpp/api/dataloader.cpp b/test/cpp/api/dataloader.cpp
index ff78197d90adb..5b1ea8cafdbd1 100644
--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@@ -1895,6 +1895,17 @@ TEST(DataLoaderTest, ChunkDatasetDoesNotHang) {
   auto iterator = data_loader->begin();
 }
 
+// Test ChunkDataset save function.
+// Note [save/load ChunkDataset as ChunkSampler]:
+// The chunk sampler inside ChunkDataset is used in a seperate thread pool other
+// than the main thread. Thus it is very hard to acuratly estimate its status
+// when ChunkDataset::save/ChunkDataset::load is called. For the pure purpose of
+// testing, we utilize the implementation fact that the file format for sampler
+// serialization is the same as ChunkDataset serialization, and manually control
+// the chunk sampler by calling the sampler's save/load method for value
+// validation. This is only for testing the specific save/load functionality. In
+// real user case, the user should still use matching ChunkDataset::save and
+// ChunkDataset::load method.
 TEST(DataLoaderTest, ChunkDatasetSave) {
   const size_t chunk_count_ = 6;
   const size_t chunk_size = 10;
@@ -1943,9 +1954,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
             sampler,
             sampler,
             ChunkDatasetOptions(
-                prefetch_count,
-                batch_size,
-                chunk_size /*cache size*/));
+                prefetch_count, batch_size, chunk_size /*cache size*/));
 
     auto data_loader = torch::data::make_data_loader(
         dataset,
@@ -1955,11 +1964,12 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
       int iteration_count = 0;
       for (auto iterator = data_loader->begin(); iterator != data_loader->end();
            ++iterator, ++iteration_count) {
-             
         if ((iteration_count + 1) % save_interval == 0) {
           torch::save(*dataset, tempfile.name);
 
           samplers::SequentialSampler new_sampler(0);
+
+          // See Note [save/load ChunkDataset as ChunkSampler]
           torch::load(new_sampler, tempfile.name);
 
           // Verify save logic. For ChunkDataset, the chunk data is stored in a
@@ -2005,6 +2015,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
   }
 }
 
+// Test ChunkDataset load function.
 TEST(DataLoaderTest, ChunkDatasetLoad) {
   auto tempfile = c10::make_tempfile();
 
@@ -2022,6 +2033,8 @@ TEST(DataLoaderTest, ChunkDatasetLoad) {
   {
     sampler.reset(data_reader.chunk_count());
     sampler.next(skipped_chunk);
+
+    // See Note [save/load ChunkDataset as ChunkSampler]
     torch::save(sampler, tempfile.name);
   }
 
@@ -2041,9 +2054,7 @@ TEST(DataLoaderTest, ChunkDatasetLoad) {
           sampler,
           sampler,
           datasets::ChunkDatasetOptions(
-              prefetch_count,
-              batch_size,
-              20 /*cache size*/));
+              prefetch_count, batch_size, 20 /*cache size*/));
 
   torch::load(*dataset, tempfile.name);
 
@@ -2076,7 +2087,9 @@ TEST(DataLoaderTest, ChunkDatasetLoad) {
   }
 
   samplers::SequentialSampler new_sampler(0);
+
+  // See Note [save/load ChunkDataset as ChunkSampler]
   torch::load(new_sampler, tempfile.name);
 
   ASSERT_EQ(new_sampler.index(), skipped_chunk);
-}
+}
\ No newline at end of file
diff --git a/torch/csrc/api/include/torch/data/datasets/chunk.h b/torch/csrc/api/include/torch/data/datasets/chunk.h
index 5b627a2714001..1a5eaf15c7658 100644
--- a/torch/csrc/api/include/torch/data/datasets/chunk.h
+++ b/torch/csrc/api/include/torch/data/datasets/chunk.h
@@ -395,6 +395,7 @@ class ChunkDataset final
   }
 
   void load(serialize::InputArchive& archive) override{
+    std::lock_guard<std::mutex> lock(chunk_index_guard_);
     chunk_sampler_.load(archive);
     load_checkpoint_ = true;
   }

From ee05fe3ed0eb37500093d19ad608cbf28b79570b Mon Sep 17 00:00:00 2001
From: xzhu1900 <xzhu1900@gmail.com>
Date: Wed, 26 Jun 2019 13:30:26 -0700
Subject: [PATCH 5/5] fix typo

---
 test/cpp/api/dataloader.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/cpp/api/dataloader.cpp b/test/cpp/api/dataloader.cpp
index 5b1ea8cafdbd1..23cf0462f3b8c 100644
--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@@ -1897,8 +1897,8 @@ TEST(DataLoaderTest, ChunkDatasetDoesNotHang) {
 
 // Test ChunkDataset save function.
 // Note [save/load ChunkDataset as ChunkSampler]:
-// The chunk sampler inside ChunkDataset is used in a seperate thread pool other
-// than the main thread. Thus it is very hard to acuratly estimate its status
+// The chunk sampler inside ChunkDataset is used in a separate thread pool other
+// than the main thread. Thus it is very hard to accurately estimate its status
 // when ChunkDataset::save/ChunkDataset::load is called. For the pure purpose of
 // testing, we utilize the implementation fact that the file format for sampler
 // serialization is the same as ChunkDataset serialization, and manually control