From bd4dd0cab193c36d7fbc0430562e03d6107c097c Mon Sep 17 00:00:00 2001
From: Badrish Chandramouli <badrishc@microsoft.com>
Date: Tue, 14 Aug 2018 13:47:05 -0700
Subject: [PATCH] Initial commit

---
 .gitattributes                                |   63 +
 .gitignore                                    |  194 +
 README.md                                     |   34 +
 cc/CMakeLists.txt                             |  102 +
 cc/CMakeLists.txt.in                          |   17 +
 cc/README.md                                  |   68 +
 cc/benchmark-dir/CMakeLists.txt               |   10 +
 cc/benchmark-dir/README.md                    |   17 +
 cc/benchmark-dir/benchmark.cc                 |  628 +++
 cc/benchmark-dir/benchmark.vcxproj            |  170 +
 cc/benchmark-dir/benchmark.vcxproj.filters    |   30 +
 cc/benchmark-dir/file.h                       |   61 +
 cc/benchmark-dir/process_ycsb.cc              |   38 +
 cc/playground/CMakeLists.txt                  |    1 +
 cc/playground/sum_store-dir/CMakeLists.txt    |    9 +
 .../sum_store-dir/concurrent_recovery_test.h  |  276 ++
 .../single_threaded_recovery_test.h           |  140 +
 cc/playground/sum_store-dir/sum_store.cc      |   77 +
 cc/playground/sum_store-dir/sum_store.h       |  160 +
 cc/src/CMakeLists.txt                         |   63 +
 cc/src/core/address.cc                        |   12 +
 cc/src/core/address.h                         |  177 +
 cc/src/core/alloc.h                           |   35 +
 cc/src/core/async.h                           |  132 +
 cc/src/core/async_result_types.h              |   60 +
 cc/src/core/auto_ptr.h                        |  123 +
 cc/src/core/checkpoint_locks.h                |  192 +
 cc/src/core/checkpoint_state.h                |  166 +
 cc/src/core/constants.h                       |   20 +
 cc/src/core/faster.h                          | 2558 +++++++++++
 cc/src/core/gc_state.h                        |   40 +
 cc/src/core/grow_state.h                      |   44 +
 cc/src/core/guid.h                            |  142 +
 cc/src/core/hash_bucket.h                     |  201 +
 cc/src/core/hash_table.h                      |  294 ++
 cc/src/core/internal_contexts.h               |  379 ++
 cc/src/core/key_hash.h                        |   54 +
 cc/src/core/light_epoch.h                     |  328 ++
 cc/src/core/lss_allocator.cc                  |  169 +
 cc/src/core/lss_allocator.h                   |  237 ++
 cc/src/core/malloc_fixed_page_size.h          |  582 +++
 cc/src/core/native_buffer_pool.h              |  188 +
 cc/src/core/persistent_memory_malloc.h        | 1021 +++++
 cc/src/core/phase.h                           |   51 +
 cc/src/core/record.h                          |  151 +
 cc/src/core/recovery_status.h                 |   59 +
 cc/src/core/state_transitions.h               |  162 +
 cc/src/core/status.h                          |   30 +
 cc/src/core/thread.cc                         |   26 +
 cc/src/core/thread.h                          |  103 +
 cc/src/core/utility.h                         |   56 +
 cc/src/device/file_system_disk.h              |  527 +++
 cc/src/device/null_disk.h                     |  124 +
 cc/src/environment/file.h                     |   10 +
 cc/src/environment/file_common.h              |   60 +
 cc/src/environment/file_linux.cc              |  199 +
 cc/src/environment/file_linux.h               |  254 ++
 cc/src/environment/file_windows.cc            |  372 ++
 cc/src/environment/file_windows.h             |  415 ++
 cc/test/CMakeLists.txt                        |   11 +
 cc/test/in_memory_test.cc                     | 1912 +++++++++
 cc/test/malloc_fixed_page_size_test.cc        |   81 +
 cc/test/paging_queue_test.cc                  |   27 +
 cc/test/paging_test.h                         | 1017 +++++
 cc/test/paging_threadpool_test.cc             |   27 +
 cc/test/recovery_queue_test.cc                |   31 +
 cc/test/recovery_test.h                       | 3753 +++++++++++++++++
 cc/test/recovery_threadpool_test.cc           |   31 +
 cc/test/utility_test.cc                       |   25 +
 cs/src/FASTER.sln                             |  149 +
 cs/src/benchmark/App.config                   |   17 +
 cs/src/benchmark/FASTER.benchmark.csproj      |   43 +
 cs/src/benchmark/FasterYcsbBenchmark.cs       |  585 +++
 cs/src/benchmark/Program.cs                   |   58 +
 cs/src/benchmark/Properties/AssemblyInfo.cs   |   39 +
 cs/src/benchmark/RandomGenerator.cs           |   85 +
 cs/src/core/Allocator/IAllocator.cs           |   14 +
 cs/src/core/Allocator/MallocFixedPageSize.cs  |  615 +++
 .../core/Allocator/PersistentMemoryMalloc.cs  |  899 ++++
 cs/src/core/Codegen/CompilerBase.cs           |  228 +
 .../core/Codegen/FasterHashTableCompiler.cs   |  132 +
 cs/src/core/Codegen/HashTableManager.cs       |   49 +
 ...MixedBlitManagedFasterHashTableCompiler.cs |  257 ++
 cs/src/core/Codegen/RoslynHelpers.cs          |  224 +
 cs/src/core/Codegen/TypeReplacer.cs           |  108 +
 cs/src/core/Codegen/TypeReplacerCompiler.cs   |   38 +
 cs/src/core/Codegen/Utilities.cs              |  240 ++
 cs/src/core/Device/IDevice.cs                 |   29 +
 cs/src/core/Device/ISegmentedDevice.cs        |   24 +
 cs/src/core/Device/LocalStorageDevice.cs      |  268 ++
 cs/src/core/Device/MemoryDevice.cs            |  195 +
 cs/src/core/Device/NullDevice.cs              |   48 +
 .../Device/SegmentedLocalStorageDevice.cs     |  317 ++
 cs/src/core/Device/SegmentedNullDevice.cs     |   59 +
 cs/src/core/Device/WrappedDevice.cs           |   69 +
 cs/src/core/Epochs/LightEpoch.cs              |  531 +++
 cs/src/core/FASTER.core.csproj                |   67 +
 cs/src/core/FASTER.core.nuspec                |   33 +
 cs/src/core/Index/Common/AddressInfo.cs       |   99 +
 cs/src/core/Index/Common/Contexts.cs          |  424 ++
 cs/src/core/Index/Common/Layout.cs            |  144 +
 cs/src/core/Index/Common/RecordInfo.cs        |  241 ++
 cs/src/core/Index/FASTER/AsyncIO.cs           |  561 +++
 cs/src/core/Index/FASTER/Checkpoint.cs        |  723 ++++
 cs/src/core/Index/FASTER/FASTER.cs            |  288 ++
 cs/src/core/Index/FASTER/FASTERBase.cs        |  709 ++++
 cs/src/core/Index/FASTER/FASTERImpl.cs        | 1736 ++++++++
 cs/src/core/Index/FASTER/FASTERThread.cs      |  315 ++
 cs/src/core/Index/FASTER/IFASTER.cs           |   38 +
 cs/src/core/Index/FASTER/IndexCheckpoint.cs   |  173 +
 cs/src/core/Index/FASTER/IndexRecovery.cs     |  148 +
 cs/src/core/Index/FASTER/Recovery.cs          |  544 +++
 cs/src/core/Index/UserCode/Context.cs         |   22 +
 cs/src/core/Index/UserCode/Functions.cs       |  154 +
 cs/src/core/Index/UserCode/Input.cs           |   17 +
 cs/src/core/Index/UserCode/Key.cs             |  170 +
 cs/src/core/Index/UserCode/Output.cs          |   23 +
 cs/src/core/Index/UserCode/Value.cs           |  314 ++
 .../core/ManagedLayer/BlittableTypeWrapper.cs |   32 +
 cs/src/core/ManagedLayer/FASTERFactory.cs     |   58 +
 cs/src/core/ManagedLayer/IFASTERKey.cs        |   16 +
 cs/src/core/ManagedLayer/IFASTERValue.cs      |   14 +
 cs/src/core/ManagedLayer/IFASTER_Mixed.cs     |   33 +
 cs/src/core/ManagedLayer/IManagedFAST.cs      |   30 +
 cs/src/core/ManagedLayer/IUserFunctions.cs    |   24 +
 .../core/ManagedLayer/MixedContextWrapper.cs  |   43 +
 .../ManagedLayer/MixedFunctionsWrapper.cs     |  132 +
 cs/src/core/ManagedLayer/MixedInputWrapper.cs |   55 +
 cs/src/core/ManagedLayer/MixedKeyWrapper.cs   |  133 +
 cs/src/core/ManagedLayer/MixedManagedFAST.cs  |  481 +++
 .../core/ManagedLayer/MixedOutputWrapper.cs   |   38 +
 .../core/ManagedLayer/MixedUnwrappedTypes.cs  |   96 +
 .../core/ManagedLayer/MixedUserFunctions.cs   |   44 +
 cs/src/core/ManagedLayer/MixedValueWrapper.cs |  345 ++
 cs/src/core/Properties/AssemblyInfo.cs        |   24 +
 cs/src/core/Properties/Resources.Designer.cs  |  713 ++++
 cs/src/core/Properties/Resources.resx         |  190 +
 cs/src/core/Utilities/AsyncResultTypes.cs     |  152 +
 cs/src/core/Utilities/Native32.cs             |  364 ++
 cs/src/core/Utilities/NativeBufferPool.cs     |  144 +
 .../Utilities/SafeConcurrentDictionary.cs     |  233 +
 cs/src/core/Utilities/StateTransitions.cs     |   73 +
 cs/src/core/Utilities/Status.cs               |   29 +
 cs/src/core/Utilities/Utility.cs              |  243 ++
 cs/src/native/adv-file-ops/adv-file-ops.cpp   |  134 +
 .../native/adv-file-ops/adv-file-ops.vcxproj  |   80 +
 cs/src/native/readtsc/readtsc.cpp             |   10 +
 cs/src/native/readtsc/readtsc.vcxproj         |   89 +
 cs/src/native/readtsc/readtsc.vcxproj.filters |   22 +
 .../playground/ClassCache/ClassCache.csproj   |   15 +
 cs/src/playground/ClassCache/Program.cs       |   81 +
 cs/src/playground/ClassCache/Types.cs         |  122 +
 cs/src/playground/ManagedSample1/App.config   |    6 +
 cs/src/playground/ManagedSample1/Functions.cs |   89 +
 .../ManagedSample1/ICustomFaster.cs           |   29 +
 .../playground/ManagedSample1/InputStruct.cs  |   18 +
 cs/src/playground/ManagedSample1/KeyStruct.cs |   66 +
 .../ManagedSample1/ManagedSample1.csproj      |   39 +
 .../playground/ManagedSample1/OutputStruct.cs |   16 +
 cs/src/playground/ManagedSample1/Program.cs   |   56 +
 .../ManagedSample1/Properties/AssemblyInfo.cs |   22 +
 .../playground/ManagedSample1/ValueStruct.cs  |   72 +
 cs/src/playground/ManagedSample2/App.config   |    6 +
 .../ManagedSample2/CustomFunctions.cs         |   48 +
 .../playground/ManagedSample2/CustomTypes.cs  |   29 +
 .../ManagedSample2/ManagedSample2.csproj      |   39 +
 cs/src/playground/ManagedSample2/Program.cs   |   53 +
 .../ManagedSample2/Properties/AssemblyInfo.cs |   23 +
 cs/src/playground/ManagedSample3/App.config   |    6 +
 .../ManagedSample3/ManagedSample3.csproj      |   44 +
 cs/src/playground/ManagedSample3/Program.cs   |  144 +
 .../ManagedSample3/Properties/AssemblyInfo.cs |   22 +
 cs/src/playground/ManagedSample4/App.config   |    6 +
 .../ManagedSample4/ManagedSample4.csproj      |   38 +
 cs/src/playground/ManagedSample4/Program.cs   |  158 +
 cs/src/playground/NestedTypesTest/App.config  |    6 +
 .../playground/NestedTypesTest/Functions.cs   |  172 +
 .../NestedTypesTest/NestedTypesTest.csproj    |   45 +
 cs/src/playground/NestedTypesTest/Program.cs  |  129 +
 cs/src/playground/NestedTypesTest/Types.cs    |  353 ++
 cs/src/playground/SumStore/AdId.cs            |   62 +
 cs/src/playground/SumStore/App.config         |    9 +
 .../SumStore/ConcurrentRecoveryTest.cs        |  362 ++
 cs/src/playground/SumStore/ConcurrentTest.cs  |  244 ++
 cs/src/playground/SumStore/Functions.cs       |   87 +
 cs/src/playground/SumStore/ICustomFaster.cs   |   36 +
 cs/src/playground/SumStore/Input.cs           |   19 +
 cs/src/playground/SumStore/NumClicks.cs       |   69 +
 cs/src/playground/SumStore/Output.cs          |   16 +
 cs/src/playground/SumStore/Program.cs         |   67 +
 .../SumStore/Properties/AssemblyInfo.cs       |   39 +
 .../SumStore/SingleThreadedRecoveryTest.cs    |  171 +
 cs/src/playground/SumStore/SumStore.csproj    |   70 +
 cs/src/test/BasicFASTERTests.cs               |  137 +
 cs/src/test/ComponentRecoveryTests.cs         |  150 +
 cs/src/test/FASTER.test.csproj                |  127 +
 cs/src/test/FullRecoveryTests.cs              |  205 +
 cs/src/test/ObjectFASTERTests.cs              |   69 +
 cs/src/test/ObjectTestTypes.cs                |  119 +
 cs/src/test/Properties/AssemblyInfo.cs        |   39 +
 cs/src/test/RecoveryTestTypes.cs              |  253 ++
 cs/src/test/TestTypes.cs                      |  253 ++
 cs/src/test/app.config                        |   21 +
 203 files changed, 39739 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 cc/CMakeLists.txt
 create mode 100644 cc/CMakeLists.txt.in
 create mode 100644 cc/README.md
 create mode 100644 cc/benchmark-dir/CMakeLists.txt
 create mode 100644 cc/benchmark-dir/README.md
 create mode 100644 cc/benchmark-dir/benchmark.cc
 create mode 100644 cc/benchmark-dir/benchmark.vcxproj
 create mode 100644 cc/benchmark-dir/benchmark.vcxproj.filters
 create mode 100644 cc/benchmark-dir/file.h
 create mode 100644 cc/benchmark-dir/process_ycsb.cc
 create mode 100644 cc/playground/CMakeLists.txt
 create mode 100644 cc/playground/sum_store-dir/CMakeLists.txt
 create mode 100644 cc/playground/sum_store-dir/concurrent_recovery_test.h
 create mode 100644 cc/playground/sum_store-dir/single_threaded_recovery_test.h
 create mode 100644 cc/playground/sum_store-dir/sum_store.cc
 create mode 100644 cc/playground/sum_store-dir/sum_store.h
 create mode 100644 cc/src/CMakeLists.txt
 create mode 100644 cc/src/core/address.cc
 create mode 100644 cc/src/core/address.h
 create mode 100644 cc/src/core/alloc.h
 create mode 100644 cc/src/core/async.h
 create mode 100644 cc/src/core/async_result_types.h
 create mode 100644 cc/src/core/auto_ptr.h
 create mode 100644 cc/src/core/checkpoint_locks.h
 create mode 100644 cc/src/core/checkpoint_state.h
 create mode 100644 cc/src/core/constants.h
 create mode 100644 cc/src/core/faster.h
 create mode 100644 cc/src/core/gc_state.h
 create mode 100644 cc/src/core/grow_state.h
 create mode 100644 cc/src/core/guid.h
 create mode 100644 cc/src/core/hash_bucket.h
 create mode 100644 cc/src/core/hash_table.h
 create mode 100644 cc/src/core/internal_contexts.h
 create mode 100644 cc/src/core/key_hash.h
 create mode 100644 cc/src/core/light_epoch.h
 create mode 100644 cc/src/core/lss_allocator.cc
 create mode 100644 cc/src/core/lss_allocator.h
 create mode 100644 cc/src/core/malloc_fixed_page_size.h
 create mode 100644 cc/src/core/native_buffer_pool.h
 create mode 100644 cc/src/core/persistent_memory_malloc.h
 create mode 100644 cc/src/core/phase.h
 create mode 100644 cc/src/core/record.h
 create mode 100644 cc/src/core/recovery_status.h
 create mode 100644 cc/src/core/state_transitions.h
 create mode 100644 cc/src/core/status.h
 create mode 100644 cc/src/core/thread.cc
 create mode 100644 cc/src/core/thread.h
 create mode 100644 cc/src/core/utility.h
 create mode 100644 cc/src/device/file_system_disk.h
 create mode 100644 cc/src/device/null_disk.h
 create mode 100644 cc/src/environment/file.h
 create mode 100644 cc/src/environment/file_common.h
 create mode 100644 cc/src/environment/file_linux.cc
 create mode 100644 cc/src/environment/file_linux.h
 create mode 100644 cc/src/environment/file_windows.cc
 create mode 100644 cc/src/environment/file_windows.h
 create mode 100644 cc/test/CMakeLists.txt
 create mode 100644 cc/test/in_memory_test.cc
 create mode 100644 cc/test/malloc_fixed_page_size_test.cc
 create mode 100644 cc/test/paging_queue_test.cc
 create mode 100644 cc/test/paging_test.h
 create mode 100644 cc/test/paging_threadpool_test.cc
 create mode 100644 cc/test/recovery_queue_test.cc
 create mode 100644 cc/test/recovery_test.h
 create mode 100644 cc/test/recovery_threadpool_test.cc
 create mode 100644 cc/test/utility_test.cc
 create mode 100644 cs/src/FASTER.sln
 create mode 100644 cs/src/benchmark/App.config
 create mode 100644 cs/src/benchmark/FASTER.benchmark.csproj
 create mode 100644 cs/src/benchmark/FasterYcsbBenchmark.cs
 create mode 100644 cs/src/benchmark/Program.cs
 create mode 100644 cs/src/benchmark/Properties/AssemblyInfo.cs
 create mode 100644 cs/src/benchmark/RandomGenerator.cs
 create mode 100644 cs/src/core/Allocator/IAllocator.cs
 create mode 100644 cs/src/core/Allocator/MallocFixedPageSize.cs
 create mode 100644 cs/src/core/Allocator/PersistentMemoryMalloc.cs
 create mode 100644 cs/src/core/Codegen/CompilerBase.cs
 create mode 100644 cs/src/core/Codegen/FasterHashTableCompiler.cs
 create mode 100644 cs/src/core/Codegen/HashTableManager.cs
 create mode 100644 cs/src/core/Codegen/MixedBlitManagedFasterHashTableCompiler.cs
 create mode 100644 cs/src/core/Codegen/RoslynHelpers.cs
 create mode 100644 cs/src/core/Codegen/TypeReplacer.cs
 create mode 100644 cs/src/core/Codegen/TypeReplacerCompiler.cs
 create mode 100644 cs/src/core/Codegen/Utilities.cs
 create mode 100644 cs/src/core/Device/IDevice.cs
 create mode 100644 cs/src/core/Device/ISegmentedDevice.cs
 create mode 100644 cs/src/core/Device/LocalStorageDevice.cs
 create mode 100644 cs/src/core/Device/MemoryDevice.cs
 create mode 100644 cs/src/core/Device/NullDevice.cs
 create mode 100644 cs/src/core/Device/SegmentedLocalStorageDevice.cs
 create mode 100644 cs/src/core/Device/SegmentedNullDevice.cs
 create mode 100644 cs/src/core/Device/WrappedDevice.cs
 create mode 100644 cs/src/core/Epochs/LightEpoch.cs
 create mode 100644 cs/src/core/FASTER.core.csproj
 create mode 100644 cs/src/core/FASTER.core.nuspec
 create mode 100644 cs/src/core/Index/Common/AddressInfo.cs
 create mode 100644 cs/src/core/Index/Common/Contexts.cs
 create mode 100644 cs/src/core/Index/Common/Layout.cs
 create mode 100644 cs/src/core/Index/Common/RecordInfo.cs
 create mode 100644 cs/src/core/Index/FASTER/AsyncIO.cs
 create mode 100644 cs/src/core/Index/FASTER/Checkpoint.cs
 create mode 100644 cs/src/core/Index/FASTER/FASTER.cs
 create mode 100644 cs/src/core/Index/FASTER/FASTERBase.cs
 create mode 100644 cs/src/core/Index/FASTER/FASTERImpl.cs
 create mode 100644 cs/src/core/Index/FASTER/FASTERThread.cs
 create mode 100644 cs/src/core/Index/FASTER/IFASTER.cs
 create mode 100644 cs/src/core/Index/FASTER/IndexCheckpoint.cs
 create mode 100644 cs/src/core/Index/FASTER/IndexRecovery.cs
 create mode 100644 cs/src/core/Index/FASTER/Recovery.cs
 create mode 100644 cs/src/core/Index/UserCode/Context.cs
 create mode 100644 cs/src/core/Index/UserCode/Functions.cs
 create mode 100644 cs/src/core/Index/UserCode/Input.cs
 create mode 100644 cs/src/core/Index/UserCode/Key.cs
 create mode 100644 cs/src/core/Index/UserCode/Output.cs
 create mode 100644 cs/src/core/Index/UserCode/Value.cs
 create mode 100644 cs/src/core/ManagedLayer/BlittableTypeWrapper.cs
 create mode 100644 cs/src/core/ManagedLayer/FASTERFactory.cs
 create mode 100644 cs/src/core/ManagedLayer/IFASTERKey.cs
 create mode 100644 cs/src/core/ManagedLayer/IFASTERValue.cs
 create mode 100644 cs/src/core/ManagedLayer/IFASTER_Mixed.cs
 create mode 100644 cs/src/core/ManagedLayer/IManagedFAST.cs
 create mode 100644 cs/src/core/ManagedLayer/IUserFunctions.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedContextWrapper.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedFunctionsWrapper.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedInputWrapper.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedKeyWrapper.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedManagedFAST.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedOutputWrapper.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedUnwrappedTypes.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedUserFunctions.cs
 create mode 100644 cs/src/core/ManagedLayer/MixedValueWrapper.cs
 create mode 100644 cs/src/core/Properties/AssemblyInfo.cs
 create mode 100644 cs/src/core/Properties/Resources.Designer.cs
 create mode 100644 cs/src/core/Properties/Resources.resx
 create mode 100644 cs/src/core/Utilities/AsyncResultTypes.cs
 create mode 100644 cs/src/core/Utilities/Native32.cs
 create mode 100644 cs/src/core/Utilities/NativeBufferPool.cs
 create mode 100644 cs/src/core/Utilities/SafeConcurrentDictionary.cs
 create mode 100644 cs/src/core/Utilities/StateTransitions.cs
 create mode 100644 cs/src/core/Utilities/Status.cs
 create mode 100644 cs/src/core/Utilities/Utility.cs
 create mode 100644 cs/src/native/adv-file-ops/adv-file-ops.cpp
 create mode 100644 cs/src/native/adv-file-ops/adv-file-ops.vcxproj
 create mode 100644 cs/src/native/readtsc/readtsc.cpp
 create mode 100644 cs/src/native/readtsc/readtsc.vcxproj
 create mode 100644 cs/src/native/readtsc/readtsc.vcxproj.filters
 create mode 100644 cs/src/playground/ClassCache/ClassCache.csproj
 create mode 100644 cs/src/playground/ClassCache/Program.cs
 create mode 100644 cs/src/playground/ClassCache/Types.cs
 create mode 100644 cs/src/playground/ManagedSample1/App.config
 create mode 100644 cs/src/playground/ManagedSample1/Functions.cs
 create mode 100644 cs/src/playground/ManagedSample1/ICustomFaster.cs
 create mode 100644 cs/src/playground/ManagedSample1/InputStruct.cs
 create mode 100644 cs/src/playground/ManagedSample1/KeyStruct.cs
 create mode 100644 cs/src/playground/ManagedSample1/ManagedSample1.csproj
 create mode 100644 cs/src/playground/ManagedSample1/OutputStruct.cs
 create mode 100644 cs/src/playground/ManagedSample1/Program.cs
 create mode 100644 cs/src/playground/ManagedSample1/Properties/AssemblyInfo.cs
 create mode 100644 cs/src/playground/ManagedSample1/ValueStruct.cs
 create mode 100644 cs/src/playground/ManagedSample2/App.config
 create mode 100644 cs/src/playground/ManagedSample2/CustomFunctions.cs
 create mode 100644 cs/src/playground/ManagedSample2/CustomTypes.cs
 create mode 100644 cs/src/playground/ManagedSample2/ManagedSample2.csproj
 create mode 100644 cs/src/playground/ManagedSample2/Program.cs
 create mode 100644 cs/src/playground/ManagedSample2/Properties/AssemblyInfo.cs
 create mode 100644 cs/src/playground/ManagedSample3/App.config
 create mode 100644 cs/src/playground/ManagedSample3/ManagedSample3.csproj
 create mode 100644 cs/src/playground/ManagedSample3/Program.cs
 create mode 100644 cs/src/playground/ManagedSample3/Properties/AssemblyInfo.cs
 create mode 100644 cs/src/playground/ManagedSample4/App.config
 create mode 100644 cs/src/playground/ManagedSample4/ManagedSample4.csproj
 create mode 100644 cs/src/playground/ManagedSample4/Program.cs
 create mode 100644 cs/src/playground/NestedTypesTest/App.config
 create mode 100644 cs/src/playground/NestedTypesTest/Functions.cs
 create mode 100644 cs/src/playground/NestedTypesTest/NestedTypesTest.csproj
 create mode 100644 cs/src/playground/NestedTypesTest/Program.cs
 create mode 100644 cs/src/playground/NestedTypesTest/Types.cs
 create mode 100644 cs/src/playground/SumStore/AdId.cs
 create mode 100644 cs/src/playground/SumStore/App.config
 create mode 100644 cs/src/playground/SumStore/ConcurrentRecoveryTest.cs
 create mode 100644 cs/src/playground/SumStore/ConcurrentTest.cs
 create mode 100644 cs/src/playground/SumStore/Functions.cs
 create mode 100644 cs/src/playground/SumStore/ICustomFaster.cs
 create mode 100644 cs/src/playground/SumStore/Input.cs
 create mode 100644 cs/src/playground/SumStore/NumClicks.cs
 create mode 100644 cs/src/playground/SumStore/Output.cs
 create mode 100644 cs/src/playground/SumStore/Program.cs
 create mode 100644 cs/src/playground/SumStore/Properties/AssemblyInfo.cs
 create mode 100644 cs/src/playground/SumStore/SingleThreadedRecoveryTest.cs
 create mode 100644 cs/src/playground/SumStore/SumStore.csproj
 create mode 100644 cs/src/test/BasicFASTERTests.cs
 create mode 100644 cs/src/test/ComponentRecoveryTests.cs
 create mode 100644 cs/src/test/FASTER.test.csproj
 create mode 100644 cs/src/test/FullRecoveryTests.cs
 create mode 100644 cs/src/test/ObjectFASTERTests.cs
 create mode 100644 cs/src/test/ObjectTestTypes.cs
 create mode 100644 cs/src/test/Properties/AssemblyInfo.cs
 create mode 100644 cs/src/test/RecoveryTestTypes.cs
 create mode 100644 cs/src/test/TestTypes.cs
 create mode 100644 cs/src/test/app.config

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..1ff0c4230
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,63 @@
+###############################################################################
+# Set default behavior to automatically normalize line endings.
+###############################################################################
+* text=auto
+
+###############################################################################
+# Set default behavior for command prompt diff.
+#
+# This is need for earlier builds of msysgit that does not have it on by
+# default for csharp files.
+# Note: This is only used by command line
+###############################################################################
+#*.cs     diff=csharp
+
+###############################################################################
+# Set the merge driver for project and solution files
+#
+# Merging from the command prompt will add diff markers to the files if there
+# are conflicts (Merging from VS is not affected by the settings below, in VS
+# the diff markers are never inserted). Diff markers may cause the following 
+# file extensions to fail to load in VS. An alternative would be to treat
+# these files as binary and thus will always conflict and require user
+# intervention with every merge. To do so, just uncomment the entries below
+###############################################################################
+#*.sln       merge=binary
+#*.csproj    merge=binary
+#*.vbproj    merge=binary
+#*.vcxproj   merge=binary
+#*.vcproj    merge=binary
+#*.dbproj    merge=binary
+#*.fsproj    merge=binary
+#*.lsproj    merge=binary
+#*.wixproj   merge=binary
+#*.modelproj merge=binary
+#*.sqlproj   merge=binary
+#*.wwaproj   merge=binary
+
+###############################################################################
+# behavior for image files
+#
+# image files are treated as binary by default.
+###############################################################################
+#*.jpg   binary
+#*.png   binary
+#*.gif   binary
+
+###############################################################################
+# diff behavior for common document formats
+# 
+# Convert binary document formats to text before diffing them. This feature
+# is only available from the command line. Turn it on by uncommenting the 
+# entries below.
+###############################################################################
+#*.doc   diff=astextplain
+#*.DOC   diff=astextplain
+#*.docx  diff=astextplain
+#*.DOCX  diff=astextplain
+#*.dot   diff=astextplain
+#*.DOT   diff=astextplain
+#*.pdf   diff=astextplain
+#*.PDF   diff=astextplain
+#*.rtf   diff=astextplain
+#*.RTF   diff=astextplain
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..51b2e9ed1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,194 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.sln.docstates
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+x64/
+build/
+bld/
+[Bb]in/
+[Oo]bj/
+
+# Roslyn cache directories
+*.ide/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+#NUNIT
+*.VisualState.xml
+TestResult.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+*_i.c
+*_p.c
+*_i.h
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opensdf
+*.sdf
+*.cachefile
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# JustCode is a .NET coding addin-in
+.JustCode
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+## TODO: Comment the next line if you want to checkin your
+## web deploy settings but do note that will include unencrypted
+## passwords
+#*.pubxml
+
+# NuGet Packages Directory
+packages/*
+## TODO: If the tool you use requires repositories.config
+## uncomment the next line
+#!packages/repositories.config
+
+# Enable "build/" folder in the NuGet Packages folder since
+# NuGet packages use it for MSBuild targets.
+# This line needs to be after the ignore of the build folder
+# (and the packages folder if the line above has been uncommented)
+!packages/build/
+
+# Windows Azure Build Output
+csx/
+*.build.csdef
+
+# Windows Store app package directory
+AppPackages/
+
+# Others
+sql/
+*.Cache
+ClientBin/
+[Ss]tyle[Cc]op.*
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.pfx
+*.publishsettings
+node_modules/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+
+# SQL Server files
+*.mdf
+*.ldf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# LightSwitch generated files
+GeneratedArtifacts/
+_Pvt_Extensions/
+ModelManifest.xml
+packages/
+*.VC.db*
+*.VC.opendb
+/.vs/
+/cs/src/.vs/
+*.lib
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..46e0bf211
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+# Introduction
+
+Managing large application state easily and with high performance is one of the hardest problems
+in the cloud today. We present FASTER, a new concurrent key-value store designed for point lookups 
+and heavy updates. FASTER supports data larger than memory, by leveraging fast external storage. 
+What differentiates FASTER are its cache-optimized index that achieves very high performance — up
+to 160 million operations per second when data fits in memory; its unique “hybrid record log” design
+that combines a traditional persistent log with in-place updates, to shape the memory working set 
+and retain performance; and its architecture as an component that can be embedded in cloud apps. FASTER
+achieves higher throughput than current systems, by more than two orders of magnitude, and scales better 
+than current pure in-memory data structures, for in-memory working sets. FASTER also offers a new consistent
+recovery scheme that achieves better performance at the expense of slightly higher commit latency.
+
+# Getting Started
+
+Go to [our website](http://aka.ms/FASTER) for more details and papers.
+
+# Build and Test in C#
+
+Clone the repo, open /cs/src/FASTER.sln, build using VS 2017.
+
+# Contributing
+
+This project welcomes contributions and suggestions.  Most contributions require you to agree to a
+Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
+the rights to use your contribution. For details, visit https://cla.microsoft.com.
+
+When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
+a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
+provided by the bot. You will only need to do this once across all repos using our CLA.
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
+contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
\ No newline at end of file
diff --git a/cc/CMakeLists.txt b/cc/CMakeLists.txt
new file mode 100644
index 000000000..8a8b8d7e6
--- /dev/null
+++ b/cc/CMakeLists.txt
@@ -0,0 +1,102 @@
+cmake_minimum_required (VERSION 3.2.2)
+
+enable_testing()
+
+include(ExternalProject)
+project(FASTER)
+
+if (MSVC)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /Gm- /W3 /WX /EHsc /GS /fp:precise /permissive- /Zc:wchar_t /Zc:forScope /Zc:inline /Gd /TP")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /wd4996")
+
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Od /RTC1 /MDd")
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Oi /Gy- /MD")
+
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /OPT:REF /OPT:NOICF /INCREMENTAL:NO")
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEBUG /OPT:REF /OPT:NOICF /INCREMENTAL:NO")
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
+
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g -D_DEBUG")
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -g")
+endif()
+
+#Always set _DEBUG compiler directive when compiling bits regardless of target OS
+set_directory_properties(PROPERTIES COMPILE_DEFINITIONS_DEBUG "_DEBUG")
+
+##### BEGIN GOOGLE TEST INSTALLATION #####
+# Copied from https://github.com/google/googletest/tree/master/googletest#incorporating-into-an-existing-cmake-project
+# Download and unpack googletest at configure time
+configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt)
+execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
+  RESULT_VARIABLE result
+  WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
+if(result)
+  message(FATAL_ERROR "CMake step for googletest failed: ${result}")
+endif()
+execute_process(COMMAND ${CMAKE_COMMAND} --build .
+  RESULT_VARIABLE result
+  WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
+if(result)
+  message(FATAL_ERROR "Build step for googletest failed: ${result}")
+endif()
+
+# Prevent overriding the parent project's compiler/linker
+# settings on Windows
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+
+# Add googletest directly to our build. This defines
+# the gtest and gtest_main targets.
+add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src
+                 ${CMAKE_BINARY_DIR}/googletest-build
+                 EXCLUDE_FROM_ALL)
+
+##### END GOOGLE TEST INSTALLATION #####
+
+include_directories(${CMAKE_SOURCE_DIR}/src)
+
+# Set the directory targets when build in libs and binaries
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+
+set (FAST_LINK_LIBS
+  faster
+)
+ 
+# Set the link libraries to for test compilation
+set (FAST_TEST_LINK_LIBS ${FAST_LINK_LIBS} gtest)
+if(WIN32)
+else()
+  set (FAST_TEST_LINK_LIBS ${FAST_TEST_LINK_LIBS} stdc++fs uuid tbb gcc aio m stdc++ pthread)
+endif()
+
+# Set the link libraries to for benchmark binary compilation
+set (FAST_BENCHMARK_LINK_LIBS ${FAST_LINK_LIBS})
+if(WIN32)
+set (FAST_BENCHMARK_LINK_LIBS ${FAST_LINK_LIBS} wsock32 Ws2_32)
+else()
+  set (FAST_BENCHMARK_LINK_LIBS ${FAST_BENCHMARK_LINK_LIBS} stdc++fs uuid tbb gcc aio m stdc++ pthread)
+endif()
+
+#Function to automate building test binaries
+FUNCTION(ADD_FAST_TEST TEST_NAME HEADERS)
+  add_executable(${TEST_NAME} ${HEADERS} ${TEST_NAME}.cc)
+
+  target_link_libraries(${TEST_NAME} ${FAST_TEST_LINK_LIBS})
+  add_test(${TEST_NAME} ${CMAKE_BINARY_DIR}/${TEST_NAME})
+ENDFUNCTION()
+
+#Function to automate building benchmark binaries
+FUNCTION(ADD_FAST_BENCHMARK BENCHMARK_NAME)
+  add_executable(${BENCHMARK_NAME} ${BENCHMARK_HEADERS} ${BENCHMARK_NAME}.cc)
+
+  target_link_libraries(${BENCHMARK_NAME} ${FAST_BENCHMARK_LINK_LIBS})
+ENDFUNCTION()
+
+# Build each subdirectory
+add_subdirectory(benchmark-dir)
+add_subdirectory(playground)
+add_subdirectory(src)
+add_subdirectory(test)
+
diff --git a/cc/CMakeLists.txt.in b/cc/CMakeLists.txt.in
new file mode 100644
index 000000000..30a6ab711
--- /dev/null
+++ b/cc/CMakeLists.txt.in
@@ -0,0 +1,17 @@
+# Copied from https://github.com/google/googletest/tree/master/googletest#incorporating-into-an-existing-cmake-project
+
+cmake_minimum_required(VERSION 2.8.2)
+
+project(googletest-download NONE)
+
+include(ExternalProject)
+ExternalProject_Add(googletest
+  GIT_REPOSITORY    https://github.com/google/googletest.git
+  GIT_TAG           master
+  SOURCE_DIR        "${CMAKE_BINARY_DIR}/googletest-src"
+  BINARY_DIR        "${CMAKE_BINARY_DIR}/googletest-build"
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND     ""
+  INSTALL_COMMAND   ""
+  TEST_COMMAND      ""
+)
diff --git a/cc/README.md b/cc/README.md
new file mode 100644
index 000000000..7e5a57579
--- /dev/null
+++ b/cc/README.md
@@ -0,0 +1,68 @@
+Building C++ FASTER
+===================
+The C++ version of FASTER uses CMake for builds. To build C++ FASTER, create
+one or more build directories and use CMake to set up build scripts for your
+target OS. Once CMake has generated the build scripts, it will try to update
+them, as needed, during ordinary build.
+
+Building on Windows
+-------------------
+Create new directory "build" off the root directory (FAST\cc). From the new
+"build" directory, execute:
+
+  cmake .. -G "<MSVC compiler> Win64"
+
+To see a list of supported MSVC compiler versions, just run "cmake -G". As of
+this writing, we're using Visual Studio 2017, so you would execute:
+
+  cmake .. -G "Visual Studio 15 2017 Win64"
+
+That will create build scripts inside your new "build" directory, including
+a "FASTER.sln" file that you can use inside Visual Studio. CMake will add several
+build profiles to FASTER.sln, including Debug/x64 and Release/x64.
+
+Building on Linux
+-----------------
+The Linux build requires several packages (both libraries and header files);
+see "CMakeFiles.txt" in the root directory (FAST/cc) for the list of libraries
+being linked to, on Linux.
+
+As of this writing, the required libraries are:
+ - stdc++fs : for <experimental/filesytem>, used for cross-platform directory
+              creation.
+ - uuid : support for GUIDs.
+ - tbb : Intel's Thread Building Blocks library, used for concurrent_queue.
+ - gcc 
+ - aio : Kernel Async I/O, used by QueueFile / QueueIoHandler.
+ - stdc++
+ - pthread : thread library.
+
+Also, CMake on Linux, for the gcc compiler, generates build scripts for either
+Debug or Release build, but not both; so you'll have to run CMake twice, in two
+different directories, to get both Debug and Release build scripts.
+
+Create new directories "build/Debug" and "build/Release" off the root directory
+(FAST/cc). From "build/Debug", run:
+
+  cmake -DCMAKE_BUILD_TYPE=Debug ../..
+
+--and from "build/Release", run:
+
+  cmake -DCMAKE_BUILD_TYPE=Release ../..
+
+Then you can build Debug or Release binaries by running "make" inside the
+relevant build directory.
+
+Other options
+-------------
+You can try other generators (compilers) supported by CMake. The main CMake
+build script is the CMakeLists.txt located in the root directory (FAST/cc).
+
+Examples
+========
+There are some unit tests in FAST/cc/test.
+
+Sum-store, located in FAST/cc/playground/sum_store-dir, is a good example of
+checkpointing and recovery.
+
+There's a basic YCSB test driver in FAST/cc/benchmark-dir.
diff --git a/cc/benchmark-dir/CMakeLists.txt b/cc/benchmark-dir/CMakeLists.txt
new file mode 100644
index 000000000..1629d18ba
--- /dev/null
+++ b/cc/benchmark-dir/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(BENCHMARK_HEADERS
+  file.h
+)
+
+set(BENCHMARK_SOURCES
+)
+
+ADD_FAST_BENCHMARK(benchmark)
+
+add_executable(process_ycsb process_ycsb.cc)
diff --git a/cc/benchmark-dir/README.md b/cc/benchmark-dir/README.md
new file mode 100644
index 000000000..3fb5f8e4d
--- /dev/null
+++ b/cc/benchmark-dir/README.md
@@ -0,0 +1,17 @@
+Setting up YCSB
+===============
+First, download and install YCSB, from 
+https://github.com/brianfrankcooper/YCSB/ . Configure YCSB for your intended
+workload, and run the "basic" driver (both "load" and "run," as required),
+redirecting the output to a file.
+
+The output of YCSB's "basic" driver is verbose. A typical line looks like:
+
+  INSERT usertable user5575651532496486335 [ field1='...' ... ]
+
+To speed up file ingestion, our basic YCSB benchmark assumes that the input
+file consists only of the 8-byte-integer portion of the key--e.g.:
+
+  5575651532496486335
+
+To convert YCSB "basic" output to the format we expect, run "process_ycsb."
diff --git a/cc/benchmark-dir/benchmark.cc b/cc/benchmark-dir/benchmark.cc
new file mode 100644
index 000000000..29090237c
--- /dev/null
+++ b/cc/benchmark-dir/benchmark.cc
@@ -0,0 +1,628 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <atomic>
+#include <cinttypes>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+#include <string>
+
+#include "file.h"
+
+#include "core/auto_ptr.h"
+#include "core/faster.h"
+#include "device/null_disk.h"
+
+using namespace std::chrono_literals;
+using namespace FASTER::core;
+
+/// Basic YCSB benchmark.
+
+enum class Op : uint8_t {
+  Insert = 0,
+  Read = 1,
+  Upsert = 2,
+  Scan = 3,
+  ReadModifyWrite = 4,
+};
+
+enum class Workload {
+  A_50_50 = 0,
+  RMW_100 = 1,
+};
+
+static constexpr uint64_t kInitCount = 250000000;
+static constexpr uint64_t kTxnCount = 1000000000;
+static constexpr uint64_t kChunkSize = 3200;
+static constexpr uint64_t kRefreshInterval = 64;
+static constexpr uint64_t kCompletePendingInterval = 1600;
+
+static_assert(kInitCount % kChunkSize == 0, "kInitCount % kChunkSize != 0");
+static_assert(kTxnCount % kChunkSize == 0, "kTxnCount % kChunkSize != 0");
+static_assert(kCompletePendingInterval % kRefreshInterval == 0,
+              "kCompletePendingInterval % kRefreshInterval != 0");
+
+static constexpr uint64_t kNanosPerSecond = 1000000000;
+
+static constexpr uint64_t kMaxKey = 268435456;
+static constexpr uint64_t kRunSeconds = 360;
+static constexpr uint64_t kCheckpointSeconds = 30;
+
+aligned_unique_ptr_t<uint64_t> init_keys_;
+aligned_unique_ptr_t<uint64_t> txn_keys_;
+std::atomic<uint64_t> idx_{ 0 };
+std::atomic<bool> done_{ false };
+std::atomic<uint64_t> total_duration_{ 0 };
+std::atomic<uint64_t> total_reads_done_{ 0 };
+std::atomic<uint64_t> total_writes_done_{ 0 };
+
+class ReadContext;
+class UpsertContext;
+class RmwContext;
+
+/// This benchmark stores 8-byte keys in key-value store.
+class Key {
+ public:
+  Key(uint64_t key)
+    : key_{ key } {
+  }
+
+  /// Methods and operators required by the (implicit) interface:
+  inline static constexpr uint32_t size() {
+    return static_cast<uint32_t>(sizeof(Key));
+  }
+  inline KeyHash GetHash() const {
+    return KeyHash{ Utility::GetHashCode(key_) };
+  }
+
+  /// Comparison operators.
+  inline bool operator==(const Key& other) const {
+    return key_ == other.key_;
+  }
+  inline bool operator!=(const Key& other) const {
+    return key_ != other.key_;
+  }
+
+ private:
+  uint64_t key_;
+};
+
+/// This benchmark stores an 8-byte value in the key-value store.
+class Value {
+ public:
+  Value()
+    : value_{ 0 } {
+  }
+
+  Value(const Value& other)
+    : value_{ other.value_ } {
+  }
+
+  Value(uint64_t value)
+    : value_{ value } {
+  }
+
+  inline static constexpr uint32_t size() {
+    return static_cast<uint32_t>(sizeof(Value));
+  }
+
+  friend class ReadContext;
+  friend class UpsertContext;
+  friend class RmwContext;
+
+ private:
+  union {
+    uint64_t value_;
+    std::atomic<uint64_t> atomic_value_;
+  };
+};
+
+/// Class passed to store_t::Read().
+class ReadContext : public IAsyncContext {
+ public:
+  typedef Key key_t;
+  typedef Value value_t;
+
+  ReadContext(uint64_t key)
+    : key_{ key } {
+  }
+
+  /// Copy (and deep-copy) constructor.
+  ReadContext(const ReadContext& other)
+    : key_{ other.key_ } {
+  }
+
+  /// The implicit and explicit interfaces require a key() accessor.
+  inline const Key& key() const {
+    return key_;
+  }
+
+  // For this benchmark, we don't copy out, so these are no-ops.
+  inline void Get(const value_t& value) { }
+  inline void GetAtomic(const value_t& value) { }
+
+ protected:
+  /// The explicit interface requires a DeepCopy_Internal() implementation.
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+    return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+  }
+
+ private:
+  Key key_;
+};
+
+/// Class passed to store_t::Upsert().
+class UpsertContext : public IAsyncContext {
+ public:
+  typedef Key key_t;
+  typedef Value value_t;
+
+  UpsertContext(uint64_t key, uint64_t input)
+    : key_{ key }
+    , input_{ input } {
+  }
+
+  /// Copy (and deep-copy) constructor.
+  UpsertContext(const UpsertContext& other)
+    : key_{ other.key_ }
+    , input_{ other.input_ } {
+  }
+
+  /// The implicit and explicit interfaces require a key() accessor.
+  inline const Key& key() const {
+    return key_;
+  }
+  inline static constexpr uint32_t value_size() {
+    return sizeof(value_t);
+  }
+
+  /// Non-atomic and atomic Put() methods.
+  inline void Put(value_t& value) {
+    value.value_ = input_;
+  }
+  inline bool PutAtomic(value_t& value) {
+    value.atomic_value_.store(input_);
+    return true;
+  }
+
+ protected:
+  /// The explicit interface requires a DeepCopy_Internal() implementation.
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+    return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+  }
+
+ private:
+  Key key_;
+  uint64_t input_;
+};
+
+/// Class passed to store_t::RMW().
+class RmwContext : public IAsyncContext {
+ public:
+  typedef Key key_t;
+  typedef Value value_t;
+
+  RmwContext(uint64_t key, uint64_t incr)
+    : key_{ key }
+    , incr_{ incr } {
+  }
+
+  /// Copy (and deep-copy) constructor.
+  RmwContext(const RmwContext& other)
+    : key_{ other.key_ }
+    , incr_{ other.incr_ } {
+  }
+
+  /// The implicit and explicit interfaces require a key() accessor.
+  const Key& key() const {
+    return key_;
+  }
+  inline static constexpr uint32_t value_size() {
+    return sizeof(value_t);
+  }
+
+  /// Initial, non-atomic, and atomic RMW methods.
+  inline void RmwInitial(value_t& value) {
+    value.value_ = incr_;
+  }
+  inline void RmwCopy(const value_t& old_value, value_t& value) {
+    value.value_ = old_value.value_ + incr_;
+  }
+  inline bool RmwAtomic(value_t& value) {
+    value.atomic_value_.fetch_add(incr_);
+    return true;
+  }
+
+ protected:
+  /// The explicit interface requires a DeepCopy_Internal() implementation.
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+    return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+  }
+
+ private:
+  Key key_;
+  uint64_t incr_;
+};
+
+/// Key-value store, specialized to our key and value types.
+#ifdef _WIN32
+typedef FASTER::environment::ThreadPoolIoHandler handler_t;
+#else
+typedef FASTER::environment::QueueIoHandler handler_t;
+#endif
+typedef FASTER::device::FileSystemDisk<handler_t, 1073741824ull> disk_t;
+using store_t = FasterKv<Key, Value, disk_t>;
+
+inline Op ycsb_a_50_50(std::mt19937& rng) {
+  if(rng() % 100 < 50) {
+    return Op::Read;
+  } else {
+    return Op::Upsert;
+  }
+}
+
+inline Op ycsb_rmw_100(std::mt19937& rng) {
+  return Op::ReadModifyWrite;
+}
+
+/// Affinitize to hardware threads on the same core first, before
+/// moving on to the next core.
+void SetThreadAffinity(size_t core) {
+
+  // For now, assume 36 cores. (Set this correctly for your test system.)
+  constexpr size_t kCoreCount = 36;
+#ifdef _WIN32
+  HANDLE thread_handle = ::GetCurrentThread();
+  GROUP_AFFINITY group;
+  group.Group = WORD(core / kCoreCount);
+  group.Mask = KAFFINITY(0x1llu << (core - kCoreCount * group.Group));
+  ::SetThreadGroupAffinity(thread_handle, &group, nullptr);
+#else
+  // On our 28-core test system, we see CPU 0, Core 0 assigned to 0, 28;
+  //                                    CPU 1, Core 0 assigned to 1, 29; etc.
+  cpu_set_t mask;
+  CPU_ZERO(&mask);
+#ifdef NUMA
+  switch(core % 4) {
+  case 0:
+    // 0 |-> 0
+    // 4 |-> 2
+    // 8 |-> 4
+    core = core / 2;
+    break;
+  case 1:
+    // 1 |-> 28
+    // 5 |-> 30
+    // 9 |-> 32
+    core = kCoreCount + (core - 1) / 2;
+    break;
+  case 2:
+    // 2  |-> 1
+    // 6  |-> 3
+    // 10 |-> 5
+    core = core / 2;
+    break;
+  case 3:
+    // 3  |-> 29
+    // 7  |-> 31
+    // 11 |-> 33
+    core = kCoreCount + (core - 1) / 2;
+    break;
+  }
+#else
+  switch(core % 2) {
+  case 0:
+    // 0 |-> 0
+    // 2 |-> 2
+    // 4 |-> 4
+    core = core;
+    break;
+  case 1:
+    // 1 |-> 28
+    // 3 |-> 30
+    // 5 |-> 32
+    core = (core - 1) + kCoreCount;
+    break;
+  }
+#endif
+  CPU_SET(core, &mask);
+
+  ::sched_setaffinity(0, sizeof(mask), &mask);
+#endif
+}
+
+void load_files(const std::string& load_filename, const std::string& run_filename) {
+  constexpr size_t kFileChunkSize = 131072;
+
+  auto chunk_guard = alloc_aligned<uint64_t>(512, kFileChunkSize);
+  uint64_t* chunk = chunk_guard.get();
+
+  FASTER::benchmark::File init_file{ load_filename };
+
+  printf("loading keys from %s into memory...\n", load_filename.c_str());
+
+  init_keys_ = alloc_aligned<uint64_t>(64, kInitCount * sizeof(uint64_t));
+  uint64_t count = 0;
+
+  uint64_t offset = 0;
+  while(true) {
+    uint64_t size = init_file.Read(chunk, kFileChunkSize, offset);
+    for(uint64_t idx = 0; idx < size / 8; ++idx) {
+      init_keys_.get()[count] = chunk[idx];
+      ++count;
+    }
+    if(size == kFileChunkSize) {
+      offset += kFileChunkSize;
+    } else {
+      break;
+    }
+  }
+  if(kInitCount != count) {
+    printf("Init file load fail!\n");
+    exit(1);
+  }
+
+  printf("loaded %" PRIu64 " keys.\n", count);
+
+  FASTER::benchmark::File txn_file{ run_filename };
+
+  printf("loading txns from %s into memory...\n", run_filename.c_str());
+
+  txn_keys_ = alloc_aligned<uint64_t>(64, kTxnCount * sizeof(uint64_t));
+
+  count = 0;
+  offset = 0;
+
+  while(true) {
+    uint64_t size = txn_file.Read(chunk, kFileChunkSize, offset);
+    for(uint64_t idx = 0; idx < size / 8; ++idx) {
+      txn_keys_.get()[count] = chunk[idx];
+      ++count;
+    }
+    if(size == kFileChunkSize) {
+      offset += kFileChunkSize;
+    } else {
+      break;
+    }
+  }
+  if(kTxnCount != count) {
+    printf("Txn file load fail!\n");
+    exit(1);
+  }
+  printf("loaded %" PRIu64 " txns.\n", count);
+}
+
+void thread_setup_store(store_t* store, size_t thread_idx) {
+  auto callback = [](IAsyncContext* ctxt, Status result) {
+    assert(result == Status::Ok);
+  };
+
+  SetThreadAffinity(thread_idx);
+
+  Guid guid = store->StartSession();
+
+  uint64_t value = 42;
+  for(uint64_t chunk_idx = idx_.fetch_add(kChunkSize); chunk_idx < kInitCount;
+      chunk_idx = idx_.fetch_add(kChunkSize)) {
+    for(uint64_t idx = chunk_idx; idx < chunk_idx + kChunkSize; ++idx) {
+      if(idx % kRefreshInterval == 0) {
+        store->Refresh();
+        if(idx % kCompletePendingInterval == 0) {
+          store->CompletePending(false);
+        }
+      }
+
+      UpsertContext context{ init_keys_.get()[idx], value };
+      store->Upsert(context, callback, 1);
+    }
+  }
+
+  store->CompletePending(true);
+  store->StopSession();
+}
+
+void setup_store(store_t* store, size_t num_threads) {
+  idx_ = 0;
+  std::deque<std::thread> threads;
+  for(size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
+    threads.emplace_back(&thread_setup_store, store, thread_idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  init_keys_.reset();
+
+  printf("Finished populating store: contains ?? elements.\n");
+}
+
+
+static std::atomic<int64_t> async_reads_done{ 0 };
+static std::atomic<int64_t> async_writes_done{ 0 };
+
+template <Op(*FN)(std::mt19937&)>
+void thread_run_benchmark(store_t* store, size_t thread_idx) {
+  SetThreadAffinity(thread_idx);
+
+  std::random_device rd{};
+  std::mt19937 rng{ rd() };
+
+  auto start_time = std::chrono::high_resolution_clock::now();
+
+  uint64_t upsert_value = 0;
+  int64_t reads_done = 0;
+  int64_t writes_done = 0;
+
+  Guid guid = store->StartSession();
+
+  while(!done_) {
+    uint64_t chunk_idx = idx_.fetch_add(kChunkSize);
+    while(chunk_idx >= kTxnCount) {
+      if(chunk_idx == kTxnCount) {
+        idx_ = 0;
+      }
+      chunk_idx = idx_.fetch_add(kChunkSize);
+    }
+    for(uint64_t idx = chunk_idx; idx < chunk_idx + kChunkSize; ++idx) {
+      if(idx % kRefreshInterval == 0) {
+        store->Refresh();
+        if(idx % kCompletePendingInterval == 0) {
+          store->CompletePending(false);
+        }
+      }
+      switch(FN(rng)) {
+      case Op::Insert:
+      case Op::Upsert: {
+        auto callback = [](IAsyncContext* ctxt, Status result) {
+          CallbackContext<UpsertContext> context{ ctxt };
+        };
+
+        UpsertContext context{ txn_keys_.get()[idx], upsert_value };
+        Status result = store->Upsert(context, callback, 1);
+        ++writes_done;
+        break;
+      }
+      case Op::Scan:
+        printf("Scan currently not supported!\n");
+        exit(1);
+        break;
+      case Op::Read: {
+        auto callback = [](IAsyncContext* ctxt, Status result) {
+          CallbackContext<ReadContext> context{ ctxt };
+        };
+
+        ReadContext context{ txn_keys_.get()[idx] };
+
+        Status result = store->Read(context, callback, 1);
+        ++reads_done;
+        break;
+      }
+      case Op::ReadModifyWrite:
+        auto callback = [](IAsyncContext* ctxt, Status result) {
+          CallbackContext<RmwContext> context{ ctxt };
+        };
+
+        RmwContext context{ txn_keys_.get()[idx], 5 };
+        Status result = store->Rmw(context, callback, 1);
+        if(result == Status::Ok) {
+          ++writes_done;
+        }
+        break;
+      }
+    }
+  }
+
+  store->CompletePending(true);
+  store->StopSession();
+
+  auto end_time = std::chrono::high_resolution_clock::now();
+  std::chrono::nanoseconds duration = end_time - start_time;
+  total_duration_ += duration.count();
+  total_reads_done_ += reads_done;
+  total_writes_done_ += writes_done;
+  printf("Finished thread %" PRIu64 " : %" PRIu64 " reads, %" PRIu64 " writes, in %.2f seconds.\n",
+         thread_idx, reads_done, writes_done, (double)duration.count() / kNanosPerSecond);
+}
+
+template <Op(*FN)(std::mt19937&)>
+void run_benchmark(store_t* store, size_t num_threads) {
+  idx_ = 0;
+  total_duration_ = 0;
+  total_reads_done_ = 0;
+  total_writes_done_ = 0;
+  done_ = false;
+  std::deque<std::thread> threads;
+  for(size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
+    threads.emplace_back(&thread_run_benchmark<FN>, store, thread_idx);
+  }
+
+  static std::atomic<uint64_t> num_checkpoints;
+  num_checkpoints = 0;
+
+  if(kCheckpointSeconds == 0) {
+    std::this_thread::sleep_for(std::chrono::seconds(kRunSeconds));
+  } else {
+    auto callback = [](uint64_t persistent_serial_num) {
+      ++num_checkpoints;
+    };
+
+    auto start_time = std::chrono::high_resolution_clock::now();
+    auto last_checkpoint_time = start_time;
+    auto current_time = start_time;
+
+    uint64_t checkpoint_num = 0;
+
+    while(current_time - start_time < std::chrono::seconds(kRunSeconds)) {
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      current_time = std::chrono::high_resolution_clock::now();
+      if(current_time - last_checkpoint_time >= std::chrono::seconds(kCheckpointSeconds)) {
+        bool success = store->Checkpoint(callback);
+        if(success) {
+          printf("Starting checkpoint %" PRIu64 ".\n", checkpoint_num);
+          ++checkpoint_num;
+        } else {
+          printf("Failed to start checkpoint.\n");
+        }
+        last_checkpoint_time = current_time;
+      }
+    }
+
+    done_ = true;
+  }
+
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  printf("Finished benchmark: %" PRIu64 " thread checkpoints completed;  %.2f ops/second/thread\n",
+         num_checkpoints.load(),
+         ((double)total_reads_done_ + (double)total_writes_done_) / ((double)total_duration_ /
+             kNanosPerSecond));
+}
+
+void run(Workload workload, size_t num_threads) {
+  // FASTER store has a hash table with approx. kInitCount / 2 entries, a log of size 16 GB,
+  // and a null device (it's in-memory only).
+  size_t init_size = next_power_of_two(kInitCount / 2);
+  store_t store{ init_size, 17179869184, "storage" };
+
+  printf("Populating the store...\n");
+
+  setup_store(&store, num_threads);
+
+  store.DumpDistribution();
+
+  printf("Running benchmark on %" PRIu64 " threads...\n", num_threads);
+  switch(workload) {
+  case Workload::A_50_50:
+    run_benchmark<ycsb_a_50_50>(&store, num_threads);
+    break;
+  case Workload::RMW_100:
+    run_benchmark<ycsb_rmw_100>(&store, num_threads);
+    break;
+  default:
+    printf("Unknown workload!\n");
+    exit(1);
+  }
+}
+
+int main(int argc, char* argv[]) {
+  constexpr size_t kNumArgs = 4;
+  if(argc != kNumArgs + 1) {
+    printf("Usage: benchmark.exe <workload> <# threads> <load_filename> <run_filename>\n");
+    exit(0);
+  }
+
+  Workload workload = static_cast<Workload>(std::atol(argv[1]));
+  size_t num_threads = ::atol(argv[2]);
+  std::string load_filename{ argv[3] };
+  std::string run_filename{ argv[4] };
+
+  load_files(load_filename, run_filename);
+
+  run(workload, num_threads);
+
+  return 0;
+}
diff --git a/cc/benchmark-dir/benchmark.vcxproj b/cc/benchmark-dir/benchmark.vcxproj
new file mode 100644
index 000000000..36b05c245
--- /dev/null
+++ b/cc/benchmark-dir/benchmark.vcxproj
@@ -0,0 +1,170 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{109C58E8-A9A2-49CC-86F4-64D25FB40773}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>benchmark</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <DisableSpecificWarnings>4996</DisableSpecificWarnings>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <DisableSpecificWarnings>4996</DisableSpecificWarnings>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>false</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <DisableSpecificWarnings>4996</DisableSpecificWarnings>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>false</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <DisableSpecificWarnings>4996</DisableSpecificWarnings>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="benchmark.cc" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\vs\FAST.vcxproj">
+      <Project>{419e0f92-c483-416e-ada3-292a1c6cce7c}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="aligned_alloc.h" />
+    <ClInclude Include="file.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/cc/benchmark-dir/benchmark.vcxproj.filters b/cc/benchmark-dir/benchmark.vcxproj.filters
new file mode 100644
index 000000000..11c6a1e87
--- /dev/null
+++ b/cc/benchmark-dir/benchmark.vcxproj.filters
@@ -0,0 +1,30 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="aligned_alloc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="file.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="benchmark.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cc/benchmark-dir/file.h b/cc/benchmark-dir/file.h
new file mode 100644
index 000000000..b50e7e485
--- /dev/null
+++ b/cc/benchmark-dir/file.h
@@ -0,0 +1,61 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <string>
+
+#ifdef _WIN32
+#define NOMINMAX
+#define _WINSOCKAPI_
+#include <Windows.h>
+#else
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+namespace FASTER {
+namespace benchmark {
+
+/// Basic wrapper around synchronous file read.
+class File {
+ public:
+  File(const std::string& filename) {
+#ifdef _WIN32
+    file_handle_ = ::CreateFileA(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr,
+                                 OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, nullptr);
+#else
+    fd_ = ::open(filename.c_str(), O_RDONLY | O_DIRECT, S_IRUSR);
+#endif
+  }
+
+  ~File() {
+#ifdef _WIN32
+    ::CloseHandle(file_handle_);
+#else
+    ::close(fd_);
+#endif
+  }
+
+  size_t Read(void* buf, size_t count, uint64_t offset) {
+#ifdef _WIN32
+    DWORD bytes_read { 0 };
+    ::ReadFile(file_handle_, buf, static_cast<DWORD>(count), &bytes_read, nullptr);
+    return bytes_read;
+#else
+    return ::pread(fd_, buf, count, offset);
+#endif
+  }
+
+ private:
+#ifdef _WIN32
+  HANDLE file_handle_;
+#else
+  int fd_;
+#endif
+};
+
+}
+} // namespace FASTER::benchmark
diff --git a/cc/benchmark-dir/process_ycsb.cc b/cc/benchmark-dir/process_ycsb.cc
new file mode 100644
index 000000000..a972a6b2c
--- /dev/null
+++ b/cc/benchmark-dir/process_ycsb.cc
@@ -0,0 +1,38 @@
+#include <cstdio>
+#include <fstream>
+#include <iostream>
+#include <stdlib.h>
+#include <string>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int main(int argc, char* argv[]) {
+  if (argc != 3) {
+    fprintf(stderr, "Requires two arguments: file copied from, file copied to.\n");
+    exit(-1);
+  }
+
+  std::string from_filename{ argv[1] };
+  std::string to_filename{ argv[2] };
+
+  std::ifstream from_file{ from_filename };
+  std::ofstream to_file{ to_filename };
+
+  const std::string prefix{ "usertable user" };
+  
+  while (!from_file.eof()) {
+    char buffer[256];
+    from_file.getline(buffer, sizeof(buffer));
+    std::string line{ buffer };
+    std::string::size_type pos = line.find(prefix);
+    if (pos == std::string::npos) {
+      continue;
+    }
+    line = line.substr(pos + prefix.size());
+    uint64_t key = stol(line);
+
+    to_file.write(reinterpret_cast<char*>(&key), sizeof(key));
+  }
+}
diff --git a/cc/playground/CMakeLists.txt b/cc/playground/CMakeLists.txt
new file mode 100644
index 000000000..89fef048e
--- /dev/null
+++ b/cc/playground/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(sum_store-dir)
diff --git a/cc/playground/sum_store-dir/CMakeLists.txt b/cc/playground/sum_store-dir/CMakeLists.txt
new file mode 100644
index 000000000..f2e536431
--- /dev/null
+++ b/cc/playground/sum_store-dir/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(SUM_STORE_HEADERS
+  concurrent_recovery_test.h
+  single_threaded_recovery_test.h
+  sum_store.h
+)
+
+add_executable(sum_store ${SUM_STORE_HEADERS} sum_store.cc)
+target_link_libraries(sum_store ${FAST_BENCHMARK_LINK_LIBS})
+
diff --git a/cc/playground/sum_store-dir/concurrent_recovery_test.h b/cc/playground/sum_store-dir/concurrent_recovery_test.h
new file mode 100644
index 000000000..63007de88
--- /dev/null
+++ b/cc/playground/sum_store-dir/concurrent_recovery_test.h
@@ -0,0 +1,276 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <deque>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <thread>
+
+#include "core/auto_ptr.h"
+#include "core/faster.h"
+#include "core/thread.h"
+#include "sum_store.h"
+
+namespace sum_store {
+
+class ConcurrentRecoveryTest {
+ public:
+  static constexpr uint64_t kNumUniqueKeys = (1L << 22);
+  static constexpr uint64_t kKeySpace = (1L << 14);
+  static constexpr uint64_t kNumOps = (1L << 25);
+  static constexpr uint64_t kRefreshInterval = (1L << 8);
+  static constexpr uint64_t kCompletePendingInterval = (1L << 12);
+  static constexpr uint64_t kCheckpointInterval = (1L << 22);
+
+  ConcurrentRecoveryTest(store_t& store_, size_t num_threads_)
+    : store{ store_ }
+    , num_threads{ num_threads_ }
+    , num_active_threads{ 0 }
+    , num_checkpoints{ 0 } {
+  }
+
+ private:
+  static void PopulateWorker(store_t* store, size_t thread_idx,
+                             std::atomic<size_t>* num_active_threads, size_t num_threads,
+                             std::atomic<uint32_t>* num_checkpoints) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<RmwContext> context{ ctxt };
+      assert(result == Status::Ok);
+    };
+
+    auto persistence_callback = [](uint64_t persistent_serial_num) {
+      printf("Thread %" PRIu32 " reports persistence until %" PRIu64 "\n",
+             Thread::id(), persistent_serial_num);
+    };
+
+    // Register thread with the store
+    store->StartSession();
+
+    ++(*num_active_threads);
+
+    // Process the batch of input data
+    for(size_t idx = 0; idx < kNumOps; ++idx) {
+      RmwContext context{ idx % kNumUniqueKeys, 1 };
+      store->Rmw(context, callback, idx);
+      if(idx % kCheckpointInterval == 0 && *num_active_threads == num_threads) {
+        if(store->Checkpoint(persistence_callback)) {
+          printf("Thread %" PRIu32 " calling Checkpoint(), %" PRIu32 "\n", Thread::id(),
+                 ++(*num_checkpoints));
+        }
+      }
+      if(idx % kCompletePendingInterval == 0) {
+        store->CompletePending(false);
+      } else if(idx % kRefreshInterval == 0) {
+        store->Refresh();
+      }
+    }
+
+    // Make sure operations are completed
+    store->CompletePending(true);
+
+    // Deregister thread from FASTER
+    store->StopSession();
+
+    printf("Populate successful on thread %" PRIu32 ".\n", Thread::id());
+  }
+
+ public:
+  void Populate() {
+    std::deque<std::thread> threads;
+    for(size_t idx = 0; idx < num_threads; ++idx) {
+      threads.emplace_back(&PopulateWorker, &store, idx, &num_active_threads, num_threads,
+                           &num_checkpoints);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+    // Verify the records.
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext> context{ ctxt };
+      assert(result == Status::Ok);
+    };
+    // Create array for reading
+    auto read_results = alloc_aligned<uint64_t>(64, sizeof(uint64_t) * kNumUniqueKeys);
+    std::memset(read_results.get(), 0, sizeof(uint64_t) * kNumUniqueKeys);
+
+    // Register with thread
+    store.StartSession();
+
+    // Issue read requests
+    for(uint64_t idx = 0; idx < kNumUniqueKeys; ++idx) {
+      ReadContext context{ AdId{ idx }, read_results.get() + idx };
+      store.Read(context, callback, idx);
+    }
+
+    // Complete all pending requests
+    store.CompletePending(true);
+
+    // Release
+    store.StopSession();
+    for(uint64_t idx = 0; idx < kNumUniqueKeys; ++idx) {
+      uint64_t expected_result = (num_threads * kNumOps) / kNumUniqueKeys;
+      if(read_results.get()[idx] != expected_result) {
+        printf("Debug error for AdId %" PRIu64 ": Expected (%" PRIu64 "), Found(%" PRIu64 ")\n",
+               idx,
+               expected_result,
+               read_results.get()[idx]);
+      }
+    }
+  }
+
+  void RecoverAndTest(uint32_t cpr_version, uint32_t index_version) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext> context{ ctxt };
+      assert(result == Status::Ok);
+    };
+
+    // Recover
+    std::vector<Guid> session_ids;
+    FASTER::core::Status result = store.Recover(cpr_version, index_version, session_ids);
+    if(result != FASTER::core::Status::Ok) {
+      printf("Recovery failed with error %u\n", static_cast<uint8_t>(result));
+      exit(1);
+    }
+
+    std::vector<uint64_t> serial_nums;
+    for(const auto& session_id : session_ids) {
+      serial_nums.push_back(store.ContinueSession(session_id));
+      store.StopSession();
+    }
+
+    // Create array for reading
+    auto read_results = alloc_aligned<uint64_t>(64, sizeof(uint64_t) * kNumUniqueKeys);
+    std::memset(read_results.get(), 0, sizeof(uint64_t) * kNumUniqueKeys);
+
+    // Register with thread
+    store.StartSession();
+
+    // Issue read requests
+    for(uint64_t idx = 0; idx < kNumUniqueKeys; ++idx) {
+      ReadContext context{ AdId{ idx}, read_results.get() + idx };
+      store.Read(context, callback, idx);
+    }
+
+    // Complete all pending requests
+    store.CompletePending(true);
+
+    // Release
+    store.StopSession();
+
+    // Test outputs
+    // Compute expected array
+    auto expected_results = alloc_aligned<uint64_t>(64,
+                            sizeof(uint64_t) * kNumUniqueKeys);
+    std::memset(expected_results.get(), 0, sizeof(uint64_t) * kNumUniqueKeys);
+
+    // Sessions that were active during checkpoint:
+    for(uint64_t serial_num : serial_nums) {
+      for(uint64_t idx = 0; idx <= serial_num; ++idx) {
+        ++expected_results.get()[idx % kNumUniqueKeys];
+      }
+    }
+    // Sessions that were finished at time of checkpoint.
+    size_t num_completed = num_threads - serial_nums.size();
+    for(size_t thread_idx = 0; thread_idx < num_completed; ++thread_idx) {
+      uint64_t serial_num = kNumOps;
+      for(uint64_t idx = 0; idx < serial_num; ++idx) {
+        ++expected_results.get()[idx % kNumUniqueKeys];
+      }
+    }
+
+    // Assert if expected is same as found
+    for(uint64_t idx = 0; idx < kNumUniqueKeys; ++idx) {
+      if(expected_results.get()[idx] != read_results.get()[idx]) {
+        printf("Debug error for AdId %" PRIu64 ": Expected (%" PRIu64 "), Found(%" PRIu64 ")\n",
+               idx,
+               expected_results.get()[idx],
+               read_results.get()[idx]);
+      }
+    }
+    printf("Test successful\n");
+  }
+
+  static void ContinueWorker(store_t* store, size_t thread_idx,
+                             std::atomic<size_t>* num_active_threads, size_t num_threads,
+                             std::atomic<uint32_t>* num_checkpoints, Guid guid) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<RmwContext> context{ ctxt };
+      assert(result == Status::Ok);
+    };
+
+    auto persistence_callback = [](uint64_t persistent_serial_num) {
+      printf("Thread %" PRIu32 " reports persistence until %" PRIu64 "\n",
+             Thread::id(), persistent_serial_num);
+    };
+
+    // Register thread with the store
+    uint64_t start_num = store->ContinueSession(guid);
+
+    ++(*num_active_threads);
+
+    // Process the batch of input data
+    for(size_t idx = start_num + 1; idx < kNumOps; ++idx) {
+      RmwContext context{ idx % kNumUniqueKeys, 1 };
+      store->Rmw(context, callback, idx);
+      if(idx % kCheckpointInterval == 0 && *num_active_threads == num_threads) {
+        if(store->Checkpoint(persistence_callback)) {
+          printf("Thread %" PRIu32 " calling Checkpoint(), %" PRIu32 "\n", Thread::id(),
+                 ++(*num_checkpoints));
+        }
+      }
+      if(idx % kCompletePendingInterval == 0) {
+        store->CompletePending(false);
+      } else if(idx % kRefreshInterval == 0) {
+        store->Refresh();
+      }
+    }
+
+    // Make sure operations are completed
+    store->CompletePending(true);
+
+    // Deregister thread from FASTER
+    store->StopSession();
+
+    printf("Populate successful on thread %" PRIu32 ".\n", Thread::id());
+  }
+
+  void Continue(uint32_t cpr_version, uint32_t index_version) {
+    // Recover
+    printf("Recovering version (%" PRIu32 ", %" PRIu32 ")\n", cpr_version, index_version);
+    std::vector<Guid> session_ids;
+    FASTER::core::Status result = store.Recover(cpr_version, index_version, session_ids);
+    if(result != FASTER::core::Status::Ok) {
+      printf("Recovery failed with error %u\n", static_cast<uint8_t>(result));
+      exit(1);
+    } else {
+      printf("Recovery Done!\n");
+    }
+
+    num_checkpoints.store(cpr_version);
+    // Some threads may have already completed.
+    num_threads = session_ids.size();
+
+    std::deque<std::thread> threads;
+    for(size_t idx = 0; idx < num_threads; ++idx) {
+      threads.emplace_back(&ContinueWorker, &store, idx, &num_active_threads, num_threads,
+                           &num_checkpoints, session_ids[idx]);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+  }
+
+  store_t& store;
+  size_t num_threads;
+  std::atomic<size_t> num_active_threads;
+  std::atomic<uint32_t> num_checkpoints;
+};
+
+} // namespace sum_store
diff --git a/cc/playground/sum_store-dir/single_threaded_recovery_test.h b/cc/playground/sum_store-dir/single_threaded_recovery_test.h
new file mode 100644
index 000000000..cf9a642b9
--- /dev/null
+++ b/cc/playground/sum_store-dir/single_threaded_recovery_test.h
@@ -0,0 +1,140 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "core/auto_ptr.h"
+#include "core/faster.h"
+#include "sum_store.h"
+
+using namespace FASTER;
+
+namespace sum_store {
+
+class SingleThreadedRecoveryTest {
+ public:
+  static constexpr uint64_t kNumUniqueKeys = (1L << 23);
+  static constexpr uint64_t kNumOps = (1L << 25);
+  static constexpr uint64_t kRefreshInterval = (1L << 8);
+  static constexpr uint64_t kCompletePendingInterval = (1L << 12);
+  static constexpr uint64_t kCheckpointInterval = (1L << 20);
+
+  SingleThreadedRecoveryTest(store_t& store_)
+    : store{ store_ } {
+  }
+
+ private:
+
+ public:
+  void Populate() {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<RmwContext> context{ ctxt };
+      assert(result == Status::Ok);
+    };
+
+    auto persistence_callback = [](uint64_t persistent_serial_num) {
+      printf("Thread %" PRIu32 " reports persistence until %" PRIu64 "\n",
+             Thread::id(), persistent_serial_num);
+    };
+
+    // Register thread with FASTER
+    store.StartSession();
+
+    // Process the batch of input data
+    for(uint64_t idx = 0; idx < kNumOps; ++idx) {
+      RmwContext context{ AdId{ idx % kNumUniqueKeys}, 1 };
+      store.Rmw(context, callback, idx);
+
+      if(idx % kCheckpointInterval == 0) {
+        store.Checkpoint(persistence_callback);
+      }
+      if(idx % kCompletePendingInterval == 0) {
+        store.CompletePending(false);
+      } else if(idx % kRefreshInterval == 0) {
+        store.Refresh();
+      }
+    }
+    // Make sure operations are completed
+    store.CompletePending(true);
+
+    // Deregister thread from FASTER
+    store.StopSession();
+
+    printf("Populate successful\n");
+
+    std::string discard;
+    std::getline(std::cin, discard);
+  }
+
+  void RecoverAndTest(uint32_t cpr_version, uint32_t index_version) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext> context{ ctxt };
+      assert(result == Status::Ok);
+    };
+
+    // Recover
+    std::vector<Guid> session_ids;
+    store.Recover(cpr_version, index_version, session_ids);
+
+    // Create array for reading
+    auto read_results = alloc_aligned<uint64_t>(64, sizeof(uint64_t) * kNumUniqueKeys);
+    std::memset(read_results.get(), 0, sizeof(uint64_t) * kNumUniqueKeys);
+
+    Guid session_id = session_ids[0];
+
+    // Register with thread
+    uint64_t sno = store.ContinueSession(session_id);
+
+    // Issue read requests
+    for(uint64_t idx = 0; idx < kNumUniqueKeys; ++idx) {
+      ReadContext context{ AdId{ idx}, read_results.get() + idx };
+      store.Read(context, callback, idx);
+    }
+
+    // Complete all pending requests
+    store.CompletePending(true);
+
+    // Release
+    store.StopSession();
+
+    // Test outputs
+    // Compute expected array
+    auto expected_results = alloc_aligned<uint64_t>(64,
+                            sizeof(uint64_t) * kNumUniqueKeys);
+    std::memset(expected_results.get(), 0, sizeof(uint64_t) * kNumUniqueKeys);
+
+    for(uint64_t idx = 0; idx <= sno; ++idx) {
+      ++expected_results.get()[idx % kNumUniqueKeys];
+    }
+
+    // Assert if expected is same as found
+    for(uint64_t idx = 0; idx < kNumUniqueKeys; ++idx) {
+      if(expected_results.get()[idx] != read_results.get()[idx]) {
+        printf("Debug error for AdId %" PRIu64 ": Expected (%" PRIu64 "), Found(%" PRIu64 ")\n",
+               idx,
+               expected_results.get()[idx],
+               read_results.get()[idx]);
+      }
+    }
+    printf("Test successful\n");
+
+    std::string discard;
+    std::getline(std::cin, discard);
+  }
+
+  void Continue() {
+    // Not implemented.
+    assert(false);
+  }
+
+  store_t& store;
+};
+
+} // namespace sum_store
diff --git a/cc/playground/sum_store-dir/sum_store.cc b/cc/playground/sum_store-dir/sum_store.cc
new file mode 100644
index 000000000..f42edfd90
--- /dev/null
+++ b/cc/playground/sum_store-dir/sum_store.cc
@@ -0,0 +1,77 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <experimental/filesystem>
+#include <string>
+
+#include "concurrent_recovery_test.h"
+#include "sum_store.h"
+#include "single_threaded_recovery_test.h"
+
+int main(int argc, char* argv[]) {
+  if(argc < 3) {
+    printf("Usage: sum_store.exe single <operation>\n");
+    printf("Where <operation> is one of \"populate\", \"recover <version>\", or \"continue\".\n");
+    exit(0);
+  }
+
+  std::experimental::filesystem::create_directory("sum_storage");
+
+  static constexpr uint64_t kKeySpace = (1L << 15);
+
+  sum_store::store_t store{ kKeySpace, 17179869184, "sum_storage" };
+
+
+  std::string type{ argv[1] };
+  if(type == "single") {
+    sum_store::SingleThreadedRecoveryTest test{ store };
+
+    std::string task{ argv[2] };
+    if(task == "populate") {
+      test.Populate();
+    } else if(task == "recover") {
+      if(argc != 4) {
+        printf("Must specify version to recover to.\n");
+        exit(1);
+      }
+      uint32_t version = std::atoi(argv[3]);
+      test.RecoverAndTest(version, version);
+    }
+  } else if(type == "concurrent") {
+    if(argc < 4) {
+      printf("Must specify number of threads to execute concurrently.\n");
+      exit(1);
+    }
+
+    size_t num_threads = std::atoi(argv[2]);
+
+    sum_store::ConcurrentRecoveryTest test{ store, num_threads };
+
+    std::string task{ argv[3] };
+    if(task == "populate") {
+      test.Populate();
+    } else if(task == "recover") {
+      if(argc != 5) {
+        printf("Must specify version to recover to.\n");
+        exit(1);
+      }
+      uint32_t version = std::atoi(argv[4]);
+      test.RecoverAndTest(version, version);
+    } else if(task == "continue") {
+      if(argc != 5) {
+        printf("Must specify version to continue from.\n");
+        exit(1);
+      }
+      uint32_t version = std::atoi(argv[4]);
+      test.Continue(version, version);
+    }
+
+  }
+
+
+  return 0;
+}
+
diff --git a/cc/playground/sum_store-dir/sum_store.h b/cc/playground/sum_store-dir/sum_store.h
new file mode 100644
index 000000000..38c4861b6
--- /dev/null
+++ b/cc/playground/sum_store-dir/sum_store.h
@@ -0,0 +1,160 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include "core/faster.h"
+#include "core/utility.h"
+#include "device/file_system_disk.h"
+
+using namespace FASTER::core;
+
+namespace sum_store {
+
+// Sum store's key type.
+class AdId {
+ public:
+  AdId(uint64_t key)
+    : key_{ key } {
+  }
+
+  inline static constexpr uint32_t size() {
+    return static_cast<uint32_t>(sizeof(AdId));
+  }
+  inline KeyHash GetHash() const {
+    return KeyHash{ Utility::GetHashCode(key_) };
+  }
+
+  /// Comparison operators.
+  inline bool operator==(const AdId& other) const {
+    return key_ == other.key_;
+  }
+  inline bool operator!=(const AdId& other) const {
+    return key_ != other.key_;
+  }
+
+ private:
+  uint64_t key_;
+};
+static_assert(sizeof(AdId) == 8, "sizeof(AdId) != 8)");
+
+// Sum store's value type.
+class NumClicks {
+ public:
+  NumClicks()
+    : num_clicks{ 0 } {
+  }
+  NumClicks(const NumClicks& other)
+    : num_clicks{ other.num_clicks } {
+  }
+
+  inline static constexpr uint32_t size() {
+    return static_cast<uint32_t>(sizeof(NumClicks));
+  }
+
+  union {
+      uint64_t num_clicks;
+      std::atomic<uint64_t> atomic_num_clicks;
+    };
+};
+
+/// Key is an 8-byte advertising ID.
+typedef AdId key_t;
+
+/// Value is an 8-byte count of clicks.
+typedef NumClicks value_t;
+
+/// Context to update the sum store (via read-modify-write).
+class RmwContext : public IAsyncContext {
+ public:
+  typedef sum_store::key_t key_t;
+  typedef sum_store::value_t value_t;
+
+  RmwContext(const AdId& key, uint64_t increment)
+    : key_{ key }
+    , increment_{ increment } {
+  }
+
+  /// Copy (and deep-copy) constructor.
+  RmwContext(const RmwContext& other)
+    : key_{ other.key_ }
+    , increment_{ other.increment_ } {
+  }
+
+  /// The implicit and explicit interfaces require a key() accessor.
+  inline const AdId& key() const {
+    return key_;
+  }
+
+  inline void RmwInitial(NumClicks& value) {
+    value.num_clicks = increment_;
+  }
+  inline void RmwCopy(const NumClicks& old_value, NumClicks& value) {
+    value.num_clicks = old_value.num_clicks + increment_;
+  }
+  inline bool RmwAtomic(NumClicks& value) {
+    value.atomic_num_clicks.fetch_add(increment_);
+    return true;
+  }
+  inline static constexpr uint32_t value_size() {
+    return sizeof(value_t);
+  }
+
+ protected:
+  /// The explicit interface requires a DeepCopy_Internal() implementation.
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+    return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+  }
+
+ private:
+  AdId key_;
+  uint64_t increment_;
+};
+
+/// Context to read the store (after recovery).
+class ReadContext : public IAsyncContext {
+ public:
+  typedef sum_store::key_t key_t;
+  typedef sum_store::value_t value_t;
+
+  ReadContext(const AdId& key, uint64_t* result)
+    : key_{ key }
+    , result_{ result } {
+  }
+
+  /// Copy (and deep-copy) constructor.
+  ReadContext(const ReadContext& other)
+    : key_{ other.key_ }
+    , result_{ other.result_ } {
+  }
+
+  /// The implicit and explicit interfaces require a key() accessor.
+  inline const AdId& key() const {
+    return key_;
+  }
+
+  inline void Get(const value_t& value) {
+    *result_ = value.num_clicks;
+  }
+  inline void GetAtomic(const value_t& value) {
+    *result_ = value.atomic_num_clicks;
+  }
+
+ protected:
+  /// The explicit interface requires a DeepCopy_Internal() implementation.
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+    return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+  }
+
+ private:
+  AdId key_;
+  uint64_t* result_;
+};
+
+typedef FasterKv<key_t, value_t, FASTER::device::FileSystemDisk<
+FASTER::environment::QueueIoHandler, 1073741824L>> store_t;
+
+} // namespace sum_store
diff --git a/cc/src/CMakeLists.txt b/cc/src/CMakeLists.txt
new file mode 100644
index 000000000..4ea429cc2
--- /dev/null
+++ b/cc/src/CMakeLists.txt
@@ -0,0 +1,63 @@
+# Build the FASTER library.
+set (FAST_HEADERS
+  core/address.h
+  core/alloc.h
+  core/async.h
+  core/async_result_types.h
+  core/auto_ptr.h
+  core/checkpoint_locks.h
+  core/checkpoint_state.h
+  core/constants.h
+  core/faster.h
+  core/gc_state.h
+  core/grow_state.h
+  core/guid.h
+  core/hash_bucket.h
+  core/hash_table.h
+  core/internal_contexts.h
+  core/key_hash.h
+  core/light_epoch.h
+  core/lss_allocator.h
+  core/malloc_fixed_page_size.h
+  core/native_buffer_pool.h
+  core/persistent_memory_malloc.h
+  core/phase.h
+  core/record.h
+  core/recovery_status.h
+  core/state_transitions.h
+  core/status.h
+  core/thread.h
+  core/utility.h
+  device/file_system_disk.h
+  device/null_disk.h
+  environment/file.h
+  environment/file_common.h
+)
+
+if (MSVC)
+set (FAST_HEADERS ${FAST_HEADERS}
+  environment/file_windows.h
+)
+else()
+set (FAST_HEADERS ${FAST_HEADERS}
+  environment/file_linux.h
+)
+endif() 
+
+set (FAST_SOURCES
+  core/address.cc
+  core/lss_allocator.cc
+  core/thread.cc
+)
+
+if (MSVC)
+set (FAST_SOURCES ${FAST_SOURCES}
+  environment/file_windows.cc
+)
+else()
+set (FAST_SOURCES ${FAST_SOURCES}
+  environment/file_linux.cc
+)
+endif()
+
+add_library(faster STATIC ${FAST_SOURCES} ${FAST_HEADERS})
diff --git a/cc/src/core/address.cc b/cc/src/core/address.cc
new file mode 100644
index 000000000..1d9e289af
--- /dev/null
+++ b/cc/src/core/address.cc
@@ -0,0 +1,12 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include "address.h"
+
+namespace FASTER {
+namespace core {
+
+constexpr uint32_t Address::kMaxOffset;
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/address.h b/cc/src/core/address.h
new file mode 100644
index 000000000..aeb52d3fb
--- /dev/null
+++ b/cc/src/core/address.h
@@ -0,0 +1,177 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+
+namespace FASTER {
+namespace core {
+
+class PageOffset;
+
+/// (Logical) address into persistent memory. Identifies a page and an offset within that page.
+/// Uses 48 bits: 25 bits for the offset and 23 bits for the page. (The remaining 16 bits are
+/// reserved for use by the hash table.)
+/// Address
+class Address {
+ public:
+  friend class PageOffset;
+
+  /// An invalid address, used when you need to initialize an address but you don't have a valid
+  /// value for it yet. NOTE: set to 1, not 0, to distinguish an invalid hash bucket entry
+  /// (initialized to all zeros) from a valid hash bucket entry that points to an invalid address.
+  static constexpr uint64_t kInvalidAddress = 1;
+
+  /// A logical address is 8 bytes.
+  /// --of which 48 bits are used for the address. (The remaining 16 bits are used by the hash
+  /// table, for control bits and the tag.)
+  static constexpr uint64_t kAddressBits = 48;
+  static constexpr uint64_t kMaxAddress = ((uint64_t)1 << kAddressBits) - 1;
+  /// --of which 25 bits are used for offsets into a page, of size 2^25 = 32 MB.
+  static constexpr uint64_t kOffsetBits = 25;
+  static constexpr uint32_t kMaxOffset = ((uint32_t)1 << kOffsetBits) - 1;
+  /// --and the remaining 23 bits are used for the page index, allowing for approximately 8 million
+  /// pages.
+  static constexpr uint64_t kPageBits = kAddressBits - kOffsetBits;
+  static constexpr uint32_t kMaxPage = ((uint32_t)1 << kPageBits) - 1;
+
+  /// Default constructor.
+  Address()
+    : control_{ 0 } {
+  }
+  Address(uint32_t page, uint32_t offset)
+    : reserved_{ 0 }
+    , page_{ page }
+    , offset_{ offset } {
+  }
+  /// Copy constructor.
+  Address(const Address& other)
+    : control_{ other.control_ } {
+  }
+  Address(uint64_t control)
+    : control_{ control } {
+    assert(reserved_ == 0);
+  }
+
+  inline Address& operator=(const Address& other) {
+    control_ = other.control_;
+    return *this;
+  }
+  inline Address& operator+=(uint64_t delta) {
+    assert(delta < UINT32_MAX);
+    control_ += delta;
+    return *this;
+  }
+  inline Address operator-(const Address& other) {
+    return control_ - other.control_;
+  }
+
+  /// Comparison operators.
+  inline bool operator<(const Address& other) const {
+    assert(reserved_ == 0);
+    assert(other.reserved_ == 0);
+    return control_ < other.control_;
+  }
+  inline bool operator<=(const Address& other) const {
+    assert(reserved_ == 0);
+    assert(other.reserved_ == 0);
+    return control_ <= other.control_;
+  }
+  inline bool operator>(const Address& other) const {
+    assert(reserved_ == 0);
+    assert(other.reserved_ == 0);
+    return control_ > other.control_;
+  }
+  inline bool operator>=(const Address& other) const {
+    assert(reserved_ == 0);
+    assert(other.reserved_ == 0);
+    return control_ >= other.control_;
+  }
+  inline bool operator==(const Address& other) const {
+    return control_ == other.control_;
+  }
+  inline bool operator!=(const Address& other) const {
+    return control_ != other.control_;
+  }
+
+  /// Accessors.
+  inline uint32_t page() const {
+    return static_cast<uint32_t>(page_);
+  }
+  inline uint32_t offset() const {
+    return static_cast<uint32_t>(offset_);
+  }
+  inline uint64_t control() const {
+    return control_;
+  }
+
+ private:
+  union {
+      struct {
+        uint64_t offset_ : kOffsetBits;         // 25 bits
+        uint64_t page_ : kPageBits;  // 23 bits
+        uint64_t reserved_ : 64 - kAddressBits; // 16 bits
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(Address) == 8, "sizeof(Address) != 8");
+
+}
+} // namespace FASTER::core
+
+/// Implement std::min() for Address type.
+namespace std {
+template <>
+inline const FASTER::core::Address& min(const FASTER::core::Address& a,
+                                        const FASTER::core::Address& b) {
+  return (b < a) ? b : a;
+}
+}
+
+namespace FASTER {
+namespace core {
+
+/// Atomic (logical) address.
+class AtomicAddress {
+ public:
+  AtomicAddress(const Address& address)
+    : control_{ address.control() } {
+  }
+
+  /// Atomic access.
+  inline Address load() const {
+    return Address{ control_.load() };
+  }
+  inline void store(Address value) {
+    control_.store(value.control());
+  }
+  inline bool compare_exchange_strong(Address& expected, Address desired) {
+    uint64_t expected_control = expected.control();
+    bool result = control_.compare_exchange_strong(expected_control, desired.control());
+    expected = Address{ expected_control };
+    return result;
+  }
+
+  /// Accessors.
+  inline uint32_t page() const {
+    return load().page();
+  }
+  inline uint32_t offset() const {
+    return load().offset();
+  }
+  inline uint64_t control() const {
+    return load().control();
+  }
+
+ private:
+  /// Atomic access to the address.
+  std::atomic<uint64_t> control_;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/alloc.h b/cc/src/core/alloc.h
new file mode 100644
index 000000000..fefe5e806
--- /dev/null
+++ b/cc/src/core/alloc.h
@@ -0,0 +1,35 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdlib>
+
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+
+namespace FASTER {
+namespace core {
+
+/// Windows and standard C++/Linux have incompatible implementations of aligned malloc(). (Windows
+/// defines a corresponding aligned free(), while Linux relies on the ordinary free().)
+inline void* aligned_alloc(size_t alignment, size_t size) {
+#ifdef _WIN32
+  return _aligned_malloc(size, alignment);
+#else
+  return ::aligned_alloc(alignment, size);
+#endif
+}
+
+inline void aligned_free(void* ptr) {
+#ifdef _WIN32
+  _aligned_free(ptr);
+#else
+  ::free(ptr);
+#endif
+}
+
+}
+} // namespace FASTER::core
+
diff --git a/cc/src/core/async.h b/cc/src/core/async.h
new file mode 100644
index 000000000..ce1786d82
--- /dev/null
+++ b/cc/src/core/async.h
@@ -0,0 +1,132 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <mutex>
+#include <thread>
+#include <condition_variable>
+
+#include "auto_ptr.h"
+#include "status.h"
+
+namespace FASTER {
+namespace core {
+
+#define RETURN_NOT_OK(s) do { \
+    Status _s = (s); \
+    if (_s != Status::Ok) return _s; \
+  } while (0)
+
+class IAsyncContext;
+
+/// Signature of the async callback for I/Os.
+typedef void(*AsyncIOCallback)(IAsyncContext* context, Status result, size_t bytes_transferred);
+
+/// Standard interface for contexts used by async callbacks.
+class IAsyncContext {
+ public:
+  IAsyncContext()
+    : from_deep_copy_{ false } {
+  }
+
+  virtual ~IAsyncContext() { }
+
+  /// Contexts are initially allocated (as local variables) on the stack. When an operation goes
+  /// async, it deep copies its context to a new heap allocation; this context must also deep copy
+  /// its parent context, if any. Once a context has been deep copied, subsequent DeepCopy() calls
+  /// just return the original, heap-allocated copy.
+  Status DeepCopy(IAsyncContext*& context_copy) {
+    if(from_deep_copy_) {
+      // Already on the heap: nothing to do.
+      context_copy = this;
+      return Status::Ok;
+    } else {
+      RETURN_NOT_OK(DeepCopy_Internal(context_copy));
+      context_copy->from_deep_copy_ = true;
+      return Status::Ok;
+    }
+  }
+
+  /// Whether the internal state for the async context has been copied to a heap-allocated memory
+  /// block.
+  bool from_deep_copy() const {
+    return from_deep_copy_;
+  }
+
+ protected:
+  /// Override this method to make a deep, persistent copy of your context. A context should:
+  ///   1. Allocate memory for its copy. If the allocation fails, return Status::OutOfMemory.
+  ///   2. If it has a parent/caller context, call DeepCopy() on that context. If the call fails,
+  ///      free the memory it just allocated and return the call's error code.
+  ///   3. Initialize its copy and return Status::Ok..
+  virtual Status DeepCopy_Internal(IAsyncContext*& context_copy) = 0;
+
+  /// A common pattern: deep copy, when context has no parent/caller context.
+  template <class C>
+  inline static Status DeepCopy_Internal(C& context, IAsyncContext*& context_copy) {
+    context_copy = nullptr;
+    auto ctxt = alloc_context<C>(sizeof(C));
+    if(!ctxt.get()) return Status::OutOfMemory;
+    new(ctxt.get()) C{ context };
+    context_copy = ctxt.release();
+    return Status::Ok;
+  }
+  /// Another common pattern: deep copy, when context has a parent/caller context.
+  template <class C>
+  inline static Status DeepCopy_Internal(C& context, IAsyncContext* caller_context,
+                                         IAsyncContext*& context_copy) {
+    context_copy = nullptr;
+    auto ctxt = alloc_context<C>(sizeof(C));
+    if(!ctxt.get()) return Status::OutOfMemory;
+    IAsyncContext* caller_context_copy;
+    RETURN_NOT_OK(caller_context->DeepCopy(caller_context_copy));
+    new(ctxt.get()) C{ context, caller_context_copy };
+    context_copy = ctxt.release();
+    return Status::Ok;
+  }
+
+ private:
+  /// Whether the internal state for the async context has been copied to a heap-allocated memory
+  /// block.
+  bool from_deep_copy_;
+};
+
+/// User-defined callbacks for async FASTER operations. Async callback equivalent of:
+///   Status some_function(context* arg).
+typedef void(*AsyncCallback)(IAsyncContext* ctxt, Status result);
+
+/// Helper class, for use inside a continuation callback, that ensures the context will be freed
+/// when the callback exits.
+template <class C>
+class CallbackContext {
+ public:
+  CallbackContext(IAsyncContext* context)
+    : async{ false } {
+    context_ = make_context_unique_ptr(static_cast<C*>(context));
+  }
+
+  ~CallbackContext() {
+    if(async || !context_->from_deep_copy()) {
+      // The callback went async again, or it never went async. The next callback or the caller is
+      // responsible for freeing the context.
+      context_.release();
+    }
+  }
+
+  C* get() const {
+    return context_.get();
+  }
+  C* operator->() const {
+    return context_.get();
+  }
+
+ public:
+  bool async;
+ protected:
+  context_unique_ptr_t<C> context_;
+};
+
+}
+} // namespace FASTER::core
\ No newline at end of file
diff --git a/cc/src/core/async_result_types.h b/cc/src/core/async_result_types.h
new file mode 100644
index 000000000..698bdedac
--- /dev/null
+++ b/cc/src/core/async_result_types.h
@@ -0,0 +1,60 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include "address.h"
+#include "async.h"
+#include "native_buffer_pool.h"
+
+#ifdef _WIN32
+#include <concurrent_queue.h>
+
+template <typename T>
+using concurrent_queue = concurrency::concurrent_queue<T>;
+#endif
+
+namespace FASTER {
+namespace core {
+
+class AsyncIOContext : public IAsyncContext {
+ public:
+  AsyncIOContext(void* faster_, Address address_,
+                 IAsyncContext* caller_context_,
+                 concurrent_queue<AsyncIOContext*>* thread_io_responses_,
+                 uint64_t io_id_)
+    : faster{ faster_ }
+    , address{ address_ }
+    , caller_context{ caller_context_ }
+    , thread_io_responses{ thread_io_responses_ }
+    , io_id{ io_id_ } {
+  }
+  /// No copy constructor.
+  AsyncIOContext(const AsyncIOContext& other) = delete;
+  /// The deep-copy constructor.
+  AsyncIOContext(AsyncIOContext& other, IAsyncContext* caller_context_)
+    : faster{ other.faster }
+    , address{ other.address }
+    , caller_context{ caller_context_ }
+    , thread_io_responses{ other.thread_io_responses }
+    , record{ std::move(other.record) }
+    , io_id{ other.io_id } {
+  }
+ protected:
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+    return IAsyncContext::DeepCopy_Internal(*this, caller_context, context_copy);
+  }
+ public:
+  void* faster;
+  Address address;
+  IAsyncContext* caller_context;
+  concurrent_queue<AsyncIOContext*>* thread_io_responses;
+  uint64_t io_id;
+
+  SectorAlignedMemory record;
+};
+
+}
+} // namespace FASTER::core
\ No newline at end of file
diff --git a/cc/src/core/auto_ptr.h b/cc/src/core/auto_ptr.h
new file mode 100644
index 000000000..31d9edaaf
--- /dev/null
+++ b/cc/src/core/auto_ptr.h
@@ -0,0 +1,123 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <type_traits>
+
+#include "alloc.h"
+#include "lss_allocator.h"
+
+#ifdef _WIN32
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse64)
+#else
+namespace FASTER {
+/// Convert GCC's __builtin_clzl() to Microsoft's _BitScanReverse64().
+inline uint8_t _BitScanReverse64(unsigned long* index, uint64_t mask) {
+  bool found = mask > 0;
+  *index = 63 - __builtin_clzl(mask);
+  return found;
+}
+}
+#endif
+
+/// Wrappers for C++ std::unique_ptr<>.
+
+namespace FASTER {
+namespace core {
+
+/// Round the specified size up to the next power of 2.
+inline size_t next_power_of_two(size_t size) {
+  assert(size > 0);
+  // BSR returns the index k of the most-significant 1 bit. So 2^(k+1) > (size - 1) >= 2^k,
+  // which means 2^(k+1) >= size > 2^k.
+  unsigned long k;
+  uint8_t found = _BitScanReverse64(&k, size - 1);
+  return (uint64_t)1 << (found  * (k + 1));
+}
+
+/// Pad alignment to specified. Declared "constexpr" so that the calculation can be performed at
+/// compile time, assuming parameters "size" and "alignment" are known then.
+constexpr inline size_t pad_alignment(size_t size, size_t alignment) {
+  assert(alignment > 0);
+  // Function implemented only for powers of 2.
+  assert((alignment & (alignment - 1)) == 0);
+  size_t max_padding = alignment - 1;
+  return (size + max_padding) & ~max_padding;
+}
+
+/// Pad alignment to specified type.
+template <typename T>
+constexpr inline size_t pad_alignment(size_t size) {
+  return pad_alignment(size, alignof(T));
+}
+
+/// Defined in C++ 14; copying the definition here for older compilers.
+template <typename T>
+using remove_const_t = typename std::remove_const<T>::type;
+
+/// alloc_aligned(): allocate a unique_ptr with a particular alignment.
+template <typename T>
+void unique_ptr_aligned_deleter(T* p) {
+  auto q = const_cast<remove_const_t<T>*>(p);
+  q->~T();
+  aligned_free(q);
+}
+
+template <typename T>
+struct AlignedDeleter {
+  void operator()(T* p) const {
+    unique_ptr_aligned_deleter(p);
+  }
+};
+
+template <typename T>
+using aligned_unique_ptr_t = std::unique_ptr<T, AlignedDeleter<T>>;
+static_assert(sizeof(aligned_unique_ptr_t<void>) == 8, "sizeof(unique_aligned_ptr_t)");
+
+template <typename T>
+aligned_unique_ptr_t<T> make_aligned_unique_ptr(T* p) {
+  return aligned_unique_ptr_t<T>(p, AlignedDeleter<T>());
+}
+
+template <typename T>
+aligned_unique_ptr_t<T> alloc_aligned(size_t alignment, size_t size) {
+  return make_aligned_unique_ptr<T>(reinterpret_cast<T*>(aligned_alloc(alignment, size)));
+}
+
+/// alloc_context(): allocate a small chunk of memory for a callback context.
+template <typename T>
+void unique_ptr_context_deleter(T* p) {
+  auto q = const_cast<remove_const_t<T>*>(p);
+  q->~T();
+  lss_allocator.Free(q);
+}
+
+template <typename T>
+struct ContextDeleter {
+  void operator()(T* p) const {
+    unique_ptr_context_deleter(p);
+  }
+};
+
+template <typename T>
+using context_unique_ptr_t = std::unique_ptr<T, ContextDeleter<T>>;
+static_assert(sizeof(context_unique_ptr_t<void>) == 8, "sizeof(context_unique_ptr_t)");
+
+template <typename T>
+context_unique_ptr_t<T> make_context_unique_ptr(T* p) {
+  return context_unique_ptr_t<T>(p, ContextDeleter<T>());
+}
+
+template <typename T>
+context_unique_ptr_t<T> alloc_context(uint32_t size) {
+  return make_context_unique_ptr<T>(reinterpret_cast<T*>(lss_allocator.Allocate(size)));
+}
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/checkpoint_locks.h b/cc/src/core/checkpoint_locks.h
new file mode 100644
index 000000000..c1f31cd36
--- /dev/null
+++ b/cc/src/core/checkpoint_locks.h
@@ -0,0 +1,192 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <thread>
+
+#include "alloc.h"
+#include "constants.h"
+#include "key_hash.h"
+
+namespace FASTER {
+namespace core {
+
+struct CheckpointLock {
+  CheckpointLock()
+    : control_{ 0 } {
+  }
+  CheckpointLock(uint64_t control)
+    : control_{ control } {
+  }
+  CheckpointLock(uint32_t old_lock_count, uint32_t new_lock_count)
+    : old_lock_count_{ old_lock_count }
+    , new_lock_count_{ new_lock_count } {
+  }
+
+  union {
+      struct {
+        uint32_t old_lock_count_;
+        uint32_t new_lock_count_;
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(CheckpointLock) == 8, "sizeof(CheckpointLock) != 8");
+
+class AtomicCheckpointLock {
+ public:
+  AtomicCheckpointLock()
+    : control_{ 0 } {
+  }
+
+  /// Try to lock the old version of a record.
+  inline bool try_lock_old() {
+    CheckpointLock expected{ control_.load() };
+    while(expected.new_lock_count_ == 0) {
+      CheckpointLock desired{ expected.old_lock_count_ + 1, 0 };
+      if(control_.compare_exchange_strong(expected.control_, desired.control_)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  inline void unlock_old() {
+    control_ -= CheckpointLock{ 1, 0 } .control_;
+  }
+
+  /// Try to lock the new version of a record.
+  inline bool try_lock_new() {
+    CheckpointLock expected{ control_.load() };
+    while(expected.old_lock_count_ == 0) {
+      CheckpointLock desired{ 0, expected.new_lock_count_ + 1 };
+      if(control_.compare_exchange_strong(expected.control_, desired.control_)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  inline void unlock_new() {
+    control_ -= CheckpointLock{ 0, 1 } .control_;
+  }
+
+  inline bool old_locked() const {
+    CheckpointLock result{ control_ };
+    return result.old_lock_count_ > 0;
+  }
+  inline bool new_locked() const {
+    CheckpointLock result{ control_ };
+    return result.new_lock_count_ > 0;
+  }
+
+ private:
+  union {
+      std::atomic<uint64_t> control_;
+    };
+};
+static_assert(sizeof(AtomicCheckpointLock) == 8, "sizeof(AtomicCheckpointLock) != 8");
+
+class CheckpointLocks {
+ public:
+  CheckpointLocks()
+    : size_{ 0 }
+    , locks_{ nullptr } {
+  }
+
+  ~CheckpointLocks() {
+    if(locks_) {
+      aligned_free(locks_);
+    }
+  }
+
+  void Initialize(uint64_t size) {
+    assert(size < INT32_MAX);
+    assert(Utility::IsPowerOfTwo(size));
+    if(locks_) {
+      aligned_free(locks_);
+    }
+    size_ = size;
+    locks_ = reinterpret_cast<AtomicCheckpointLock*>(aligned_alloc(Constants::kCacheLineBytes,
+             size_ * sizeof(AtomicCheckpointLock)));
+    std::memset(locks_, 0, size_ * sizeof(AtomicCheckpointLock));
+  }
+
+  void Free() {
+    assert(locks_);
+#ifdef _DEBUG
+    for(uint64_t idx = 0; idx < size_; ++idx) {
+      assert(!locks_[idx].old_locked());
+      assert(!locks_[idx].new_locked());
+    }
+#endif
+    aligned_free(locks_);
+    size_ = 0;
+    locks_ = nullptr;
+  }
+
+  inline uint64_t size() const {
+    return size_;
+  }
+
+  inline AtomicCheckpointLock& get_lock(KeyHash hash) {
+    return locks_[hash.idx(size_)];
+  }
+
+ private:
+  uint64_t size_;
+  AtomicCheckpointLock* locks_;
+};
+
+class CheckpointLockGuard {
+ public:
+  CheckpointLockGuard(CheckpointLocks& locks, KeyHash hash)
+    : lock_{ nullptr }
+    , locked_old_{ false }
+    , locked_new_{ false } {
+    if(locks.size() > 0) {
+      lock_ = &locks.get_lock(hash);
+    }
+  }
+  ~CheckpointLockGuard() {
+    if(lock_) {
+      if(locked_old_) {
+        lock_->unlock_old();
+      }
+      if(locked_new_) {
+        lock_->unlock_new();
+      }
+    }
+  }
+  inline bool try_lock_old() {
+    assert(lock_);
+    assert(!locked_old_);
+    locked_old_ = lock_->try_lock_old();
+    return locked_old_;
+  }
+  inline bool try_lock_new() {
+    assert(lock_);
+    assert(!locked_new_);
+    locked_new_ = lock_->try_lock_new();
+    return locked_new_;
+  }
+
+  inline bool old_locked() const {
+    assert(lock_);
+    return lock_->old_locked();
+  }
+  inline bool new_locked() const {
+    assert(lock_);
+    return lock_->new_locked();
+  }
+
+ private:
+  AtomicCheckpointLock* lock_;
+  bool locked_old_;
+  bool locked_new_;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/checkpoint_state.h b/cc/src/core/checkpoint_state.h
new file mode 100644
index 000000000..44b4b2ec4
--- /dev/null
+++ b/cc/src/core/checkpoint_state.h
@@ -0,0 +1,166 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <unordered_map>
+#include "address.h"
+#include "guid.h"
+#include "malloc_fixed_page_size.h"
+#include "thread.h"
+
+namespace FASTER {
+namespace core {
+
+/// Checkpoint metadata for the index itself.
+class IndexMetadata {
+ public:
+  IndexMetadata()
+    : version{ 0 }
+    , table_size{ 0 }
+    , num_ht_bytes{ 0 }
+    , num_ofb_bytes{ 0 }
+    , ofb_count{ FixedPageAddress::kInvalidAddress }
+    , log_begin_address{ Address::kInvalidAddress }
+    , checkpoint_start_address{ Address::kInvalidAddress } {
+  }
+
+  inline void Initialize(uint32_t version_, uint64_t size_, Address log_begin_address_,
+                         Address checkpoint_start_address_) {
+    version = version_;
+    table_size = size_;
+    log_begin_address = log_begin_address_;
+    checkpoint_start_address = checkpoint_start_address_;
+    num_ht_bytes = 0;
+    num_ofb_bytes = 0;
+    ofb_count = FixedPageAddress::kInvalidAddress;
+  }
+  inline void Reset() {
+    version = 0;
+    table_size = 0;
+    num_ht_bytes = 0;
+    num_ofb_bytes = 0;
+    ofb_count = FixedPageAddress::kInvalidAddress;
+    log_begin_address = Address::kInvalidAddress;
+    checkpoint_start_address = Address::kInvalidAddress;
+  }
+
+  uint32_t version;
+  uint64_t table_size;
+  uint64_t num_ht_bytes;
+  uint64_t num_ofb_bytes;
+  FixedPageAddress ofb_count;
+  /// Earliest address that is valid for the log.
+  Address log_begin_address;
+  /// Address as of which this checkpoint was taken.
+  Address checkpoint_start_address;
+};
+static_assert(sizeof(IndexMetadata) == 56, "sizeof(IndexMetadata) != 56");
+
+/// Checkpoint metadata, for the log.
+class LogMetadata {
+ public:
+  LogMetadata()
+    : use_snapshot_file{ false }
+    , version{ UINT32_MAX }
+    , num_threads{ 0 }
+    , flushed_address{ Address::kInvalidAddress }
+    , final_address{ Address::kMaxAddress } {
+    std::memset(guids, 0, sizeof(guids));
+    std::memset(monotonic_serial_nums, 0, sizeof(monotonic_serial_nums));
+  }
+
+  inline void Initialize(bool use_snapshot_file_, uint32_t version_, Address flushed_address_) {
+    use_snapshot_file = use_snapshot_file_;
+    version = version_;
+    num_threads = 0;
+    flushed_address = flushed_address_;
+    final_address = Address::kMaxAddress;
+    std::memset(guids, 0, sizeof(guids));
+    std::memset(monotonic_serial_nums, 0, sizeof(monotonic_serial_nums));
+  }
+  inline void Reset() {
+    Initialize(false, UINT32_MAX, Address::kInvalidAddress);
+  }
+
+  bool use_snapshot_file;
+  uint32_t version;
+  std::atomic<uint32_t> num_threads;
+  Address flushed_address;
+  Address final_address;
+  uint64_t monotonic_serial_nums[Thread::kMaxNumThreads];
+  Guid guids[Thread::kMaxNumThreads];
+};
+static_assert(sizeof(LogMetadata) == 32 + (24 * Thread::kMaxNumThreads),
+              "sizeof(LogMetadata) != 32 + (24 * Thread::kMaxNumThreads)");
+
+/// State of the active Checkpoint()/Recover() call, including metadata written to disk.
+template <class F>
+class CheckpointState {
+ public:
+  typedef F file_t;
+  typedef void(*persistence_callback_t)(uint64_t persistent_serial_num);
+
+  CheckpointState()
+    : index_checkpoint_started{ false }
+    , failed{ false }
+    , flush_pending{ UINT32_MAX }
+    , persistence_callback{ nullptr } {
+  }
+
+  void InitializeCheckpoint(uint32_t version, uint64_t table_size, Address log_begin_address,
+                            Address checkpoint_start_address, bool use_snapshot_file,
+                            Address flushed_until_address,
+                            persistence_callback_t persistence_callback_) {
+    failed = false;
+    index_checkpoint_started = false;
+    continue_tokens.clear();
+    index_metadata.Initialize(version, table_size, log_begin_address, checkpoint_start_address);
+    log_metadata.Initialize(use_snapshot_file, version, flushed_until_address);
+    if(use_snapshot_file) {
+      flush_pending = UINT32_MAX;
+    } else {
+      flush_pending = 0;
+    }
+    persistence_callback = persistence_callback_;
+  }
+
+  void CheckpointDone() {
+    assert(!failed);
+    assert(index_checkpoint_started);
+    assert(continue_tokens.empty());
+    assert(flush_pending == 0);
+    index_metadata.Reset();
+    log_metadata.Reset();
+    snapshot_file.Close();
+    persistence_callback = nullptr;
+  }
+
+  inline void InitializeRecover() {
+    failed = false;
+  }
+
+  void RecoverDone() {
+    assert(!failed);
+    index_metadata.Reset();
+    log_metadata.Reset();
+    snapshot_file.Close();
+  }
+
+  std::atomic<bool> index_checkpoint_started;
+  std::atomic<bool> failed;
+  IndexMetadata index_metadata;
+  LogMetadata log_metadata;
+  /// State used when fold_over_snapshot = false.
+  file_t snapshot_file;
+  std::atomic<uint32_t> flush_pending;
+
+  persistence_callback_t persistence_callback;
+  std::unordered_map<Guid, uint64_t> continue_tokens;
+};
+
+}
+} // namespace FASTER::core
+
diff --git a/cc/src/core/constants.h b/cc/src/core/constants.h
new file mode 100644
index 000000000..a1746f11b
--- /dev/null
+++ b/cc/src/core/constants.h
@@ -0,0 +1,20 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+
+namespace FASTER {
+namespace core {
+
+struct Constants {
+  /// Size of cache line in bytes
+  static constexpr uint32_t kCacheLineBytes = 64;
+
+  /// We issue 256 writes to disk, to checkpoint the hash table.
+  static constexpr uint32_t kNumMergeChunks = 256;
+};
+
+}
+} // namespace FASTER::cire
\ No newline at end of file
diff --git a/cc/src/core/faster.h b/cc/src/core/faster.h
new file mode 100644
index 000000000..887dab6d4
--- /dev/null
+++ b/cc/src/core/faster.h
@@ -0,0 +1,2558 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <type_traits>
+
+#include "device/file_system_disk.h"
+
+#include "alloc.h"
+#include "checkpoint_locks.h"
+#include "checkpoint_state.h"
+#include "constants.h"
+#include "gc_state.h"
+#include "grow_state.h"
+#include "guid.h"
+#include "hash_table.h"
+#include "internal_contexts.h"
+#include "key_hash.h"
+#include "malloc_fixed_page_size.h"
+#include "persistent_memory_malloc.h"
+#include "record.h"
+#include "recovery_status.h"
+#include "state_transitions.h"
+#include "status.h"
+#include "utility.h"
+
+using namespace std::chrono_literals;
+
+/// The FASTER key-value store, and related classes.
+
+namespace FASTER {
+namespace core {
+
+class alignas(Constants::kCacheLineBytes) ThreadContext {
+ public:
+  ThreadContext()
+    : contexts_{}
+    , cur_{ 0 } {
+  }
+
+  inline const ExecutionContext& cur() const {
+    return contexts_[cur_];
+  }
+  inline ExecutionContext& cur() {
+    return contexts_[cur_];
+  }
+
+  inline const ExecutionContext& prev() const {
+    return contexts_[(cur_ + 1) % 2];
+  }
+  inline ExecutionContext& prev() {
+    return contexts_[(cur_ + 1) % 2];
+  }
+
+  inline void swap() {
+    cur_ = (cur_ + 1) % 2;
+  }
+
+ private:
+  ExecutionContext contexts_[2];
+  uint8_t cur_;
+};
+static_assert(sizeof(ThreadContext) == 448, "sizeof(ThreadContext) != 448");
+
+/// The FASTER key-value store.
+template <class K, class V, class D>
+class FasterKv {
+ public:
+  typedef FasterKv<K, V, D> faster_t;
+
+  /// Keys and values stored in this key-value store.
+  typedef K key_t;
+  typedef V value_t;
+
+  typedef D disk_t;
+  typedef typename D::file_t file_t;
+  typedef typename D::log_file_t log_file_t;
+
+  typedef PersistentMemoryMalloc<disk_t> hlog_t;
+
+  /// Contexts that have been deep-copied, for async continuations, and must be accessed via
+  /// virtual function calls.
+  typedef AsyncPendingReadContext<key_t> async_pending_read_context_t;
+  typedef AsyncPendingUpsertContext<key_t> async_pending_upsert_context_t;
+  typedef AsyncPendingRmwContext<key_t> async_pending_rmw_context_t;
+
+  FasterKv(uint64_t table_size, uint64_t log_size, const std::string& filename,
+           double log_mutable_fraction = 0.9)
+    : min_table_size_{ table_size }
+    , disk{ filename, epoch_ }
+    , hlog{ log_size, epoch_, disk, disk.log(), log_mutable_fraction }
+    , system_state_{ Action::None, Phase::REST, 1 }
+    , num_pending_ios{ 0 } {
+    if(!Utility::IsPowerOfTwo(table_size)) {
+      throw std::invalid_argument{ " Size is not a power of 2" };
+    }
+    if(table_size > INT32_MAX) {
+      throw std::invalid_argument{ " Cannot allocate such a large hash table " };
+    }
+
+    resize_info_.version = 0;
+    state_[0].Initialize(table_size, disk.log().alignment());
+    overflow_buckets_allocator_[0].Initialize(disk.log().alignment(), epoch_);
+  }
+
+  // No copy constructor.
+  FasterKv(const FasterKv& other) = delete;
+
+ public:
+  /// Thread-related operations
+  Guid StartSession();
+  uint64_t ContinueSession(const Guid& guid);
+  void StopSession();
+  void Refresh();
+
+  /// Store interface
+  template <class RC>
+  inline Status Read(RC& context, AsyncCallback callback, uint64_t monotonic_serial_num);
+
+  template <class UC>
+  inline Status Upsert(UC& context, AsyncCallback callback, uint64_t monotonic_serial_num);
+
+  template <class MC>
+  inline Status Rmw(MC& context, AsyncCallback callback, uint64_t monotonic_serial_num);
+  /// Delete() not yet implemented!
+  // void Delete(const Key& key, Context& context, uint64_t lsn);
+  inline bool CompletePending(bool wait = false);
+
+  /// Checkpoint/recovery operations.
+  bool Checkpoint(void(*persistence_callback)(uint64_t persistent_serial_num));
+  Status Recover(uint32_t cpr_version, uint32_t index_version, std::vector<Guid>& session_ids);
+
+  /// Truncating the head of the log.
+  bool ShiftBeginAddress(Address address, GcState::truncate_callback_t truncate_callback,
+                         GcState::complete_callback_t complete_callback);
+
+  /// Make the hash table larger.
+  bool GrowIndex(GrowState::callback_t caller_callback);
+
+  /// Statistics
+  inline uint64_t Size() const {
+    return hlog.GetTailAddress().control();
+  }
+  inline void DumpDistribution() {
+    state_[resize_info_.version].DumpDistribution(
+      overflow_buckets_allocator_[resize_info_.version]);
+  }
+
+ private:
+  typedef Record<key_t, value_t> record_t;
+
+  typedef PendingContext<key_t> pending_context_t;
+
+  template <class C>
+  inline OperationStatus InternalRead(C& pending_context) const;
+
+  template <class C>
+  inline OperationStatus InternalUpsert(C& pending_context);
+
+  template <class C>
+  inline OperationStatus InternalRmw(C& pending_context, bool retrying);
+
+  inline OperationStatus InternalRetryPendingRmw(async_pending_rmw_context_t& pending_context);
+
+  OperationStatus InternalContinuePendingRead(ExecutionContext& ctx,
+      AsyncIOContext& io_context);
+  OperationStatus InternalContinuePendingRmw(ExecutionContext& ctx,
+      AsyncIOContext& io_context);
+
+  // Find the hash bucket entry, if any, corresponding to the specified hash.
+  inline const AtomicHashBucketEntry* FindEntry(KeyHash hash) const;
+  // If a hash bucket entry corresponding to the specified hash exists, return it; otherwise,
+  // create a new entry. The caller can use the "expected_entry" to CAS its desired address into
+  // the entry.
+  inline AtomicHashBucketEntry* FindOrCreateEntry(KeyHash hash, HashBucketEntry& expected_entry,
+      HashBucket*& bucket);
+  inline Address TraceBackForKeyMatch(const key_t& key, Address from_address,
+                                      Address min_offset) const;
+  Address TraceBackForOtherChainStart(uint64_t old_size,  uint64_t new_size, Address from_address,
+                                      Address min_address, uint8_t side);
+
+  // If a hash bucket entry corresponding to the specified hash exists, return it; otherwise,
+  // return an unused bucket entry.
+  inline AtomicHashBucketEntry* FindTentativeEntry(KeyHash hash, HashBucket* bucket,
+      uint8_t version, HashBucketEntry& expected_entry);
+  // Looks for an entry that has the same
+  inline bool HasConflictingEntry(KeyHash hash, const HashBucket* bucket, uint8_t version,
+                                  const AtomicHashBucketEntry* atomic_entry) const;
+
+  inline Address BlockAllocate(uint32_t record_size);
+
+  inline Status HandleOperationStatus(ExecutionContext& ctx,
+                                      pending_context_t& pending_context,
+                                      OperationStatus internal_status, bool& async);
+  inline Status PivotAndRetry(ExecutionContext& ctx, pending_context_t& pending_context,
+                              bool& async);
+  inline Status RetryLater(ExecutionContext& ctx, pending_context_t& pending_context,
+                           bool& async);
+  inline constexpr uint32_t MinIoRequestSize() const;
+  inline Status IssueAsyncIoRequest(ExecutionContext& ctx, pending_context_t& pending_context,
+                                    bool& async);
+
+  void AsyncGetFromDisk(Address address, uint32_t num_records, AsyncIOCallback callback,
+                        AsyncIOContext& context);
+  static void AsyncGetFromDiskCallback(IAsyncContext* ctxt, Status result,
+                                       size_t bytes_transferred);
+
+  void CompleteIoPendingRequests(ExecutionContext& context);
+  void CompleteRetryRequests(ExecutionContext& context);
+
+  void InitializeCheckpointLocks();
+
+  /// Checkpoint/recovery methods.
+  void HandleSpecialPhases();
+  bool GlobalMoveToNextState(SystemState current_state);
+
+  Status CheckpointFuzzyIndex();
+  Status CheckpointFuzzyIndexComplete();
+  Status RecoverFuzzyIndex();
+  Status RecoverFuzzyIndexComplete(bool wait);
+
+  Status WriteIndexMetadata();
+  Status ReadIndexMetadata(uint32_t version);
+  Status WriteCprMetadata();
+  Status ReadCprMetadata(uint32_t version);
+  Status WriteCprContext();
+  Status ReadCprContexts(uint32_t version, const Guid* guids);
+
+  Status RecoverHybridLog();
+  Status RecoverHybridLogFromSnapshotFile();
+  Status RecoverFromPage(Address from_address, Address to_address);
+  Status RestoreHybridLog();
+
+  void MarkAllPendingRequests();
+
+  inline void HeavyEnter();
+  bool CleanHashTableBuckets();
+  void SplitHashTableBuckets();
+  void AddHashEntry(HashBucket*& bucket, uint32_t& next_idx, uint8_t version,
+                    HashBucketEntry entry);
+
+  /// Access the current and previous (thread-local) execution contexts.
+  const ExecutionContext& thread_ctx() const {
+    return thread_contexts_[Thread::id()].cur();
+  }
+  ExecutionContext& thread_ctx() {
+    return thread_contexts_[Thread::id()].cur();
+  }
+  ExecutionContext& prev_thread_ctx() {
+    return thread_contexts_[Thread::id()].prev();
+  }
+
+ private:
+  LightEpoch epoch_;
+
+ public:
+  disk_t disk;
+  hlog_t hlog;
+
+ private:
+  static constexpr bool kCopyReadsToTail = false;
+  static constexpr uint64_t kGcHashTableChunkSize = 16384;
+  static constexpr uint64_t kGrowHashTableChunkSize = 16384;
+
+  bool fold_over_snapshot = true;
+
+  /// Initial size of the table
+  uint64_t min_table_size_;
+
+  // Allocator for the hash buckets that don't fit in the hash table.
+  MallocFixedPageSize<HashBucket, disk_t> overflow_buckets_allocator_[2];
+
+  // An array of size two, that contains the old and new versions of the hash-table
+  InternalHashTable<disk_t> state_[2];
+
+  CheckpointLocks checkpoint_locks_;
+
+  ResizeInfo resize_info_;
+
+  AtomicSystemState system_state_;
+
+  /// Checkpoint/recovery state.
+  CheckpointState<file_t> checkpoint_;
+  /// Garbage collection state.
+  GcState gc_;
+  /// Grow (hash table) state.
+  GrowState grow_;
+
+  /// Global count of pending I/Os, used for throttling.
+  std::atomic<uint64_t> num_pending_ios;
+
+  /// Space for two contexts per thread, stored inline.
+  ThreadContext thread_contexts_[Thread::kMaxNumThreads];
+};
+
+// Implementations.
+template <class K, class V, class D>
+inline Guid FasterKv<K, V, D>::StartSession() {
+  SystemState state = system_state_.load();
+  if(state.phase != Phase::REST) {
+    throw std::runtime_error{ "Can acquire only in REST phase!" };
+  }
+  thread_ctx().Initialize(state.phase, state.version, Guid::Create(), 0);
+  Refresh();
+  return thread_ctx().guid;
+}
+
+template <class K, class V, class D>
+inline uint64_t FasterKv<K, V, D>::ContinueSession(const Guid& session_id) {
+  auto iter = checkpoint_.continue_tokens.find(session_id);
+  if(iter == checkpoint_.continue_tokens.end()) {
+    throw std::invalid_argument{ "Unknown session ID" };
+  }
+
+  SystemState state = system_state_.load();
+  if(state.phase != Phase::REST) {
+    throw std::runtime_error{ "Can continue only in REST phase!" };
+  }
+  thread_ctx().Initialize(state.phase, state.version, session_id, iter->second);
+  Refresh();
+  return iter->second;
+}
+
+template <class K, class V, class D>
+inline void FasterKv<K, V, D>::Refresh() {
+  epoch_.ProtectAndDrain();
+  // We check if we are in normal mode
+  SystemState new_state = system_state_.load();
+  if(thread_ctx().phase == Phase::REST && new_state.phase == Phase::REST) {
+    return;
+  }
+  HandleSpecialPhases();
+}
+
+template <class K, class V, class D>
+inline void FasterKv<K, V, D>::StopSession() {
+  // If this thread is still involved in some activity, wait until it finishes.
+  while(thread_ctx().phase != Phase::REST ||
+        !thread_ctx().pending_ios.empty() ||
+        !thread_ctx().retry_requests.empty()) {
+    CompletePending(false);
+    std::this_thread::yield();
+  }
+
+  assert(thread_ctx().retry_requests.empty());
+  assert(thread_ctx().pending_ios.empty());
+  assert(thread_ctx().io_responses.empty());
+
+  assert(prev_thread_ctx().retry_requests.empty());
+  assert(prev_thread_ctx().pending_ios.empty());
+  assert(prev_thread_ctx().io_responses.empty());
+
+  assert(thread_ctx().phase == Phase::REST);
+
+  epoch_.Unprotect();
+}
+
+template <class K, class V, class D>
+inline const AtomicHashBucketEntry* FasterKv<K, V, D>::FindEntry(KeyHash hash) const {
+  // Truncate the hash to get a bucket page_index < state[version].size.
+  uint32_t version = resize_info_.version;
+  const HashBucket* bucket = &state_[version].bucket(hash);
+  assert(reinterpret_cast<size_t>(bucket) % Constants::kCacheLineBytes == 0);
+
+  while(true) {
+    // Search through the bucket looking for our key. Last entry is reserved
+    // for the overflow pointer.
+    for(uint32_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+      HashBucketEntry entry = bucket->entries[entry_idx].load();
+      if(entry.unused()) {
+        continue;
+      }
+      if(hash.tag() == entry.tag()) {
+        // Found a matching tag. (So, the input hash matches the entry on 14 tag bits +
+        // log_2(table size) address bits.)
+        if(!entry.tentative()) {
+          // If (final key, return immediately)
+          return &bucket->entries[entry_idx];
+        }
+      }
+    }
+
+    // Go to next bucket in the chain
+    HashBucketOverflowEntry entry = bucket->overflow_entry.load();
+    if(entry.unused()) {
+      // No more buckets in the chain.
+      return nullptr;
+    }
+    bucket = &overflow_buckets_allocator_[version].Get(entry.address());
+    assert(reinterpret_cast<size_t>(bucket) % Constants::kCacheLineBytes == 0);
+  }
+  assert(false);
+  return nullptr; // NOT REACHED
+}
+
+template <class K, class V, class D>
+inline AtomicHashBucketEntry* FasterKv<K, V, D>::FindTentativeEntry(KeyHash hash,
+    HashBucket* bucket,
+    uint8_t version, HashBucketEntry& expected_entry) {
+  expected_entry = HashBucketEntry::kInvalidEntry;
+  AtomicHashBucketEntry* atomic_entry = nullptr;
+  // Try to find a slot that contains the right tag or that's free.
+  while(true) {
+    // Search through the bucket looking for our key. Last entry is reserved
+    // for the overflow pointer.
+    for(uint32_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+      HashBucketEntry entry = bucket->entries[entry_idx].load();
+      if(entry.unused()) {
+        if(!atomic_entry) {
+          // Found a free slot; keep track of it, and continue looking for a match.
+          atomic_entry = &bucket->entries[entry_idx];
+        }
+        continue;
+      }
+      if(hash.tag() == entry.tag() && !entry.tentative()) {
+        // Found a match. (So, the input hash matches the entry on 14 tag bits +
+        // log_2(table size) address bits.) Return it to caller.
+        expected_entry = entry;
+        return &bucket->entries[entry_idx];
+      }
+    }
+    // Go to next bucket in the chain
+    HashBucketOverflowEntry overflow_entry = bucket->overflow_entry.load();
+    if(overflow_entry.unused()) {
+      // No more buckets in the chain.
+      if(atomic_entry) {
+        // We found a free slot earlier (possibly inside an earlier bucket).
+        assert(expected_entry == HashBucketEntry::kInvalidEntry);
+        return atomic_entry;
+      }
+      // We didn't find any free slots, so allocate new bucket.
+      FixedPageAddress new_bucket_addr = overflow_buckets_allocator_[version].Allocate();
+      bool success;
+      do {
+        HashBucketOverflowEntry new_bucket_entry{ new_bucket_addr };
+        success = bucket->overflow_entry.compare_exchange_strong(overflow_entry,
+                  new_bucket_entry);
+      } while(!success && overflow_entry.unused());
+      if(!success) {
+        // Install failed, undo allocation; use the winner's entry
+        overflow_buckets_allocator_[version].FreeAtEpoch(new_bucket_addr, 0);
+      } else {
+        // Install succeeded; we have a new bucket on the chain. Return its first slot.
+        bucket = &overflow_buckets_allocator_[version].Get(new_bucket_addr);
+        assert(expected_entry == HashBucketEntry::kInvalidEntry);
+        return &bucket->entries[0];
+      }
+    }
+    // Go to the next bucket.
+    bucket = &overflow_buckets_allocator_[version].Get(overflow_entry.address());
+    assert(reinterpret_cast<size_t>(bucket) % Constants::kCacheLineBytes == 0);
+  }
+  assert(false);
+  return nullptr; // NOT REACHED
+}
+
+template <class K, class V, class D>
+bool FasterKv<K, V, D>::HasConflictingEntry(KeyHash hash, const HashBucket* bucket, uint8_t version,
+    const AtomicHashBucketEntry* atomic_entry) const {
+  uint16_t tag = atomic_entry->load().tag();
+  while(true) {
+    for(uint32_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+      HashBucketEntry entry = bucket->entries[entry_idx].load();
+      if(entry != HashBucketEntry::kInvalidEntry &&
+          entry.tag() == tag &&
+          atomic_entry != &bucket->entries[entry_idx]) {
+        // Found a conflict.
+        return true;
+      }
+    }
+    // Go to next bucket in the chain
+    HashBucketOverflowEntry entry = bucket->overflow_entry.load();
+    if(entry.unused()) {
+      // Reached the end of the bucket chain; no conflicts found.
+      return false;
+    }
+    // Go to the next bucket.
+    bucket = &overflow_buckets_allocator_[version].Get(entry.address());
+    assert(reinterpret_cast<size_t>(bucket) % Constants::kCacheLineBytes == 0);
+  }
+}
+
+template <class K, class V, class D>
+inline AtomicHashBucketEntry* FasterKv<K, V, D>::FindOrCreateEntry(KeyHash hash,
+    HashBucketEntry& expected_entry, HashBucket*& bucket) {
+  bucket = nullptr;
+  // Truncate the hash to get a bucket page_index < state[version].size.
+  uint32_t version = resize_info_.version;
+  assert(version <= 1);
+
+  while(true) {
+    bucket = &state_[version].bucket(hash);
+    assert(reinterpret_cast<size_t>(bucket) % Constants::kCacheLineBytes == 0);
+
+    AtomicHashBucketEntry* atomic_entry = FindTentativeEntry(hash, bucket, version,
+                                          expected_entry);
+    if(expected_entry != HashBucketEntry::kInvalidEntry) {
+      // Found an existing hash bucket entry; nothing further to check.
+      return atomic_entry;
+    }
+    // We have a free slot.
+    assert(atomic_entry);
+    assert(expected_entry == HashBucketEntry::kInvalidEntry);
+    // Try to install tentative tag in free slot.
+    HashBucketEntry entry{ Address::kInvalidAddress, hash.tag(), true };
+    if(atomic_entry->compare_exchange_strong(expected_entry, entry)) {
+      // See if some other thread is also trying to install this tag.
+      if(HasConflictingEntry(hash, bucket, version, atomic_entry)) {
+        // Back off and try again.
+        atomic_entry->store(HashBucketEntry::kInvalidEntry);
+      } else {
+        // No other thread was trying to install this tag, so we can clear our entry's "tentative"
+        // bit.
+        expected_entry = HashBucketEntry{ Address::kInvalidAddress, hash.tag(), false };
+        atomic_entry->store(expected_entry);
+        return atomic_entry;
+      }
+    }
+  }
+  assert(false);
+  return nullptr; // NOT REACHED
+}
+
+template <class K, class V, class D>
+template <class RC>
+inline Status FasterKv<K, V, D>::Read(RC& context, AsyncCallback callback,
+                                      uint64_t monotonic_serial_num) {
+  typedef RC read_context_t;
+  typedef PendingReadContext<RC> pending_read_context_t;
+  static_assert(std::is_base_of<value_t, typename read_context_t::value_t>::value,
+                "value_t is not a base class of read_context_t::value_t");
+  static_assert(alignof(value_t) == alignof(typename read_context_t::value_t),
+                "alignof(value_t) != alignof(typename read_context_t::value_t)");
+
+  pending_read_context_t pending_context{ context, callback };
+  OperationStatus internal_status = InternalRead(pending_context);
+  Status status;
+  if(internal_status == OperationStatus::SUCCESS) {
+    status = Status::Ok;
+  } else if(internal_status == OperationStatus::NOT_FOUND) {
+    status = Status::NotFound;
+  } else {
+    assert(internal_status == OperationStatus::RECORD_ON_DISK);
+    bool async;
+    status = HandleOperationStatus(thread_ctx(), pending_context, internal_status, async);
+  }
+  thread_ctx().serial_num = monotonic_serial_num;
+  return status;
+}
+
+template <class K, class V, class D>
+template <class UC>
+inline Status FasterKv<K, V, D>::Upsert(UC& context, AsyncCallback callback,
+                                        uint64_t monotonic_serial_num) {
+  typedef UC upsert_context_t;
+  typedef PendingUpsertContext<UC> pending_upsert_context_t;
+  static_assert(std::is_base_of<value_t, typename upsert_context_t::value_t>::value,
+                "value_t is not a base class of upsert_context_t::value_t");
+  static_assert(alignof(value_t) == alignof(typename upsert_context_t::value_t),
+                "alignof(value_t) != alignof(typename upsert_context_t::value_t)");
+
+  pending_upsert_context_t pending_context{ context, callback };
+  OperationStatus internal_status = InternalUpsert(pending_context);
+  Status status;
+
+  if(internal_status == OperationStatus::SUCCESS) {
+    status = Status::Ok;
+  } else {
+    bool async;
+    status = HandleOperationStatus(thread_ctx(), pending_context, internal_status, async);
+  }
+  thread_ctx().serial_num = monotonic_serial_num;
+  return status;
+}
+
+template <class K, class V, class D>
+template <class MC>
+inline Status FasterKv<K, V, D>::Rmw(MC& context, AsyncCallback callback,
+                                     uint64_t monotonic_serial_num) {
+  typedef MC rmw_context_t;
+  typedef PendingRmwContext<MC> pending_rmw_context_t;
+  static_assert(std::is_base_of<value_t, typename rmw_context_t::value_t>::value,
+                "value_t is not a base class of rmw_context_t::value_t");
+  static_assert(alignof(value_t) == alignof(typename rmw_context_t::value_t),
+                "alignof(value_t) != alignof(typename rmw_context_t::value_t)");
+
+  pending_rmw_context_t pending_context{ context, callback };
+  OperationStatus internal_status = InternalRmw(pending_context, false);
+  Status status;
+  if(internal_status == OperationStatus::SUCCESS) {
+    status = Status::Ok;
+  } else {
+    bool async;
+    status = HandleOperationStatus(thread_ctx(), pending_context, internal_status, async);
+  }
+  thread_ctx().serial_num = monotonic_serial_num;
+  return status;
+}
+
+template <class K, class V, class D>
+inline bool FasterKv<K, V, D>::CompletePending(bool wait) {
+  do {
+    disk.TryComplete();
+
+    bool done = true;
+    if(thread_ctx().phase != Phase::WAIT_PENDING && thread_ctx().phase != Phase::IN_PROGRESS) {
+      CompleteIoPendingRequests(thread_ctx());
+    }
+    Refresh();
+    CompleteRetryRequests(thread_ctx());
+
+    done = (thread_ctx().pending_ios.empty() && thread_ctx().retry_requests.empty());
+
+    if(thread_ctx().phase != Phase::REST) {
+      CompleteIoPendingRequests(prev_thread_ctx());
+      Refresh();
+      CompleteRetryRequests(prev_thread_ctx());
+      done = false;
+    }
+    if(done) {
+      return true;
+    }
+  } while(wait);
+  return false;
+}
+
+template <class K, class V, class D>
+inline void FasterKv<K, V, D>::CompleteIoPendingRequests(ExecutionContext& context) {
+  AsyncIOContext* ctxt;
+  // Clear this thread's I/O response queue. (Does not clear I/Os issued by this thread that have
+  // not yet completed.)
+  while(context.io_responses.try_pop(ctxt)) {
+    CallbackContext<AsyncIOContext> io_context{ ctxt };
+    CallbackContext<pending_context_t> pending_context{ io_context->caller_context };
+    // This I/O is no longer pending, since we popped its response off the queue.
+    auto pending_io = context.pending_ios.find(io_context->io_id);
+    assert(pending_io != context.pending_ios.end());
+    context.pending_ios.erase(pending_io);
+
+    // Issue the continue command
+    OperationStatus internal_status;
+    if(pending_context->type == OperationType::Read) {
+      internal_status = InternalContinuePendingRead(context, *io_context.get());
+    } else {
+      assert(pending_context->type == OperationType::RMW);
+      internal_status = InternalContinuePendingRmw(context, *io_context.get());
+    }
+    Status result;
+    if(internal_status == OperationStatus::SUCCESS) {
+      result = Status::Ok;
+    } else if(internal_status == OperationStatus::NOT_FOUND) {
+      result = Status::NotFound;
+    } else {
+      result = HandleOperationStatus(context, *pending_context.get(), internal_status,
+                                     pending_context.async);
+    }
+    if(!pending_context.async) {
+      pending_context->caller_callback(pending_context->caller_context, result);
+    }
+  }
+}
+
+template <class K, class V, class D>
+inline void FasterKv<K, V, D>::CompleteRetryRequests(ExecutionContext& context) {
+  // If we can't complete a request, it will be pushed back onto the deque. Retry each request
+  // only once.
+  size_t size = context.retry_requests.size();
+  for(size_t idx = 0; idx < size; ++idx) {
+    CallbackContext<pending_context_t> pending_context{ context.retry_requests.front() };
+    context.retry_requests.pop_front();
+    // Issue retry command
+    OperationStatus internal_status;
+    switch(pending_context->type) {
+    case OperationType::RMW:
+      internal_status = InternalRetryPendingRmw(
+                          *static_cast<async_pending_rmw_context_t*>(pending_context.get()));
+      break;
+    case OperationType::Upsert:
+      internal_status = InternalUpsert(
+                          *static_cast<async_pending_upsert_context_t*>(pending_context.get()));
+      break;
+    default:
+      assert(false);
+      throw std::runtime_error{ "Cannot happen!" };
+    }
+    // Handle operation status
+    Status result;
+    if(internal_status == OperationStatus::SUCCESS) {
+      result = Status::Ok;
+    } else {
+      result = HandleOperationStatus(context, *pending_context.get(), internal_status,
+                                     pending_context.async);
+    }
+
+    // If done, callback user code.
+    if(!pending_context.async) {
+      pending_context->caller_callback(pending_context->caller_context, result);
+    }
+  }
+}
+
+template <class K, class V, class D>
+template <class C>
+inline OperationStatus FasterKv<K, V, D>::InternalRead(C& pending_context) const {
+  typedef C pending_read_context_t;
+
+  if(thread_ctx().phase != Phase::REST) {
+    const_cast<faster_t*>(this)->HeavyEnter();
+  }
+
+  const key_t& key = pending_context.key();
+  KeyHash hash = key.GetHash();
+  const AtomicHashBucketEntry* atomic_entry = FindEntry(hash);
+  if(!atomic_entry) {
+    // no record found
+    return OperationStatus::NOT_FOUND;
+  }
+
+  HashBucketEntry entry = atomic_entry->load();
+  Address address = entry.address();
+  Address begin_address = hlog.begin_address.load();
+  Address head_address = hlog.head_address.load();
+  Address safe_read_only_address = hlog.safe_read_only_address.load();
+  Address read_only_address = hlog.read_only_address.load();
+  uint64_t latest_record_version = 0;
+
+  if(address >= head_address) {
+    // Look through the in-memory portion of the log, to find the first record (if any) whose key
+    // matches.
+    const record_t* record = reinterpret_cast<const record_t*>(hlog.Get(address));
+    latest_record_version = record->header.checkpoint_version;
+    if(key != record->key()) {
+      address = TraceBackForKeyMatch(key, record->header.previous_address(), head_address);
+    }
+  }
+
+  switch(thread_ctx().phase) {
+  case Phase::PREPARE:
+    // Reading old version (v).
+    if(latest_record_version > thread_ctx().version) {
+      // CPR shift detected: we are in the "PREPARE" phase, and a record has a version later than
+      // what we've seen.
+      pending_context.go_async(thread_ctx().phase, thread_ctx().version, address, entry);
+      return OperationStatus::CPR_SHIFT_DETECTED;
+    }
+    break;
+  default:
+    break;
+  }
+
+  if(address >= safe_read_only_address) {
+    // Mutable or fuzzy region
+    // concurrent read
+    pending_context.GetAtomic(hlog.Get(address));
+    return OperationStatus::SUCCESS;
+  } else if(address >= head_address) {
+    // Immutable region
+    // single-thread read
+    pending_context.Get(hlog.Get(address));
+    return OperationStatus::SUCCESS;
+  } else if(address >= begin_address) {
+    // Record not available in-memory
+    pending_context.go_async(thread_ctx().phase, thread_ctx().version, address, entry);
+    return OperationStatus::RECORD_ON_DISK;
+  } else {
+    // No record found
+    return OperationStatus::NOT_FOUND;
+  }
+}
+
+template <class K, class V, class D>
+template <class C>
+inline OperationStatus FasterKv<K, V, D>::InternalUpsert(C& pending_context) {
+  typedef C pending_upsert_context_t;
+
+  if(thread_ctx().phase != Phase::REST) {
+    HeavyEnter();
+  }
+
+  const key_t& key = pending_context.key();
+  KeyHash hash = key.GetHash();
+  HashBucketEntry expected_entry;
+  HashBucket* bucket;
+  AtomicHashBucketEntry* atomic_entry = FindOrCreateEntry(hash, expected_entry, bucket);
+
+  // (Note that address will be Address::kInvalidAddress, if the atomic_entry was created.)
+  Address address = expected_entry.address();
+  Address head_address = hlog.head_address.load();
+  Address read_only_address = hlog.read_only_address.load();
+  uint64_t latest_record_version = 0;
+
+  if(address >= head_address) {
+    // Multiple keys may share the same hash. Try to find the most recent record with a matching
+    // key that we might be able to update in place.
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    latest_record_version = record->header.checkpoint_version;
+    if(key != record->key()) {
+      address = TraceBackForKeyMatch(key, record->header.previous_address(), head_address);
+    }
+  }
+
+  CheckpointLockGuard lock_guard{ checkpoint_locks_, hash };
+
+  // The common case
+  if(thread_ctx().phase == Phase::REST && address >= read_only_address) {
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    if(pending_context.PutAtomic(record)) {
+      return OperationStatus::SUCCESS;
+    } else {
+      // Must retry as RCU.
+      goto create_record;
+    }
+  }
+
+  // Acquire necessary locks.
+  switch(thread_ctx().phase) {
+  case Phase::PREPARE:
+    // Working on old version (v).
+    if(!lock_guard.try_lock_old()) {
+      pending_context.go_async(thread_ctx().phase, thread_ctx().version, address, expected_entry);
+      return OperationStatus::CPR_SHIFT_DETECTED;
+    } else {
+      if(latest_record_version > thread_ctx().version) {
+        // CPR shift detected: we are in the "PREPARE" phase, and a record has a version later than
+        // what we've seen.
+        pending_context.go_async(thread_ctx().phase, thread_ctx().version, address,
+                                 expected_entry);
+        return OperationStatus::CPR_SHIFT_DETECTED;
+      }
+    }
+    break;
+  case Phase::IN_PROGRESS:
+    // All other threads are in phase {PREPARE,IN_PROGRESS,WAIT_PENDING}.
+    if(latest_record_version < thread_ctx().version) {
+      // Will create new record or update existing record to new version (v+1).
+      if(!lock_guard.try_lock_new()) {
+        pending_context.go_async(thread_ctx().phase, thread_ctx().version, address,
+                                 expected_entry);
+        return OperationStatus::RETRY_LATER;
+      } else {
+        // Update to new version (v+1) requires RCU.
+        goto create_record;
+      }
+    }
+    break;
+  case Phase::WAIT_PENDING:
+    // All other threads are in phase {IN_PROGRESS,WAIT_PENDING,WAIT_FLUSH}.
+    if(latest_record_version < thread_ctx().version) {
+      if(lock_guard.old_locked()) {
+        pending_context.go_async(thread_ctx().phase, thread_ctx().version, address,
+                                 expected_entry);
+        return OperationStatus::RETRY_LATER;
+      } else {
+        // Update to new version (v+1) requires RCU.
+        goto create_record;
+      }
+    }
+    break;
+  case Phase::WAIT_FLUSH:
+    // All other threads are in phase {WAIT_PENDING,WAIT_FLUSH,PERSISTENCE_CALLBACK}.
+    if(latest_record_version < thread_ctx().version) {
+      goto create_record;
+    }
+    break;
+  default:
+    break;
+  }
+
+  if(address >= read_only_address) {
+    // Mutable region; try to update in place.
+    if(atomic_entry->load() != expected_entry) {
+      // Some other thread may have RCUed the record before we locked it; try again.
+      return OperationStatus::RETRY_NOW;
+    }
+    // We acquired the necessary locks, so so we can update the record's bucket atomically.
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    if(pending_context.PutAtomic(record)) {
+      // Host successfully replaced record, atomically.
+      return OperationStatus::SUCCESS;
+    } else {
+      // Must retry as RCU.
+      goto create_record;
+    }
+  }
+
+  // Create a record and attempt RCU.
+create_record:
+  uint32_t record_size = record_t::size(key, pending_context.value_size());
+  Address new_address = BlockAllocate(record_size);
+  record_t* record = reinterpret_cast<record_t*>(hlog.Get(new_address));
+  new(record) record_t{
+    RecordInfo{
+      static_cast<uint16_t>(thread_ctx().version), true, false, false,
+      expected_entry.address() },
+    key };
+  pending_context.Put(record);
+
+  HashBucketEntry updated_entry{ new_address, hash.tag(), false };
+
+  if(atomic_entry->compare_exchange_strong(expected_entry, updated_entry)) {
+    // Installed the new record in the hash table.
+    return OperationStatus::SUCCESS;
+  } else {
+    // Try again.
+    record->header.invalid = true;
+    return InternalUpsert(pending_context);
+  }
+}
+
+template <class K, class V, class D>
+template <class C>
+inline OperationStatus FasterKv<K, V, D>::InternalRmw(C& pending_context, bool retrying) {
+  typedef C pending_rmw_context_t;
+
+  Phase phase = retrying ? pending_context.phase : thread_ctx().phase;
+  uint32_t version = retrying ? pending_context.version : thread_ctx().version;
+
+  if(phase != Phase::REST) {
+    HeavyEnter();
+  }
+
+  const key_t& key = pending_context.key();
+  KeyHash hash = key.GetHash();
+  HashBucketEntry expected_entry;
+  HashBucket* bucket;
+  AtomicHashBucketEntry* atomic_entry = FindOrCreateEntry(hash, expected_entry, bucket);
+
+  // (Note that address will be Address::kInvalidAddress, if the atomic_entry was created.)
+  Address address = expected_entry.address();
+  Address begin_address = hlog.begin_address.load();
+  Address head_address = hlog.head_address.load();
+  Address read_only_address = hlog.read_only_address.load();
+  Address safe_read_only_address = hlog.safe_read_only_address.load();
+  uint64_t latest_record_version = 0;
+
+  if(address >= head_address) {
+    // Multiple keys may share the same hash. Try to find the most recent record with a matching
+    // key that we might be able to update in place.
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    latest_record_version = record->header.checkpoint_version;
+    if(key != record->key()) {
+      address = TraceBackForKeyMatch(key, record->header.previous_address(), head_address);
+    }
+  }
+
+  CheckpointLockGuard lock_guard{ checkpoint_locks_, hash };
+
+  // The common case.
+  if(phase == Phase::REST && address >= read_only_address) {
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    if(pending_context.RmwAtomic(record)) {
+      // In-place RMW succeeded.
+      return OperationStatus::SUCCESS;
+    } else {
+      // Must retry as RCU.
+      goto create_record;
+    }
+  }
+
+  // Acquire necessary locks.
+  switch(phase) {
+  case Phase::PREPARE:
+    // Working on old version (v).
+    if(!lock_guard.try_lock_old()) {
+      // If we're retrying the operation, then we already have an old lock, so we'll always
+      // succeed in obtaining a second. Otherwise, another thread has acquired the new lock, so
+      // a CPR shift has occurred.
+      assert(!retrying);
+      pending_context.go_async(phase, version, address, expected_entry);
+      return OperationStatus::CPR_SHIFT_DETECTED;
+    } else {
+      if(latest_record_version > version) {
+        // CPR shift detected: we are in the "PREPARE" phase, and a mutable record has a version
+        // later than what we've seen.
+        assert(!retrying);
+        pending_context.go_async(phase, version, address, expected_entry);
+        return OperationStatus::CPR_SHIFT_DETECTED;
+      }
+    }
+    break;
+  case Phase::IN_PROGRESS:
+    // All other threads are in phase {PREPARE,IN_PROGRESS,WAIT_PENDING}.
+    if(latest_record_version < version) {
+      // Will create new record or update existing record to new version (v+1).
+      if(!lock_guard.try_lock_new()) {
+        if(!retrying) {
+          pending_context.go_async(phase, version, address, expected_entry);
+        } else {
+          pending_context.continue_async(address, expected_entry);
+        }
+        return OperationStatus::RETRY_LATER;
+      } else {
+        // Update to new version (v+1) requires RCU.
+        goto create_record;
+      }
+    }
+    break;
+  case Phase::WAIT_PENDING:
+    // All other threads are in phase {IN_PROGRESS,WAIT_PENDING,WAIT_FLUSH}.
+    if(latest_record_version < version) {
+      if(lock_guard.old_locked()) {
+        if(!retrying) {
+          pending_context.go_async(phase, version, address, expected_entry);
+        } else {
+          pending_context.continue_async(address, expected_entry);
+        }
+        return OperationStatus::RETRY_LATER;
+      } else {
+        // Update to new version (v+1) requires RCU.
+        goto create_record;
+      }
+    }
+    break;
+  case Phase::WAIT_FLUSH:
+    // All other threads are in phase {WAIT_PENDING,WAIT_FLUSH,PERSISTENCE_CALLBACK}.
+    if(latest_record_version < version) {
+      goto create_record;
+    }
+    break;
+  default:
+    break;
+  }
+
+  if(address >= read_only_address) {
+    // Mutable region. Try to update in place.
+    if(atomic_entry->load() != expected_entry) {
+      // Some other thread may have RCUed the record before we locked it; try again.
+      return OperationStatus::RETRY_NOW;
+    }
+    // We acquired the necessary locks, so so we can update the record's bucket atomically.
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    if(pending_context.RmwAtomic(record)) {
+      // In-place RMW succeeded.
+      return OperationStatus::SUCCESS;
+    } else {
+      // Must retry as RCU.
+      goto create_record;
+    }
+  } else if(address >= safe_read_only_address) {
+    // Fuzzy Region: Must go pending due to lost-update anomaly
+    if(!retrying) {
+      pending_context.go_async(phase, version, address, expected_entry);
+    } else {
+      pending_context.continue_async(address, expected_entry);
+    }
+    return OperationStatus::RETRY_LATER;
+  } else if(address >= head_address) {
+    goto create_record;
+  } else if(address >= begin_address) {
+    // Need to obtain old record from disk.
+    if(!retrying) {
+      pending_context.go_async(phase, version, address, expected_entry);
+    } else {
+      pending_context.continue_async(address, expected_entry);
+    }
+    return OperationStatus::RECORD_ON_DISK;
+  } else {
+    // Create a new record.
+    goto create_record;
+  }
+
+  // Create a record and attempt RCU.
+create_record:
+  uint32_t record_size = record_t::size(key, pending_context.value_size());
+  Address new_address = BlockAllocate(record_size);
+  record_t* new_record = reinterpret_cast<record_t*>(hlog.Get(new_address));
+
+  // Allocating a block may have the side effect of advancing the head address.
+  head_address = hlog.head_address.load();
+  // Allocating a block may have the side effect of advancing the thread context's version and
+  // phase.
+  if(!retrying) {
+    phase = thread_ctx().phase;
+    version = thread_ctx().version;
+  }
+
+  new(new_record) record_t{
+    RecordInfo{
+      static_cast<uint16_t>(version), true, false, false,
+      expected_entry.address() },
+    key };
+  if(address < hlog.begin_address.load()) {
+    pending_context.RmwInitial(new_record);
+  } else if(address >= head_address) {
+    const record_t* old_record = reinterpret_cast<const record_t*>(hlog.Get(address));
+    pending_context.RmwCopy(old_record, new_record);
+  } else {
+    // The block we allocated for the new record caused the head address to advance beyond
+    // the old record. Need to obtain the old record from disk.
+    new_record->header.invalid = true;
+    if(!retrying) {
+      pending_context.go_async(phase, version, address, expected_entry);
+    } else {
+      pending_context.continue_async(address, expected_entry);
+    }
+    return OperationStatus::RECORD_ON_DISK;
+  }
+
+  HashBucketEntry updated_entry{ new_address, hash.tag(), false };
+  if(atomic_entry->compare_exchange_strong(expected_entry, updated_entry)) {
+    return OperationStatus::SUCCESS;
+  } else {
+    // CAS failed; try again.
+    new_record->header.invalid = true;
+    if(!retrying) {
+      pending_context.go_async(phase, version, address, expected_entry);
+    } else {
+      pending_context.continue_async(address, expected_entry);
+    }
+    return OperationStatus::RETRY_NOW;
+  }
+}
+
+template <class K, class V, class D>
+inline OperationStatus FasterKv<K, V, D>::InternalRetryPendingRmw(
+  async_pending_rmw_context_t& pending_context) {
+  OperationStatus status = InternalRmw(pending_context, true);
+  if(status == OperationStatus::SUCCESS && pending_context.version != thread_ctx().version) {
+    status = OperationStatus::SUCCESS_UNMARK;
+  }
+  return status;
+}
+
+template <class K, class V, class D>
+inline Address FasterKv<K, V, D>::TraceBackForKeyMatch(const key_t& key, Address from_address,
+    Address min_offset) const {
+  while(from_address >= min_offset) {
+    const record_t* record = reinterpret_cast<const record_t*>(hlog.Get(from_address));
+    if(key == record->key()) {
+      return from_address;
+    } else {
+      from_address = record->header.previous_address();
+      continue;
+    }
+  }
+  return from_address;
+}
+
+template <class K, class V, class D>
+inline Status FasterKv<K, V, D>::HandleOperationStatus(ExecutionContext& ctx,
+    pending_context_t& pending_context, OperationStatus internal_status, bool& async) {
+  async = false;
+  switch(internal_status) {
+  case OperationStatus::RETRY_NOW:
+    switch(pending_context.type) {
+    case OperationType::Read: {
+      async_pending_read_context_t& read_context =
+        *static_cast<async_pending_read_context_t*>(&pending_context);
+      internal_status = InternalRead(read_context);
+      break;
+    }
+    case OperationType::Upsert: {
+      async_pending_upsert_context_t& upsert_context =
+        *static_cast<async_pending_upsert_context_t*>(&pending_context);
+      internal_status = InternalUpsert(upsert_context);
+      break;
+    }
+    case OperationType::RMW: {
+      async_pending_rmw_context_t& rmw_context =
+        *static_cast<async_pending_rmw_context_t*>(&pending_context);
+      internal_status = InternalRmw(rmw_context, false);
+      break;
+    }
+    }
+
+    if(internal_status == OperationStatus::SUCCESS) {
+      return Status::Ok;
+    } else {
+      return HandleOperationStatus(ctx, pending_context, internal_status, async);
+    }
+  case OperationStatus::RETRY_LATER:
+    if(thread_ctx().phase == Phase::PREPARE) {
+      assert(pending_context.type == OperationType::RMW);
+      // Can I be marking an operation again and again?
+      if(!checkpoint_locks_.get_lock(pending_context.key().GetHash()).try_lock_old()) {
+        return PivotAndRetry(ctx, pending_context, async);
+      }
+    }
+    return RetryLater(ctx, pending_context, async);
+  case OperationStatus::RECORD_ON_DISK:
+    if(thread_ctx().phase == Phase::PREPARE) {
+      assert(pending_context.type == OperationType::Read ||
+             pending_context.type == OperationType::RMW);
+      // Can I be marking an operation again and again?
+      if(!checkpoint_locks_.get_lock(pending_context.key().GetHash()).try_lock_old()) {
+        return PivotAndRetry(ctx, pending_context, async);
+      }
+    }
+    return IssueAsyncIoRequest(ctx, pending_context, async);
+  case OperationStatus::SUCCESS_UNMARK:
+    checkpoint_locks_.get_lock(pending_context.key().GetHash()).unlock_old();
+    return Status::Ok;
+  case OperationStatus::NOT_FOUND_UNMARK:
+    checkpoint_locks_.get_lock(pending_context.key().GetHash()).unlock_old();
+    return Status::NotFound;
+  case OperationStatus::CPR_SHIFT_DETECTED:
+    return PivotAndRetry(ctx, pending_context, async);
+  }
+  // not reached
+  assert(false);
+  return Status::Corruption;
+}
+
+template <class K, class V, class D>
+inline Status FasterKv<K, V, D>::PivotAndRetry(ExecutionContext& ctx,
+    pending_context_t& pending_context, bool& async) {
+  // Some invariants
+  assert(ctx.version == thread_ctx().version);
+  assert(thread_ctx().phase == Phase::PREPARE);
+  Refresh();
+  // thread must have moved to IN_PROGRESS phase
+  assert(thread_ctx().version == ctx.version + 1);
+  // retry with new contexts
+  pending_context.phase = thread_ctx().phase;
+  pending_context.version = thread_ctx().version;
+  return HandleOperationStatus(thread_ctx(), pending_context, OperationStatus::RETRY_NOW, async);
+}
+
+template <class K, class V, class D>
+inline Status FasterKv<K, V, D>::RetryLater(ExecutionContext& ctx,
+    pending_context_t& pending_context, bool& async) {
+  IAsyncContext* context_copy;
+  Status result = pending_context.DeepCopy(context_copy);
+  if(result == Status::Ok) {
+    async = true;
+    ctx.retry_requests.push_back(context_copy);
+    return Status::Pending;
+  } else {
+    async = false;
+    return result;
+  }
+}
+
+template <class K, class V, class D>
+inline constexpr uint32_t FasterKv<K, V, D>::MinIoRequestSize() const {
+  return static_cast<uint32_t>(
+           sizeof(value_t) + pad_alignment(record_t::min_disk_key_size(),
+               alignof(value_t)));
+}
+
+template <class K, class V, class D>
+inline Status FasterKv<K, V, D>::IssueAsyncIoRequest(ExecutionContext& ctx,
+    pending_context_t& pending_context, bool& async) {
+  // Issue asynchronous I/O request
+  uint64_t io_id = thread_ctx().io_id++;
+  thread_ctx().pending_ios.insert({ io_id, pending_context.key().GetHash() });
+  async = true;
+  AsyncIOContext io_request{ this, pending_context.address, &pending_context,
+                             &thread_ctx().io_responses, io_id };
+  AsyncGetFromDisk(pending_context.address, MinIoRequestSize(), AsyncGetFromDiskCallback,
+                   io_request);
+  return Status::Pending;
+}
+
+template <class K, class V, class D>
+inline Address FasterKv<K, V, D>::BlockAllocate(uint32_t record_size) {
+  uint32_t page;
+  Address retval = hlog.Allocate(record_size, page);
+  while(retval < hlog.read_only_address.load()) {
+    Refresh();
+    // Don't overrun the hlog's tail offset.
+    bool page_closed = (retval == Address::kInvalidAddress);
+    while(page_closed) {
+      page_closed = !hlog.NewPage(page);
+      Refresh();
+    }
+    retval = hlog.Allocate(record_size, page);
+  }
+  return retval;
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::AsyncGetFromDisk(Address address, uint32_t num_records,
+    AsyncIOCallback callback, AsyncIOContext& context) {
+  if(epoch_.IsProtected()) {
+    /// Throttling. (Thread pool, unprotected threads are not throttled.)
+    while(num_pending_ios.load() > 120) {
+      disk.TryComplete();
+      std::this_thread::yield();
+      epoch_.ProtectAndDrain();
+    }
+  }
+  ++num_pending_ios;
+  hlog.AsyncGetFromDisk(address, num_records, callback, context);
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::AsyncGetFromDiskCallback(IAsyncContext* ctxt, Status result,
+    size_t bytes_transferred) {
+  CallbackContext<AsyncIOContext> context{ ctxt };
+  faster_t* faster = reinterpret_cast<faster_t*>(context->faster);
+  /// Context stack is: AsyncIOContext, PendingContext.
+  pending_context_t* pending_context = static_cast<pending_context_t*>(context->caller_context);
+
+  /// This I/O is finished.
+  --faster->num_pending_ios;
+  /// Always "goes async": context is freed by the issuing thread, when processing thread I/O
+  /// responses.
+  context.async = true;
+
+  pending_context->result = result;
+  if(result == Status::Ok) {
+    record_t* record = reinterpret_cast<record_t*>(context->record.GetValidPointer());
+    // Size of the record we read from disk (might not have read the entire record, yet).
+    size_t record_size = context->record.available_bytes;
+    if(record->min_disk_key_size() > record_size) {
+      // Haven't read the full record in yet; I/O is not complete!
+      faster->AsyncGetFromDisk(context->address, record->min_disk_key_size(),
+                               AsyncGetFromDiskCallback, *context.get());
+      context.async = true;
+    } else if(record->min_disk_value_size() > record_size) {
+      // Haven't read the full record in yet; I/O is not complete!
+      faster->AsyncGetFromDisk(context->address, record->min_disk_value_size(),
+                               AsyncGetFromDiskCallback, *context.get());
+      context.async = true;
+    } else if(record->disk_size() > record_size) {
+      // Haven't read the full record in yet; I/O is not complete!
+      faster->AsyncGetFromDisk(context->address, record->disk_size(),
+                               AsyncGetFromDiskCallback, *context.get());
+      context.async = true;
+    } else if(pending_context->key() == record->key()) {
+      //The keys are same, so I/O is complete
+      context->thread_io_responses->push(context.get());
+    } else {
+      //keys are not same. I/O is not complete
+      context->address = record->header.previous_address();
+      if(context->address >= faster->hlog.begin_address.load()) {
+        faster->AsyncGetFromDisk(context->address, faster->MinIoRequestSize(),
+                                 AsyncGetFromDiskCallback, *context.get());
+        context.async = true;
+      } else {
+        // Record not found, so I/O is complete.
+        context->thread_io_responses->push(context.get());
+      }
+    }
+  }
+}
+
+template <class K, class V, class D>
+OperationStatus FasterKv<K, V, D>::InternalContinuePendingRead(ExecutionContext& context,
+    AsyncIOContext& io_context) {
+  if(io_context.address >= hlog.begin_address.load()) {
+    async_pending_read_context_t* pending_context = static_cast<async_pending_read_context_t*>(
+          io_context.caller_context);
+    record_t* record = reinterpret_cast<record_t*>(io_context.record.GetValidPointer());
+    pending_context->Get(record);
+    assert(!kCopyReadsToTail);
+    return (thread_ctx().version > context.version) ? OperationStatus::SUCCESS_UNMARK :
+           OperationStatus::SUCCESS;
+  } else {
+    return (thread_ctx().version > context.version) ? OperationStatus::NOT_FOUND_UNMARK :
+           OperationStatus::NOT_FOUND;
+  }
+}
+
+template <class K, class V, class D>
+OperationStatus FasterKv<K, V, D>::InternalContinuePendingRmw(ExecutionContext& context,
+    AsyncIOContext& io_context) {
+  async_pending_rmw_context_t* pending_context = static_cast<async_pending_rmw_context_t*>(
+        io_context.caller_context);
+
+  // Find a hash bucket entry to store the updated value in.
+  const key_t& key = pending_context->key();
+  KeyHash hash = key.GetHash();
+  HashBucketEntry expected_entry;
+  HashBucket* bucket;
+  AtomicHashBucketEntry* atomic_entry = FindOrCreateEntry(hash, expected_entry, bucket);
+
+  // (Note that address will be Address::kInvalidAddress, if the atomic_entry was created.)
+  Address address = expected_entry.address();
+  Address head_address = hlog.head_address.load();
+
+  // Make sure that atomic_entry is OK to update.
+  if(address >= head_address) {
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    if(key != record->key()) {
+      address = TraceBackForKeyMatch(key, record->header.previous_address(), head_address);
+    }
+  }
+
+  if(address > pending_context->entry.address()) {
+    // We can't trace the current hash bucket entry back to the record we read.
+    pending_context->continue_async(address, expected_entry);
+    return OperationStatus::RETRY_NOW;
+  }
+  assert(address < hlog.begin_address.load() || address == pending_context->entry.address());
+
+  // We have to do copy-on-write/RCU and write the updated value to the tail of the log.
+  uint32_t record_size = record_t::size(key, pending_context->value_size());
+  Address new_address = BlockAllocate(record_size);
+  record_t* new_record = reinterpret_cast<record_t*>(hlog.Get(new_address));
+
+  new(new_record) record_t{
+    RecordInfo{
+      static_cast<uint16_t>(context.version), true, false, false,
+      expected_entry.address() },
+    key };
+  if(io_context.address < hlog.begin_address.load()) {
+    // The on-disk trace back failed to find a key match.
+    pending_context->RmwInitial(new_record);
+  } else {
+    // The record we read from disk.
+    const record_t* disk_record = reinterpret_cast<const record_t*>(
+                                    io_context.record.GetValidPointer());
+    pending_context->RmwCopy(disk_record, new_record);
+  }
+
+  HashBucketEntry updated_entry{ new_address, hash.tag(), false };
+  if(atomic_entry->compare_exchange_strong(expected_entry, updated_entry)) {
+    assert(thread_ctx().version >= context.version);
+    return (thread_ctx().version == context.version) ? OperationStatus::SUCCESS :
+           OperationStatus::SUCCESS_UNMARK;
+  } else {
+    // CAS failed; try again.
+    new_record->header.invalid = true;
+    pending_context->continue_async(address, expected_entry);
+    return OperationStatus::RETRY_NOW;
+  }
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::InitializeCheckpointLocks() {
+  uint32_t table_version = resize_info_.version;
+  uint64_t size = state_[table_version].size();
+  checkpoint_locks_.Initialize(size);
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::WriteIndexMetadata() {
+  uint32_t checkpoint_version = checkpoint_.index_metadata.version;
+  std::string filename = disk.index_checkpoint_path(checkpoint_version) + "info.dat";
+  // (This code will need to be refactored into the disk_t interface, if we want to support
+  // unformatted disks.)
+  std::FILE* file = std::fopen(filename.c_str(), "wb");
+  if(!file) {
+    return Status::IOError;
+  }
+  if(std::fwrite(&checkpoint_.index_metadata, sizeof(checkpoint_.index_metadata), 1, file) != 1) {
+    std::fclose(file);
+    return Status::IOError;
+  }
+  if(std::fclose(file) != 0) {
+    return Status::IOError;
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::ReadIndexMetadata(uint32_t version) {
+  std::string filename = disk.index_checkpoint_path(version) + "info.dat";
+  // (This code will need to be refactored into the disk_t interface, if we want to support
+  // unformatted disks.)
+  std::FILE* file = std::fopen(filename.c_str(), "rb");
+  if(!file) {
+    return Status::IOError;
+  }
+  if(std::fread(&checkpoint_.index_metadata, sizeof(checkpoint_.index_metadata), 1, file) != 1) {
+    std::fclose(file);
+    return Status::IOError;
+  }
+  if(std::fclose(file) != 0) {
+    return Status::IOError;
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::WriteCprMetadata() {
+  uint32_t checkpoint_version = checkpoint_.log_metadata.version;
+  std::string filename = disk.cpr_checkpoint_path(checkpoint_version) + "info.dat";
+  // (This code will need to be refactored into the disk_t interface, if we want to support
+  // unformatted disks.)
+  std::FILE* file = std::fopen(filename.c_str(), "wb");
+  if(!file) {
+    return Status::IOError;
+  }
+  if(std::fwrite(&checkpoint_.log_metadata, sizeof(checkpoint_.log_metadata), 1, file) != 1) {
+    std::fclose(file);
+    return Status::IOError;
+  }
+  if(std::fclose(file) != 0) {
+    return Status::IOError;
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::ReadCprMetadata(uint32_t version) {
+  std::string filename = disk.cpr_checkpoint_path(version) + "info.dat";
+  // (This code will need to be refactored into the disk_t interface, if we want to support
+  // unformatted disks.)
+  std::FILE* file = std::fopen(filename.c_str(), "rb");
+  if(!file) {
+    return Status::IOError;
+  }
+  if(std::fread(&checkpoint_.log_metadata, sizeof(checkpoint_.log_metadata), 1, file) != 1) {
+    std::fclose(file);
+    return Status::IOError;
+  }
+  if(std::fclose(file) != 0) {
+    return Status::IOError;
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::WriteCprContext() {
+  uint32_t checkpoint_version = prev_thread_ctx().version;
+  std::string filename = disk.cpr_checkpoint_path(checkpoint_version);
+  const Guid& guid = prev_thread_ctx().guid;
+  filename += guid.ToString();
+  filename += ".dat";
+  // (This code will need to be refactored into the disk_t interface, if we want to support
+  // unformatted disks.)
+  std::FILE* file = std::fopen(filename.c_str(), "wb");
+  if(!file) {
+    return Status::IOError;
+  }
+  if(std::fwrite(static_cast<PersistentExecContext*>(&prev_thread_ctx()),
+                 sizeof(PersistentExecContext), 1, file) != 1) {
+    std::fclose(file);
+    return Status::IOError;
+  }
+  if(std::fclose(file) != 0) {
+    return Status::IOError;
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::ReadCprContexts(uint32_t version, const Guid* guids) {
+  for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+    const Guid& guid = guids[idx];
+    if(guid == Guid{}) {
+      continue;
+    }
+    std::string filename = disk.cpr_checkpoint_path(version);
+    filename += guid.ToString();
+    filename += ".dat";
+    // (This code will need to be refactored into the disk_t interface, if we want to support
+    // unformatted disks.)
+    std::FILE* file = std::fopen(filename.c_str(), "rb");
+    if(!file) {
+      return Status::IOError;
+    }
+    PersistentExecContext context{};
+    if(std::fread(&context, sizeof(PersistentExecContext), 1, file) != 1) {
+      std::fclose(file);
+      return Status::IOError;
+    }
+    if(std::fclose(file) != 0) {
+      return Status::IOError;
+    }
+    auto result = checkpoint_.continue_tokens.insert({ context.guid, context.serial_num });
+    assert(result.second);
+  }
+  if(checkpoint_.continue_tokens.size() != checkpoint_.log_metadata.num_threads) {
+    return Status::Corruption;
+  } else {
+    return Status::Ok;
+  }
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::CheckpointFuzzyIndex() {
+  uint32_t hash_table_version = resize_info_.version;
+  uint32_t checkpoint_version = checkpoint_.index_metadata.version;
+  // Checkpoint the main hash table.
+  file_t ht_file = disk.NewFile(disk.relative_index_checkpoint_path(checkpoint_version) +
+                                "ht.dat");
+  RETURN_NOT_OK(ht_file.Open(&disk.handler()));
+  RETURN_NOT_OK(state_[hash_table_version].Checkpoint(disk, std::move(ht_file),
+                checkpoint_.index_metadata.num_ht_bytes));
+  // Checkpoint the hash table's overflow buckets.
+  file_t ofb_file = disk.NewFile(disk.relative_index_checkpoint_path(checkpoint_version) +
+                                 "ofb.dat");
+  RETURN_NOT_OK(ofb_file.Open(&disk.handler()));
+  RETURN_NOT_OK(overflow_buckets_allocator_[hash_table_version].Checkpoint(disk,
+                std::move(ofb_file), checkpoint_.index_metadata.num_ofb_bytes));
+  checkpoint_.index_checkpoint_started = true;
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::CheckpointFuzzyIndexComplete() {
+  if(!checkpoint_.index_checkpoint_started) {
+    return Status::Pending;
+  }
+  uint32_t hash_table_version = resize_info_.version;
+  Status result = state_[hash_table_version].CheckpointComplete(false);
+  if(result == Status::Pending) {
+    return Status::Pending;
+  } else if(result != Status::Ok) {
+    return result;
+  } else {
+    return overflow_buckets_allocator_[hash_table_version].CheckpointComplete(false);
+  }
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::RecoverFuzzyIndex() {
+  uint8_t hash_table_version = resize_info_.version;
+  uint32_t checkpoint_version = checkpoint_.index_metadata.version;
+  assert(state_[hash_table_version].size() == checkpoint_.index_metadata.table_size);
+
+  // Recover the main hash table.
+  file_t ht_file = disk.NewFile(disk.relative_index_checkpoint_path(checkpoint_version) +
+                                "ht.dat");
+  RETURN_NOT_OK(ht_file.Open(&disk.handler()));
+  RETURN_NOT_OK(state_[hash_table_version].Recover(disk, std::move(ht_file),
+                checkpoint_.index_metadata.num_ht_bytes));
+  // Recover the hash table's overflow buckets.
+  file_t ofb_file = disk.NewFile(disk.relative_index_checkpoint_path(checkpoint_version) +
+                                 "ofb.dat");
+  RETURN_NOT_OK(ofb_file.Open(&disk.handler()));
+  return overflow_buckets_allocator_[hash_table_version].Recover(disk, std::move(ofb_file),
+         checkpoint_.index_metadata.num_ofb_bytes, checkpoint_.index_metadata.ofb_count);
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::RecoverFuzzyIndexComplete(bool wait) {
+  uint8_t hash_table_version = resize_info_.version;
+  Status result = state_[hash_table_version].RecoverComplete(true);
+  if(result != Status::Ok) {
+    return result;
+  }
+  result = overflow_buckets_allocator_[hash_table_version].RecoverComplete(true);
+  if(result != Status::Ok) {
+    return result;
+  }
+
+  // Clear all tentative entries.
+  for(uint64_t bucket_idx = 0; bucket_idx < state_[hash_table_version].size(); ++bucket_idx) {
+    HashBucket* bucket = &state_[hash_table_version].bucket(bucket_idx);
+    while(true) {
+      for(uint32_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+        if(bucket->entries[entry_idx].load().tentative()) {
+          bucket->entries[entry_idx].store(HashBucketEntry::kInvalidEntry);
+        }
+      }
+      // Go to next bucket in the chain
+      HashBucketOverflowEntry entry = bucket->overflow_entry.load();
+      if(entry.unused()) {
+        // No more buckets in the chain.
+        break;
+      }
+      bucket = &overflow_buckets_allocator_[hash_table_version].Get(entry.address());
+      assert(reinterpret_cast<size_t>(bucket) % Constants::kCacheLineBytes == 0);
+    }
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::RecoverHybridLog() {
+  class Context : public IAsyncContext {
+   public:
+    Context(hlog_t& hlog_, uint32_t page_, RecoveryStatus& recovery_status_)
+      : hlog{ &hlog_}
+      , page{ page_ }
+      , recovery_status{ &recovery_status_ } {
+    }
+    /// The deep-copy constructor
+    Context(const Context& other)
+      : hlog{ other.hlog }
+      , page{ other.page }
+      , recovery_status{ other.recovery_status } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+   public:
+    hlog_t* hlog;
+    uint32_t page;
+    RecoveryStatus* recovery_status;
+  };
+
+  auto callback = [](IAsyncContext* ctxt, Status result) {
+    CallbackContext<Context> context{ ctxt };
+    result = context->hlog->AsyncReadPagesFromLog(context->page, 1, *context->recovery_status);
+  };
+
+  Address from_address = checkpoint_.index_metadata.checkpoint_start_address;
+  Address to_address = checkpoint_.log_metadata.final_address;
+
+  uint32_t start_page = from_address.page();
+  uint32_t end_page = to_address.offset() > 0 ? to_address.page() + 1 : to_address.page();
+  uint32_t capacity = hlog.buffer_size();
+  RecoveryStatus recovery_status{ start_page, end_page };
+  // Initially issue read request for all pages that can be held in memory
+  uint32_t total_pages_to_read = end_page - start_page;
+  uint32_t pages_to_read_first = std::min(capacity, total_pages_to_read);
+  RETURN_NOT_OK(hlog.AsyncReadPagesFromLog(start_page, pages_to_read_first, recovery_status));
+
+  for(uint32_t page = start_page; page < end_page; ++page) {
+    while(recovery_status.page_status(page) != PageRecoveryStatus::ReadDone) {
+      disk.TryComplete();
+      std::this_thread::sleep_for(10ms);
+    }
+
+    // handle start and end at non-page boundaries
+    RETURN_NOT_OK(RecoverFromPage(page == start_page ? from_address : Address{ page, 0 },
+                                  page + 1 == end_page ? to_address :
+                                  Address{ page, Address::kMaxOffset }));
+
+    // OS thread flushes current page and issues a read request if necessary
+    if(page + capacity < end_page) {
+      Context context{ hlog, page + capacity, recovery_status };
+      RETURN_NOT_OK(hlog.AsyncFlushPage(page, recovery_status, callback, &context));
+    } else {
+      RETURN_NOT_OK(hlog.AsyncFlushPage(page, recovery_status, nullptr, nullptr));
+    }
+  }
+  // Wait until all pages have been flushed
+  for(uint32_t page = start_page; page < end_page; ++page) {
+    while(recovery_status.page_status(page) != PageRecoveryStatus::FlushDone) {
+      disk.TryComplete();
+      std::this_thread::sleep_for(10ms);
+    }
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::RecoverHybridLogFromSnapshotFile() {
+  class Context : public IAsyncContext {
+   public:
+    Context(hlog_t& hlog_, file_t& file_, uint32_t file_start_page_, uint32_t page_,
+            RecoveryStatus& recovery_status_)
+      : hlog{ &hlog_ }
+      , file{ &file_ }
+      , file_start_page{ file_start_page_ }
+      , page{ page_ }
+      , recovery_status{ &recovery_status_ } {
+    }
+    /// The deep-copy constructor
+    Context(const Context& other)
+      : hlog{ other.hlog }
+      , file{ other.file }
+      , file_start_page{ other.file_start_page }
+      , page{ other.page }
+      , recovery_status{ other.recovery_status } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+   public:
+    hlog_t* hlog;
+    file_t* file;
+    uint32_t file_start_page;
+    uint32_t page;
+    RecoveryStatus* recovery_status;
+  };
+
+  auto callback = [](IAsyncContext* ctxt, Status result) {
+    CallbackContext<Context> context{ ctxt };
+    result = context->hlog->AsyncReadPagesFromSnapshot(*context->file,
+             context->file_start_page, context->page, 1, *context->recovery_status);
+  };
+
+  Address file_start_address = checkpoint_.log_metadata.flushed_address;
+  Address from_address = checkpoint_.index_metadata.checkpoint_start_address;
+  Address to_address = checkpoint_.log_metadata.final_address;
+
+  uint32_t start_page = file_start_address.page();
+  uint32_t end_page = to_address.offset() > 0 ? to_address.page() + 1 : to_address.page();
+  uint32_t capacity = hlog.buffer_size();
+  RecoveryStatus recovery_status{ start_page, end_page };
+  checkpoint_.snapshot_file = disk.NewFile(disk.relative_cpr_checkpoint_path(
+                                checkpoint_.log_metadata.version) + "snapshot.dat");
+  RETURN_NOT_OK(checkpoint_.snapshot_file.Open(&disk.handler()));
+
+  // Initially issue read request for all pages that can be held in memory
+  uint32_t total_pages_to_read = end_page - start_page;
+  uint32_t pages_to_read_first = std::min(capacity, total_pages_to_read);
+  RETURN_NOT_OK(hlog.AsyncReadPagesFromSnapshot(checkpoint_.snapshot_file, start_page, start_page,
+                pages_to_read_first, recovery_status));
+
+  for(uint32_t page = start_page; page < end_page; ++page) {
+    while(recovery_status.page_status(page) != PageRecoveryStatus::ReadDone) {
+      disk.TryComplete();
+      std::this_thread::sleep_for(10ms);
+    }
+
+    // Perform recovery if page in fuzzy portion of the log
+    if(Address{ page + 1, 0 } > from_address) {
+      // handle start and end at non-page boundaries
+      RETURN_NOT_OK(RecoverFromPage(page == from_address.page() ? from_address :
+                                    Address{ page, 0 },
+                                    page + 1 == end_page ? to_address :
+                                    Address{ page, Address::kMaxOffset }));
+    }
+
+    // OS thread flushes current page and issues a read request if necessary
+    if(page + capacity < end_page) {
+      Context context{ hlog, checkpoint_.snapshot_file, start_page, page + capacity,
+                       recovery_status };
+      RETURN_NOT_OK(hlog.AsyncFlushPage(page, recovery_status, callback, &context));
+    } else {
+      RETURN_NOT_OK(hlog.AsyncFlushPage(page, recovery_status, nullptr, nullptr));
+    }
+  }
+  // Wait until all pages have been flushed
+  for(uint32_t page = start_page; page < end_page; ++page) {
+    while(recovery_status.page_status(page) != PageRecoveryStatus::FlushDone) {
+      disk.TryComplete();
+      std::this_thread::sleep_for(10ms);
+    }
+  }
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::RecoverFromPage(Address from_address, Address to_address) {
+  assert(from_address.page() == to_address.page());
+  for(Address address = from_address; address < to_address;) {
+    record_t* record = reinterpret_cast<record_t*>(hlog.Get(address));
+    if(record->header.IsNull()) {
+      address += sizeof(record->header);
+      continue;
+    }
+    if(record->header.invalid) {
+      address += record->size();
+      continue;
+    }
+    const key_t& key = record->key();
+    KeyHash hash = key.GetHash();
+    HashBucketEntry expected_entry;
+    HashBucket* bucket;
+    AtomicHashBucketEntry* atomic_entry = FindOrCreateEntry(hash, expected_entry, bucket);
+
+    if(record->header.checkpoint_version <= checkpoint_.log_metadata.version) {
+      HashBucketEntry new_entry{ address, hash.tag(), false };
+      atomic_entry->store(new_entry);
+    } else {
+      record->header.invalid = true;
+      if(record->header.previous_address() < checkpoint_.index_metadata.checkpoint_start_address) {
+        HashBucketEntry new_entry{ record->header.previous_address(), hash.tag(), false };
+        atomic_entry->store(new_entry);
+      }
+    }
+    address += record->size();
+  }
+
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::RestoreHybridLog() {
+  Address tail_address = checkpoint_.log_metadata.final_address;
+  uint32_t end_page = tail_address.offset() > 0 ? tail_address.page() + 1 : tail_address.page();
+  uint32_t capacity = hlog.buffer_size();
+  // Restore as much of the log as will fit in memory.
+  uint32_t start_page;
+  if(end_page < capacity - hlog.kNumHeadPages) {
+    start_page = 0;
+  } else {
+    start_page = end_page - (capacity - hlog.kNumHeadPages);
+  }
+  RecoveryStatus recovery_status{ start_page, end_page };
+
+  uint32_t num_pages = end_page - start_page;
+  RETURN_NOT_OK(hlog.AsyncReadPagesFromLog(start_page, num_pages, recovery_status));
+
+  // Wait until all pages have been read.
+  for(uint32_t page = start_page; page < end_page; ++page) {
+    while(recovery_status.page_status(page) != PageRecoveryStatus::ReadDone) {
+      disk.TryComplete();
+      std::this_thread::sleep_for(10ms);
+    }
+  }
+  // Skip the null page.
+  Address head_address = start_page == 0 ? Address{ 0, Constants::kCacheLineBytes } :
+                         Address{ start_page, 0 };
+  hlog.RecoveryReset(checkpoint_.index_metadata.log_begin_address, head_address, tail_address);
+  return Status::Ok;
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::HeavyEnter() {
+  if(thread_ctx().phase == Phase::GC_IO_PENDING || thread_ctx().phase == Phase::GC_IN_PROGRESS) {
+    CleanHashTableBuckets();
+    return;
+  }
+  while(thread_ctx().phase == Phase::GROW_PREPARE) {
+    // We spin-wait as a simplification
+    // Could instead do a "heavy operation" here
+    std::this_thread::yield();
+    Refresh();
+  }
+  if(thread_ctx().phase == Phase::GROW_IN_PROGRESS) {
+    SplitHashTableBuckets();
+  }
+}
+
+template <class K, class V, class D>
+bool FasterKv<K, V, D>::CleanHashTableBuckets() {
+  uint64_t chunk = gc_.next_chunk++;
+  if(chunk >= gc_.num_chunks) {
+    // No chunk left to clean.
+    return false;
+  }
+  uint8_t version = resize_info_.version;
+  Address begin_address = hlog.begin_address.load();
+  uint64_t upper_bound;
+  if(chunk + 1 < grow_.num_chunks) {
+    // All chunks but the last chunk contain kGrowHashTableChunkSize elements.
+    upper_bound = kGrowHashTableChunkSize;
+  } else {
+    // Last chunk might contain more or fewer elements.
+    upper_bound = state_[version].size() - (chunk * kGcHashTableChunkSize);
+  }
+  for(uint64_t idx = 0; idx < upper_bound; ++idx) {
+    HashBucket* bucket = &state_[version].bucket(chunk * kGcHashTableChunkSize + idx);
+    while(true) {
+      for(uint32_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+        AtomicHashBucketEntry& atomic_entry = bucket->entries[entry_idx];
+        HashBucketEntry expected_entry = atomic_entry.load();
+        if(!expected_entry.unused() && expected_entry.address() != Address::kInvalidAddress &&
+            expected_entry.address() < begin_address) {
+          // The record that this entry points to was truncated; try to delete the entry.
+          atomic_entry.compare_exchange_strong(expected_entry, HashBucketEntry::kInvalidEntry);
+          // If deletion failed, then some other thread must have added a new record to the entry.
+        }
+      }
+      // Go to next bucket in the chain.
+      HashBucketOverflowEntry overflow_entry = bucket->overflow_entry.load();
+      if(overflow_entry.unused()) {
+        // No more buckets in the chain.
+        break;
+      }
+      bucket = &overflow_buckets_allocator_[version].Get(overflow_entry.address());
+    }
+  }
+  // Done with this chunk--did some work.
+  return true;
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::AddHashEntry(HashBucket*& bucket, uint32_t& next_idx, uint8_t version,
+                                     HashBucketEntry entry) {
+  if(next_idx == HashBucket::kNumEntries) {
+    // Need to allocate a new bucket, first.
+    FixedPageAddress new_bucket_addr = overflow_buckets_allocator_[version].Allocate();
+    HashBucketOverflowEntry new_bucket_entry{ new_bucket_addr };
+    bucket->overflow_entry.store(new_bucket_entry);
+    bucket = &overflow_buckets_allocator_[version].Get(new_bucket_addr);
+    next_idx = 0;
+  }
+  bucket->entries[next_idx].store(entry);
+  ++next_idx;
+}
+
+template <class K, class V, class D>
+Address FasterKv<K, V, D>::TraceBackForOtherChainStart(uint64_t old_size, uint64_t new_size,
+    Address from_address, Address min_address, uint8_t side) {
+  assert(side == 0 || side == 1);
+  // Search back as far as min_address.
+  while(from_address >= min_address) {
+    const record_t* record = reinterpret_cast<const record_t*>(hlog.Get(from_address));
+    KeyHash hash = record->key().GetHash();
+    if((hash.idx(new_size) < old_size) != (side == 0)) {
+      // Record's key hashes to the other side.
+      return from_address;
+    }
+    from_address = record->header.previous_address();
+  }
+  return from_address;
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::SplitHashTableBuckets() {
+  // This thread won't exit until all hash table buckets have been split.
+  Address head_address = hlog.head_address.load();
+  Address begin_address = hlog.begin_address.load();
+  for(uint64_t chunk = grow_.next_chunk++; chunk < grow_.num_chunks; chunk = grow_.next_chunk++) {
+    uint64_t old_size = state_[grow_.old_version].size();
+    uint64_t new_size = state_[grow_.new_version].size();
+    assert(new_size == old_size * 2);
+    // Split this chunk.
+    uint64_t upper_bound;
+    if(chunk + 1 < grow_.num_chunks) {
+      // All chunks but the last chunk contain kGrowHashTableChunkSize elements.
+      upper_bound = kGrowHashTableChunkSize;
+    } else {
+      // Last chunk might contain more or fewer elements.
+      upper_bound = old_size - (chunk * kGrowHashTableChunkSize);
+    }
+    for(uint64_t idx = 0; idx < upper_bound; ++idx) {
+
+      // Split this (chain of) bucket(s).
+      HashBucket* old_bucket = &state_[grow_.old_version].bucket(
+                                 chunk * kGrowHashTableChunkSize + idx);
+      HashBucket* new_bucket0 = &state_[grow_.new_version].bucket(
+                                  chunk * kGrowHashTableChunkSize + idx);
+      HashBucket* new_bucket1 = &state_[grow_.new_version].bucket(
+                                  old_size + chunk * kGrowHashTableChunkSize + idx);
+      uint32_t new_entry_idx0 = 0;
+      uint32_t new_entry_idx1 = 0;
+      while(true) {
+        for(uint32_t old_entry_idx = 0; old_entry_idx < HashBucket::kNumEntries; ++old_entry_idx) {
+          HashBucketEntry old_entry = old_bucket->entries[old_entry_idx].load();
+          if(old_entry.unused()) {
+            // Nothing to do.
+            continue;
+          } else if(old_entry.address() < head_address) {
+            // Can't tell which new bucket the entry should go into; put it in both.
+            AddHashEntry(new_bucket0, new_entry_idx0, grow_.new_version, old_entry);
+            AddHashEntry(new_bucket1, new_entry_idx1, grow_.new_version, old_entry);
+            continue;
+          }
+
+          const record_t* record = reinterpret_cast<const record_t*>(hlog.Get(
+                                     old_entry.address()));
+          KeyHash hash = record->key().GetHash();
+          if(hash.idx(new_size) < old_size) {
+            // Record's key hashes to the 0 side of the new hash table.
+            AddHashEntry(new_bucket0, new_entry_idx0, grow_.new_version, old_entry);
+            Address other_address = TraceBackForOtherChainStart(old_size, new_size,
+                                    record->header.previous_address(), head_address, 0);
+            if(other_address >= begin_address) {
+              // We found a record that either is on disk or has a key that hashes to the 1 side of
+              // the new hash table.
+              AddHashEntry(new_bucket1, new_entry_idx1, grow_.new_version,
+                           HashBucketEntry{ other_address, old_entry.tag(), false });
+            }
+          } else {
+            // Record's key hashes to the 1 side of the new hash table.
+            AddHashEntry(new_bucket1, new_entry_idx1, grow_.new_version, old_entry);
+            Address other_address = TraceBackForOtherChainStart(old_size, new_size,
+                                    record->header.previous_address(), head_address, 1);
+            if(other_address >= begin_address) {
+              // We found a record that either is on disk or has a key that hashes to the 0 side of
+              // the new hash table.
+              AddHashEntry(new_bucket0, new_entry_idx0, grow_.new_version,
+                           HashBucketEntry{ other_address, old_entry.tag(), false });
+            }
+          }
+        }
+        // Go to next bucket in the chain.
+        HashBucketOverflowEntry overflow_entry = old_bucket->overflow_entry.load();
+        if(overflow_entry.unused()) {
+          // No more buckets in the chain.
+          break;
+        }
+        old_bucket = &overflow_buckets_allocator_[grow_.old_version].Get(overflow_entry.address());
+      }
+    }
+    // Done with this chunk.
+    if(--grow_.num_pending_chunks == 0) {
+      // Free the old hash table.
+      state_[grow_.old_version].Uninitialize();
+      overflow_buckets_allocator_[grow_.old_version].Uninitialize();
+      break;
+    }
+  }
+  // Thread has finished growing its part of the hash table.
+  thread_ctx().phase = Phase::REST;
+  // Thread ack that it has finished growing the hash table.
+  if(epoch_.FinishThreadPhase(Phase::GROW_IN_PROGRESS)) {
+    // Let other threads know that they can use the new hash table now.
+    GlobalMoveToNextState(SystemState{ Action::GrowIndex, Phase::GROW_IN_PROGRESS,
+                                       thread_ctx().version });
+  } else {
+    while(system_state_.load().phase == Phase::GROW_IN_PROGRESS) {
+      // Spin until all other threads have finished splitting their chunks.
+      std::this_thread::yield();
+    }
+  }
+}
+
+template <class K, class V, class D>
+bool FasterKv<K, V, D>::GlobalMoveToNextState(SystemState current_state) {
+  SystemState next_state = current_state.GetNextState();
+  if(!system_state_.compare_exchange_strong(current_state, next_state)) {
+    return false;
+  }
+
+  switch(next_state.action) {
+  case Action::Checkpoint:
+    switch(next_state.phase) {
+    case Phase::PREP_INDEX_CHKPT:
+      // This case is handled directly inside Checkpoint().
+      assert(false);
+      break;
+    case Phase::INDEX_CHKPT:
+      // Issue async request for fuzzy checkpoint
+      assert(!checkpoint_.failed);
+      if(CheckpointFuzzyIndex() != Status::Ok) {
+        checkpoint_.failed = true;
+      }
+      break;
+    case Phase::PREPARE:
+      // INDEX_CHKPT -> PREPARE
+      // Get an overestimate for the ofb's tail, after we've finished fuzzy-checkpointing the ofb.
+      // (Ensures that recovery won't accidentally reallocate from the ofb.)
+      checkpoint_.index_metadata.ofb_count =
+        overflow_buckets_allocator_[resize_info_.version].count();
+      // Write index meta data on disk
+      if(WriteIndexMetadata() != Status::Ok) {
+        checkpoint_.failed = true;
+      }
+      break;
+    case Phase::IN_PROGRESS: {
+      // PREPARE -> IN_PROGRESS
+      // Do nothing
+      break;
+    }
+    case Phase::WAIT_PENDING:
+      // IN_PROGRESS -> WAIT_PENDING
+      // Do nothing
+      break;
+    case Phase::WAIT_FLUSH: {
+      // WAIT_PENDING -> WAIT_FLUSH
+      if(fold_over_snapshot) {
+        // Move read-only to tail
+        Address tail_address = hlog.ShiftReadOnlyToTail();
+        // Get final address for CPR
+        checkpoint_.log_metadata.final_address = tail_address;
+      } else {
+        Address tail_address = hlog.GetTailAddress();
+        // Get final address for CPR
+        checkpoint_.log_metadata.final_address = tail_address;
+        checkpoint_.snapshot_file = disk.NewFile(disk.relative_cpr_checkpoint_path(
+                                      checkpoint_.log_metadata.version) + "snapshot.dat");
+        if(checkpoint_.snapshot_file.Open(&disk.handler()) != Status::Ok) {
+          checkpoint_.failed = true;
+        }
+        // Flush the log to a snapshot.
+        hlog.AsyncFlushPagesToFile(checkpoint_.log_metadata.flushed_address.page(),
+                                   checkpoint_.log_metadata.final_address, checkpoint_.snapshot_file,
+                                   checkpoint_.flush_pending);
+      }
+      // Write CPR meta data file
+      if(WriteCprMetadata() != Status::Ok) {
+        checkpoint_.failed = true;
+      }
+      break;
+    }
+    case Phase::PERSISTENCE_CALLBACK:
+      // WAIT_FLUSH -> PERSISTENCE_CALLBACK
+      break;
+    case Phase::REST:
+      // PERSISTENCE_CALLBACK -> REST
+      // All persistence callbacks have been called; we can reset the contexts now. (Have to reset
+      // contexts before another checkpoint can be started.)
+      checkpoint_.CheckpointDone();
+      // Free checkpoint locks!
+      checkpoint_locks_.Free();
+      // Checkpoint is done--no more work for threads to do.
+      system_state_.store(SystemState{ Action::None, Phase::REST, next_state.version });
+      break;
+    default:
+      // not reached
+      assert(false);
+      break;
+    }
+    break;
+  case Action::GC:
+    switch(next_state.phase) {
+    case Phase::GC_IO_PENDING:
+      // This case is handled directly inside ShiftBeginAddress().
+      assert(false);
+      break;
+    case Phase::GC_IN_PROGRESS:
+      // GC_IO_PENDING -> GC_IN_PROGRESS
+      // Tell the disk to truncate the log.
+      hlog.Truncate(gc_.truncate_callback);
+      break;
+    case Phase::REST:
+      // GC_IN_PROGRESS -> REST
+      // GC is done--no more work for threads to do.
+      if(gc_.complete_callback) {
+        gc_.complete_callback();
+      }
+      system_state_.store(SystemState{ Action::None, Phase::REST, next_state.version });
+      break;
+    default:
+      // not reached
+      assert(false);
+      break;
+    }
+    break;
+  case Action::GrowIndex:
+    switch(next_state.phase) {
+    case Phase::GROW_PREPARE:
+      // This case is handled directly inside GrowIndex().
+      assert(false);
+      break;
+    case Phase::GROW_IN_PROGRESS:
+      // Swap hash table versions so that all threads will use the new version after populating it.
+      resize_info_.version = grow_.new_version;
+      break;
+    case Phase::REST:
+      if(grow_.callback) {
+        grow_.callback(state_[grow_.new_version].size());
+      }
+      system_state_.store(SystemState{ Action::None, Phase::REST, next_state.version });
+      break;
+    default:
+      // not reached
+      assert(false);
+      break;
+    }
+    break;
+  default:
+    // not reached
+    assert(false);
+    break;
+  }
+  return true;
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::MarkAllPendingRequests() {
+  uint32_t table_version = resize_info_.version;
+  uint64_t table_size = state_[table_version].size();
+
+  for(const IAsyncContext* ctxt : thread_ctx().retry_requests) {
+    const pending_context_t* context = static_cast<const pending_context_t*>(ctxt);
+    // We will succeed, since no other thread can currently advance the entry's version, since this
+    // thread hasn't acked "PENDING" phase completion yet.
+    bool result = checkpoint_locks_.get_lock(context->key().GetHash()).try_lock_old();
+    assert(result);
+  }
+  for(const auto& pending_io : thread_ctx().pending_ios) {
+    // We will succeed, since no other thread can currently advance the entry's version, since this
+    // thread hasn't acked "PENDING" phase completion yet.
+    bool result = checkpoint_locks_.get_lock(pending_io.second).try_lock_old();
+    assert(result);
+  }
+}
+
+template <class K, class V, class D>
+void FasterKv<K, V, D>::HandleSpecialPhases() {
+  SystemState final_state = system_state_.load();
+  if(final_state.phase == Phase::REST) {
+    // Nothing to do; just reset thread context.
+    thread_ctx().phase = Phase::REST;
+    thread_ctx().version = final_state.version;
+    return;
+  }
+  SystemState previous_state{ final_state.action, thread_ctx().phase, thread_ctx().version };
+  do {
+    // Identify the transition (currentState -> nextState)
+    SystemState current_state = (previous_state == final_state) ? final_state :
+                                previous_state.GetNextState();
+    switch(current_state.action) {
+    case Action::Checkpoint:
+      switch(current_state.phase) {
+      case Phase::PREP_INDEX_CHKPT:
+        // Both from REST -> PREP_INDEX_CHKPT and PREP_INDEX_CHKPT -> PREP_INDEX_CHKPT
+        if(previous_state.phase == Phase::REST) {
+          // Thread ack that we're performing a checkpoint.
+          if(epoch_.FinishThreadPhase(Phase::PREP_INDEX_CHKPT)) {
+            GlobalMoveToNextState(current_state);
+          }
+        }
+        break;
+      case Phase::INDEX_CHKPT: {
+        // Both from PREP_INDEX_CHKPT -> INDEX_CHKPT and INDEX_CHKPT -> INDEX_CHKPT
+        Status result = CheckpointFuzzyIndexComplete();
+        if(result != Status::Pending && result != Status::Ok) {
+          checkpoint_.failed = true;
+        }
+        if(result != Status::Pending) {
+          GlobalMoveToNextState(current_state);
+        }
+        break;
+      }
+      case Phase::PREPARE:
+        // Handle INDEX_CHKPT -> PREPARE and PREPARE -> PREPARE
+        if(previous_state.phase == Phase::INDEX_CHKPT) {
+          // mark pending requests
+          MarkAllPendingRequests();
+          // keep a count of number of threads
+          ++checkpoint_.log_metadata.num_threads;
+          // set the thread index
+          checkpoint_.log_metadata.guids[Thread::id()] = thread_ctx().guid;
+          // Thread ack that it has finished marking its pending requests.
+          if(epoch_.FinishThreadPhase(Phase::PREPARE)) {
+            GlobalMoveToNextState(current_state);
+          }
+        }
+        break;
+      case Phase::IN_PROGRESS:
+        // Handle PREPARE -> IN_PROGRESS and IN_PROGRESS -> IN_PROGRESS
+        if(previous_state.phase == Phase::PREPARE) {
+          assert(prev_thread_ctx().retry_requests.empty());
+          assert(prev_thread_ctx().pending_ios.empty());
+          assert(prev_thread_ctx().io_responses.empty());
+
+          // Get a new thread context; keep track of the old one as "previous."
+          thread_contexts_[Thread::id()].swap();
+          // initialize a new local context
+          thread_ctx().Initialize(Phase::IN_PROGRESS, current_state.version,
+                                  prev_thread_ctx().guid, prev_thread_ctx().serial_num);
+          // Thread ack that it has swapped contexts.
+          if(epoch_.FinishThreadPhase(Phase::IN_PROGRESS)) {
+            GlobalMoveToNextState(current_state);
+          }
+        }
+        break;
+      case Phase::WAIT_PENDING:
+        // Handle IN_PROGRESS -> WAIT_PENDING and WAIT_PENDING -> WAIT_PENDING
+        if(!epoch_.HasThreadFinishedPhase(Phase::WAIT_PENDING)) {
+          if(prev_thread_ctx().pending_ios.empty() &&
+              prev_thread_ctx().retry_requests.empty()) {
+            // Thread ack that it has completed its pending I/Os.
+            if(epoch_.FinishThreadPhase(Phase::WAIT_PENDING)) {
+              GlobalMoveToNextState(current_state);
+            }
+          }
+        }
+        break;
+      case Phase::WAIT_FLUSH:
+        // Handle WAIT_PENDING -> WAIT_FLUSH and WAIT_FLUSH -> WAIT_FLUSH
+        if(!epoch_.HasThreadFinishedPhase(Phase::WAIT_FLUSH)) {
+          bool flushed;
+          if(fold_over_snapshot) {
+            flushed = hlog.flushed_until_address.load() >= checkpoint_.log_metadata.final_address;
+          } else {
+            flushed = checkpoint_.flush_pending.load() == 0;
+          }
+          if(flushed) {
+            // write context info
+            WriteCprContext();
+            // Thread ack that it has written its CPU context.
+            if(epoch_.FinishThreadPhase(Phase::WAIT_FLUSH)) {
+              GlobalMoveToNextState(current_state);
+            }
+          }
+        }
+        break;
+      case Phase::PERSISTENCE_CALLBACK:
+        // Handle WAIT_FLUSH -> PERSISTENCE_CALLBACK and PERSISTENCE_CALLBACK -> PERSISTENCE_CALLBACK
+        if(previous_state.phase == Phase::WAIT_FLUSH) {
+          // Persistence callback
+          if(checkpoint_.persistence_callback) {
+            checkpoint_.persistence_callback(prev_thread_ctx().serial_num);
+          }
+          // Thread has finished checkpointing.
+          thread_ctx().phase = Phase::REST;
+          // Thread ack that it has finished checkpointing.
+          if(epoch_.FinishThreadPhase(Phase::PERSISTENCE_CALLBACK)) {
+            GlobalMoveToNextState(current_state);
+          }
+        }
+        break;
+      default:
+        // nothing to do.
+        break;
+      }
+      break;
+    case Action::GC:
+      switch(current_state.phase) {
+      case Phase::GC_IO_PENDING:
+        // Handle REST -> GC_IO_PENDING and GC_IO_PENDING -> GC_IO_PENDING.
+        if(previous_state.phase == Phase::REST) {
+          assert(prev_thread_ctx().retry_requests.empty());
+          assert(prev_thread_ctx().pending_ios.empty());
+          assert(prev_thread_ctx().io_responses.empty());
+          // Get a new thread context; keep track of the old one as "previous."
+          thread_contexts_[Thread::id()].swap();
+          // initialize a new local context
+          thread_ctx().Initialize(Phase::GC_IO_PENDING, current_state.version,
+                                  prev_thread_ctx().guid, prev_thread_ctx().serial_num);
+        }
+
+        // See if the old thread context has completed its pending I/Os.
+        if(!epoch_.HasThreadFinishedPhase(Phase::GC_IO_PENDING)) {
+          if(prev_thread_ctx().pending_ios.empty() &&
+              prev_thread_ctx().retry_requests.empty()) {
+            // Thread ack that it has completed its pending I/Os.
+            if(epoch_.FinishThreadPhase(Phase::GC_IO_PENDING)) {
+              GlobalMoveToNextState(current_state);
+            }
+          }
+        }
+        break;
+      case Phase::GC_IN_PROGRESS:
+        // Handle GC_IO_PENDING -> GC_IN_PROGRESS and GC_IN_PROGRESS -> GC_IN_PROGRESS.
+        if(!epoch_.HasThreadFinishedPhase(Phase::GC_IN_PROGRESS)) {
+          if(!CleanHashTableBuckets()) {
+            // No more buckets for this thread to clean; thread has finished GC.
+            thread_ctx().phase = Phase::REST;
+            // Thread ack that it has finished GC.
+            if(epoch_.FinishThreadPhase(Phase::GC_IN_PROGRESS)) {
+              GlobalMoveToNextState(current_state);
+            }
+          }
+        }
+        break;
+      default:
+        assert(false); // not reached
+        break;
+      }
+      break;
+    case Action::GrowIndex:
+      switch(current_state.phase) {
+      case Phase::GROW_PREPARE:
+        if(previous_state.phase == Phase::REST) {
+          // Thread ack that we're going to grow the hash table.
+          if(epoch_.FinishThreadPhase(Phase::GROW_PREPARE)) {
+            GlobalMoveToNextState(current_state);
+          }
+        } else {
+          // Wait for all other threads to finish their outstanding (synchronous) hash table
+          // operations.
+          std::this_thread::yield();
+        }
+        break;
+      case Phase::GROW_IN_PROGRESS:
+        SplitHashTableBuckets();
+        break;
+      }
+      break;
+    }
+    thread_ctx().phase = current_state.phase;
+    thread_ctx().version = current_state.version;
+    previous_state = current_state;
+  } while(previous_state != final_state);
+}
+
+template <class K, class V, class D>
+bool FasterKv<K, V, D>::Checkpoint(void(*persistence_callback)(uint64_t persistent_serial_num)) {
+  // Only one thread can initiate a checkpoint at a time. (This assumption is implicit in the C#
+  /// version, and explicit here.)
+  SystemState expected{ Action::None, Phase::REST, system_state_.load().version };
+  SystemState desired{ Action::Checkpoint, Phase::REST, expected.version };
+  if(!system_state_.compare_exchange_strong(expected, desired)) {
+    // Can't start a new checkpoint while a checkpoint or recovery is already in progress.
+    return false;
+  }
+  // We are going to start a checkpoint.
+  epoch_.ResetPhaseFinished();
+  // Initialize all contexts
+  disk.CreateIndexCheckpointDirectory(desired.version);
+  disk.CreateCprCheckpointDirectory(desired.version);
+  // Obtain tail address for fuzzy index checkpoint
+  if(!fold_over_snapshot) {
+    checkpoint_.InitializeCheckpoint(desired.version, state_[resize_info_.version].size(),
+                                     hlog.begin_address.load(),  hlog.GetTailAddress(), true,
+                                     hlog.flushed_until_address.load(), persistence_callback);
+  } else {
+    checkpoint_.InitializeCheckpoint(desired.version, state_[resize_info_.version].size(),
+                                     hlog.begin_address.load(),  hlog.GetTailAddress(), false,
+                                     Address::kInvalidAddress, persistence_callback);
+  }
+  InitializeCheckpointLocks();
+  // Let other threads know that the checkpoint has started.
+  system_state_.store(desired.GetNextState());
+  return true;
+}
+
+template <class K, class V, class D>
+Status FasterKv<K, V, D>::Recover(uint32_t cpr_version, uint32_t index_version,
+                                  std::vector<Guid>& session_ids) {
+  session_ids.clear();
+  SystemState expected = SystemState{ Action::None, Phase::REST, system_state_.load().version };
+  if(!system_state_.compare_exchange_strong(expected,
+      SystemState{ Action::Recover, Phase::REST, expected.version })) {
+    return Status::Aborted;
+  }
+  checkpoint_.InitializeRecover();
+  Status status;
+#define BREAK_NOT_OK(s) \
+    status = (s); \
+    if (status != Status::Ok) break \
+
+  do {
+    // Index and log metadata.
+    BREAK_NOT_OK(ReadIndexMetadata(index_version));
+    BREAK_NOT_OK(ReadCprMetadata(cpr_version));
+    system_state_.store(SystemState{ Action::Recover, Phase::REST, cpr_version + 1 });
+
+    BREAK_NOT_OK(ReadCprContexts(cpr_version, checkpoint_.log_metadata.guids));
+    // The index itself (including overflow buckets).
+    BREAK_NOT_OK(RecoverFuzzyIndex());
+    BREAK_NOT_OK(RecoverFuzzyIndexComplete(true));
+    // Any changes made to the log while the index was being fuzzy-checkpointed.
+    if(fold_over_snapshot) {
+      BREAK_NOT_OK(RecoverHybridLog());
+    } else {
+      BREAK_NOT_OK(RecoverHybridLogFromSnapshotFile());
+    }
+    BREAK_NOT_OK(RestoreHybridLog());
+  } while(false);
+  if(status == Status::Ok) {
+    for(const auto& token : checkpoint_.continue_tokens) {
+      session_ids.push_back(token.first);
+    }
+  }
+  checkpoint_.RecoverDone();
+  system_state_.store(SystemState{ Action::None, Phase::REST, cpr_version + 1 });
+  return status;
+#undef BREAK_NOT_OK
+}
+
+template <class K, class V, class D>
+bool FasterKv<K, V, D>::ShiftBeginAddress(Address address,
+    GcState::truncate_callback_t truncate_callback,
+    GcState::complete_callback_t complete_callback) {
+  SystemState expected = SystemState{ Action::None, Phase::REST, system_state_.load().version };
+  if(!system_state_.compare_exchange_strong(expected,
+      SystemState{ Action::GC, Phase::REST, expected.version })) {
+    // Can't start a GC while an action is already in progress.
+    return false;
+  }
+  hlog.begin_address.store(address);
+  // Each active thread will notify the epoch when all pending I/Os have completed.
+  epoch_.ResetPhaseFinished();
+  uint64_t num_chunks = std::max(state_[resize_info_.version].size() / kGcHashTableChunkSize,
+                                 (uint64_t)1);
+  gc_.Initialize(truncate_callback, complete_callback, num_chunks);
+  // Let other threads know to complete their pending I/Os, so that the log can be truncated.
+  system_state_.store(SystemState{ Action::GC, Phase::GC_IO_PENDING, expected.version });
+  return true;
+}
+
+template <class K, class V, class D>
+bool FasterKv<K, V, D>::GrowIndex(GrowState::callback_t caller_callback) {
+  SystemState expected = SystemState{ Action::None, Phase::REST, system_state_.load().version };
+  if(!system_state_.compare_exchange_strong(expected,
+      SystemState{ Action::GrowIndex, Phase::REST, expected.version })) {
+    // An action is already in progress.
+    return false;
+  }
+  epoch_.ResetPhaseFinished();
+  uint8_t current_version = resize_info_.version;
+  assert(current_version == 0 || current_version == 1);
+  uint8_t next_version = 1 - current_version;
+  uint64_t num_chunks = std::max(state_[current_version].size() / kGrowHashTableChunkSize,
+                                 (uint64_t)1);
+  grow_.Initialize(caller_callback, current_version, num_chunks);
+  // Initialize the next version of our hash table to be twice the size of the current version.
+  state_[next_version].Initialize(state_[current_version].size() * 2, disk.log().alignment());
+  overflow_buckets_allocator_[next_version].Initialize(disk.log().alignment(), epoch_);
+
+  SystemState next = SystemState{ Action::GrowIndex, Phase::GROW_PREPARE, expected.version };
+  system_state_.store(next);
+
+  // Let this thread know it should be growing the index.
+  Refresh();
+  return true;
+}
+
+}
+} // namespace FASTER::core
\ No newline at end of file
diff --git a/cc/src/core/gc_state.h b/cc/src/core/gc_state.h
new file mode 100644
index 000000000..4b7a74251
--- /dev/null
+++ b/cc/src/core/gc_state.h
@@ -0,0 +1,40 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+
+namespace FASTER {
+namespace core {
+
+/// State of the active garbage-collection call.
+class GcState {
+ public:
+  typedef void(*truncate_callback_t)(uint64_t offset);
+  typedef void(*complete_callback_t)(void);
+
+  GcState()
+    : truncate_callback{ nullptr }
+    , complete_callback{ nullptr }
+    , num_chunks{ 0 }
+    , next_chunk{ 0 } {
+  }
+
+  void Initialize(truncate_callback_t truncate_callback_, complete_callback_t complete_callback_,
+                  uint64_t num_chunks_) {
+    truncate_callback = truncate_callback_;
+    complete_callback = complete_callback_;
+    num_chunks = num_chunks_;
+    next_chunk = 0;
+  }
+
+  truncate_callback_t truncate_callback;
+  complete_callback_t complete_callback;
+  uint64_t num_chunks;
+  std::atomic<uint64_t> next_chunk;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/grow_state.h b/cc/src/core/grow_state.h
new file mode 100644
index 000000000..fd64860c3
--- /dev/null
+++ b/cc/src/core/grow_state.h
@@ -0,0 +1,44 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+
+namespace FASTER {
+namespace core {
+
+/// State of the active grow-index call.
+class GrowState {
+ public:
+  typedef void(*callback_t)(uint64_t new_size);
+
+  GrowState()
+    : callback{ nullptr }
+    , num_pending_chunks{ 0 }
+    , old_version{ UINT8_MAX }
+    , new_version{ UINT8_MAX } {
+  }
+
+  void Initialize(callback_t callback_, uint8_t current_version, uint64_t num_chunks_) {
+    callback = callback_;
+    assert(current_version == 0 || current_version == 1);
+    old_version = current_version;
+    new_version = 1 - current_version;
+    num_chunks = num_chunks_;
+    num_pending_chunks = num_chunks_;
+    next_chunk = 0;
+  }
+
+  callback_t callback;
+  uint8_t old_version;
+  uint8_t new_version;
+  uint64_t num_chunks;
+  std::atomic<uint64_t> num_pending_chunks;
+  std::atomic<uint64_t> next_chunk;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/guid.h b/cc/src/core/guid.h
new file mode 100644
index 000000000..a41f5818f
--- /dev/null
+++ b/cc/src/core/guid.h
@@ -0,0 +1,142 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <string>
+
+#ifdef _WIN32
+#define NOMINMAX
+#define _WINSOCKAPI_
+#include <Windows.h>
+#else
+#include <uuid/uuid.h>
+#endif
+
+namespace FASTER {
+namespace core {
+
+/// Wrapper for GUIDs, for Windows and Linux.
+class Guid {
+ public:
+#ifdef _WIN32
+  Guid() {
+    guid_.Data1 = 0;
+    guid_.Data2 = 0;
+    guid_.Data3 = 0;
+    std::memset(guid_.Data4, 0, 8);
+  }
+#else
+  Guid() {
+    uuid_clear(uuid_);
+  }
+#endif
+
+ private:
+#ifdef _WIN32
+  Guid(const GUID& guid)
+    : guid_{ guid } {
+  }
+#else
+  Guid(const uuid_t uuid) {
+    uuid_copy(uuid_, uuid);
+  }
+#endif
+
+ public:
+#ifdef _WIN32
+  static Guid Create() {
+    GUID guid;
+    HRESULT result = ::CoCreateGuid(&guid);
+    assert(result == S_OK);
+    return guid;
+  }
+#else
+  static Guid Create() {
+    uuid_t uuid;
+    uuid_generate(uuid);
+    return uuid;
+  }
+#endif
+
+#ifdef _WIN32
+  std::string ToString() const {
+    char buffer[37];
+    size_t offset = sprintf(buffer, "%.8lX-%.4hX-%.4hX-", guid_.Data1, guid_.Data2, guid_.Data3);
+    for(size_t idx = 0; idx < 2; ++idx) {
+      offset += sprintf(buffer + offset, "%.2hhX", guid_.Data4[idx]);
+    }
+    offset += sprintf(buffer + offset, "-");
+    for(size_t idx = 2; idx < sizeof(guid_.Data4); ++idx) {
+      offset += sprintf(buffer + offset, "%.2hhX", guid_.Data4[idx]);
+    }
+    buffer[36] = '\0';
+    return std::string{ buffer };
+  }
+#else
+  std::string ToString() const {
+    char buffer[37];
+    uuid_unparse(uuid_, buffer);
+    return std::string{ buffer };
+  }
+#endif
+
+#ifdef _WIN32
+  bool operator==(const Guid& other) const {
+    return guid_.Data1 == other.guid_.Data1 &&
+           guid_.Data2 == other.guid_.Data2 &&
+           guid_.Data3 == other.guid_.Data3 &&
+           std::memcmp(guid_.Data4, other.guid_.Data4, 8) == 0;
+  }
+#else
+  bool operator==(const Guid& other) const {
+    return uuid_compare(uuid_, other.uuid_) == 0;
+  }
+#endif
+
+#ifdef _WIN32
+  uint32_t GetHashCode() const {
+    // From C#, .NET Reference Framework.
+    return guid_.Data1 ^ ((static_cast<uint32_t>(guid_.Data2) << 16) |
+                          static_cast<uint32_t>(guid_.Data3)) ^
+           ((static_cast<uint32_t>(guid_.Data4[2]) << 24) | guid_.Data4[7]);
+  }
+#else
+  uint32_t GetHashCode() const {
+    uint32_t Data1;
+    uint16_t Data2;
+    uint16_t Data3;
+    std::memcpy(&Data1, uuid_, sizeof(Data1));
+    std::memcpy(&Data2, uuid_ + 4, sizeof(Data2));
+    std::memcpy(&Data3, uuid_ + 6, sizeof(Data3));
+    // From C#, .NET Reference Framework.
+    return Data1 ^ ((static_cast<uint32_t>(Data2) << 16) |
+                    static_cast<uint32_t>(Data3)) ^
+           ((static_cast<uint32_t>(uuid_[10]) << 24) | uuid_[15]);
+  }
+#endif
+
+ private:
+#ifdef _WIN32
+  GUID guid_;
+#else
+  uuid_t uuid_;
+#endif
+};
+
+}
+} // namespace FASTER::core
+
+/// Implement std::hash<> for GUIDs.
+namespace std {
+template<>
+struct hash<FASTER::core::Guid> {
+  size_t operator()(const FASTER::core::Guid& val) const {
+    return val.GetHashCode();
+  }
+};
+}
diff --git a/cc/src/core/hash_bucket.h b/cc/src/core/hash_bucket.h
new file mode 100644
index 000000000..081e8de14
--- /dev/null
+++ b/cc/src/core/hash_bucket.h
@@ -0,0 +1,201 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <thread>
+
+#include "address.h"
+#include "constants.h"
+#include "malloc_fixed_page_size.h"
+
+namespace FASTER {
+namespace core {
+
+static_assert(Address::kAddressBits == 48, "Address::kAddressBits != 48");
+
+/// Entry stored in a hash bucket. Packed into 8 bytes.
+struct HashBucketEntry {
+  /// Invalid value in the hash table
+  static constexpr uint64_t kInvalidEntry = 0;
+
+  HashBucketEntry()
+    : control_{ 0 } {
+  }
+  HashBucketEntry(Address address, uint16_t tag, bool tentative)
+    : address_{ address.control() }
+    , tag_{ tag }
+    , reserved_{ 0 }
+    , tentative_{ tentative } {
+  }
+  HashBucketEntry(uint64_t code)
+    : control_{ code } {
+  }
+  HashBucketEntry(const HashBucketEntry& other)
+    : control_{ other.control_ } {
+  }
+
+  inline HashBucketEntry& operator=(const HashBucketEntry& other) {
+    control_ = other.control_;
+    return *this;
+  }
+  inline bool operator ==(const HashBucketEntry& other) const {
+    return control_ == other.control_;
+  }
+  inline bool operator !=(const HashBucketEntry& other) const {
+    return control_ != other.control_;
+  }
+  inline bool unused() const {
+    return control_ == 0;
+  }
+  inline Address address() const {
+    return Address{ address_ };
+  }
+  inline uint16_t tag() const {
+    return static_cast<uint16_t>(tag_);
+  }
+  inline bool tentative() const {
+    return static_cast<bool>(tentative_);
+  }
+  inline void set_tentative(bool desired) {
+    tentative_ = desired;
+  }
+
+  union {
+      struct {
+        uint64_t address_ : 48; // corresponds to logical address
+        uint64_t tag_ : 14;
+        uint64_t reserved_ : 1;
+        uint64_t tentative_ : 1;
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(HashBucketEntry) == 8, "sizeof(HashBucketEntry) != 8");
+
+/// Atomic hash-bucket entry.
+class AtomicHashBucketEntry {
+ public:
+  AtomicHashBucketEntry(const HashBucketEntry& entry)
+    : control_{ entry.control_ } {
+  }
+  /// Default constructor
+  AtomicHashBucketEntry()
+    : control_{ HashBucketEntry::kInvalidEntry } {
+  }
+
+  /// Atomic access.
+  inline HashBucketEntry load() const {
+    return HashBucketEntry{ control_.load() };
+  }
+  inline void store(const HashBucketEntry& desired) {
+    control_.store(desired.control_);
+  }
+  inline bool compare_exchange_strong(HashBucketEntry& expected, HashBucketEntry desired) {
+    uint64_t expected_control = expected.control_;
+    bool result = control_.compare_exchange_strong(expected_control, desired.control_);
+    expected = HashBucketEntry{ expected_control };
+    return result;
+  }
+
+ private:
+  /// Atomic address to the hash bucket entry.
+  std::atomic<uint64_t> control_;
+};
+
+/// Entry stored in a hash bucket that points to the next overflow bucket (if any).
+struct HashBucketOverflowEntry {
+  HashBucketOverflowEntry()
+    : control_{ 0 } {
+  }
+  HashBucketOverflowEntry(FixedPageAddress address)
+    : address_{ address.control() }
+    , unused_{ 0 } {
+  }
+  HashBucketOverflowEntry(const HashBucketOverflowEntry& other)
+    : control_{ other.control_ } {
+  }
+  HashBucketOverflowEntry(uint64_t code)
+    : control_{ code } {
+  }
+
+  inline HashBucketOverflowEntry& operator=(const HashBucketOverflowEntry& other) {
+    control_ = other.control_;
+    return *this;
+  }
+  inline bool operator ==(const HashBucketOverflowEntry& other) const {
+    return control_ == other.control_;
+  }
+  inline bool operator !=(const HashBucketOverflowEntry& other) const {
+    return control_ != other.control_;
+  }
+  inline bool unused() const {
+    return address_ == 0;
+  }
+  inline FixedPageAddress address() const {
+    return FixedPageAddress{ address_ };
+  }
+
+  union {
+      struct {
+        uint64_t address_ : 48; // corresponds to logical address
+        uint64_t unused_ : 16;
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(HashBucketOverflowEntry) == 8, "sizeof(HashBucketOverflowEntry) != 8");
+
+/// Atomic hash-bucket overflow entry.
+class AtomicHashBucketOverflowEntry {
+ private:
+  static constexpr uint64_t kPinIncrement = (uint64_t)1 << 48;
+  static constexpr uint64_t kLocked = (uint64_t)1 << 63;
+
+ public:
+  AtomicHashBucketOverflowEntry(const HashBucketOverflowEntry& entry)
+    : control_{ entry.control_ } {
+  }
+  /// Default constructor
+  AtomicHashBucketOverflowEntry()
+    : control_{ HashBucketEntry::kInvalidEntry } {
+  }
+
+  /// Atomic access.
+  inline HashBucketOverflowEntry load() const {
+    return HashBucketOverflowEntry{ control_.load() };
+  }
+  inline void store(const HashBucketOverflowEntry& desired) {
+    control_.store(desired.control_);
+  }
+  inline bool compare_exchange_strong(HashBucketOverflowEntry& expected,
+                                      HashBucketOverflowEntry desired) {
+    uint64_t expected_control = expected.control_;
+    bool result = control_.compare_exchange_strong(expected_control, desired.control_);
+    expected = HashBucketOverflowEntry{ expected_control };
+    return result;
+  }
+
+ private:
+  /// Atomic address to the hash bucket entry.
+  std::atomic<uint64_t> control_;
+};
+
+/// A bucket consisting of 7 hash bucket entries, plus one hash bucket overflow entry. Fits in
+/// a cache line.
+struct alignas(Constants::kCacheLineBytes) HashBucket {
+  /// Number of entries per bucket (excluding overflow entry).
+  static constexpr uint32_t kNumEntries = 7;
+  /// The entries.
+  AtomicHashBucketEntry entries[kNumEntries];
+  /// Overflow entry points to next overflow bucket, if any.
+  AtomicHashBucketOverflowEntry overflow_entry;
+};
+static_assert(sizeof(HashBucket) == Constants::kCacheLineBytes,
+              "sizeof(HashBucket) != Constants::kCacheLineBytes");
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/hash_table.h b/cc/src/core/hash_table.h
new file mode 100644
index 000000000..4fc05a482
--- /dev/null
+++ b/cc/src/core/hash_table.h
@@ -0,0 +1,294 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cinttypes>
+#include <cstdint>
+
+#include "hash_bucket.h"
+#include "key_hash.h"
+
+namespace FASTER {
+namespace core {
+
+/// The hash table itself: a sized array of HashBuckets.
+template <class D>
+class InternalHashTable {
+ public:
+  typedef D disk_t;
+  typedef typename D::file_t file_t;
+
+  InternalHashTable()
+    : size_{ 0 }
+    , buckets_{ nullptr }
+    , disk_{ nullptr }
+    , pending_checkpoint_writes_{ 0 }
+    , pending_recover_reads_{ 0 }
+    , checkpoint_pending_{ false }
+    , checkpoint_failed_{ false }
+    , recover_pending_{ false }
+    , recover_failed_{ false } {
+  }
+
+  ~InternalHashTable() {
+    if(buckets_) {
+      aligned_free(buckets_);
+    }
+  }
+
+  inline void Initialize(uint64_t new_size, uint64_t alignment) {
+    assert(new_size < INT32_MAX);
+    assert(Utility::IsPowerOfTwo(new_size));
+    assert(Utility::IsPowerOfTwo(alignment));
+    assert(alignment >= Constants::kCacheLineBytes);
+    if(size_ != new_size) {
+      size_ = new_size;
+      if(buckets_) {
+        aligned_free(buckets_);
+      }
+      buckets_ = reinterpret_cast<HashBucket*>(aligned_alloc(alignment,
+                 size_ * sizeof(HashBucket)));
+    }
+    std::memset(buckets_, 0, size_ * sizeof(HashBucket));
+    assert(pending_checkpoint_writes_ == 0);
+    assert(pending_recover_reads_ == 0);
+    assert(checkpoint_pending_ == false);
+    assert(checkpoint_failed_ == false);
+    assert(recover_pending_ == false);
+    assert(recover_failed_ == false);
+  }
+
+  inline void Uninitialize() {
+    if(buckets_) {
+      aligned_free(buckets_);
+      buckets_ = nullptr;
+    }
+    size_ = 0;
+    assert(pending_checkpoint_writes_ == 0);
+    assert(pending_recover_reads_ == 0);
+    assert(checkpoint_pending_ == false);
+    assert(checkpoint_failed_ == false);
+    assert(recover_pending_ == false);
+    assert(recover_failed_ == false);
+  }
+
+  /// Get the bucket specified by the hash.
+  inline const HashBucket& bucket(KeyHash hash) const {
+    return buckets_[hash.idx(size_)];
+  }
+  inline HashBucket& bucket(KeyHash hash) {
+    return buckets_[hash.idx(size_)];
+  }
+
+  /// Get the bucket specified by the index. (Used by checkpoint/recovery.)
+  inline const HashBucket& bucket(uint64_t idx) const {
+    assert(idx < size_);
+    return buckets_[idx];
+  }
+  /// (Used by GC and called by unit tests.)
+  inline HashBucket& bucket(uint64_t idx) {
+    assert(idx < size_);
+    return buckets_[idx];
+  }
+
+  inline uint64_t size() const {
+    return size_;
+  }
+
+  // Checkpointing and recovery.
+  Status Checkpoint(disk_t& disk, file_t&& file, uint64_t& checkpoint_size);
+  inline Status CheckpointComplete(bool wait);
+
+  Status Recover(disk_t& disk, file_t&& file, uint64_t checkpoint_size);
+  inline Status RecoverComplete(bool wait);
+
+  void DumpDistribution(MallocFixedPageSize<HashBucket, disk_t>& overflow_buckets_allocator);
+
+ private:
+  // Checkpointing and recovery.
+  class AsyncIoContext : public IAsyncContext {
+   public:
+    AsyncIoContext(InternalHashTable* table_)
+      : table{ table_ } {
+    }
+    /// The deep-copy constructor
+    AsyncIoContext(AsyncIoContext& other)
+      : table{ other.table } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+   public:
+    InternalHashTable* table;
+  };
+
+ private:
+  uint64_t size_;
+  HashBucket* buckets_;
+
+  /// State for ongoing checkpoint/recovery.
+  disk_t* disk_;
+  file_t file_;
+  std::atomic<uint64_t> pending_checkpoint_writes_;
+  std::atomic<uint64_t> pending_recover_reads_;
+  std::atomic<bool> checkpoint_pending_;
+  std::atomic<bool> checkpoint_failed_;
+  std::atomic<bool> recover_pending_;
+  std::atomic<bool> recover_failed_;
+};
+
+/// Implementations.
+template <class D>
+Status InternalHashTable<D>::Checkpoint(disk_t& disk, file_t&& file, uint64_t& checkpoint_size) {
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<AsyncIoContext> context{ ctxt };
+    if(result != Status::Ok) {
+      context->table->checkpoint_failed_ = true;
+    }
+    if(--context->table->pending_checkpoint_writes_ == 0) {
+      result = context->table->file_.Close();
+      if(result != Status::Ok) {
+        context->table->checkpoint_failed_ = true;
+      }
+      context->table->checkpoint_pending_ = false;
+    }
+  };
+
+  assert(size_ % Constants::kNumMergeChunks == 0);
+  disk_ = &disk;
+  file_ = std::move(file);
+
+  checkpoint_size = 0;
+  checkpoint_failed_ = false;
+  uint32_t chunk_size = static_cast<uint32_t>(size_ / Constants::kNumMergeChunks);
+  uint32_t write_size = static_cast<uint32_t>(chunk_size * sizeof(HashBucket));
+  assert(write_size % file_.alignment() == 0);
+  assert(!checkpoint_pending_);
+  assert(pending_checkpoint_writes_ == 0);
+  checkpoint_pending_ = true;
+  pending_checkpoint_writes_ = Constants::kNumMergeChunks;
+  for(uint32_t idx = 0; idx < Constants::kNumMergeChunks; ++idx) {
+    AsyncIoContext context{ this };
+    RETURN_NOT_OK(file_.WriteAsync(&bucket(idx * chunk_size), idx * write_size, write_size,
+                                   callback, context));
+  }
+  checkpoint_size = size_ * sizeof(HashBucket);
+  return Status::Ok;
+}
+
+template <class D>
+inline Status InternalHashTable<D>::CheckpointComplete(bool wait) {
+  disk_->TryComplete();
+  bool complete = !checkpoint_pending_.load();
+  while(wait && !complete) {
+    disk_->TryComplete();
+    complete = !checkpoint_pending_.load();
+    std::this_thread::yield();
+  }
+  if(!complete) {
+    return Status::Pending;
+  } else {
+    return checkpoint_failed_ ? Status::IOError : Status::Ok;
+  }
+}
+
+template <class D>
+Status InternalHashTable<D>::Recover(disk_t& disk, file_t&& file, uint64_t checkpoint_size) {
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<AsyncIoContext> context{ ctxt };
+    if(result != Status::Ok) {
+      context->table->recover_failed_ = true;
+    }
+    if(--context->table->pending_recover_reads_ == 0) {
+      result = context->table->file_.Close();
+      if(result != Status::Ok) {
+        context->table->recover_failed_ = true;
+      }
+      context->table->recover_pending_ = false;
+    }
+  };
+
+  assert(checkpoint_size > 0);
+  assert(checkpoint_size % sizeof(HashBucket) == 0);
+  assert(checkpoint_size % Constants::kNumMergeChunks == 0);
+  disk_ = &disk;
+  file_ = std::move(file);
+
+  recover_failed_ = false;
+  uint32_t read_size = static_cast<uint32_t>(checkpoint_size / Constants::kNumMergeChunks);
+  uint32_t chunk_size = static_cast<uint32_t>(read_size / sizeof(HashBucket));
+  assert(read_size % file_.alignment() == 0);
+
+  Initialize(checkpoint_size / sizeof(HashBucket), file_.alignment());
+  assert(!recover_pending_);
+  assert(pending_recover_reads_.load() == 0);
+  recover_pending_ = true;
+  pending_recover_reads_ = Constants::kNumMergeChunks;
+  for(uint32_t idx = 0; idx < Constants::kNumMergeChunks; ++idx) {
+    AsyncIoContext context{ this };
+    RETURN_NOT_OK(file_.ReadAsync(idx * read_size, &bucket(idx * chunk_size), read_size,
+                                  callback, context));
+  }
+  return Status::Ok;
+}
+
+template <class D>
+inline Status InternalHashTable<D>::RecoverComplete(bool wait) {
+  disk_->TryComplete();
+  bool complete = !recover_pending_.load();
+  while(wait && !complete) {
+    disk_->TryComplete();
+    complete = !recover_pending_.load();
+    std::this_thread::yield();
+  }
+  if(!complete) {
+    return Status::Pending;
+  } else {
+    return recover_failed_ ? Status::IOError : Status::Ok;
+  }
+}
+
+template <class D>
+inline void InternalHashTable<D>::DumpDistribution(
+  MallocFixedPageSize<HashBucket, disk_t>& overflow_buckets_allocator) {
+  uint64_t table_size = size();
+  uint64_t total_record_count = 0;
+  uint64_t histogram[16] = { 0 };
+  for(uint64_t bucket_idx = 0; bucket_idx < table_size; ++bucket_idx) {
+    const HashBucket* bucket = &buckets_[bucket_idx];
+    uint64_t count = 0;
+    while(bucket) {
+      for(uint32_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+        if(!bucket->entries[entry_idx].load().unused()) {
+          ++count;
+          ++total_record_count;
+        }
+      }
+      HashBucketOverflowEntry overflow_entry = bucket->overflow_entry.load();
+      if(overflow_entry.unused()) {
+        bucket = nullptr;
+      } else {
+        bucket = &overflow_buckets_allocator.Get(overflow_entry.address());
+      }
+    }
+    if(count < 15) {
+      ++histogram[count];
+    } else {
+      ++histogram[15];
+    }
+  }
+
+  printf("number of hash buckets: %" PRIu64 "\n", table_size);
+  printf("total record count: %" PRIu64 "\n", total_record_count);
+  printf("histogram:\n");
+  for(uint8_t idx = 0; idx < 15; ++idx) {
+    printf("%2u : %" PRIu64 "\n", idx, histogram[idx]);
+  }
+  printf("15+: %" PRIu64 "\n", histogram[15]);
+}
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/internal_contexts.h b/cc/src/core/internal_contexts.h
new file mode 100644
index 000000000..8a0cfd787
--- /dev/null
+++ b/cc/src/core/internal_contexts.h
@@ -0,0 +1,379 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <unordered_map>
+#include <string>
+#include "address.h"
+#include "guid.h"
+#include "hash_bucket.h"
+#include "native_buffer_pool.h"
+#include "record.h"
+#include "state_transitions.h"
+#include "thread.h"
+
+namespace FASTER {
+namespace core {
+
+/// Internal contexts, used by FASTER.
+
+enum class OperationType : uint8_t {
+  Read,
+  RMW,
+  Upsert,
+  Insert,
+  Delete
+};
+
+enum class OperationStatus : uint8_t {
+  SUCCESS,
+  NOT_FOUND,
+  RETRY_NOW,
+  RETRY_LATER,
+  RECORD_ON_DISK,
+  SUCCESS_UNMARK,
+  NOT_FOUND_UNMARK,
+  CPR_SHIFT_DETECTED
+};
+
+/// Internal FASTER context.
+template <class K>
+class PendingContext : public IAsyncContext {
+ public:
+  typedef K key_t;
+
+ protected:
+  PendingContext(OperationType type_, IAsyncContext& caller_context_,
+                 AsyncCallback caller_callback_)
+    : type{ type_ }
+    , caller_context{ &caller_context_ }
+    , caller_callback{ caller_callback_ }
+    , version{ UINT32_MAX }
+    , phase{ Phase::INVALID }
+    , result{ Status::Pending }
+    , address{ Address::kInvalidAddress }
+    , entry{ HashBucketEntry::kInvalidEntry } {
+  }
+
+ public:
+  /// The deep-copy constructor.
+  PendingContext(const PendingContext& other, IAsyncContext* caller_context_)
+    : type{ other.type }
+    , caller_context{ caller_context_ }
+    , caller_callback{ other.caller_callback }
+    , version{ other.version }
+    , phase{ other.phase }
+    , result{ other.result }
+    , address{ other.address }
+    , entry{ other.entry } {
+  }
+
+ public:
+  /// Go async, for the first time.
+  void go_async(Phase phase_, uint32_t version_, Address address_, HashBucketEntry entry_) {
+    phase = phase_;
+    version = version_;
+    address = address_;
+    entry = entry_;
+  }
+
+  /// Go async, again.
+  void continue_async(Address address_, HashBucketEntry entry_) {
+    address = address_;
+    entry = entry_;
+  }
+
+  virtual const key_t& key() const = 0;
+
+  /// Caller context.
+  IAsyncContext* caller_context;
+  /// Caller callback.
+  AsyncCallback caller_callback;
+  /// Checkpoint version.
+  uint32_t version;
+  /// Checkpoint phase.
+  Phase phase;
+  /// Type of operation (Read, Upsert, RMW, etc.).
+  OperationType type;
+  /// Result of operation.
+  Status result;
+  /// Address of the record being read or modified.
+  Address address;
+  /// Hash table entry that (indirectly) leads to the record being read or modified.
+  HashBucketEntry entry;
+};
+
+/// FASTER's internal Read() context.
+
+/// An internal Read() context that has gone async and lost its type information.
+template <class K>
+class AsyncPendingReadContext : public PendingContext<K> {
+ public:
+  typedef K key_t;
+ protected:
+  AsyncPendingReadContext(IAsyncContext& caller_context_, AsyncCallback caller_callback_)
+    : PendingContext<key_t>(OperationType::Read, caller_context_, caller_callback_) {
+  }
+  /// The deep copy constructor.
+  AsyncPendingReadContext(AsyncPendingReadContext& other, IAsyncContext* caller_context)
+    : PendingContext<key_t>(other, caller_context) {
+  }
+ public:
+  virtual void Get(const void* rec) = 0;
+  virtual void GetAtomic(const void* rec) = 0;
+};
+
+/// A synchronous Read() context preserves its type information.
+template <class RC>
+class PendingReadContext : public AsyncPendingReadContext<typename RC::key_t> {
+ public:
+  typedef RC read_context_t;
+  typedef typename read_context_t::key_t key_t;
+  typedef typename read_context_t::value_t value_t;
+  typedef Record<key_t, value_t> record_t;
+
+  PendingReadContext(read_context_t& caller_context_, AsyncCallback caller_callback_)
+    : AsyncPendingReadContext<key_t>(caller_context_, caller_callback_) {
+  }
+  /// The deep copy constructor.
+  PendingReadContext(PendingReadContext& other, IAsyncContext* caller_context_)
+    : AsyncPendingReadContext<key_t>(other, caller_context_) {
+  }
+ protected:
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+    return IAsyncContext::DeepCopy_Internal(*this, PendingContext<key_t>::caller_context,
+                                            context_copy);
+  }
+ private:
+  inline const read_context_t& read_context() const {
+    return *static_cast<const read_context_t*>(PendingContext<key_t>::caller_context);
+  }
+  inline read_context_t& read_context() {
+    return *static_cast<read_context_t*>(PendingContext<key_t>::caller_context);
+  }
+ public:
+  /// Accessors.
+  inline const key_t& key() const final {
+    return read_context().key();
+  }
+  inline void Get(const void* rec) final {
+    const record_t* record = reinterpret_cast<const record_t*>(rec);
+    read_context().Get(record->value());
+  }
+  inline void GetAtomic(const void* rec) final {
+    const record_t* record = reinterpret_cast<const record_t*>(rec);
+    read_context().GetAtomic(record->value());
+  }
+};
+
+/// FASTER's internal Upsert() context.
+
+/// An internal Upsert() context that has gone async and lost its type information.
+template <class K>
+class AsyncPendingUpsertContext : public PendingContext<K> {
+ public:
+  typedef K key_t;
+ protected:
+  AsyncPendingUpsertContext(IAsyncContext& caller_context_, AsyncCallback caller_callback_)
+    : PendingContext<key_t>(OperationType::Upsert, caller_context_, caller_callback_) {
+  }
+  /// The deep copy constructor.
+  AsyncPendingUpsertContext(AsyncPendingUpsertContext& other, IAsyncContext* caller_context)
+    : PendingContext<key_t>(other, caller_context) {
+  }
+ public:
+  virtual void Put(void* rec) = 0;
+  virtual bool PutAtomic(void* rec) = 0;
+  virtual uint32_t value_size() const = 0;
+};
+
+/// A synchronous Upsert() context preserves its type information.
+template <class UC>
+class PendingUpsertContext : public AsyncPendingUpsertContext<typename UC::key_t> {
+ public:
+  typedef UC upsert_context_t;
+  typedef typename upsert_context_t::key_t key_t;
+  typedef typename upsert_context_t::value_t value_t;
+  typedef Record<key_t, value_t> record_t;
+
+  PendingUpsertContext(upsert_context_t& caller_context_, AsyncCallback caller_callback_)
+    : AsyncPendingUpsertContext<key_t>(caller_context_, caller_callback_) {
+  }
+  /// The deep copy constructor.
+  PendingUpsertContext(PendingUpsertContext& other, IAsyncContext* caller_context_)
+    : AsyncPendingUpsertContext<key_t>(other, caller_context_) {
+  }
+ protected:
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+    return IAsyncContext::DeepCopy_Internal(*this, PendingContext<key_t>::caller_context,
+                                            context_copy);
+  }
+ private:
+  inline const upsert_context_t& upsert_context() const {
+    return *static_cast<const upsert_context_t*>(PendingContext<key_t>::caller_context);
+  }
+  inline upsert_context_t& upsert_context() {
+    return *static_cast<upsert_context_t*>(PendingContext<key_t>::caller_context);
+  }
+ public:
+  /// Accessors.
+  inline const key_t& key() const final {
+    return upsert_context().key();
+  }
+  inline void Put(void* rec) final {
+    record_t* record = reinterpret_cast<record_t*>(rec);
+    upsert_context().Put(record->value());
+  }
+  inline bool PutAtomic(void* rec) final {
+    record_t* record = reinterpret_cast<record_t*>(rec);
+    return upsert_context().PutAtomic(record->value());
+  }
+  inline constexpr uint32_t value_size() const final {
+    return upsert_context().value_size();
+  }
+};
+
+/// FASTER's internal Rmw() context.
+/// An internal Rmw() context that has gone async and lost its type information.
+template <class K>
+class AsyncPendingRmwContext : public PendingContext<K> {
+ public:
+  typedef K key_t;
+ protected:
+  AsyncPendingRmwContext(IAsyncContext& caller_context_, AsyncCallback caller_callback_)
+    : PendingContext<key_t>(OperationType::RMW, caller_context_, caller_callback_) {
+  }
+  /// The deep copy constructor.
+  AsyncPendingRmwContext(AsyncPendingRmwContext& other, IAsyncContext* caller_context)
+    : PendingContext<key_t>(other, caller_context) {
+  }
+ public:
+  /// Set initial value.
+  virtual void RmwInitial(void* rec) = 0;
+  /// RCU.
+  virtual void RmwCopy(const void* old_rec, void* rec) = 0;
+  /// in-place update.
+  virtual bool RmwAtomic(void* rec) = 0;
+  virtual uint32_t value_size() const = 0;
+};
+
+/// A synchronous Rmw() context preserves its type information.
+template <class MC>
+class PendingRmwContext : public AsyncPendingRmwContext<typename MC::key_t> {
+ public:
+  typedef MC rmw_context_t;
+  typedef typename rmw_context_t::key_t key_t;
+  typedef typename rmw_context_t::value_t value_t;
+  typedef Record<key_t, value_t> record_t;
+
+  PendingRmwContext(rmw_context_t& caller_context_, AsyncCallback caller_callback_)
+    : AsyncPendingRmwContext<key_t>(caller_context_, caller_callback_) {
+  }
+  /// The deep copy constructor.
+  PendingRmwContext(PendingRmwContext& other, IAsyncContext* caller_context_)
+    : AsyncPendingRmwContext<key_t>(other, caller_context_) {
+  }
+ protected:
+  Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+    return IAsyncContext::DeepCopy_Internal(*this, PendingContext<key_t>::caller_context,
+                                            context_copy);
+  }
+ private:
+  const rmw_context_t& rmw_context() const {
+    return *static_cast<const rmw_context_t*>(PendingContext<key_t>::caller_context);
+  }
+  rmw_context_t& rmw_context() {
+    return *static_cast<rmw_context_t*>(PendingContext<key_t>::caller_context);
+  }
+ public:
+  /// Accessors.
+  const key_t& key() const {
+    return rmw_context().key();
+  }
+  /// Set initial value.
+  inline void RmwInitial(void* rec) final {
+    record_t* record = reinterpret_cast<record_t*>(rec);
+    rmw_context().RmwInitial(record->value());
+  }
+  /// RCU.
+  inline void RmwCopy(const void* old_rec, void* rec) final {
+    const record_t* old_record = reinterpret_cast<const record_t*>(old_rec);
+    record_t* record = reinterpret_cast<record_t*>(rec);
+    rmw_context().RmwCopy(old_record->value(), record->value());
+  }
+  /// in-place update.
+  inline bool RmwAtomic(void* rec) final {
+    record_t* record = reinterpret_cast<record_t*>(rec);
+    return rmw_context().RmwAtomic(record->value());
+  }
+  inline constexpr uint32_t value_size() const final {
+    return rmw_context().value_size();
+  }
+};
+
+class AsyncIOContext;
+
+/// Per-thread execution context. (Just the stuff that's checkpointed to disk.)
+struct PersistentExecContext {
+  PersistentExecContext()
+    : serial_num{ 0 }
+    , version{ 0 }
+    , guid{} {
+  }
+
+  void Initialize(uint32_t version_, const Guid& guid_, uint64_t serial_num_) {
+    serial_num = serial_num_;
+    version = version_;
+    guid = guid_;
+  }
+
+  uint64_t serial_num;
+  uint32_t version;
+  /// Unique identifier for this session.
+  Guid guid;
+};
+static_assert(sizeof(PersistentExecContext) == 32, "sizeof(PersistentExecContext) != 32");
+
+/// Per-thread execution context. (Also includes state kept in-memory-only.)
+struct ExecutionContext : public PersistentExecContext {
+  /// Default constructor.
+  ExecutionContext()
+    : phase{ Phase::INVALID }
+    , io_id{ 0 } {
+  }
+
+  void Initialize(Phase phase_, uint32_t version_, const Guid& guid_, uint64_t serial_num_) {
+    assert(retry_requests.empty());
+    assert(pending_ios.empty());
+    assert(io_responses.empty());
+
+    PersistentExecContext::Initialize(version_, guid_, serial_num_);
+    phase = phase_;
+    retry_requests.clear();
+    io_id = 0;
+    pending_ios.clear();
+    io_responses.clear();
+  }
+
+  Phase phase;
+
+  /// Retry request contexts are stored inside the deque.
+  std::deque<IAsyncContext*> retry_requests;
+  /// Assign a unique ID to every I/O request.
+  uint64_t io_id;
+  /// For each pending I/O, maps io_id to the hash of the key being retrieved.
+  std::unordered_map<uint64_t, KeyHash> pending_ios;
+
+  /// The I/O completion thread hands the PendingContext back to the thread that issued the
+  /// request.
+  concurrent_queue<AsyncIOContext*> io_responses;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/key_hash.h b/cc/src/core/key_hash.h
new file mode 100644
index 000000000..5d3521b2a
--- /dev/null
+++ b/cc/src/core/key_hash.h
@@ -0,0 +1,54 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+#include "utility.h"
+
+namespace FASTER {
+namespace core {
+
+/// Hash of a key is 8 bytes, compatible with hash bucket entry.
+struct KeyHash {
+  KeyHash()
+    : control_{ 0 } {
+  }
+  explicit KeyHash(uint64_t code)
+    : control_{ code } {
+  }
+  KeyHash(const KeyHash& other)
+    : control_{ other.control_ } {
+  }
+
+  KeyHash& operator=(const KeyHash& other) {
+    control_ = other.control_;
+  }
+
+  /// Truncate the key hash's address to get the page_index into a hash table of specified size.
+  inline uint64_t idx(uint64_t size) const {
+    assert(Utility::IsPowerOfTwo(size));
+    return address_ & (size - 1);
+  }
+
+  /// The tag (14 bits) serves as a discriminator inside a hash bucket. (Hash buckets use 2 bits
+  /// for control and 48 bits for log-structured store offset; the remaining 14 bits discriminate
+  /// between different key hashes stored in the same bucket.)
+  inline uint16_t tag() const {
+    return static_cast<uint16_t>(tag_);
+  }
+
+ private:
+  union {
+      struct {
+        uint64_t address_ : 48;
+        uint64_t tag_ : 14;
+        uint64_t not_used_ : 2;
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(KeyHash) == 8, "sizeof(KeyHash) != 8");
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/light_epoch.h b/cc/src/core/light_epoch.h
new file mode 100644
index 000000000..f08219191
--- /dev/null
+++ b/cc/src/core/light_epoch.h
@@ -0,0 +1,328 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <memory>
+#include <thread>
+
+#include "alloc.h"
+#include "async.h"
+#include "constants.h"
+#include "phase.h"
+#include "thread.h"
+#include "utility.h"
+
+namespace FASTER {
+namespace core {
+
+class LightEpoch {
+ private:
+  /// Entry in epoch table
+  struct alignas(Constants::kCacheLineBytes) Entry {
+    Entry()
+      : local_current_epoch{ 0 }
+      , reentrant{ 0 }
+      , phase_finished{ Phase::REST } {
+    }
+
+    uint64_t local_current_epoch;
+    uint32_t reentrant;
+    std::atomic<Phase> phase_finished;
+  };
+  static_assert(sizeof(Entry) == 64, "sizeof(Entry) != 64");
+
+  struct EpochAction {
+    typedef void(*callback_t)(IAsyncContext*);
+
+    static constexpr uint64_t kFree = UINT64_MAX;
+    static constexpr uint64_t kLocked = UINT64_MAX - 1;
+
+    EpochAction()
+      : epoch{ kFree }
+      , callback{ nullptr }
+      , context{ nullptr } {
+    }
+
+    void Initialize() {
+      callback = nullptr;
+      context = nullptr;
+      epoch = kFree;
+    }
+
+    bool IsFree() const {
+      return epoch.load() == kFree;
+    }
+
+    bool TryPop(uint64_t expected_epoch) {
+      bool retval = epoch.compare_exchange_strong(expected_epoch, kLocked);
+      if(retval) {
+        callback_t callback_ = callback;
+        IAsyncContext* context_ = context;
+        callback = nullptr;
+        context = nullptr;
+        // Release the lock.
+        epoch.store(kFree);
+        // Perform the action.
+        callback_(context_);
+      }
+      return retval;
+    }
+
+    bool TryPush(uint64_t prior_epoch, callback_t new_callback, IAsyncContext* new_context) {
+      uint64_t expected_epoch = kFree;
+      bool retval = epoch.compare_exchange_strong(expected_epoch, kLocked);
+      if(retval) {
+        callback = new_callback;
+        context = new_context;
+        // Release the lock.
+        epoch.store(prior_epoch);
+      }
+      return retval;
+    }
+
+    bool TrySwap(uint64_t expected_epoch, uint64_t prior_epoch, callback_t new_callback,
+                 IAsyncContext* new_context) {
+      bool retval = epoch.compare_exchange_strong(expected_epoch, kLocked);
+      if(retval) {
+        callback_t existing_callback = callback;
+        IAsyncContext* existing_context = context;
+        callback = new_callback;
+        context = new_context;
+        // Release the lock.
+        epoch.store(prior_epoch);
+        // Perform the action.
+        existing_callback(existing_context);
+      }
+      return retval;
+    }
+
+    /// The epoch field is atomic--always read it first and write it last.
+    std::atomic<uint64_t> epoch;
+
+    void(*callback)(IAsyncContext* context);
+    IAsyncContext* context;
+  };
+
+ public:
+  /// Default invalid page_index entry.
+  static constexpr uint32_t kInvalidIndex = 0;
+  /// This thread is not protecting any epoch.
+  static constexpr uint64_t kUnprotected = 0;
+
+ private:
+  /// Default number of entries in the entries table
+  static constexpr uint32_t kTableSize = Thread::kMaxNumThreads;
+  /// Default drainlist size
+  static constexpr uint32_t kDrainListSize = 256;
+  /// Epoch table
+  Entry* table_;
+  /// Number of entries in epoch table.
+  uint32_t num_entries_;
+
+  /// List of action, epoch pairs containing actions to performed when an epoch becomes
+  /// safe to reclaim.
+  EpochAction drain_list_[kDrainListSize];
+  /// Count of drain actions
+  std::atomic<uint32_t> drain_count_;
+
+ public:
+  /// Current system epoch (global state)
+  std::atomic<uint64_t> current_epoch;
+  /// Cached value of epoch that is safe to reclaim
+  std::atomic<uint64_t> safe_to_reclaim_epoch;
+
+  LightEpoch(uint32_t size = kTableSize)
+    : table_{ nullptr }
+    , num_entries_{ 0 }
+    , drain_count_{ 0 }
+    , drain_list_{} {
+    Initialize(size);
+  }
+
+  ~LightEpoch() {
+    Uninitialize();
+  }
+
+ private:
+  void Initialize(uint32_t size) {
+    num_entries_ = size;
+    // do cache-line alignment
+    table_ = reinterpret_cast<Entry*>(aligned_alloc(Constants::kCacheLineBytes,
+                                      (size + 2) * sizeof(Entry)));
+    new(table_) Entry[size + 2];
+    current_epoch = 1;
+    safe_to_reclaim_epoch = 0;
+    for(uint32_t idx = 0; idx < kDrainListSize; ++idx) {
+      drain_list_[idx].Initialize();
+    }
+    drain_count_ = 0;
+  }
+
+  void Uninitialize() {
+    aligned_free(table_);
+    table_ = nullptr;
+    num_entries_ = 0;
+    current_epoch = 1;
+    safe_to_reclaim_epoch = 0;
+  }
+
+ public:
+  /// Enter the thread into the protected code region
+  inline uint64_t Protect() {
+    uint32_t entry = Thread::id();
+    table_[entry].local_current_epoch = current_epoch.load();
+    return table_[entry].local_current_epoch;
+  }
+
+  /// Enter the thread into the protected code region
+  /// Process entries in drain list if possible
+  inline uint64_t ProtectAndDrain() {
+    uint32_t entry = Thread::id();
+    table_[entry].local_current_epoch = current_epoch.load();
+    if(drain_count_.load() > 0) {
+      Drain(table_[entry].local_current_epoch);
+    }
+    return table_[entry].local_current_epoch;
+  }
+
+  uint64_t ReentrantProtect() {
+    uint32_t entry = Thread::id();
+    if(table_[entry].local_current_epoch != kUnprotected)
+      return table_[entry].local_current_epoch;
+    table_[entry].local_current_epoch = current_epoch.load();
+    table_[entry].reentrant++;
+    return table_[entry].local_current_epoch;
+  }
+
+  inline bool IsProtected() {
+    uint32_t entry = Thread::id();
+    return table_[entry].local_current_epoch != kUnprotected;
+  }
+
+  /// Exit the thread from the protected code region.
+  void Unprotect() {
+    table_[Thread::id()].local_current_epoch = kUnprotected;
+  }
+
+  void ReentrantUnprotect() {
+    uint32_t entry = Thread::id();
+    if(--(table_[entry].reentrant) == 0) {
+      table_[entry].local_current_epoch = kUnprotected;
+    }
+  }
+
+  void Drain(uint64_t nextEpoch) {
+    ComputeNewSafeToReclaimEpoch(nextEpoch);
+    for(uint32_t idx = 0; idx < kDrainListSize; ++idx) {
+      uint64_t trigger_epoch = drain_list_[idx].epoch.load();
+      if(trigger_epoch <= safe_to_reclaim_epoch) {
+        if(drain_list_[idx].TryPop(trigger_epoch)) {
+          if(--drain_count_ == 0) {
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  /// Increment the current epoch (global system state)
+  uint64_t BumpCurrentEpoch() {
+    uint64_t nextEpoch = ++current_epoch;
+    if(drain_count_ > 0) {
+      Drain(nextEpoch);
+    }
+    return nextEpoch;
+  }
+
+  /// Increment the current epoch (global system state) and register
+  /// a trigger action for when older epoch becomes safe to reclaim
+  uint64_t BumpCurrentEpoch(EpochAction::callback_t callback, IAsyncContext* context) {
+    uint64_t prior_epoch = BumpCurrentEpoch() - 1;
+    uint32_t i = 0, j = 0;
+    while(true) {
+      uint64_t trigger_epoch = drain_list_[i].epoch.load();
+      if(trigger_epoch == EpochAction::kFree) {
+        if(drain_list_[i].TryPush(prior_epoch, callback, context)) {
+          ++drain_count_;
+          break;
+        }
+      } else if(trigger_epoch <= safe_to_reclaim_epoch.load()) {
+        if(drain_list_[i].TrySwap(trigger_epoch, prior_epoch, callback, context)) {
+          break;
+        }
+      }
+      if(++i == kDrainListSize) {
+        i = 0;
+        if(++j == 500) {
+          j = 0;
+          std::this_thread::sleep_for(std::chrono::seconds(1));
+          fprintf(stderr, "Slowdown: Unable to add trigger to epoch\n");
+        }
+      }
+    }
+    return prior_epoch + 1;
+  }
+
+  /// Compute latest epoch that is safe to reclaim, by scanning the epoch table
+  uint64_t ComputeNewSafeToReclaimEpoch(uint64_t current_epoch_) {
+    uint64_t oldest_ongoing_call = current_epoch_;
+    for(uint32_t index = 1; index <= num_entries_; ++index) {
+      uint64_t entry_epoch = table_[index].local_current_epoch;
+      if(entry_epoch != kUnprotected && entry_epoch < oldest_ongoing_call) {
+        oldest_ongoing_call = entry_epoch;
+      }
+    }
+    safe_to_reclaim_epoch = oldest_ongoing_call - 1;
+    return safe_to_reclaim_epoch;
+  }
+
+  void SpinWaitForSafeToReclaim(uint64_t current_epoch_, uint64_t safe_to_reclaim_epoch_) {
+    do {
+      ComputeNewSafeToReclaimEpoch(current_epoch_);
+    } while(safe_to_reclaim_epoch_ > safe_to_reclaim_epoch);
+  }
+
+  bool IsSafeToReclaim(uint64_t epoch) const {
+    return (epoch <= safe_to_reclaim_epoch);
+  }
+
+  /// CPR checkpoint functions.
+  inline void ResetPhaseFinished() {
+    for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+      assert(table_[idx].phase_finished.load() == Phase::REST ||
+             table_[idx].phase_finished.load() == Phase::PERSISTENCE_CALLBACK ||
+             table_[idx].phase_finished.load() == Phase::GC_IN_PROGRESS ||
+             table_[idx].phase_finished.load() == Phase::GROW_IN_PROGRESS);
+      table_[idx].phase_finished.store(Phase::REST);
+    }
+  }
+  /// This thread has completed the specified phase.
+  inline bool FinishThreadPhase(Phase phase) {
+    uint32_t entry = Thread::id();
+    table_[entry].phase_finished = phase;
+    // Check if other threads have reported complete.
+    for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+      Phase entry_phase = table_[idx].phase_finished.load();
+      uint64_t entry_epoch = table_[idx].local_current_epoch;
+      if(entry_epoch != 0 && entry_phase != phase) {
+        return false;
+      }
+    }
+    return true;
+  }
+  /// Has this thread completed the specified phase (i.e., is it waiting for other threads to
+  /// finish the specified phase, before it can advance the global phase)?
+  inline bool HasThreadFinishedPhase(Phase phase) const {
+    uint32_t entry = Thread::id();
+    return table_[entry].phase_finished == phase;
+  }
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/lss_allocator.cc b/cc/src/core/lss_allocator.cc
new file mode 100644
index 000000000..deb85e723
--- /dev/null
+++ b/cc/src/core/lss_allocator.cc
@@ -0,0 +1,169 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <cassert>
+#include <cstdlib>
+
+#include "alloc.h"
+#include "auto_ptr.h"
+#include "lss_allocator.h"
+#include "thread.h"
+
+namespace FASTER {
+namespace core {
+
+#define thread_index_ Thread::id()
+
+LssAllocator lss_allocator{};
+
+namespace lss_memory {
+
+static_assert(sizeof(Header) < kBaseAlignment, "Unexpected header size!");
+
+void SegmentAllocator::Free(void* bytes) {
+#ifdef _DEBUG
+  Header* header = reinterpret_cast<Header*>(bytes) - 1;
+  assert(header->offset < kSegmentSize);
+  assert(header->offset + header->size <= kSegmentSize);
+  //  - 0xDA - freed.
+  ::memset(header + 1, 0xDA, header->size);
+#endif
+  Free();
+}
+
+void SegmentAllocator::Seal(uint32_t allocations) {
+  SegmentState delta_state{ allocations, 1 };
+  SegmentState old_state{ state.control.fetch_add(delta_state.control) };
+  assert(old_state.allocations == 0);
+  assert(old_state.frees < allocations);
+  if(allocations == old_state.frees + 1) {
+    // We were the last to free a block inside this segment, so we must free it.
+    this->~SegmentAllocator();
+    aligned_free(this);
+  }
+}
+
+void SegmentAllocator::Free() {
+  SegmentState delta_state{ 0, 1 };
+  SegmentState old_state{ state.control.fetch_add(delta_state.control) };
+  assert(old_state.allocations == 0 || old_state.frees < old_state.allocations);
+  if(old_state.allocations == old_state.frees + 1) {
+    // We were the last to free a block inside this segment, so we must free it.
+    this->~SegmentAllocator();
+    aligned_free(this);
+  }
+}
+
+void* ThreadAllocator::Allocate(uint32_t size) {
+  if(!segment_allocator_) {
+    segment_allocator_ = reinterpret_cast<SegmentAllocator*>(aligned_alloc(kCacheLineSize,
+                         sizeof(SegmentAllocator)));
+    if(!segment_allocator_) {
+      return nullptr;
+    }
+    new(segment_allocator_) SegmentAllocator{};
+  }
+  // Block is 16-byte aligned, after a 2-byte (8-byte in _DEBUG mode) header.
+  uint32_t block_size = static_cast<uint32_t>(pad_alignment(size + sizeof(Header),
+                        kBaseAlignment));
+  uint32_t offset = Reserve(block_size);
+  if(segment_offset_ <= kSegmentSize) {
+    // The allocation succeeded inside the active segment.
+    uint8_t* buffer = segment_allocator_->buffer;
+#ifdef _DEBUG
+    //  - 0xCA - allocated.
+    ::memset(&buffer[offset], 0xCA, block_size);
+#endif
+    Header* header = reinterpret_cast<Header*>(&buffer[offset]);
+#ifdef _DEBUG
+    new(header) Header(size, offset);
+#else
+    new(header) Header(offset);
+#endif
+    return header + 1;
+  } else {
+    // We filled the active segment; seal it.
+    segment_allocator_->Seal(allocations_);
+    segment_allocator_ = nullptr;
+    allocations_ = 0;
+    segment_offset_ = 0;
+    // Call self recursively, to allocate inside a new segment.
+    return Allocate(size);
+  }
+}
+
+void* ThreadAllocator::AllocateAligned(uint32_t size, uint32_t alignment) {
+  if(!segment_allocator_) {
+    segment_allocator_ = reinterpret_cast<SegmentAllocator*>(aligned_alloc(kCacheLineSize,
+                         sizeof(SegmentAllocator)));
+    if(!segment_allocator_) {
+      return nullptr;
+    }
+    new(segment_allocator_) SegmentAllocator{};
+  }
+  // Alignment must be >= base alignment, and a power of 2.
+  assert(alignment >= kBaseAlignment);
+  assert((alignment & (alignment - 1)) == 0);
+  // Block needs to be large enough to hold the user block, the header, and the align land fill.
+  // Max align land fill size is (alignment - kBaseAlignment).
+  uint32_t block_size = static_cast<uint32_t>(pad_alignment(
+                          size + sizeof(Header) + alignment - kBaseAlignment,
+                          kBaseAlignment));
+  uint32_t block_offset = Reserve(block_size);
+  if(segment_offset_ <= kSegmentSize) {
+    // The allocation succeeded inside the active segment.
+    uint8_t* buffer = segment_allocator_->buffer;
+#ifdef _DEBUG
+    //  - 0xEA - align land fill.
+    ::memset(&buffer[block_offset], 0xEA, block_size);
+#endif
+    // Align the user block.
+    uint32_t user_offset = static_cast<uint32_t>(pad_alignment(reinterpret_cast<size_t>(
+                             &buffer[block_offset]) + sizeof(Header), alignment) -
+                           reinterpret_cast<size_t>(&buffer[block_offset]) - sizeof(Header));
+    assert(user_offset + sizeof(Header) + size <= block_size);
+    uint32_t offset = block_offset + user_offset;
+#ifdef _DEBUG
+    //  - 0xCA - allocated.
+    ::memset(&buffer[offset], 0xCA, size + sizeof(Header));
+#endif
+    Header* header = reinterpret_cast<Header*>(&buffer[offset]);
+#ifdef _DEBUG
+    new(header) Header(size, offset);
+#else
+    new(header) Header(offset);
+#endif
+    return header + 1;
+  } else {
+    // We filled the active segment; seal it.
+    segment_allocator_->Seal(allocations_);
+    segment_allocator_ = nullptr;
+    allocations_ = 0;
+    segment_offset_ = 0;
+    // Call self recursively, to allocate inside a new segment.
+    return AllocateAligned(size, alignment);
+  }
+}
+} // namespace lss_memory
+
+void* LssAllocator::Allocate(uint32_t size) {
+  return thread_allocators_[thread_index_].Allocate(size);
+}
+
+void* LssAllocator::AllocateAligned(uint32_t size, uint32_t alignment) {
+  return thread_allocators_[thread_index_].AllocateAligned(size, alignment);
+}
+
+void LssAllocator::Free(void* bytes) {
+  lss_memory::Header* header = reinterpret_cast<lss_memory::Header*>(bytes) - 1;
+  uint8_t* block = reinterpret_cast<uint8_t*>(header);
+  uint32_t offset = header->offset + lss_memory::SegmentAllocator::kBufferOffset;
+  lss_memory::SegmentAllocator* segment_allocator =
+    reinterpret_cast<lss_memory::SegmentAllocator*>(block - offset);
+  segment_allocator->Free(bytes);
+}
+
+#undef thread_index_
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/lss_allocator.h b/cc/src/core/lss_allocator.h
new file mode 100644
index 000000000..d1fe14504
--- /dev/null
+++ b/cc/src/core/lss_allocator.h
@@ -0,0 +1,237 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#ifdef _DEBUG
+#include <cstring>
+#endif
+
+#include "status.h"
+#include "thread.h"
+
+/// A fast allocator intended for mostly-FIFO workloads (e.g., allocating contexts for file-I/O
+/// callbacks). Each thread allocates by bumping the tail of its current segment; when it fills a
+/// segment, it malloc()s a new one. Any thread frees by decrementing the allocation's segment's
+/// ref count; when a (filled) segment's ref count reaches 0, we free() it. So long as the workload
+/// is mostly FIFO, we don't leak memory.
+
+namespace FASTER {
+namespace core {
+
+/// Internal classes and structures.
+namespace lss_memory {
+
+/// Size of each segment (in bytes). (In experiments, a segment size of 16,000 worked well for
+/// on Windows, while 8,000 worked well on Linux.)
+#ifdef _WIN32
+static constexpr uint32_t kSegmentSize = 16000;
+#else
+static constexpr uint32_t kSegmentSize = 8000;
+#endif
+
+/// Preserving Windows malloc() behavior, all LSS allocations are aligned to 16 bytes.
+static constexpr uint32_t kBaseAlignment = 16;
+
+/// Header, prepended to all allocated blocks; used to find the ref count variable, to decrement it
+/// when the block is freed. (The allocation size isn't needed, since LSS allocations are
+/// essentially stack allocations; but _DEBUG mode includes it for the benefit of the caller.)
+#ifdef _DEBUG
+struct alignas(8) Header {
+  Header(uint32_t size_, uint32_t offset_)
+    : offset{ offset_ }
+    , size{ size_ } {
+  }
+
+  /// Offset from the head of the segment allocator's buffer to the memory block.
+  uint32_t offset;
+
+  /// Size of the memory block.
+  uint32_t size;
+};
+static_assert(sizeof(Header) == 8, "Header is not 8 bytes!");
+#else
+struct alignas(8) Header {
+  Header(uint16_t offset_)
+    : offset{ offset_ } {
+  }
+
+  /// Offset from the head of the segment allocator's buffer to the memory block.
+  uint16_t offset;
+};
+static_assert(sizeof(Header) == 8, "Header is not 8 bytes!");
+#endif
+
+class ThreadAllocator;
+
+class SegmentState {
+ public:
+  SegmentState()
+    : control{ 0 } {
+  }
+
+  SegmentState(uint64_t control_)
+    : control{ control_ } {
+  }
+
+  SegmentState(uint32_t allocations_, uint32_t frees_)
+    : frees{ frees_ }
+    , allocations{ allocations_ } {
+  }
+
+  union {
+      struct {
+        /// Count of memory blocks freed inside this segment. Incremented on each free. Frees can
+        /// take place on any thread.
+        uint32_t frees;
+        /// If this segment is sealed, then the count of memory blocks allocated inside this
+        /// segment. Otherwise, zero.
+        uint32_t allocations;
+      };
+      /// 64-bit control field, used so that threads can read the allocation count atomically at
+      /// the same time they increment the free count atomically.
+      std::atomic<uint64_t> control;
+    };
+};
+static_assert(kSegmentSize < UINT16_MAX / 2, "kSegmentSize too large for offset size!");
+
+/// Allocation takes place inside segments. When a segment is no longer needed, we add it to the
+/// garbage list.
+class SegmentAllocator {
+ public:
+  /// Offset from the head of the class to the head of its buffer_ field.
+#ifdef _DEBUG
+  static constexpr uint32_t kBufferOffset = 8;
+#else
+  static constexpr uint32_t kBufferOffset = 14;
+#endif
+
+  /// Initialize the segment allocator and allocate the segment.
+  SegmentAllocator()
+    : state{} {
+#ifdef _DEBUG
+    // Debug LSS memory codes:
+    //  - 0xBA - initialized, not allocated.
+    std::memset(buffer, 0xBA, kSegmentSize);
+#endif
+  }
+
+  /// Free the specified memory block. The block must be inside this segment! Returns true if the
+  /// segment was freed; otherwise, returns false.
+  void Free(void* bytes);
+
+  /// Seal the segment--no more blocks will be allocated inside this segment. Returns true if the
+  /// segment was freed; otherwise, returns false.
+  void Seal(uint32_t blocks_allocated);
+
+ private:
+  /// Decrement the active references count, effectively freeing one allocation. Also frees the
+  /// segment if (1) it is sealed and (2) its active references count is now zero. Returns true if
+  /// the segment was freed; otherwise, returns false.
+  void Free();
+
+ public:
+  /// Segment allocator state (8 bytes).
+  SegmentState state;
+
+  /// This segment's memory. (First allocation's 8-byte Header starts at 8 (mod 16), so the
+  /// allocation's contents will start at 0 (mod 16), as desired.)
+  uint8_t buffer[kSegmentSize];
+};
+
+/// Allocator for a single thread. Allocates only; frees are directed by the global allocator
+/// object directly to the relevant segment allocator.
+class alignas(64) ThreadAllocator {
+ public:
+  static constexpr uint32_t kCacheLineSize = 64;
+
+  /// Initialize the thread allocator. The real work happens lazily, when Allocate() is called for
+  /// the first time.
+  ThreadAllocator()
+    : segment_allocator_{ nullptr }
+    , segment_offset_{ 0 }
+    , allocations_{ 0 } {
+  }
+
+  /// Allocate a memory block of the specified size < kSegmentSize. If allocation fails, returns
+  /// nullptr.
+  void* Allocate(uint32_t size);
+  void* AllocateAligned(uint32_t size, uint32_t offset);
+
+ private:
+  inline uint32_t Reserve(uint32_t block_size) {
+    assert(block_size <= kSegmentSize);
+    ++allocations_;
+    uint32_t result = segment_offset_;
+    assert(result <= kSegmentSize);
+    segment_offset_ += block_size;
+    return result;
+  }
+
+  /// Segment inside which each thread's new allocations occur (pointer, 8 bytes).
+  SegmentAllocator* segment_allocator_;
+
+  /// Offset, into the active segment, of the next allocation.
+  uint32_t segment_offset_;
+
+  /// Number of blocks allocated inside the active segment.
+  uint32_t allocations_;
+};
+static_assert(sizeof(ThreadAllocator) == 64, "sizeof(ThreadAllocator) != 64.");
+
+} // namespace lss_memory
+
+/// The LSS allocator allocates memory from a log-structured store, but does not perform garbage
+/// collection. Memory is allocated from segments; each segment is freed only after all of its
+/// allocations have been freed. This means that if a single allocation inside a segment is still
+/// alive, the entire segment is still alive.
+/// The LSS allocator works well in the case where memory usage is almost FIFO. In that case, all
+/// of the segment's allocations will eventually be freed, so the segment will be freed. The LSS
+/// allocator is intended to replace the (synchronous) function call stack, for asynchronous
+/// continuations.
+class LssAllocator {
+ public:
+  /// Maximum number of threads supported. For each possible thread, we reserve an 8-byte
+  /// ThreadAllocator; so the memory required is 8 * (kMaxThreadCount) bytes. For each actual
+  /// thread, we reserve a full SegmentAllocator, of size approximately kSegmentSize.
+  static constexpr size_t kMaxThreadCount = Thread::kMaxNumThreads;
+
+  /// Size of each segment (in bytes).
+  static constexpr uint32_t kSegmentSize = lss_memory::kSegmentSize;
+
+  /// Preserving Windows malloc() behavior, all LSS allocations are aligned to 16 bytes.
+  static constexpr uint32_t kBaseAlignment = lss_memory::kBaseAlignment;
+
+  /// Initialize the LSS allocator. The real work happens lazily, when a thread calls Allocate()
+  /// for the first time.
+  LssAllocator() {
+    for(size_t idx = 0; idx < kMaxThreadCount; ++idx) {
+      thread_allocators_[idx] = lss_memory::ThreadAllocator{};
+    }
+  }
+
+  /// Allocate a memory block of the specified size. Note that size must be < kSegmentSize, since
+  /// the allocation will take place inside a segment. The Allocate() code is ultimately single-
+  /// threaded, since we maintain a separate ThreadAllocator per thread, each with its own
+  /// SegmentAllocator. If allocation fails, returns nullptr.
+  void* Allocate(uint32_t size);
+  void* AllocateAligned(uint32_t size, uint32_t alignment);
+
+  /// Free the specified memory block. The Free() code is thread-safe, since the Free() request is
+  /// always directed to the SegmentAllocator() that originally allocated the code--regardless of
+  /// what thread it is issued from.
+  void Free(void* bytes);
+
+ private:
+  /// To reduce contention (and avoid needing atomic primitives in the allocation path), we
+  /// maintain a unique allocator per thread.
+  lss_memory::ThreadAllocator thread_allocators_[kMaxThreadCount];
+};
+
+/// The global LSS allocator instance.
+extern LssAllocator lss_allocator;
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/malloc_fixed_page_size.h b/cc/src/core/malloc_fixed_page_size.h
new file mode 100644
index 000000000..730e6df3f
--- /dev/null
+++ b/cc/src/core/malloc_fixed_page_size.h
@@ -0,0 +1,582 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <thread>
+
+#include "alloc.h"
+#include "light_epoch.h"
+
+namespace FASTER {
+namespace core {
+
+/// The allocator used for the hash table's overflow buckets.
+
+/// Address into a fixed page.
+struct FixedPageAddress {
+  static constexpr uint64_t kInvalidAddress = 0;
+
+  /// A fixed-page address is 8 bytes.
+  /// --of which 48 bits are used for the address. (The remaining 16 bits are used by the hash
+  /// table, for control bits and the tag.)
+  static constexpr uint64_t kAddressBits = 48;
+  static constexpr uint64_t kMaxAddress = ((uint64_t)1 << kAddressBits) - 1;
+
+  /// --of which 20 bits are used for offsets into a page, of size 2^20 = 1 million items.
+  static constexpr uint64_t kOffsetBits = 20;
+  static constexpr uint64_t kMaxOffset = ((uint64_t)1 << kOffsetBits) - 1;
+
+  /// --and the remaining 28 bits are used for the page index, allowing for approximately 256
+  /// million pages.
+  static constexpr uint64_t kPageBits = kAddressBits - kOffsetBits;
+  static constexpr uint64_t kMaxPage = ((uint64_t)1 << kPageBits) - 1;
+
+  FixedPageAddress()
+    : control_{ 0 } {
+  }
+  FixedPageAddress(uint64_t control)
+    : control_{ control } {
+  }
+
+  bool operator==(const FixedPageAddress& other) const {
+    assert(reserved == 0);
+    assert(other.reserved == 0);
+    return control_ == other.control_;
+  }
+  bool operator<(const FixedPageAddress& other) const {
+    assert(reserved == 0);
+    assert(other.reserved == 0);
+    return control_ < other.control_;
+  }
+  bool operator>(const FixedPageAddress& other) const {
+    assert(reserved == 0);
+    assert(other.reserved == 0);
+    return control_ > other.control_;
+  }
+  bool operator>=(const FixedPageAddress& other) const {
+    assert(reserved == 0);
+    assert(other.reserved == 0);
+    return control_ >= other.control_;
+  }
+  FixedPageAddress operator++() {
+    return FixedPageAddress{ ++control_ };
+  }
+
+  uint32_t offset() const {
+    return static_cast<uint32_t>(offset_);
+  }
+  uint64_t page() const {
+    return page_;
+  }
+  uint64_t control() const {
+    return control_;
+  }
+
+  union {
+      struct {
+        uint64_t offset_ : kOffsetBits;        // 20 bits
+        uint64_t page_ : kPageBits;            // 28 bits
+        uint64_t reserved : 64 - kAddressBits; // 16 bits
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(FixedPageAddress) == 8, "sizeof(FixedPageAddress) != 8");
+
+/// Atomic address into a fixed page.
+class AtomicFixedPageAddress {
+ public:
+  AtomicFixedPageAddress(const FixedPageAddress& address)
+    : control_{ address.control_ } {
+  }
+
+  /// Atomic access.
+  inline FixedPageAddress load() const {
+    return FixedPageAddress{ control_.load() };
+  }
+  void store(FixedPageAddress value) {
+    control_.store(value.control_);
+  }
+  FixedPageAddress operator++(int) {
+    return FixedPageAddress{ control_++ };
+  }
+
+
+ private:
+  /// Atomic access to the address.
+  std::atomic<uint64_t> control_;
+};
+static_assert(sizeof(AtomicFixedPageAddress) == 8, "sizeof(AtomicFixedPageAddress) != 8");
+
+struct FreeAddress {
+  FixedPageAddress removed_addr;
+  uint64_t removal_epoch;
+};
+
+template <typename T>
+class FixedPage {
+ public:
+  typedef T item_t;
+  static constexpr uint64_t kPageSize = FixedPageAddress::kMaxOffset + 1;
+
+  /// Accessors.
+  inline const item_t& element(uint32_t offset) const {
+    assert(offset <= FixedPageAddress::kMaxOffset);
+    return elements_[offset];
+  }
+  inline item_t& element(uint32_t offset) {
+    assert(offset <= FixedPageAddress::kMaxOffset);
+    return elements_[offset];
+  }
+
+ private:
+  /// The page's contents.
+  item_t elements_[kPageSize];
+  static_assert(alignof(item_t) <= Constants::kCacheLineBytes,
+                "alignof(item_t) > Constants::kCacheLineBytes");
+};
+
+template <typename T>
+class FixedPageArray {
+ public:
+  typedef T item_t;
+  typedef FixedPage<T> page_t;
+  typedef FixedPageArray<T> array_t;
+
+ protected:
+  FixedPageArray(uint64_t alignment_, uint64_t size_, const array_t* old_array)
+    : alignment{ alignment_ }
+    , size{ size_ } {
+    assert(Utility::IsPowerOfTwo(size));
+    uint64_t idx = 0;
+    if(old_array) {
+      assert(old_array->size < size);
+      for(; idx < old_array->size; ++idx) {
+        page_t* page;
+        page = old_array->pages()[idx].load(std::memory_order_acquire);
+        while(page == nullptr) {
+          std::this_thread::yield();
+          page = old_array->pages()[idx].load(std::memory_order_acquire);
+        }
+        pages()[idx] = page;
+      }
+    }
+    for(; idx < size; ++idx) {
+      pages()[idx] = nullptr;
+    }
+  }
+
+ public:
+  static FixedPageArray* Create(uint64_t alignment, uint64_t size, const array_t* old_array) {
+    void* buffer = std::malloc(sizeof(array_t) + size * sizeof(std::atomic<page_t*>));
+    return new(buffer) array_t{ alignment, size, old_array };
+  }
+
+  static void Delete(array_t* arr, bool owns_pages) {
+    assert(arr);
+    if(owns_pages) {
+      for(uint64_t idx = 0; idx < arr->size; ++idx) {
+        page_t* page = arr->pages()[idx].load(std::memory_order_acquire);
+        if(page) {
+          page->~FixedPage();
+          aligned_free(page);
+        }
+      }
+    }
+    arr->~FixedPageArray();
+    std::free(arr);
+  }
+
+  /// Used by allocator.Get().
+  inline page_t* Get(uint64_t page_idx) {
+    assert(page_idx < size);
+    return pages()[page_idx].load(std::memory_order_acquire);
+  }
+
+  /// Used by allocator.Allocate().
+  inline page_t* GetOrAdd(uint64_t page_idx) {
+    assert(page_idx < size);
+    page_t* page = pages()[page_idx].load(std::memory_order_acquire);
+    while(page == nullptr) {
+      page = AddPage(page_idx);
+    }
+    return page;
+  }
+
+  inline page_t* AddPage(uint64_t page_idx) {
+    assert(page_idx < size);
+    void* buffer = aligned_alloc(alignment, sizeof(page_t));
+    page_t* new_page = new(buffer) page_t{};
+    page_t* expected = nullptr;
+    if(pages()[page_idx].compare_exchange_strong(expected, new_page, std::memory_order_release)) {
+      return new_page;
+    } else {
+      new_page->~page_t();
+      aligned_free(new_page);
+      return expected;
+    }
+  }
+
+ private:
+  /// Accessors, since zero-length arrays at the ends of structs aren't standard in C++.
+  const std::atomic<page_t*>* pages() const {
+    return reinterpret_cast<const std::atomic<page_t*>*>(this + 1);
+  }
+  std::atomic<page_t*>* pages() {
+    return reinterpret_cast<std::atomic<page_t*>*>(this + 1);
+  }
+
+ public:
+  /// Alignment at which each page is allocated.
+  const uint64_t alignment;
+  /// Maximum number of pages in the array; fixed at time of construction.
+  const uint64_t size;
+  /// Followed by [size] std::atomic<> pointers to (page_t) pages. (Not shown here.)
+};
+
+class alignas(Constants::kCacheLineBytes) FreeList {
+ public:
+  std::deque<FreeAddress> free_list;
+};
+
+template <typename T, class D>
+class MallocFixedPageSize {
+ public:
+  typedef T item_t;
+  typedef D disk_t;
+  typedef typename D::file_t file_t;
+  typedef FixedPage<T> page_t;
+  typedef FixedPageArray<T> array_t;
+  typedef MallocFixedPageSize<T, disk_t> alloc_t;
+
+  MallocFixedPageSize()
+    : alignment_{ UINT64_MAX }
+    , count_{ 0 }
+    , epoch_{ nullptr }
+    , page_array_{ nullptr }
+    , disk_{ nullptr }
+    , pending_checkpoint_writes_{ 0 }
+    , pending_recover_reads_{ 0 }
+    , checkpoint_pending_{ false }
+    , checkpoint_failed_{ false }
+    , recover_pending_{ false }
+    , recover_failed_{ false } {
+  }
+
+  ~MallocFixedPageSize() {
+    if(page_array_.load() != nullptr) {
+      array_t::Delete(page_array_.load(), true);
+    }
+  }
+
+  inline void Initialize(uint64_t alignment, LightEpoch& epoch) {
+    if(page_array_.load() != nullptr) {
+      array_t::Delete(page_array_.load(), true);
+    }
+    alignment_ = alignment;
+    count_.store(0);
+    epoch_ = &epoch;
+    disk_ = nullptr;
+    pending_checkpoint_writes_ = 0;
+    pending_recover_reads_ = 0;
+    checkpoint_pending_ = false;
+    checkpoint_failed_ = false;
+    recover_pending_ = false;
+    recover_failed_ = false;
+
+    array_t* page_array = array_t::Create(alignment, 2, nullptr);
+    page_array->AddPage(0);
+    page_array_.store(page_array, std::memory_order_release);
+    // Allocate the null pointer.
+    Allocate();
+  }
+
+  inline void Uninitialize() {
+    if(page_array_.load() != nullptr) {
+      array_t::Delete(page_array_.load(), true);
+      page_array_.store(nullptr);
+    }
+  }
+
+  inline item_t& Get(FixedPageAddress address) {
+    page_t* page = page_array_.load(std::memory_order_acquire)->Get(address.page());
+    assert(page);
+    return page->element(address.offset());
+  }
+  inline const item_t& Get(FixedPageAddress address) const {
+    page_t* page = page_array_.load(std::memory_order_acquire)->Get(address.page());
+    assert(page);
+    return page->element(address.offset());
+  }
+
+  FixedPageAddress Allocate();
+
+  void FreeAtEpoch(FixedPageAddress addr, uint64_t removed_epoch) {
+    free_list().push_back(FreeAddress{ addr, removed_epoch });
+  }
+
+  /// Checkpointing and recovery.
+  Status Checkpoint(disk_t& disk, file_t&& file, uint64_t& size);
+  Status CheckpointComplete(bool wait);
+
+  Status Recover(disk_t& disk, file_t&& file, uint64_t file_size, FixedPageAddress count);
+  Status RecoverComplete(bool wait);
+
+  std::deque<FreeAddress>& free_list() {
+    return free_list_[Thread::id()].free_list;
+  }
+  const std::deque<FreeAddress>& free_list() const {
+    return free_list_[Thread::id()].free_list;
+  }
+
+  FixedPageAddress count() const {
+    return count_.load();
+  }
+
+ private:
+  /// Checkpointing and recovery.
+  class AsyncIoContext : public IAsyncContext {
+   public:
+    AsyncIoContext(alloc_t* allocator_)
+      : allocator{ allocator_ } {
+    }
+
+    /// The deep-copy constructor
+    AsyncIoContext(AsyncIoContext& other)
+      : allocator{ other.allocator } {
+    }
+
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   public:
+    alloc_t* allocator;
+  };
+
+  array_t* ExpandArray(array_t* expected, uint64_t new_size);
+
+ private:
+  /// Alignment at which each page is allocated.
+  uint64_t alignment_;
+  /// Array of all of the pages we've allocated.
+  std::atomic<array_t*> page_array_;
+  /// How many elements we've allocated.
+  AtomicFixedPageAddress count_;
+
+  LightEpoch* epoch_;
+
+  /// State for ongoing checkpoint/recovery.
+  disk_t* disk_;
+  file_t file_;
+  std::atomic<uint64_t> pending_checkpoint_writes_;
+  std::atomic<uint64_t> pending_recover_reads_;
+  std::atomic<bool> checkpoint_pending_;
+  std::atomic<bool> checkpoint_failed_;
+  std::atomic<bool> recover_pending_;
+  std::atomic<bool> recover_failed_;
+
+  FreeList free_list_[Thread::kMaxNumThreads];
+};
+
+/// Implementations.
+template <typename T, class F>
+Status MallocFixedPageSize<T, F>::Checkpoint(disk_t& disk, file_t&& file, uint64_t& size) {
+  constexpr uint32_t kWriteSize = page_t::kPageSize * sizeof(item_t);
+
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<AsyncIoContext> context{ ctxt };
+    if(result != Status::Ok) {
+      context->allocator->checkpoint_failed_ = true;
+    }
+    if(--context->allocator->pending_checkpoint_writes_ == 0) {
+      result = context->allocator->file_.Close();
+      if(result != Status::Ok) {
+        context->allocator->checkpoint_failed_ = true;
+      }
+      context->allocator->checkpoint_pending_ = false;
+    }
+  };
+
+  disk_ = &disk;
+  file_ = std::move(file);
+  size = 0;
+  checkpoint_failed_ = false;
+  array_t* page_array = page_array_.load();
+  FixedPageAddress count = count_.load();
+
+  uint64_t num_levels = count.page() + (count.offset() > 0 ? 1 : 0);
+  assert(!checkpoint_pending_);
+  assert(pending_checkpoint_writes_ == 0);
+  checkpoint_pending_ = true;
+  pending_checkpoint_writes_ = num_levels;
+  for(uint64_t idx = 0; idx < num_levels; ++idx) {
+    AsyncIoContext context{ this };
+    RETURN_NOT_OK(file_.WriteAsync(page_array->Get(idx), idx * kWriteSize, kWriteSize, callback,
+                                   context));
+  }
+  size = count.control_ * sizeof(item_t);
+  return Status::Ok;
+}
+
+template <typename T, class F>
+Status MallocFixedPageSize<T, F>::CheckpointComplete(bool wait) {
+  disk_->TryComplete();
+  bool complete = !checkpoint_pending_.load();
+  while(wait && !complete) {
+    disk_->TryComplete();
+    complete = !checkpoint_pending_.load();
+    std::this_thread::yield();
+  }
+  if(!complete) {
+    return Status::Pending;
+  } else {
+    return checkpoint_failed_ ? Status::IOError : Status::Ok;
+  }
+}
+
+template <typename T, class F>
+Status MallocFixedPageSize<T, F>::Recover(disk_t& disk, file_t&& file, uint64_t file_size,
+    FixedPageAddress count) {
+  constexpr uint64_t kReadSize = page_t::kPageSize * sizeof(item_t);
+
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<AsyncIoContext> context{ ctxt };
+    if(result != Status::Ok) {
+      context->allocator->recover_failed_ = true;
+    }
+    if(--context->allocator->pending_recover_reads_ == 0) {
+      result = context->allocator->file_.Close();
+      if(result != Status::Ok) {
+        context->allocator->recover_failed_ = true;
+      }
+      context->allocator->recover_pending_ = false;
+    }
+  };
+
+  assert(file_size % sizeof(item_t) == 0);
+  disk_ = &disk;
+  file_ = std::move(file);
+  recover_failed_ = false;
+
+  // The size reserved by recovery is >= the size checkpointed to disk.
+  FixedPageAddress file_end_addr{ file_size / sizeof(item_t) };
+  uint64_t num_file_levels = file_end_addr.page() + (file_end_addr.offset() > 0 ? 1 : 0);
+  assert(num_file_levels > 0);
+  assert(count >= file_end_addr);
+  uint64_t num_levels = count.page() + (count.offset() > 0 ? 1 : 0);
+  assert(num_levels > 0);
+
+  array_t* page_array = page_array_.load();
+  // Ensure that the allocator has enough pages.
+  if(page_array->size < num_levels) {
+    uint64_t new_size = next_power_of_two(num_levels);
+    page_array = ExpandArray(page_array, new_size);
+  }
+  count_.store(count);
+  assert(!recover_pending_);
+  assert(pending_recover_reads_.load() == 0);
+  recover_pending_ = true;
+  pending_recover_reads_ = num_file_levels;
+  for(uint64_t idx = 0; idx < num_file_levels; ++idx) {
+    //read a full page
+    AsyncIoContext context{ this };
+    RETURN_NOT_OK(file_.ReadAsync(idx * kReadSize, page_array->GetOrAdd(idx), kReadSize, callback,
+                                  context));
+  }
+  return Status::Ok;
+}
+
+template <typename T, class F>
+Status MallocFixedPageSize<T, F>::RecoverComplete(bool wait) {
+  disk_->TryComplete();
+  bool complete = !recover_pending_.load();
+  while(wait && !complete) {
+    disk_->TryComplete();
+    complete = !recover_pending_.load();
+    std::this_thread::yield();
+  }
+  if(!complete) {
+    return Status::Pending;
+  } else {
+    return recover_failed_ ? Status::IOError : Status::Ok;
+  }
+}
+
+template <typename T, class F>
+FixedPageArray<T>* MallocFixedPageSize<T, F>::ExpandArray(array_t* expected, uint64_t new_size) {
+  class Delete_Context : public IAsyncContext {
+   public:
+    Delete_Context(array_t* arr_)
+      : arr{ arr_ } {
+    }
+    /// The deep-copy constructor.
+    Delete_Context(const Delete_Context& other)
+      : arr{ other.arr } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+   public:
+    array_t* arr;
+  };
+
+  auto delete_callback = [](IAsyncContext* ctxt) {
+    CallbackContext<Delete_Context> context{ ctxt };
+    array_t::Delete(context->arr, false);
+  };
+
+  assert(Utility::IsPowerOfTwo(new_size));
+  do {
+    array_t* new_array = array_t::Create(alignment_, new_size, expected);
+    if(page_array_.compare_exchange_strong(expected, new_array, std::memory_order_release)) {
+      // Have to free the old array, under epoch protection.
+      Delete_Context context{ expected };
+      IAsyncContext* context_copy;
+      Status result = context.DeepCopy(context_copy);
+      assert(result == Status::Ok);
+      epoch_->BumpCurrentEpoch(delete_callback, context_copy);
+      return new_array;
+    } else {
+      new_array->~array_t();
+      std::free(new_array);
+    }
+  } while(expected->size < new_size);
+  return expected;
+}
+
+template <typename T, class F>
+inline FixedPageAddress MallocFixedPageSize<T, F>::Allocate() {
+  if(!free_list().empty()) {
+    // Check the head of the free list.
+    if(free_list().front().removal_epoch <= epoch_->safe_to_reclaim_epoch.load()) {
+      FixedPageAddress removed_addr = free_list().front().removed_addr;
+      free_list().pop_front();
+      return removed_addr;
+    }
+  }
+  // Determine insertion page_index.
+  FixedPageAddress addr = count_++;
+  array_t* page_array = page_array_.load(std::memory_order_acquire);
+  if(addr.page() >= page_array->size) {
+    // Need to resize the page array.
+    page_array = ExpandArray(page_array, next_power_of_two(addr.page() + 1));
+  }
+  if(addr.offset() == 0 && addr.page() + 1 < page_array->size) {
+    // Add the next page early, to try to avoid blocking other threads.
+    page_array->AddPage(addr.page() + 1);
+  }
+  page_array->GetOrAdd(addr.page());
+  return addr;
+}
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/native_buffer_pool.h b/cc/src/core/native_buffer_pool.h
new file mode 100644
index 000000000..d1a2c387d
--- /dev/null
+++ b/cc/src/core/native_buffer_pool.h
@@ -0,0 +1,188 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+
+#include "alloc.h"
+#include "utility.h"
+
+#ifdef _WIN32
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+/// Microsoft's concurrency::concurrent_queue is based on Intel's tbb::concurrent_queue.
+#include <concurrent_queue.h>
+template <typename T>
+using concurrent_queue = concurrency::concurrent_queue<T>;
+#else
+namespace FASTER {
+/// Convert GCC's __builtin_clz() to Microsoft's _BitScanReverse.
+inline uint8_t _BitScanReverse(unsigned long* index, uint32_t mask) {
+  bool found = mask > 0;
+  *index = 31 - __builtin_clz(mask);
+  return found;
+}
+}
+
+#include <tbb/concurrent_queue.h>
+template <typename T>
+using concurrent_queue = tbb::concurrent_queue<T>;
+#endif
+
+namespace FASTER {
+namespace core {
+
+/// A buffer pool used for file I/Os.
+
+class NativeSectorAlignedBufferPool;
+
+/// A sector-aligned memory block, along with offsets into the block.
+class SectorAlignedMemory {
+ public:
+  /// Default constructor.
+  SectorAlignedMemory()
+    : buffer_{ nullptr }
+    , valid_offset{ 0 }
+    , required_bytes{ 0 }
+    , available_bytes{ 0 }
+    , level_{ 0 }
+    , pool_{ nullptr } {
+  }
+  SectorAlignedMemory(uint8_t* buffer, uint32_t level, NativeSectorAlignedBufferPool* pool)
+    : buffer_{ buffer }
+    , valid_offset{ 0 }
+    , required_bytes{ 0 }
+    , available_bytes{ 0 }
+    , level_{ level }
+    , pool_{ pool } {
+  }
+  /// No copy constructor.
+  SectorAlignedMemory(const SectorAlignedMemory&) = delete;
+  /// Move constructor.
+  SectorAlignedMemory(SectorAlignedMemory&& other)
+    : buffer_{ other.buffer_ }
+    , valid_offset{ other.valid_offset }
+    , required_bytes{ other.required_bytes }
+    , available_bytes{ other.available_bytes }
+    , level_{ other.level_ }
+    , pool_{ other.pool_ } {
+    other.buffer_ = nullptr;
+    other.pool_ = nullptr;
+  }
+
+  inline ~SectorAlignedMemory();
+
+  /// Move assignment operator.
+  inline SectorAlignedMemory& operator=(SectorAlignedMemory&& other);
+
+  inline void CopyValidBytesToAddress(uint8_t* pt) const {
+    std::memcpy(pt, &buffer_[valid_offset], required_bytes);
+  }
+  inline uint8_t* GetValidPointer() {
+    return &buffer_[valid_offset];
+  }
+  inline uint8_t* buffer() {
+    return buffer_;
+  }
+
+ private:
+  uint8_t* buffer_;
+ public:
+  uint32_t valid_offset;
+  uint32_t required_bytes;
+  uint32_t available_bytes;
+ private:
+  uint32_t level_;
+  NativeSectorAlignedBufferPool* pool_;
+};
+static_assert(sizeof(SectorAlignedMemory) == 32, "sizeof(SectorAlignedMemory) != 32");
+
+/// Aligned buffer pool is a pool of memory.
+/// Internally, it is organized as an array of concurrent queues where each concurrent
+/// queue represents a memory of size in particular range. queue_[i] contains memory
+/// segments each of size (2^i * sectorSize).
+class NativeSectorAlignedBufferPool {
+ private:
+  static constexpr uint32_t kLevels = 32;
+
+ public:
+  NativeSectorAlignedBufferPool(uint32_t recordSize, uint32_t sectorSize)
+    : record_size_{ recordSize }
+    , sector_size_{ sectorSize } {
+  }
+
+  inline void Return(uint32_t level, uint8_t* buffer) {
+    assert(level < kLevels);
+    queue_[level].push(buffer);
+  }
+  inline SectorAlignedMemory Get(uint32_t numRecords);
+
+ private:
+  uint32_t Level(uint32_t sectors) {
+    assert(sectors > 0);
+    if(sectors == 1) {
+      return 0;
+    }
+    // BSR returns the page_index k of the most-significant 1 bit. So 2^(k+1) > (sectors - 1) >=
+    // 2^k, which means 2^(k+1) >= sectors > 2^k.
+    unsigned long k;
+    _BitScanReverse(&k, sectors - 1);
+    return k + 1;
+  }
+
+  uint32_t record_size_;
+  uint32_t sector_size_;
+  /// Level 0 caches memory allocations of size (sectorSize); level n+1 caches allocations of size
+  /// (sectorSize) * 2^n.
+  concurrent_queue<uint8_t*> queue_[kLevels];
+};
+
+/// Implementations.
+inline SectorAlignedMemory& SectorAlignedMemory::operator=(SectorAlignedMemory&& other) {
+  if(buffer_ == other.buffer_) {
+    // Self-assignment is a no-op.
+    return *this;
+  }
+  if(buffer_ != nullptr) {
+    // Return our buffer to the pool, before taking ownership of a new buffer.
+    pool_->Return(level_, buffer_);
+  }
+  buffer_ = other.buffer_;
+  valid_offset = other.valid_offset;
+  required_bytes = other.required_bytes;
+  available_bytes = other.available_bytes;
+  level_ = other.level_;
+  pool_ = other.pool_;
+
+  // We own the buffer now; other SectorAlignedMemory does not.
+  other.buffer_ = nullptr;
+  other.pool_ = nullptr;
+  return *this;
+}
+
+inline SectorAlignedMemory::~SectorAlignedMemory() {
+  if(buffer_) {
+    pool_->Return(level_, buffer_);
+  }
+}
+
+inline SectorAlignedMemory NativeSectorAlignedBufferPool::Get(uint32_t numRecords) {
+  // How many sectors do we need?
+  uint32_t sectors_required = (numRecords * record_size_ + sector_size_ - 1) / sector_size_;
+  uint32_t level = Level(sectors_required);
+  uint8_t* buffer;
+  if(queue_[level].try_pop(buffer)) {
+    return SectorAlignedMemory{ buffer, level, this };
+  } else {
+    uint8_t* buffer = reinterpret_cast<uint8_t*>(aligned_alloc(sector_size_,
+                      sector_size_ * (1 << level)));
+    return SectorAlignedMemory{ buffer, level, this };
+  }
+}
+
+}
+} // namespace FASTER::core
\ No newline at end of file
diff --git a/cc/src/core/persistent_memory_malloc.h b/cc/src/core/persistent_memory_malloc.h
new file mode 100644
index 000000000..abaa0b9d0
--- /dev/null
+++ b/cc/src/core/persistent_memory_malloc.h
@@ -0,0 +1,1021 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <mutex>
+#include <thread>
+
+#include "device/file_system_disk.h"
+#include "address.h"
+#include "async_result_types.h"
+#include "gc_state.h"
+#include "light_epoch.h"
+#include "native_buffer_pool.h"
+#include "recovery_status.h"
+#include "status.h"
+
+namespace FASTER {
+namespace core {
+
+/// The log allocator, used by FASTER to store records.
+
+enum class FlushStatus : uint8_t {
+  Flushed,
+  InProgress
+};
+
+enum class CloseStatus : uint8_t {
+  Closed,
+  Open
+};
+
+/// Pack flush- and close-status into a single 16-bit value.
+/// State transitions are:
+/// { Flushed, Closed } (default state)
+/// --> { InProgress, Open } (when issuing the flush to disk)
+/// --> either { . , Closed} (when moving the head address forward)
+///     or     { Flushed, . } (when the flush completes).
+struct FlushCloseStatus {
+  FlushCloseStatus()
+    : flush{ FlushStatus::Flushed }
+    , close{ CloseStatus::Closed } {
+  }
+
+  FlushCloseStatus(FlushStatus flush_, CloseStatus close_)
+    : flush{ flush_ }
+    , close{ close_ } {
+  }
+
+  FlushCloseStatus(uint16_t control_)
+    : control{ control_ } {
+  }
+
+  /// Is the page ready for use?
+  inline bool Ready() const {
+    return flush == FlushStatus::Flushed && close == CloseStatus::Open;
+  }
+
+  union {
+      struct {
+        FlushStatus flush;
+        CloseStatus close;
+      };
+      uint16_t control;
+    };
+};
+static_assert(sizeof(FlushCloseStatus) == 2, "sizeof(FlushCloseStatus) != 2");
+
+/// Atomic version of FlushCloseStatus. Can set and get flush- and close- status, together,
+/// atomically.
+class AtomicFlushCloseStatus {
+ public:
+  AtomicFlushCloseStatus()
+    : status_{} {
+  }
+
+  inline void store(FlushStatus flush, CloseStatus close) {
+    // Sets flush and close statuses, atomically.
+    FlushCloseStatus status{ flush, close };
+    control_.store(status.control);
+  }
+
+  inline FlushCloseStatus load() const {
+    // Gets flush and close statuses, atomically.
+    return FlushCloseStatus{ control_.load() };
+  }
+
+  inline bool compare_exchange_weak(FlushCloseStatus& expected, FlushCloseStatus value) {
+    uint16_t expected_control = expected.control;
+    bool result = control_.compare_exchange_weak(expected_control, value.control);
+    expected.control = expected_control;
+    return result;
+  }
+  inline bool compare_exchange_strong(FlushCloseStatus& expected, FlushCloseStatus value) {
+    uint16_t expected_control = expected.control;
+    bool result = control_.compare_exchange_strong(expected_control, value.control);
+    expected.control = expected_control;
+    return result;
+  }
+
+  union {
+      FlushCloseStatus status_;
+      std::atomic<uint16_t> control_;
+    };
+};
+static_assert(sizeof(AtomicFlushCloseStatus) == 2, "sizeof(FlushCloseStatus) != 2");
+
+struct FullPageStatus {
+  FullPageStatus()
+    : LastFlushedUntilAddress{ 0 }
+    , status{} {
+  }
+
+  AtomicAddress LastFlushedUntilAddress;
+  AtomicFlushCloseStatus status;
+};
+static_assert(sizeof(FullPageStatus) == 16, "sizeof(FullPageStatus) != 16");
+
+/// Page and offset of the tail of the log. Can reserve space within the current page or move to a
+/// new page.
+class PageOffset {
+ public:
+  PageOffset(uint32_t page, uint64_t offset)
+    : offset_{ offset }
+    , page_{ page } {
+    assert(page <= Address::kMaxPage);
+  }
+
+  PageOffset(uint64_t control)
+    : control_{ control } {
+  }
+
+  PageOffset(const Address& address)
+    : offset_{ address.offset() }
+    , page_{ address.page() } {
+  }
+
+  /// Accessors.
+  inline uint64_t offset() const {
+    return offset_;
+  }
+  inline uint32_t page() const {
+    return static_cast<uint32_t>(page_);
+  }
+  inline uint64_t control() const {
+    return control_;
+  }
+
+  /// Conversion operator.
+  inline operator Address() const {
+    assert(offset_ <= Address::kMaxOffset);
+    return Address{ page(), static_cast<uint32_t>(offset()) };
+  }
+
+ private:
+  /// Use 41 bits for offset, which gives us approximately 2 PB of overflow space, for
+  /// Reserve().
+  union {
+      struct {
+        uint64_t offset_ : 64 - Address::kPageBits;
+        uint64_t page_ : Address::kPageBits;
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(PageOffset) == 8, "sizeof(PageOffset) != 8");
+
+/// Atomic page + offset marker. Can Reserve() space from current page, or move to NewPage().
+class AtomicPageOffset {
+ public:
+  AtomicPageOffset()
+    : control_{ 0 } {
+  }
+
+  AtomicPageOffset(uint32_t page, uint64_t offset)
+    : control_{ PageOffset{ page, offset } .control() } {
+  }
+
+  AtomicPageOffset(const Address& address) {
+    PageOffset page_offset{ address };
+    control_.store(page_offset.control());
+  }
+
+  /// Reserve space within the current page. Can overflow the page boundary (so result offset >
+  /// Address::kMaxOffset).
+  inline PageOffset Reserve(uint32_t num_slots) {
+    assert(num_slots <= Address::kMaxOffset);
+    PageOffset offset{ 0, num_slots };
+    return PageOffset{ control_.fetch_add(offset.control()) };
+  }
+
+  /// Move to the next page. The compare-and-swap can fail. Returns "true" if some thread advanced
+  /// the thread; sets "won_cas" = "true" if this thread won the CAS, which means it has been
+  /// chosen to set up the new page.
+  inline bool NewPage(uint32_t old_page, bool& won_cas) {
+    assert(old_page < Address::kMaxPage);
+    won_cas = false;
+    PageOffset expected_page_offset = load();
+    if(old_page != expected_page_offset.page()) {
+      // Another thread already moved to the new page.
+      assert(old_page < expected_page_offset.page());
+      return true;
+    }
+    PageOffset new_page{ old_page + 1, 0 };
+    uint64_t expected = expected_page_offset.control();
+    // Try to move to a new page.
+    won_cas = control_.compare_exchange_strong(expected, new_page.control());
+    return PageOffset{ expected } .page() > old_page;
+  }
+
+  inline PageOffset load() const {
+    return PageOffset{ control_.load() };
+  }
+  inline void store(Address address) {
+    PageOffset page_offset{ address.page(), address.offset() };
+    control_.store(page_offset.control());
+  }
+
+ private:
+  union {
+      /// Atomic access to the page+offset.
+      std::atomic<uint64_t> control_;
+    };
+};
+static_assert(sizeof(AtomicPageOffset) == 8, "sizeof(AtomicPageOffset) != 8");
+
+/// The main allocator.
+template <class D>
+class PersistentMemoryMalloc {
+ public:
+  typedef D disk_t;
+  typedef typename D::file_t file_t;
+  typedef typename D::log_file_t log_file_t;
+  typedef PersistentMemoryMalloc<disk_t> alloc_t;
+
+  /// Each page in the buffer is 2^25 bytes (= 32 MB).
+  static constexpr uint64_t kPageSize = Address::kMaxOffset + 1;
+
+  /// The first 4 HLOG pages should be below the head (i.e., being flushed to disk).
+  static constexpr uint32_t kNumHeadPages = 4;
+
+  PersistentMemoryMalloc(uint64_t log_size, LightEpoch& epoch, disk_t& disk_, log_file_t& file_,
+                         Address start_address, double log_mutable_fraction)
+    : sector_size{ static_cast<uint32_t>(file_.alignment()) }
+    , epoch_{ &epoch }
+    , disk{ &disk_ }
+    , file{ &file_ }
+    , read_buffer_pool{ 1, sector_size }
+    , io_buffer_pool{ 1, sector_size }
+    , read_only_address{ start_address }
+    , safe_read_only_address{ start_address }
+    , head_address{ start_address }
+    , safe_head_address{ start_address }
+    , flushed_until_address{ start_address }
+    , begin_address{ start_address }
+    , tail_page_offset_{ start_address }
+    , buffer_size_{ 0 }
+    , pages_{ nullptr }
+    , page_status_{ nullptr } {
+    assert(start_address.page() <= Address::kMaxPage);
+
+    if(log_size % kPageSize != 0) {
+      throw std::invalid_argument{ "Log size must be a multiple of 32 MB" };
+    }
+    if(log_size % kPageSize > UINT32_MAX) {
+      throw std::invalid_argument{ "Log size must be <= 128 PB" };
+    }
+    buffer_size_ = static_cast<uint32_t>(log_size / kPageSize);
+
+    if(buffer_size_ <= kNumHeadPages + 1) {
+      throw std::invalid_argument{ "Must have at least 2 non-head pages" };
+    }
+    // The latest N pages should be mutable.
+    num_mutable_pages_ = static_cast<uint32_t>(log_mutable_fraction * buffer_size_);
+    if(num_mutable_pages_ <= 1) {
+      // Need at least two mutable pages: one to write to, and one to open up when the previous
+      // mutable page is full.
+      throw std::invalid_argument{ "Must have at least 2 mutable pages" };
+    }
+
+    pages_ = new uint8_t* [buffer_size_];
+    for(uint32_t idx = 0; idx < buffer_size_; ++idx) {
+      pages_[idx] = nullptr;
+    }
+
+    page_status_ = new FullPageStatus[buffer_size_];
+
+    PageOffset tail_page_offset = tail_page_offset_.load();
+    AllocatePage(tail_page_offset.page());
+    AllocatePage(tail_page_offset.page() + 1);
+  }
+
+  PersistentMemoryMalloc(uint64_t log_size, LightEpoch& epoch, disk_t& disk_, log_file_t& file_,
+                         double log_mutable_fraction)
+    : PersistentMemoryMalloc(log_size, epoch, disk_, file_, Address{ 0 }, log_mutable_fraction) {
+    /// Allocate the invalid page. Supports allocations aligned up to kCacheLineBytes.
+    uint32_t discard;
+    Allocate(Constants::kCacheLineBytes, discard);
+    assert(discard == UINT32_MAX);
+    /// Move the head and read-only address past the invalid page.
+    Address tail_address = tail_page_offset_.load();
+    begin_address.store(tail_address);
+    read_only_address.store(tail_address);
+    safe_read_only_address.store(tail_address);
+    head_address.store(tail_address);
+    safe_head_address.store(tail_address);
+  }
+
+  ~PersistentMemoryMalloc() {
+    if(pages_) {
+      for(uint32_t idx = 0; idx < buffer_size_; ++idx) {
+        if(pages_[idx]) {
+          aligned_free(pages_[idx]);
+        }
+      }
+      delete[] pages_;
+    }
+    if(page_status_) {
+      delete[] page_status_;
+    }
+  }
+
+  inline const uint8_t* Page(uint32_t page) const {
+    assert(page <= Address::kMaxPage);
+    return pages_[page % buffer_size_];
+  }
+  inline uint8_t* Page(uint32_t page) {
+    assert(page <= Address::kMaxPage);
+    return pages_[page % buffer_size_];
+  }
+
+  inline const FullPageStatus& PageStatus(uint32_t page) const {
+    assert(page <= Address::kMaxPage);
+    return page_status_[page % buffer_size_];
+  }
+  inline FullPageStatus& PageStatus(uint32_t page) {
+    assert(page <= Address::kMaxPage);
+    return page_status_[page % buffer_size_];
+  }
+
+  inline uint32_t buffer_size() const {
+    return buffer_size_;
+  }
+
+  /// Read the tail page + offset, atomically, and convert it to an address.
+  inline Address GetTailAddress() const {
+    PageOffset tail_page_offset = tail_page_offset_.load();
+    return Address{ tail_page_offset.page(), std::min(Address::kMaxOffset,
+                    static_cast<uint32_t>(tail_page_offset.offset())) };
+  }
+
+  inline const uint8_t* Get(Address address) const {
+    return Page(address.page()) + address.offset();
+  }
+  inline uint8_t* Get(Address address) {
+    return Page(address.page()) + address.offset();
+  }
+
+  /// Key function used to allocate memory for a specified number of items. If the current page is
+  /// full, returns Address::kInvalidAddress and sets closed_page to the current page index. The
+  /// caller should Refresh() the epoch and call NewPage() until successful, before trying to
+  /// Allocate() again.
+  inline Address Allocate(uint32_t num_slots, uint32_t& closed_page);
+
+  /// Tries to move the allocator to a new page; used when the current page is full. Returns "true"
+  /// if the page advanced (so the caller can try to allocate, again).
+  inline bool NewPage(uint32_t old_page);
+
+  /// Invoked by users to obtain a record from disk. It uses sector aligned memory to read
+  /// the record efficiently into memory.
+  inline void AsyncGetFromDisk(Address address, uint32_t num_records, AsyncIOCallback callback,
+                               AsyncIOContext& context);
+
+  /// Used by applications to make the current state of the database immutable quickly
+  Address ShiftReadOnlyToTail();
+
+  void Truncate(GcState::truncate_callback_t callback);
+
+  /// Action to be performed for when all threads have agreed that a page range is closed.
+  class OnPagesClosed_Context : public IAsyncContext {
+   public:
+    OnPagesClosed_Context(alloc_t* allocator_,
+                          Address new_safe_head_address_,
+                          bool replace_with_clean_page_)
+      : allocator{ allocator_ }
+      , new_safe_head_address{ new_safe_head_address_ }
+      , replace_with_clean_page{ replace_with_clean_page_ } {
+    }
+
+    /// The deep-copy constructor.
+    OnPagesClosed_Context(const OnPagesClosed_Context& other)
+      : allocator{ other.allocator }
+      , new_safe_head_address{ other.new_safe_head_address }
+      , replace_with_clean_page{ other.replace_with_clean_page } {
+    }
+
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   public:
+    alloc_t* allocator;
+    Address new_safe_head_address;
+    bool replace_with_clean_page;
+  };
+
+  static void OnPagesClosed(IAsyncContext* ctxt);
+
+  /// Seal: make sure there are no longer any threads writing to the page
+  /// Flush: send page to secondary store
+  class OnPagesMarkedReadOnly_Context : public IAsyncContext {
+   public:
+    OnPagesMarkedReadOnly_Context(alloc_t* allocator_,
+                                  Address new_safe_read_only_address_,
+                                  bool wait_for_pending_flush_complete_)
+      : allocator{ allocator_ }
+      , new_safe_read_only_address{ new_safe_read_only_address_ }
+      , wait_for_pending_flush_complete{ wait_for_pending_flush_complete_ } {
+    }
+
+    /// The deep-copy constructor.
+    OnPagesMarkedReadOnly_Context(const OnPagesMarkedReadOnly_Context& other)
+      : allocator{ other.allocator }
+      , new_safe_read_only_address{ other.new_safe_read_only_address }
+      , wait_for_pending_flush_complete{ other.wait_for_pending_flush_complete } {
+    }
+
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   public:
+    alloc_t* allocator;
+    Address new_safe_read_only_address;
+    bool wait_for_pending_flush_complete;
+  };
+
+  static void OnPagesMarkedReadOnly(IAsyncContext* ctxt);
+
+ private:
+  inline void GetFileReadBoundaries(Address read_offset, uint32_t read_length,
+                                    uint64_t& begin_read, uint64_t& end_read, uint32_t& offset,
+                                    uint32_t& length) const {
+    assert(sector_size > 0);
+    assert(Utility::IsPowerOfTwo(sector_size));
+    assert(sector_size <= UINT32_MAX);
+    size_t alignment_mask = sector_size - 1;
+    // Align read to sector boundary.
+    begin_read = read_offset.control() & ~alignment_mask;
+    end_read = (read_offset.control() + read_length + alignment_mask) & ~alignment_mask;
+    offset = static_cast<uint32_t>(read_offset.control() & alignment_mask);
+    assert(end_read - begin_read <= UINT32_MAX);
+    length = static_cast<uint32_t>(end_read - begin_read);
+  }
+
+  /// Allocate memory page, in sector aligned form
+  inline void AllocatePage(uint32_t index);
+
+  /// Used by several functions to update the variable to newValue. Ignores if newValue is smaller
+  /// than the current value.
+  template <typename A, typename T>
+  inline bool MonotonicUpdate(A& variable, T new_value,
+                              T& old_value) {
+    old_value = variable.load();
+    while(old_value < new_value) {
+      if(variable.compare_exchange_strong(old_value, new_value)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  Status AsyncFlushPages(uint32_t start_page, Address until_address,
+                         bool serialize_objects = false);
+
+ public:
+  Status AsyncFlushPagesToFile(uint32_t start_page, Address until_address, file_t& file,
+                               std::atomic<uint32_t>& flush_pending);
+
+  /// Recovery.
+  Status AsyncReadPagesFromLog(uint32_t start_page, uint32_t num_pages,
+                               RecoveryStatus& recovery_status);
+  Status AsyncReadPagesFromSnapshot(file_t& snapshot_file, uint32_t file_start_page,
+                                    uint32_t start_page, uint32_t num_pages,
+                                    RecoveryStatus& recovery_status);
+
+  Status AsyncFlushPage(uint32_t page, RecoveryStatus& recovery_status,
+                        AsyncCallback caller_callback, IAsyncContext* caller_context);
+  void RecoveryReset(Address begin_address_, Address head_address_, Address tail_address);
+
+ private:
+  template <class F>
+  Status AsyncReadPages(F& read_file, uint32_t file_start_page, uint32_t start_page,
+                        uint32_t num_pages, RecoveryStatus& recovery_status);
+  inline void PageAlignedShiftHeadAddress(uint32_t tail_page);
+  inline void PageAlignedShiftReadOnlyAddress(uint32_t tail_page);
+
+  /// Every async flush callback tries to update the flushed until address to the latest value
+  /// possible
+  /// Is there a better way to do this with enabling fine-grained addresses (not necessarily at
+  /// page boundaries)?
+  inline void ShiftFlushedUntilAddress() {
+    Address current_flushed_until_address = flushed_until_address.load();
+    uint32_t page = current_flushed_until_address.page();
+
+    bool update = false;
+    Address page_last_flushed_address = PageStatus(page).LastFlushedUntilAddress.load();
+    while(page_last_flushed_address >= current_flushed_until_address) {
+      current_flushed_until_address = page_last_flushed_address;
+      update = true;
+      ++page;
+      page_last_flushed_address = PageStatus(page).LastFlushedUntilAddress.load();
+    }
+
+    if(update) {
+      Address discard;
+      MonotonicUpdate(flushed_until_address, current_flushed_until_address, discard);
+    }
+  }
+
+ public:
+  uint32_t sector_size;
+
+ private:
+  LightEpoch* epoch_;
+  disk_t* disk;
+
+ public:
+  log_file_t* file;
+  // Read buffer pool
+  NativeSectorAlignedBufferPool read_buffer_pool;
+  NativeSectorAlignedBufferPool io_buffer_pool;
+
+  /// Every address < ReadOnlyAddress is read-only.
+  AtomicAddress read_only_address;
+  /// The minimum ReadOnlyAddress that every thread has seen.
+  AtomicAddress safe_read_only_address;
+
+  /// The circular buffer can drop any page < HeadAddress.page()--must read those pages from disk.
+  AtomicAddress head_address;
+  /// The minimum HeadPage that every thread has seen.
+  AtomicAddress safe_head_address;
+
+  AtomicAddress flushed_until_address;
+
+  /// The address of the true head of the log--everything before this address has been truncated
+  /// by garbage collection.
+  AtomicAddress begin_address;
+
+ private:
+  uint32_t buffer_size_;
+
+  /// -- the latest N pages should be mutable.
+  uint32_t num_mutable_pages_;
+
+  // Circular buffer definition
+  uint8_t** pages_;
+
+  // Array that indicates the status of each buffer page
+  FullPageStatus* page_status_;
+
+  // Global address of the current tail (next element to be allocated from the circular buffer)
+  AtomicPageOffset tail_page_offset_;
+};
+
+/// Implementations.
+template <class D>
+inline void PersistentMemoryMalloc<D>::AllocatePage(uint32_t index) {
+  index = index % buffer_size_;
+  assert(pages_[index] == nullptr);
+  pages_[index] = reinterpret_cast<uint8_t*>(aligned_alloc(sector_size, kPageSize));;
+  std::memset(pages_[index], 0, kPageSize);
+
+  // Mark the page as accessible.
+  page_status_[index].status.store(FlushStatus::Flushed, CloseStatus::Open);
+}
+
+template <class D>
+inline Address PersistentMemoryMalloc<D>::Allocate(uint32_t num_slots, uint32_t& closed_page) {
+  closed_page = UINT32_MAX;
+  PageOffset page_offset = tail_page_offset_.Reserve(num_slots);
+
+  if(page_offset.offset() + num_slots > kPageSize) {
+    // The current page is full. The caller should Refresh() the epoch and wait until
+    // NewPage() is successful before trying to Allocate() again.
+    closed_page = page_offset.page();
+    return Address::kInvalidAddress;
+  } else {
+    assert(Page(page_offset.page()));
+    return static_cast<Address>(page_offset);
+  }
+}
+
+template <class D>
+inline bool PersistentMemoryMalloc<D>::NewPage(uint32_t old_page) {
+  assert(old_page < Address::kMaxPage);
+  PageOffset new_tail_offset{ old_page + 1, 0 };
+  // When the tail advances to page k+1, we clear page k+2.
+  if(old_page + 2 >= safe_head_address.page() + buffer_size_) {
+    // No room in the circular buffer for a new page; try to advance the head address, to make
+    // more room available.
+    disk->TryComplete();
+    PageAlignedShiftReadOnlyAddress(old_page + 1);
+    PageAlignedShiftHeadAddress(old_page + 1);
+    return false;
+  }
+  FlushCloseStatus status = PageStatus(old_page + 1).status.load();
+  if(!status.Ready()) {
+    // Can't access the next page yet; try to advance the head address, to make the page
+    // available.
+    disk->TryComplete();
+    PageAlignedShiftReadOnlyAddress(old_page + 1);
+    PageAlignedShiftHeadAddress(old_page + 1);
+    return false;
+  }
+  bool won_cas;
+  bool retval = tail_page_offset_.NewPage(old_page, won_cas);
+  if(won_cas) {
+    // We moved the tail to (page + 1), so we are responsible for moving the head and
+    // read-only addresses.
+    PageAlignedShiftReadOnlyAddress(old_page + 1);
+    PageAlignedShiftHeadAddress(old_page + 1);
+    if(!Page(old_page + 2)) {
+      // We are also responsible for allocating (page + 2).
+      AllocatePage(old_page + 2);
+    }
+  }
+  return retval;
+}
+
+template <class D>
+inline void PersistentMemoryMalloc<D>::AsyncGetFromDisk(Address address, uint32_t num_records,
+    AsyncIOCallback callback, AsyncIOContext& context) {
+  uint64_t begin_read, end_read;
+  uint32_t offset, length;
+  GetFileReadBoundaries(address, num_records, begin_read, end_read, offset, length);
+  context.record = read_buffer_pool.Get(length);
+  context.record.valid_offset = offset;
+  context.record.available_bytes = length - offset;
+  context.record.required_bytes = num_records;
+
+  file->ReadAsync(begin_read, context.record.buffer(), length, callback, context);
+}
+
+template <class D>
+Address PersistentMemoryMalloc<D>::ShiftReadOnlyToTail() {
+  Address tail_address = GetTailAddress();
+  Address old_read_only_address;
+  if(MonotonicUpdate(read_only_address, tail_address, old_read_only_address)) {
+    OnPagesMarkedReadOnly_Context context{ this, tail_address, false };
+    IAsyncContext* context_copy;
+    Status result = context.DeepCopy(context_copy);
+    assert(result == Status::Ok);
+    epoch_->BumpCurrentEpoch(OnPagesMarkedReadOnly, context_copy);
+  }
+  return tail_address;
+}
+
+template <class D>
+void PersistentMemoryMalloc<D>::Truncate(GcState::truncate_callback_t callback) {
+  assert(sector_size > 0);
+  assert(Utility::IsPowerOfTwo(sector_size));
+  assert(sector_size <= UINT32_MAX);
+  size_t alignment_mask = sector_size - 1;
+  // Align read to sector boundary.
+  uint64_t begin_offset = begin_address.control() & ~alignment_mask;
+  file->Truncate(begin_offset, callback);
+}
+
+template <class D>
+void PersistentMemoryMalloc<D>::OnPagesClosed(IAsyncContext* ctxt) {
+  CallbackContext<OnPagesClosed_Context> context{ ctxt };
+  Address old_safe_head_address;
+  if(context->allocator->MonotonicUpdate(context->allocator->safe_head_address,
+                                         context->new_safe_head_address,
+                                         old_safe_head_address)) {
+    for(uint32_t idx = old_safe_head_address.page(); idx < context->new_safe_head_address.page();
+        ++idx) {
+      FlushCloseStatus old_status = context->allocator->PageStatus(idx).status.load();
+      FlushCloseStatus new_status;
+      do {
+        new_status = FlushCloseStatus{ old_status.flush, CloseStatus::Closed };
+      } while(!context->allocator->PageStatus(idx).status.compare_exchange_weak(old_status,
+              new_status));
+
+      if(old_status.flush == FlushStatus::Flushed) {
+        // We closed the page after it was flushed, so we are responsible for clearing and
+        // reopening it.
+        std::memset(context->allocator->Page(idx), 0, kPageSize);
+        context->allocator->PageStatus(idx).status.store(FlushStatus::Flushed, CloseStatus::Open);
+      }
+    }
+  }
+}
+
+template <class D>
+void PersistentMemoryMalloc<D>::OnPagesMarkedReadOnly(IAsyncContext* ctxt) {
+  CallbackContext<OnPagesMarkedReadOnly_Context> context{ ctxt };
+  Address old_safe_read_only_address;
+  if(context->allocator->MonotonicUpdate(context->allocator->safe_read_only_address,
+                                         context->new_safe_read_only_address,
+                                         old_safe_read_only_address)) {
+    context->allocator->AsyncFlushPages(old_safe_read_only_address.page(),
+                                        context->new_safe_read_only_address);
+  }
+}
+
+template <class D>
+Status PersistentMemoryMalloc<D>::AsyncFlushPages(uint32_t start_page, Address until_address,
+    bool serialize_objects) {
+  class Context : public IAsyncContext {
+   public:
+    Context(alloc_t* allocator_, uint32_t page_, Address until_address_)
+      : allocator{ allocator_ }
+      , page{ page_ }
+      , until_address{ until_address_ } {
+    }
+    /// The deep-copy constructor
+    Context(const Context& other)
+      : allocator{ other.allocator }
+      , page{ other.page }
+      , until_address{ other.until_address } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+   public:
+    alloc_t* allocator;
+    uint32_t page;
+    Address until_address;
+  };
+
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<Context> context{ ctxt };
+    if(result != Status::Ok) {
+      fprintf(stderr, "AsyncFlushPages(), error: %u\n", static_cast<uint8_t>(result));
+    }
+    context->allocator->PageStatus(context->page).LastFlushedUntilAddress.store(
+      context->until_address);
+    //Set the page status to flushed
+    FlushCloseStatus old_status = context->allocator->PageStatus(context->page).status.load();
+    FlushCloseStatus new_status;
+    do {
+      new_status = FlushCloseStatus{ FlushStatus::Flushed, old_status.close };
+    } while(!context->allocator->PageStatus(context->page).status.compare_exchange_weak(
+              old_status, new_status));
+    if(old_status.close == CloseStatus::Closed) {
+      // We finished flushing the page after it was closed, so we are responsible for clearing and
+      // reopening it.
+      std::memset(context->allocator->Page(context->page), 0, kPageSize);
+      context->allocator->PageStatus(context->page).status.store(FlushStatus::Flushed,
+          CloseStatus::Open);
+    }
+    context->allocator->ShiftFlushedUntilAddress();
+  };
+
+  uint32_t num_pages = until_address.page() - start_page;
+  if(until_address.offset() > 0) {
+    ++num_pages;
+  }
+  assert(num_pages > 0);
+
+  for(uint32_t flush_page = start_page; flush_page < start_page + num_pages; ++flush_page) {
+    Address page_start_address{ flush_page, 0 };
+    Address page_end_address{ flush_page + 1, 0 };
+
+    Context context{ this, flush_page, std::min(page_end_address, until_address) };
+
+    //Set status to in-progress
+    FlushCloseStatus old_status = PageStatus(flush_page).status.load();
+    FlushCloseStatus new_status;
+    do {
+      new_status = FlushCloseStatus{ FlushStatus::InProgress, old_status.close };
+    } while(!PageStatus(flush_page).status.compare_exchange_weak(old_status, new_status));
+    PageStatus(flush_page).LastFlushedUntilAddress.store(0);
+
+    RETURN_NOT_OK(file->WriteAsync(Page(flush_page), kPageSize * flush_page, kPageSize, callback,
+                                   context));
+  }
+  return Status::Ok;
+}
+
+template <class D>
+Status PersistentMemoryMalloc<D>::AsyncFlushPagesToFile(uint32_t start_page, Address until_address,
+    file_t& file, std::atomic<uint32_t>& flush_pending) {
+  class Context : public IAsyncContext {
+   public:
+    Context(std::atomic<uint32_t>& flush_pending_)
+      : flush_pending{ flush_pending_ } {
+    }
+    /// The deep-copy constructor
+    Context(Context& other)
+      : flush_pending{ other.flush_pending } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+   public:
+    std::atomic<uint32_t>& flush_pending;
+  };
+
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<Context> context{ ctxt };
+    if(result != Status::Ok) {
+      fprintf(stderr, "AsyncFlushPagesToFile(), error: %u\n", static_cast<uint8_t>(result));
+    }
+    assert(context->flush_pending > 0);
+    --context->flush_pending;
+  };
+
+  uint32_t num_pages = until_address.page() - start_page;
+  if(until_address.offset() > 0) {
+    ++num_pages;
+  }
+  assert(num_pages > 0);
+  flush_pending = num_pages;
+
+  for(uint32_t flush_page = start_page; flush_page < start_page + num_pages; ++flush_page) {
+    Address page_start_address{ flush_page, 0 };
+    Address page_end_address{ flush_page + 1, 0 };
+    Context context{ flush_pending };
+    RETURN_NOT_OK(file.WriteAsync(Page(flush_page), kPageSize * (flush_page - start_page),
+                                  kPageSize, callback, context));
+  }
+  return Status::Ok;
+}
+
+template <class D>
+Status PersistentMemoryMalloc<D>::AsyncReadPagesFromLog(uint32_t start_page, uint32_t num_pages,
+    RecoveryStatus& recovery_status) {
+  return AsyncReadPages(*file, 0, start_page, num_pages, recovery_status);
+}
+
+template <class D>
+Status PersistentMemoryMalloc<D>::AsyncReadPagesFromSnapshot(file_t& snapshot_file,
+    uint32_t file_start_page, uint32_t start_page, uint32_t num_pages,
+    RecoveryStatus& recovery_status) {
+  return AsyncReadPages(snapshot_file, file_start_page, start_page, num_pages, recovery_status);
+}
+
+template <class D>
+template <class F>
+Status PersistentMemoryMalloc<D>::AsyncReadPages(F& read_file, uint32_t file_start_page,
+    uint32_t start_page, uint32_t num_pages, RecoveryStatus& recovery_status) {
+  class Context : public IAsyncContext {
+   public:
+    Context(std::atomic<PageRecoveryStatus>& page_status_)
+      : page_status{ &page_status_ } {
+    }
+    /// The deep-copy constructor
+    Context(const Context& other)
+      : page_status{ other.page_status } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+   public:
+    std::atomic<PageRecoveryStatus>* page_status;
+  };
+
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<Context> context{ ctxt };
+    if(result != Status::Ok) {
+      fprintf(stderr, "Error: %u\n", static_cast<uint8_t>(result));
+    }
+    assert(context->page_status->load() == PageRecoveryStatus::IssuedRead);
+    context->page_status->store(PageRecoveryStatus::ReadDone);
+  };
+
+  for(uint32_t read_page = start_page; read_page < start_page + num_pages; ++read_page) {
+    if(!Page(read_page)) {
+      // Allocate a new page.
+      AllocatePage(read_page);
+    } else {
+      // Clear an old used page.
+      std::memset(Page(read_page), 0, kPageSize);
+    }
+    assert(recovery_status.page_status(read_page) == PageRecoveryStatus::NotStarted);
+    recovery_status.page_status(read_page).store(PageRecoveryStatus::IssuedRead);
+    PageStatus(read_page).LastFlushedUntilAddress.store(Address{ read_page + 1, 0 });
+    Context context{ recovery_status.page_status(read_page) };
+    RETURN_NOT_OK(read_file.ReadAsync(kPageSize * (read_page - file_start_page), Page(read_page),
+                                      kPageSize, callback, context));
+  }
+  return Status::Ok;
+}
+
+template <class D>
+Status PersistentMemoryMalloc<D>::AsyncFlushPage(uint32_t page, RecoveryStatus& recovery_status,
+    AsyncCallback caller_callback, IAsyncContext* caller_context) {
+  class Context : public IAsyncContext {
+   public:
+    Context(std::atomic<PageRecoveryStatus>& page_status_, AsyncCallback caller_callback_,
+            IAsyncContext* caller_context_)
+      : page_status{ &page_status_ }
+      , caller_callback{ caller_callback_ }
+      , caller_context{ caller_context_ } {
+    }
+    /// The deep-copy constructor
+    Context(const Context& other, IAsyncContext* caller_context_copy)
+      : page_status{ other.page_status }
+      , caller_callback{ other.caller_callback }
+      , caller_context{ caller_context_copy } {
+    }
+   protected:
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+      if(caller_callback) {
+        return IAsyncContext::DeepCopy_Internal(*this, caller_context, context_copy);
+      } else {
+        return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+      }
+    }
+   public:
+    std::atomic<PageRecoveryStatus>* page_status;
+    AsyncCallback caller_callback;
+    IAsyncContext* caller_context;
+  };
+
+  auto callback = [](IAsyncContext* ctxt, Status result, size_t bytes_transferred) {
+    CallbackContext<Context> context{ ctxt };
+    if(result != Status::Ok) {
+      fprintf(stderr, "Error: %u\n", static_cast<uint8_t>(result));
+    }
+    assert(context->page_status->load() == PageRecoveryStatus::IssuedFlush);
+    context->page_status->store(PageRecoveryStatus::FlushDone);
+    if(context->caller_callback) {
+      context->caller_callback(context->caller_context, result);
+    }
+  };
+
+  assert(recovery_status.page_status(page) == PageRecoveryStatus::ReadDone);
+  recovery_status.page_status(page).store(PageRecoveryStatus::IssuedFlush);
+  PageStatus(page).LastFlushedUntilAddress.store(Address{ page + 1, 0 });
+  Context context{ recovery_status.page_status(page), caller_callback, caller_context };
+  return file->WriteAsync(Page(page), kPageSize * page, kPageSize, callback, context);
+}
+
+template <class D>
+void PersistentMemoryMalloc<D>::RecoveryReset(Address begin_address_, Address head_address_,
+    Address tail_address) {
+  begin_address.store(begin_address_);
+  tail_page_offset_.store(tail_address);
+  // issue read request to all pages until head lag
+  head_address.store(head_address_);
+  safe_head_address.store(head_address_);
+
+  flushed_until_address.store(Address{ tail_address.page(), 0 });
+  read_only_address.store(tail_address);
+  safe_read_only_address.store(tail_address);
+
+  uint32_t start_page = head_address_.page();
+  uint32_t end_page = tail_address.offset() == 0 ? tail_address.page() : tail_address.page() + 1;
+  if(!Page(end_page)) {
+    AllocatePage(end_page);
+  }
+  if(!Page(end_page + 1)) {
+    AllocatePage(end_page + 1);
+  }
+
+  for(uint32_t idx = 0; idx < buffer_size_; ++idx) {
+    PageStatus(idx).status.store(FlushStatus::Flushed, CloseStatus::Open);
+  }
+}
+
+template <class D>
+inline void PersistentMemoryMalloc<D>::PageAlignedShiftHeadAddress(uint32_t tail_page) {
+  //obtain local values of variables that can change
+  Address current_head_address = head_address.load();
+  Address current_flushed_until_address = flushed_until_address.load();
+
+  if(tail_page <= (buffer_size_ - kNumHeadPages)) {
+    // Desired head address is <= 0.
+    return;
+  }
+
+  Address desired_head_address{ tail_page - (buffer_size_ - kNumHeadPages), 0 };
+
+  if(current_flushed_until_address < desired_head_address) {
+    desired_head_address = Address{ current_flushed_until_address.page(), 0 };
+  }
+
+  Address old_head_address;
+  if(MonotonicUpdate(head_address, desired_head_address, old_head_address)) {
+    OnPagesClosed_Context context{ this, desired_head_address, false };
+    IAsyncContext* context_copy;
+    Status result = context.DeepCopy(context_copy);
+    assert(result == Status::Ok);
+    epoch_->BumpCurrentEpoch(OnPagesClosed, context_copy);
+  }
+}
+
+template <class D>
+inline void PersistentMemoryMalloc<D>::PageAlignedShiftReadOnlyAddress(uint32_t tail_page) {
+  Address current_read_only_address = read_only_address.load();
+  if(tail_page <= num_mutable_pages_) {
+    // Desired read-only address is <= 0.
+    return;
+  }
+
+  Address desired_read_only_address{ tail_page - num_mutable_pages_, 0 };
+  Address old_read_only_address;
+  if(MonotonicUpdate(read_only_address, desired_read_only_address, old_read_only_address)) {
+    OnPagesMarkedReadOnly_Context context{ this, desired_read_only_address, false };
+    IAsyncContext* context_copy;
+    Status result = context.DeepCopy(context_copy);
+    assert(result == Status::Ok);
+    epoch_->BumpCurrentEpoch(OnPagesMarkedReadOnly, context_copy);
+  }
+}
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/phase.h b/cc/src/core/phase.h
new file mode 100644
index 000000000..9b3ab1dd1
--- /dev/null
+++ b/cc/src/core/phase.h
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <memory>
+#include <thread>
+
+#include "alloc.h"
+#include "async.h"
+#include "constants.h"
+#include "thread.h"
+#include "utility.h"
+
+namespace FASTER {
+namespace core {
+
+/// Phases, used internally by FASTER to keep track of how far along FASTER has gotten during
+/// checkpoint, gc, and grow actions.
+enum class Phase : uint8_t {
+  /// Checkpoint phases.
+  PREP_INDEX_CHKPT,
+  INDEX_CHKPT,
+  PREPARE,
+  IN_PROGRESS,
+  WAIT_PENDING,
+  WAIT_FLUSH,
+  REST,
+  PERSISTENCE_CALLBACK,
+  /// Garbage-collection phases.
+  /// - The log's begin-address has been shifted; finish all outstanding I/Os before trying to
+  ///   truncate the log.
+  GC_IO_PENDING,
+  /// - The log has been truncated, but threads are still cleaning the hash table.
+  GC_IN_PROGRESS,
+  /// Grow-index phases.
+  /// - Each thread waits for all other threads to complete outstanding (synchronous) operations
+  ///   against the hash table.
+  GROW_PREPARE,
+  /// - Each thread copies a chunk of the old hash table into the new hash table.
+  GROW_IN_PROGRESS,
+  INVALID
+};
+
+}
+} // namespace FASTER::core
\ No newline at end of file
diff --git a/cc/src/core/record.h b/cc/src/core/record.h
new file mode 100644
index 000000000..9117564fb
--- /dev/null
+++ b/cc/src/core/record.h
@@ -0,0 +1,151 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include "address.h"
+#include "auto_ptr.h"
+
+namespace FASTER {
+namespace core {
+
+/// Record header, internal to FASTER.
+class RecordInfo {
+ public:
+  RecordInfo(uint16_t checkpoint_version_, bool final_bit_, bool tombstone_, bool invalid_,
+             Address previous_address)
+    : checkpoint_version{ checkpoint_version_ }
+    , final_bit{ final_bit_ }
+    , tombstone{ tombstone_ }
+    , invalid{ invalid_ }
+    , previous_address_{ previous_address.control() } {
+  }
+
+  RecordInfo(const RecordInfo& other)
+    : control_{ other.control_ } {
+  }
+
+  inline bool IsNull() const {
+    return control_ == 0;
+  }
+  inline Address previous_address() const {
+    return Address{ previous_address_ };
+  }
+
+  union {
+      struct {
+        uint64_t previous_address_ : 48;
+        uint64_t checkpoint_version : 13;
+        uint64_t invalid : 1;
+        uint64_t tombstone : 1;
+        uint64_t final_bit : 1;
+      };
+
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(RecordInfo) == 8, "sizeof(RecordInfo) != 8");
+static_assert(sizeof(RecordInfo) == 8, "sizeof(RecordInfo) != 8");
+
+/// A record stored in the log. The log starts at 0 (mod 64), and consists of Records, one after
+/// the other. Each record's header is 8 bytes.
+template <class key_t, class value_t>
+struct Record {
+  // To support records with alignment > 64, modify the persistent-memory allocator to allocate
+  // a larger NULL page on startup.
+  static_assert(alignof(key_t) <= Constants::kCacheLineBytes,
+                "alignof(key_t) > Constants::kCacheLineBytes)");
+  static_assert(alignof(value_t) <= Constants::kCacheLineBytes,
+                "alignof(value_t) > Constants::kCacheLineBytes)");
+
+  /// For placement new() operator. Can't set value, since it might be set by value = input (for
+  /// upsert), or rmw_initial(...) (for RMW).
+  Record(RecordInfo header_, const key_t& key_)
+    : header{ header_ } {
+    void* buffer = const_cast<key_t*>(&key());
+    new(buffer)key_t{ key_ };
+  }
+
+  /// Key appears immediately after record header (subject to alignment padding). Keys are
+  /// immutable.
+  inline constexpr const key_t& key() const {
+    const uint8_t* head = reinterpret_cast<const uint8_t*>(this);
+    size_t offset = pad_alignment(sizeof(RecordInfo), alignof(key_t));
+    return *reinterpret_cast<const key_t*>(head + offset);
+  }
+
+  /// Value appears immediately after key (subject to alignment padding). Values can be modified.
+  inline constexpr const value_t& value() const {
+    const uint8_t* head = reinterpret_cast<const uint8_t*>(this);
+    size_t offset = pad_alignment(key().size() +
+                                  pad_alignment(sizeof(RecordInfo), alignof(key_t)),
+                                  alignof(value_t));
+    return *reinterpret_cast<const value_t*>(head + offset);
+  }
+  inline constexpr value_t& value() {
+    uint8_t* head = reinterpret_cast<uint8_t*>(this);
+    size_t offset = pad_alignment(key().size() +
+                                  pad_alignment(sizeof(RecordInfo), alignof(key_t)),
+                                  alignof(value_t));
+    return *reinterpret_cast<value_t*>(head + offset);
+  }
+
+  /// Size of a record to be created, in memory. (Includes padding, if any, after the value, so
+  /// that the next record stored in the log is properly aligned.)
+  static inline constexpr uint32_t size(const key_t& key_, uint32_t value_size) {
+    return static_cast<uint32_t>(
+             // --plus Value size, all padded to Header alignment.
+             pad_alignment(value_size +
+                           // --plus Key size, all padded to Value alignment.
+                           pad_alignment(key_.size() +
+                                         // Header, padded to Key alignment.
+                                         pad_alignment(sizeof(RecordInfo), alignof(key_t)),
+                                         alignof(value_t)),
+                           alignof(RecordInfo)));
+  }
+  /// Size of the existing record, in memory. (Includes padding, if any, after the value.)
+  inline constexpr uint32_t size() const {
+    return size(key(), value().size());
+  }
+
+  /// Minimum size of a read from disk that is guaranteed to include the record's header + whatever
+  /// information class key_t needs to determine its key size.
+  static inline constexpr uint32_t min_disk_key_size() {
+    return static_cast<uint32_t>(
+             // -- plus sizeof(key_t).
+             sizeof(key_t) +
+             // Header size, padded to Key alignment.
+             pad_alignment(sizeof(RecordInfo), alignof(key_t)));
+  }
+
+  /// Minimum size of a read from disk that is guaranteed to include the record's header, key,
+  // and whatever information the host needs to determine the value size.
+  inline constexpr uint32_t min_disk_value_size() const {
+    return static_cast<uint32_t>(
+             // -- plus size of the Value's header.
+             sizeof(value_t) +
+             // --plus Key size, padded to Base Value alignment.
+             pad_alignment(key().size() +
+                           // Header, padded to Key alignment.
+                           pad_alignment(sizeof(RecordInfo), alignof(key_t)),
+                           alignof(value_t))
+           );
+  }
+
+  /// Size of a record, on disk. (Excludes padding, if any, after the value.)
+  inline constexpr uint32_t disk_size() const {
+    return static_cast<uint32_t>(value().size() +
+                                 pad_alignment(key().size() +
+                                     // Header, padded to Key alignment.
+                                     pad_alignment(sizeof(RecordInfo), alignof(key_t)),
+                                     alignof(value_t)));
+  }
+
+ public:
+  RecordInfo header;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/recovery_status.h b/cc/src/core/recovery_status.h
new file mode 100644
index 000000000..111bfade1
--- /dev/null
+++ b/cc/src/core/recovery_status.h
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+
+namespace FASTER {
+namespace core {
+
+/// Used by FASTER to track status, during recovery action.
+
+enum class PageRecoveryStatus {
+  NotStarted = 0,
+  IssuedRead,
+  ReadDone,
+  IssuedFlush,
+  FlushDone
+};
+
+class RecoveryStatus {
+ public:
+  RecoveryStatus(uint32_t start_page_, uint32_t end_page_)
+    : start_page{ start_page_ }
+    , end_page{ end_page_ }
+    , page_status_{ nullptr } {
+    assert(end_page >= start_page);
+    uint32_t buffer_size = end_page - start_page;
+    page_status_ = new std::atomic<PageRecoveryStatus>[buffer_size];
+    std::memset(page_status_, 0, sizeof(std::atomic<PageRecoveryStatus>) * buffer_size);
+  }
+
+  ~RecoveryStatus() {
+    if(page_status_) {
+      delete page_status_;
+    }
+  }
+
+  const std::atomic<PageRecoveryStatus>& page_status(uint32_t page) const {
+    assert(page >= start_page);
+    assert(page < end_page);
+    return page_status_[page - start_page];
+  }
+  std::atomic<PageRecoveryStatus>& page_status(uint32_t page) {
+    assert(page >= start_page);
+    assert(page < end_page);
+    return page_status_[page - start_page];
+  }
+
+  uint32_t start_page;
+  uint32_t end_page;
+
+ private:
+  std::atomic<PageRecoveryStatus>* page_status_;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/state_transitions.h b/cc/src/core/state_transitions.h
new file mode 100644
index 000000000..5796fb271
--- /dev/null
+++ b/cc/src/core/state_transitions.h
@@ -0,0 +1,162 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include "phase.h"
+
+namespace FASTER {
+namespace core {
+
+struct ResizeInfo {
+  uint8_t version;
+};
+
+/// Each FASTER store can perform only one action at a time (checkpoint, recovery, garbage
+// collect, or grow index).
+enum class Action : uint8_t {
+  None = 0,
+  Checkpoint,
+  Recover,
+  GC,
+  GrowIndex
+};
+
+struct SystemState {
+  SystemState(Action action_, Phase phase_, uint32_t version_)
+    : control_{ 0 } {
+    action = action_;
+    phase = phase_;
+    version = version_;
+  }
+  SystemState(uint64_t control)
+    : control_{ control } {
+  }
+  SystemState(const SystemState& other)
+    : control_{ other.control_ } {
+  }
+
+  inline SystemState& operator=(const SystemState& other) {
+    control_ = other.control_;
+    return *this;
+  }
+  inline bool operator==(const SystemState& other) {
+    return control_ == other.control_;
+  }
+  inline bool operator!=(const SystemState& other) {
+    return control_ != other.control_;
+  }
+
+  /// The state transitions.
+  inline SystemState GetNextState() const {
+    switch(action) {
+    case Action::Checkpoint:
+      switch(phase) {
+      case Phase::REST:
+        return SystemState{ Action::Checkpoint, Phase::PREP_INDEX_CHKPT, version };
+      case Phase::PREP_INDEX_CHKPT:
+        return SystemState{ Action::Checkpoint, Phase::INDEX_CHKPT, version };
+      case Phase::INDEX_CHKPT:
+        return SystemState{ Action::Checkpoint, Phase::PREPARE, version };
+      case Phase::PREPARE:
+        return SystemState{ Action::Checkpoint, Phase::IN_PROGRESS, version + 1 };
+      case Phase::IN_PROGRESS:
+        return SystemState{ Action::Checkpoint, Phase::WAIT_PENDING, version };
+      case Phase::WAIT_PENDING:
+        return SystemState{ Action::Checkpoint, Phase::WAIT_FLUSH, version };
+      case Phase::WAIT_FLUSH:
+        return SystemState{ Action::Checkpoint, Phase::PERSISTENCE_CALLBACK, version };
+      case Phase::PERSISTENCE_CALLBACK:
+        return SystemState{ Action::Checkpoint, Phase::REST, version };
+      default:
+        // not reached
+        assert(false);
+        return SystemState(UINT64_MAX);
+      }
+      break;
+    case Action::GC:
+      switch(phase) {
+      case Phase::REST:
+        return SystemState{ Action::GC, Phase::GC_IO_PENDING, version };
+      case Phase::GC_IO_PENDING:
+        return SystemState{ Action::GC, Phase::GC_IN_PROGRESS, version };
+      case Phase::GC_IN_PROGRESS:
+        return SystemState{ Action::GC, Phase::REST, version };
+      default:
+        // not reached
+        assert(false);
+        return SystemState(UINT64_MAX);
+      }
+      break;
+    case Action::GrowIndex:
+      switch(phase) {
+      case Phase::REST:
+        return SystemState{ Action::GrowIndex, Phase::GROW_PREPARE, version };
+      case Phase::GROW_PREPARE:
+        return SystemState{ Action::GrowIndex, Phase::GROW_IN_PROGRESS, version };
+      case Phase::GROW_IN_PROGRESS:
+        return SystemState{ Action::GrowIndex, Phase::REST, version };
+      default:
+        // not reached
+        assert(false);
+        return SystemState(UINT64_MAX);
+      }
+    default:
+      // not reached
+      assert(false);
+      return SystemState(UINT64_MAX);
+    }
+  }
+
+  union {
+      struct {
+        /// Action being performed (checkpoint, recover, or gc).
+        Action action;
+        /// Phase of that action currently being executed.
+        Phase phase;
+        /// Checkpoint version (used for CPR).
+        uint32_t version;
+      };
+      uint64_t control_;
+    };
+};
+static_assert(sizeof(SystemState) == 8, "sizeof(SystemState) != 8");
+
+class AtomicSystemState {
+ public:
+  AtomicSystemState(Action action_, Phase phase_, uint32_t version_) {
+    SystemState state{ action_, phase_, version_ };
+    control_.store(state.control_);
+  }
+
+  /// Atomic access.
+  inline SystemState load() const {
+    return SystemState{ control_.load() };
+  }
+  inline void store(SystemState value) {
+    control_.store(value.control_);
+  }
+  inline bool compare_exchange_strong(SystemState& expected, SystemState desired) {
+    uint64_t expected_control = expected.control_;
+    bool result = control_.compare_exchange_strong(expected_control, desired.control_);
+    expected = SystemState{ expected_control };
+    return result;
+  }
+
+  /// Accessors.
+  inline Phase phase() const {
+    return load().phase;
+  }
+  inline uint32_t version() const {
+    return load().version;
+  }
+
+ private:
+  /// Atomic access to the system state.
+  std::atomic<uint64_t> control_;
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/status.h b/cc/src/core/status.h
new file mode 100644
index 000000000..0ecb48bed
--- /dev/null
+++ b/cc/src/core/status.h
@@ -0,0 +1,30 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+#include <cstdint>
+
+namespace FASTER {
+namespace core {
+
+enum class Status : uint8_t {
+  Ok = 0,
+  Pending = 1,
+  NotFound = 2,
+  OutOfMemory = 3,
+  IOError = 4,
+  Corruption = 5,
+  Aborted = 6,
+};
+
+enum class InternalStatus : uint8_t {
+  Ok,
+  RETRY_NOW,
+  RETRY_LATER,
+  RECORD_ON_DISK,
+  SUCCESS_UNMARK,
+  CPR_SHIFT_DETECTED
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/thread.cc b/cc/src/core/thread.cc
new file mode 100644
index 000000000..2ebefa0ba
--- /dev/null
+++ b/cc/src/core/thread.cc
@@ -0,0 +1,26 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include "thread.h"
+
+namespace FASTER {
+namespace core {
+
+/// The first thread will have index 0.
+std::atomic<uint32_t> Thread::next_index_{ 0 };
+
+/// No thread IDs have been used yet.
+std::atomic<bool> Thread::id_used_[kMaxNumThreads] = {};
+
+#ifdef COUNT_ACTIVE_THREADS
+std::atomic<int32_t> Thread::current_num_threads_ { 0 };
+#endif
+
+/// Give the new thread an ID. (In this implementation, threads get IDs when they are created, and
+/// release them when they are freed. We will eventually merge chkulk's improvements, from another
+/// branch, and then threads will get IDs on their first call to FasterKv::StartSession(), while
+/// still releasing IDs when they are freed.)
+thread_local Thread::ThreadId Thread::id_{};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/thread.h b/cc/src/core/thread.h
new file mode 100644
index 000000000..24af3474a
--- /dev/null
+++ b/cc/src/core/thread.h
@@ -0,0 +1,103 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <stdexcept>
+
+/// Turn this on to have Thread::current_num_threads_ keep a count of currently-active threads.
+#undef COUNT_ACTIVE_THREADS
+
+namespace FASTER {
+namespace core {
+
+/// Gives every thread a unique, numeric thread ID, and recycles IDs when threads exit.
+class Thread {
+ public:
+  /// The number of entries in table. Currently, this is fixed at 64 and never changes or grows.
+  /// If the table runs out of entries, then the current implementation will throw a
+  /// std::runtime_error.
+  static constexpr size_t kMaxNumThreads = 96;
+
+ private:
+  /// Encapsulates a thread ID, getting a free ID from the Thread class when the thread starts, and
+  /// releasing it back to the Thread class, when the thread exits.
+  class ThreadId {
+   public:
+    static constexpr uint32_t kInvalidId = UINT32_MAX;
+
+    inline ThreadId();
+    inline ~ThreadId();
+
+    inline uint32_t id() const {
+      return id_;
+    }
+
+   private:
+    uint32_t id_;
+  };
+
+ public:
+  /// Call static method Thread::id() to get the executing thread's ID.
+  inline static uint32_t id() {
+    return id_.id();
+  }
+
+ private:
+  /// Methods ReserveEntry() and ReleaseEntry() do the real work.
+  inline static uint32_t ReserveEntry() {
+#ifdef COUNT_ACTIVE_THREADS
+    int32_t result = ++current_num_threads_;
+    assert(result < kMaxNumThreads);
+#endif
+    uint32_t start = next_index_++;
+    uint32_t end = start + 2 * kMaxNumThreads;
+    for(uint32_t id = start; id < end; ++id) {
+      bool expected = false;
+      if(id_used_[id % kMaxNumThreads].compare_exchange_strong(expected, true)) {
+        return id % kMaxNumThreads;
+      }
+    }
+    // Already have 64 active threads.
+    throw std::runtime_error{ "Too many threads!" };
+  }
+
+  inline static void ReleaseEntry(uint32_t id) {
+    assert(id != ThreadId::kInvalidId);
+    assert(id_used_[id].load());
+    id_used_[id] = false;
+#ifdef COUNT_ACTIVE_THREADS
+    int32_t result = --current_num_threads_;
+#endif
+  }
+
+  /// The current thread's page_index.
+  static thread_local ThreadId id_;
+
+  /// Next thread index to consider.
+  static std::atomic<uint32_t> next_index_;
+  /// Which thread IDs have already been taken.
+  static std::atomic<bool> id_used_[kMaxNumThreads];
+
+#ifdef COUNT_ACTIVE_THREADS
+  static std::atomic<int32_t> current_num_threads_;
+#endif
+
+  friend class ThreadId;
+};
+
+inline Thread::ThreadId::ThreadId()
+  : id_{ kInvalidId } {
+  id_ = Thread::ReserveEntry();
+}
+
+inline Thread::ThreadId::~ThreadId() {
+  Thread::ReleaseEntry(id_);
+}
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/core/utility.h b/cc/src/core/utility.h
new file mode 100644
index 000000000..83211bddd
--- /dev/null
+++ b/cc/src/core/utility.h
@@ -0,0 +1,56 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <stdexcept>
+#include <string>
+
+namespace FASTER {
+namespace core {
+
+class Utility {
+ public:
+  static inline uint64_t Rotr64(uint64_t x, std::size_t n) {
+    return (((x) >> n) | ((x) << (64 - n)));
+  }
+
+  static inline uint64_t GetHashCode(uint64_t input) {
+    uint64_t local_rand = input;
+    uint64_t local_rand_hash = 8;
+    local_rand_hash = 40343 * local_rand_hash + ((local_rand) & 0xFFFF);
+    local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 16) & 0xFFFF);
+    local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 32) & 0xFFFF);
+    local_rand_hash = 40343 * local_rand_hash + (local_rand >> 48);
+    local_rand_hash = 40343 * local_rand_hash;
+    return Rotr64(local_rand_hash, 43);
+    //Func<long, long> hash =
+    //    e => 40343 * (40343 * (40343 * (40343 * (40343 * 8 + (long)((e) & 0xFFFF)) + (long)((e >> 16) & 0xFFFF)) + (long)((e >> 32) & 0xFFFF)) + (long)(e >> 48));
+  }
+
+  static inline uint64_t HashBytes(const uint16_t* str, size_t len) {
+    // 40343 is a "magic constant" that works well,
+    // 38299 is another good value.
+    // Both are primes and have a good distribution of bits.
+    const uint64_t kMagicNum = 40343;
+    uint64_t hashState = len;
+
+    for(size_t idx = 0; idx < len; ++idx) {
+      hashState = kMagicNum * hashState + str[idx];
+    }
+
+    // The final scrambling helps with short keys that vary only on the high order bits.
+    // Low order bits are not always well distributed so shift them to the high end, where they'll
+    // form part of the 14-bit tag.
+    return Rotr64(kMagicNum * hashState, 6);
+  }
+
+  static constexpr inline bool IsPowerOfTwo(uint64_t x) {
+    return (x > 0) && ((x & (x - 1)) == 0);
+  }
+};
+
+}
+} // namespace FASTER::core
diff --git a/cc/src/device/file_system_disk.h b/cc/src/device/file_system_disk.h
new file mode 100644
index 000000000..3c076e1d1
--- /dev/null
+++ b/cc/src/device/file_system_disk.h
@@ -0,0 +1,527 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <cstdint>
+#include <experimental/filesystem>
+#include <mutex>
+#include <string>
+
+#include "../core/gc_state.h"
+#include "../core/light_epoch.h"
+#include "../core/utility.h"
+#include "../environment/file.h"
+
+/// Wrapper that exposes files to FASTER. Encapsulates segmented files, etc.
+
+namespace FASTER {
+namespace device {
+
+template <class H, uint64_t S>
+class FileSystemDisk;
+
+template <class H>
+class FileSystemFile {
+ public:
+  typedef H handler_t;
+  typedef typename handler_t::async_file_t file_t;
+
+  /// Default constructor
+  FileSystemFile()
+    : file_{}
+    , file_options_{} {
+  }
+
+  FileSystemFile(const std::string& filename, const environment::FileOptions& file_options)
+    : file_{ filename }
+    , file_options_{ file_options } {
+  }
+
+  /// Move constructor.
+  FileSystemFile(FileSystemFile&& other)
+    : file_{ std::move(other.file_) }
+    , file_options_{ other.file_options_ } {
+  }
+
+  /// Move assignment operator.
+  FileSystemFile& operator=(FileSystemFile&& other) {
+    file_ = std::move(other.file_);
+    file_options_ = other.file_options_;
+    return *this;
+  }
+
+  Status Open(handler_t* handler) {
+    return file_.Open(FASTER::environment::FileCreateDisposition::OpenOrCreate, file_options_,
+                      handler, nullptr);
+  }
+  Status Close() {
+    return file_.Close();
+  }
+  Status Delete() {
+    return file_.Delete();
+  }
+  void Truncate(uint64_t new_begin_offset, GcState::truncate_callback_t callback) {
+    // Truncation is a no-op.
+    if(callback) {
+      callback(new_begin_offset);
+    }
+  }
+
+  Status ReadAsync(uint64_t source, void* dest, uint32_t length,
+                   AsyncIOCallback callback, IAsyncContext& context) const {
+    return file_.Read(source, length, reinterpret_cast<uint8_t*>(dest), context, callback);
+  }
+  Status WriteAsync(const void* source, uint64_t dest, uint32_t length,
+                    AsyncIOCallback callback, IAsyncContext& context) {
+    return file_.Write(dest, length, reinterpret_cast<const uint8_t*>(source), context, callback);
+  }
+
+  size_t alignment() const {
+    return file_.device_alignment();
+  }
+
+ private:
+  file_t file_;
+  environment::FileOptions file_options_;
+};
+
+/// Manages a bundle of segment files.
+template <class H>
+class FileSystemSegmentBundle {
+ public:
+  typedef H handler_t;
+  typedef FileSystemFile<handler_t> file_t;
+  typedef FileSystemSegmentBundle<handler_t> bundle_t;
+
+  FileSystemSegmentBundle(const std::string& filename,
+                          const environment::FileOptions& file_options, handler_t* handler,
+                          uint64_t begin_segment_, uint64_t end_segment_)
+    : filename_{ filename }
+    , file_options_{ file_options }
+    , begin_segment{ begin_segment_ }
+    , end_segment{ end_segment_ }
+    , owner_{ true } {
+    for(uint64_t idx = begin_segment; idx < end_segment; ++idx) {
+      new(files() + (idx - begin_segment)) file_t{ filename_ + std::to_string(idx),
+          file_options_ };
+      Status result = file(idx).Open(handler);
+      assert(result == Status::Ok);
+    }
+  }
+
+  FileSystemSegmentBundle(handler_t* handler, uint64_t begin_segment_, uint64_t end_segment_,
+                          bundle_t& other)
+    : filename_{ std::move(other.filename_) }
+    , file_options_{ other.file_options_ }
+    , begin_segment{ begin_segment_ }
+    , end_segment{ end_segment_ }
+    , owner_{ true } {
+    assert(end_segment >= other.end_segment);
+
+    uint64_t begin_new = begin_segment;
+    uint64_t begin_copy = std::max(begin_segment, other.begin_segment);
+    uint64_t end_copy = std::min(end_segment, other.end_segment);
+    uint64_t end_new = end_segment;
+
+    for(uint64_t idx = begin_segment; idx < begin_copy; ++idx) {
+      new(files() + (idx - begin_segment)) file_t{ filename_ + std::to_string(idx),
+          file_options_ };
+      Status result = file(idx).Open(handler);
+      assert(result == Status::Ok);
+    }
+    for(uint64_t idx = begin_copy; idx < end_copy; ++idx) {
+      // Move file handles for segments already opened.
+      new(files() + (idx - begin_segment)) file_t{ std::move(other.file(idx)) };
+    }
+    for(uint64_t idx = end_copy; idx < end_new; ++idx) {
+      new(files() + (idx - begin_segment)) file_t{ filename_ + std::to_string(idx),
+          file_options_ };
+      Status result = file(idx).Open(handler);
+      assert(result == Status::Ok);
+    }
+
+    other.owner_ = false;
+  }
+
+  ~FileSystemSegmentBundle() {
+    if(owner_) {
+      for(uint64_t idx = begin_segment; idx < end_segment; ++idx) {
+        file(idx).~file_t();
+      }
+    }
+  }
+
+  Status Close() {
+    assert(owner_);
+    Status result = Status::Ok;
+    for(uint64_t idx = begin_segment; idx < end_segment; ++idx) {
+      Status r = file(idx).Close();
+      if(r != Status::Ok) {
+        // We'll report the last error.
+        result = r;
+      }
+    }
+    return result;
+  }
+
+  Status Delete() {
+    assert(owner_);
+    Status result = Status::Ok;
+    for(uint64_t idx = begin_segment; idx < end_segment; ++idx) {
+      Status r = file(idx).Delete();
+      if(r != Status::Ok) {
+        // We'll report the last error.
+        result = r;
+      }
+    }
+    return result;
+  }
+
+  file_t* files() {
+    return reinterpret_cast<file_t*>(this + 1);
+  }
+  file_t& file(uint64_t segment) {
+    assert(segment >= begin_segment);
+    return files()[segment - begin_segment];
+  }
+  bool exists(uint64_t segment) const {
+    return segment >= begin_segment && segment < end_segment;
+  }
+
+  static constexpr uint64_t size(uint64_t num_segments) {
+    return sizeof(bundle_t) + num_segments * sizeof(file_t);
+  }
+
+ public:
+  const uint64_t begin_segment;
+  const uint64_t end_segment;
+ private:
+  std::string filename_;
+  environment::FileOptions file_options_;
+  bool owner_;
+};
+
+template <class H, uint64_t S>
+class FileSystemSegmentedFile {
+ public:
+  typedef H handler_t;
+  typedef FileSystemFile<H> file_t;
+  typedef FileSystemSegmentBundle<handler_t> bundle_t;
+
+  static constexpr uint64_t kSegmentSize = S;
+  static_assert(Utility::IsPowerOfTwo(S), "template parameter S is not a power of two!");
+
+  FileSystemSegmentedFile(const std::string& filename,
+                          const environment::FileOptions& file_options, LightEpoch* epoch)
+    : begin_segment_{ 0 }
+    , files_{ nullptr }
+    , handler_{ nullptr }
+    , filename_{ filename }
+    , file_options_{ file_options }
+    , epoch_{ epoch } {
+  }
+
+  ~FileSystemSegmentedFile() {
+    bundle_t* files = files_.load();
+    if(files) {
+      files->~bundle_t();
+      std::free(files);
+    }
+  }
+
+  Status Open(handler_t* handler) {
+    handler_ = handler;
+    return Status::Ok;
+  }
+  Status Close() {
+    return (files_) ? files_->Close() : Status::Ok;
+  }
+  Status Delete() {
+    return (files_) ? files_->Delete() : Status::Ok;
+  }
+  void Truncate(uint64_t new_begin_offset, GcState::truncate_callback_t callback) {
+    uint64_t new_begin_segment = new_begin_offset / kSegmentSize;
+    begin_segment_ = new_begin_segment;
+    TruncateSegments(new_begin_segment, callback);
+  }
+
+  Status ReadAsync(uint64_t source, void* dest, uint32_t length, AsyncIOCallback callback,
+                   IAsyncContext& context) const {
+    uint64_t segment = source / kSegmentSize;
+    assert(source % kSegmentSize + length <= kSegmentSize);
+
+    bundle_t* files = files_.load();
+
+    if(!files || !files->exists(segment)) {
+      Status result = const_cast<FileSystemSegmentedFile<H, S>*>(this)->OpenSegment(segment);
+      if(result != Status::Ok) {
+        return result;
+      }
+      files = files_.load();
+    }
+    return files->file(segment).ReadAsync(source % kSegmentSize, dest, length, callback, context);
+  }
+
+  Status WriteAsync(const void* source, uint64_t dest, uint32_t length,
+                    AsyncIOCallback callback, IAsyncContext& context) {
+    uint64_t segment = dest / kSegmentSize;
+    assert(dest % kSegmentSize + length <= kSegmentSize);
+
+    bundle_t* files = files_.load();
+
+    if(!files || !files->exists(segment)) {
+      Status result = OpenSegment(segment);
+      if(result != Status::Ok) {
+        return result;
+      }
+      files = files_.load();
+    }
+    return files->file(segment).WriteAsync(source, dest % kSegmentSize, length, callback, context);
+  }
+
+  size_t alignment() const {
+    return 512; // For now, assume all disks have 512-bytes alignment.
+  }
+
+ private:
+  Status OpenSegment(uint64_t segment) {
+    class Context : public IAsyncContext {
+     public:
+      Context(void* files_)
+        : files{ files_ } {
+      }
+      /// The deep-copy constructor.
+      Context(const Context& other)
+        : files{ other.files} {
+      }
+     protected:
+      Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+        return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+      }
+     public:
+      void* files;
+    };
+
+    auto callback = [](IAsyncContext* ctxt) {
+      CallbackContext<Context> context{ ctxt };
+      std::free(context->files);
+    };
+
+    // Only one thread can modify the list of files at a given time.
+    std::lock_guard<std::mutex> lock{ mutex_ };
+    bundle_t* files = files_.load();
+
+    if(segment < begin_segment_) {
+      // The requested segment has been truncated.
+      return Status::IOError;
+    }
+    if(files && files->exists(segment)) {
+      // Some other thread already opened this segment for us.
+      return Status::Ok;
+    }
+
+    if(!files) {
+      // First segment opened.
+      void* buffer = std::malloc(bundle_t::size(1));
+      bundle_t* new_files = new(buffer) bundle_t{ filename_, file_options_, handler_,
+          segment, segment + 1 };
+      files_.store(new_files);
+      return Status::Ok;
+    }
+
+    // Expand the list of files_.
+    uint64_t new_begin_segment = std::min(files->begin_segment, segment);
+    uint64_t new_end_segment = std::max(files->end_segment, segment + 1);
+    void* buffer = std::malloc(bundle_t::size(new_end_segment - new_begin_segment));
+    bundle_t* new_files = new(buffer) bundle_t{ handler_, new_begin_segment, new_end_segment,
+        *files };
+    files_.store(new_files);
+    // Delete the old list only after all threads have finished looking at it.
+    Context context{ files };
+    IAsyncContext* context_copy;
+    Status result = context.DeepCopy(context_copy);
+    assert(result == Status::Ok);
+    epoch_->BumpCurrentEpoch(callback, context_copy);
+    return Status::Ok;
+  }
+
+  void TruncateSegments(uint64_t new_begin_segment, GcState::truncate_callback_t caller_callback) {
+    class Context : public IAsyncContext {
+     public:
+      Context(bundle_t* files_, uint64_t new_begin_segment_,
+              GcState::truncate_callback_t caller_callback_)
+        : files{ files_ }
+        , new_begin_segment{ new_begin_segment_ }
+        , caller_callback{ caller_callback_ } {
+      }
+      /// The deep-copy constructor.
+      Context(const Context& other)
+        : files{ other.files }
+        , new_begin_segment{ other.new_begin_segment }
+        , caller_callback{ other.caller_callback } {
+      }
+     protected:
+      Status DeepCopy_Internal(IAsyncContext*& context_copy) final {
+        return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+      }
+     public:
+      bundle_t* files;
+      uint64_t new_begin_segment;
+      GcState::truncate_callback_t caller_callback;
+    };
+
+    auto callback = [](IAsyncContext* ctxt) {
+      CallbackContext<Context> context{ ctxt };
+      for(uint64_t idx = context->files->begin_segment; idx < context->new_begin_segment; ++idx) {
+        file_t& file = context->files->file(idx);
+        file.Close();
+        file.Delete();
+      }
+      std::free(context->files);
+      if(context->caller_callback) {
+        context->caller_callback(context->new_begin_segment * kSegmentSize);
+      }
+    };
+
+    // Only one thread can modify the list of files at a given time.
+    std::lock_guard<std::mutex> lock{ mutex_ };
+    bundle_t* files = files_.load();
+    assert(files);
+    if(files->begin_segment >= new_begin_segment) {
+      // Segments have already been truncated.
+      if(caller_callback) {
+        caller_callback(files->begin_segment * kSegmentSize);
+      }
+      return;
+    }
+
+    // Make a copy of the list, excluding the files to be truncated.
+    void* buffer = std::malloc(bundle_t::size(files->end_segment - new_begin_segment));
+    bundle_t* new_files = new(buffer) bundle_t{ handler_, new_begin_segment, files->end_segment,
+        *files };
+    files_.store(new_files);
+    // Delete the old list only after all threads have finished looking at it.
+    Context context{ files, new_begin_segment, caller_callback };
+    IAsyncContext* context_copy;
+    Status result = context.DeepCopy(context_copy);
+    assert(result == Status::Ok);
+    epoch_->BumpCurrentEpoch(callback, context_copy);
+  }
+
+  std::atomic<uint64_t> begin_segment_;
+  std::atomic<bundle_t*> files_;
+  handler_t* handler_;
+  std::string filename_;
+  environment::FileOptions file_options_;
+  LightEpoch* epoch_;
+  std::mutex mutex_;
+};
+
+template <class H, uint64_t S>
+class FileSystemDisk {
+ public:
+  typedef H handler_t;
+  typedef FileSystemFile<handler_t> file_t;
+  typedef FileSystemSegmentedFile<handler_t, S> log_file_t;
+
+ private:
+  static std::string NormalizePath(std::string root_path) {
+    if(root_path.empty() || root_path.back() != FASTER::environment::kPathSeparator[0]) {
+      root_path += FASTER::environment::kPathSeparator;
+    }
+    return root_path;
+  }
+
+ public:
+  FileSystemDisk(const std::string& root_path, LightEpoch& epoch, bool enablePrivileges = false,
+                 bool unbuffered = true, bool delete_on_close = false)
+    : root_path_{ NormalizePath(root_path) }
+    , handler_{ 16 /*max threads*/ }
+    , default_file_options_{ unbuffered, delete_on_close }
+    , log_{ root_path_ + "log.log", default_file_options_, &epoch} {
+    Status result = log_.Open(&handler_);
+    assert(result == Status::Ok);
+  }
+
+  /// Methods required by the (implicit) disk interface.
+  uint32_t sector_size() const {
+    return static_cast<uint32_t>(log_.alignment());
+  }
+
+  const log_file_t& log() const {
+    return log_;
+  }
+  log_file_t& log() {
+    return log_;
+  }
+
+  std::string relative_index_checkpoint_path(uint32_t version) const {
+    std::string retval = "index-checkpoints";
+    retval += FASTER::environment::kPathSeparator;
+    retval += std::to_string(version);
+    retval += FASTER::environment::kPathSeparator;
+    return retval;
+  }
+  std::string index_checkpoint_path(uint32_t version) const {
+    return root_path_ + relative_index_checkpoint_path(version);
+  }
+
+  std::string relative_cpr_checkpoint_path(uint32_t version) const {
+    std::string retval = "cpr-checkpoints";
+    retval += FASTER::environment::kPathSeparator;
+    retval += std::to_string(version);
+    retval += FASTER::environment::kPathSeparator;
+    return retval;
+  }
+  std::string cpr_checkpoint_path(uint32_t version) const {
+    return root_path_ + relative_cpr_checkpoint_path(version);
+  }
+
+  void CreateIndexCheckpointDirectory(uint32_t version) {
+    std::string index_dir = index_checkpoint_path(version);
+    std::experimental::filesystem::path path{ index_dir };
+    try {
+      std::experimental::filesystem::remove_all(path);
+    } catch(std::experimental::filesystem::filesystem_error&) {
+      // Ignore; throws when path doesn't exist yet.
+    }
+    std::experimental::filesystem::create_directories(path);
+  }
+
+  void CreateCprCheckpointDirectory(uint32_t version) {
+    std::string cpr_dir = cpr_checkpoint_path(version);
+    std::experimental::filesystem::path path{ cpr_dir };
+    try {
+      std::experimental::filesystem::remove_all(path);
+    } catch(std::experimental::filesystem::filesystem_error&) {
+      // Ignore; throws when path doesn't exist yet.
+    }
+    std::experimental::filesystem::create_directories(path);
+  }
+
+  file_t NewFile(const std::string& relative_path) {
+    return file_t{ root_path_ + relative_path, default_file_options_ };
+  }
+
+  /// Implementation-specific accessor.
+  handler_t& handler() {
+    return handler_;
+  }
+
+  bool TryComplete() {
+    return handler_.TryComplete();
+  }
+
+ private:
+  std::string root_path_;
+  handler_t handler_;
+
+  environment::FileOptions default_file_options_;
+
+  /// Store the log (contains all records).
+  log_file_t log_;
+};
+
+}
+} // namespace FASTER::device
diff --git a/cc/src/device/null_disk.h b/cc/src/device/null_disk.h
new file mode 100644
index 000000000..0034fa290
--- /dev/null
+++ b/cc/src/device/null_disk.h
@@ -0,0 +1,124 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <string>
+
+#include "../core/gc_state.h"
+#include "../core/light_epoch.h"
+#include "../environment/file.h"
+
+namespace FASTER {
+namespace device {
+
+/// A dummy (null) disk, used when you want an in-memory-only FASTER store.
+
+struct NullHandler {
+};
+
+class NullFile {
+ public:
+  Status Open(NullHandler* handler) {
+    return Status::Ok;
+  }
+  Status Close() {
+    return Status::Ok;
+  }
+  Status Delete() {
+    return Status::Ok;
+  }
+  void Truncate(uint64_t new_begin_offset, GcState::truncate_callback_t callback) {
+    if(callback) {
+      callback(new_begin_offset);
+    }
+  }
+
+  Status ReadAsync(uint64_t source, void* dest, uint32_t length,
+                   AsyncIOCallback callback, IAsyncContext& context) const {
+    callback(&context, Status::Ok, length);
+    return Status::Ok;
+  }
+  Status WriteAsync(const void* source, uint64_t dest, uint32_t length,
+                    AsyncIOCallback callback, IAsyncContext& context) {
+    callback(&context, Status::Ok, length);
+    return Status::Ok;
+  }
+
+  static size_t alignment() {
+    // Align null device to cache line.
+    return 64;
+  }
+
+  void set_handler(NullHandler* handler) {
+  }
+};
+
+class NullDisk {
+ public:
+  typedef NullHandler handler_t;
+  typedef NullFile file_t;
+  typedef NullFile log_file_t;
+
+  NullDisk(const std::string& filename, LightEpoch& epoch) {
+  }
+
+  static uint32_t sector_size() {
+    return 64;
+  }
+
+  /// Methods required by the (implicit) disk interface.
+  const file_t& log() const {
+    return log_;
+  }
+  file_t& log() {
+    return log_;
+  }
+
+  std::string relative_index_checkpoint_path(uint32_t version) const {
+    assert(false);
+    return "";
+  }
+  std::string index_checkpoint_path(uint32_t version) const {
+    assert(false);
+    return "";
+  }
+
+  std::string relative_cpr_checkpoint_path(uint32_t version) const {
+    assert(false);
+    return "";
+  }
+  std::string cpr_checkpoint_path(uint32_t version) const {
+    assert(false);
+    return "";
+  }
+
+  void CreateIndexCheckpointDirectory(uint32_t version) {
+    assert(false);
+  }
+  void CreateCprCheckpointDirectory(uint32_t version) {
+    assert(false);
+  }
+
+  file_t NewFile(const std::string& relative_path) {
+    assert(false);
+    return file_t{};
+  }
+
+  handler_t& handler() {
+    return handler_;
+  }
+
+  inline static constexpr bool TryComplete() {
+    return false;
+  }
+
+ private:
+  handler_t handler_;
+  file_t log_;
+};
+
+}
+} // namespace FASTER::device
\ No newline at end of file
diff --git a/cc/src/environment/file.h b/cc/src/environment/file.h
new file mode 100644
index 000000000..34cac012c
--- /dev/null
+++ b/cc/src/environment/file.h
@@ -0,0 +1,10 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#ifdef _WIN32
+#include "file_windows.h"
+#else
+#include "file_linux.h"
+#endif
diff --git a/cc/src/environment/file_common.h b/cc/src/environment/file_common.h
new file mode 100644
index 000000000..af8c8fa3b
--- /dev/null
+++ b/cc/src/environment/file_common.h
@@ -0,0 +1,60 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <ostream>
+
+#include "../core/async.h"
+#include "../core/lss_allocator.h"
+
+using namespace FASTER::core;
+
+namespace FASTER {
+namespace environment {
+
+enum class FileCreateDisposition : uint8_t {
+  /// Creates the file if it does not exist; truncates it if it does.
+  CreateOrTruncate,
+  /// Opens the file if it exists; creates it if it does not.
+  OpenOrCreate,
+  /// Opens the file if it exists.
+  OpenExisting
+};
+
+inline std::ostream& operator<<(std::ostream& os, FileCreateDisposition val) {
+  switch(val) {
+  case FileCreateDisposition::CreateOrTruncate:
+    os << "CreateOrTruncate";
+    break;
+  case FileCreateDisposition::OpenOrCreate:
+    os << "OpenOrCreate";
+    break;
+  case FileCreateDisposition::OpenExisting:
+    os << "OpenExisting";
+    break;
+  default:
+    os << "UNKNOWN: " << static_cast<uint8_t>(val);
+    break;
+  }
+  return os;
+}
+
+enum class FileOperationType : uint8_t { Read, Write };
+
+struct FileOptions {
+  FileOptions()
+    : unbuffered{ false }
+    , delete_on_close{ false } {
+  }
+  FileOptions(bool unbuffered_, bool delete_on_close_)
+    : unbuffered{ unbuffered_ }
+    , delete_on_close{ delete_on_close_ } {
+  }
+
+  bool unbuffered;
+  bool delete_on_close;
+};
+
+}
+} // namespace FASTER::environment
\ No newline at end of file
diff --git a/cc/src/environment/file_linux.cc b/cc/src/environment/file_linux.cc
new file mode 100644
index 000000000..2cbcf7b5c
--- /dev/null
+++ b/cc/src/environment/file_linux.cc
@@ -0,0 +1,199 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <cstring>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <time.h>
+#include "file_linux.h"
+
+namespace FASTER {
+namespace environment {
+
+#ifdef _DEBUG
+#define DCHECK_ALIGNMENT(o, l, b) \
+do { \
+  assert(reinterpret_cast<uintptr_t>(b) % device_alignment() == 0); \
+  assert((o) % device_alignment() == 0); \
+  assert((l) % device_alignment() == 0); \
+} while (0)
+#else
+#define DCHECK_ALIGNMENT(o, l, b) do {} while(0)
+#endif
+
+Status File::Open(int flags, FileCreateDisposition create_disposition, bool* exists) {
+  if(exists) {
+    *exists = false;
+  }
+
+  int create_flags = GetCreateDisposition(create_disposition);
+
+  /// Always unbuffered (O_DIRECT).
+  fd_ = ::open(filename_.c_str(), flags | O_RDWR | create_flags, S_IRUSR | S_IWUSR);
+
+  if(exists) {
+    // Let the caller know whether the file we tried to open or create (already) exists.
+    if(create_disposition == FileCreateDisposition::CreateOrTruncate ||
+        create_disposition == FileCreateDisposition::OpenOrCreate) {
+      *exists = (errno == EEXIST);
+    } else if(create_disposition == FileCreateDisposition::OpenExisting) {
+      *exists = (errno != ENOENT);
+      if(!*exists) {
+        // The file doesn't exist. Don't return an error, since the caller is expecting this case.
+        return Status::Ok;
+      }
+    }
+  }
+  if(fd_ == -1) {
+    int error = errno;
+    return Status::IOError;
+  }
+
+  Status result = GetDeviceAlignment();
+  if(result != Status::Ok) {
+    Close();
+  }
+  owner_ = true;
+  return result;
+}
+
+Status File::Close() {
+  if(fd_ != -1) {
+    int result = ::close(fd_);
+    fd_ = -1;
+    if(result == -1) {
+      int error = errno;
+      return Status::IOError;
+    }
+  }
+  owner_ = false;
+  return Status::Ok;
+}
+
+Status File::Delete() {
+  int result = ::remove(filename_.c_str());
+  if(result == -1) {
+    int error = errno;
+    return Status::IOError;
+  }
+}
+
+Status File::GetDeviceAlignment() {
+  // For now, just hardcode 512-byte alignment.
+  device_alignment_ = 512;
+  return Status::Ok;
+}
+
+int File::GetCreateDisposition(FileCreateDisposition create_disposition) {
+  switch(create_disposition) {
+  case FileCreateDisposition::CreateOrTruncate:
+    return O_CREAT | O_TRUNC;
+  case FileCreateDisposition::OpenOrCreate:
+    return O_CREAT;
+  case FileCreateDisposition::OpenExisting:
+    return 0;
+  default:
+    assert(false);
+    return 0; // not reached
+  }
+}
+
+void QueueIoHandler::IoCompletionCallback(io_context_t ctx, struct iocb* iocb, long res,
+    long res2) {
+  auto callback_context = make_context_unique_ptr<IoCallbackContext>(
+                            reinterpret_cast<IoCallbackContext*>(iocb));
+  size_t bytes_transferred;
+  Status return_status;
+  if(res < 0) {
+    return_status = Status::IOError;
+    bytes_transferred = 0;
+  } else {
+    return_status = Status::Ok;
+    bytes_transferred = res;
+  }
+  callback_context->callback(callback_context->caller_context, return_status, bytes_transferred);
+}
+
+bool QueueIoHandler::TryComplete() {
+  struct timespec timeout;
+  std::memset(&timeout, 0, sizeof(timeout));
+  struct io_event events[1];
+  int result = ::io_getevents(io_object_, 1, 1, events, &timeout);
+  if(result == 1) {
+    io_callback_t callback = reinterpret_cast<io_callback_t>(events[0].data);
+    callback(io_object_, events[0].obj, events[0].res, events[0].res2);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+Status QueueFile::Open(FileCreateDisposition create_disposition, const FileOptions& options,
+                       QueueIoHandler* handler, bool* exists) {
+  int flags = 0;
+  if(options.unbuffered) {
+    flags |= O_DIRECT;
+  }
+  RETURN_NOT_OK(File::Open(flags, create_disposition, exists));
+  if(exists && !*exists) {
+    return Status::Ok;
+  }
+
+  io_object_ = handler->io_object();
+  return Status::Ok;
+}
+
+Status QueueFile::Read(size_t offset, uint32_t length, uint8_t* buffer,
+                       IAsyncContext& context, AsyncIOCallback callback) const {
+  DCHECK_ALIGNMENT(offset, length, buffer);
+#ifdef IO_STATISTICS
+  ++read_count_;
+  bytes_read_ += length;
+#endif
+  return const_cast<QueueFile*>(this)->ScheduleOperation(FileOperationType::Read, buffer,
+         offset, length, context, callback);
+}
+
+Status QueueFile::Write(size_t offset, uint32_t length, const uint8_t* buffer,
+                        IAsyncContext& context, AsyncIOCallback callback) {
+  DCHECK_ALIGNMENT(offset, length, buffer);
+#ifdef IO_STATISTICS
+  bytes_written_ += length;
+#endif
+  return ScheduleOperation(FileOperationType::Write, const_cast<uint8_t*>(buffer), offset, length,
+                           context, callback);
+}
+
+Status QueueFile::ScheduleOperation(FileOperationType operationType, uint8_t* buffer,
+                                    size_t offset, uint32_t length, IAsyncContext& context,
+                                    AsyncIOCallback callback) {
+  auto io_context = alloc_context<QueueIoHandler::IoCallbackContext>(sizeof(
+                      QueueIoHandler::IoCallbackContext));
+  if(!io_context.get()) return Status::OutOfMemory;
+
+  IAsyncContext* caller_context_copy;
+  RETURN_NOT_OK(context.DeepCopy(caller_context_copy));
+
+  new(io_context.get()) QueueIoHandler::IoCallbackContext(operationType, fd_, offset, length,
+      buffer, caller_context_copy, callback);
+
+  struct iocb* iocbs[1];
+  iocbs[0] = reinterpret_cast<struct iocb*>(io_context.get());
+
+  int result = ::io_submit(io_object_, 1, iocbs);
+  if(result != 1) {
+    return Status::IOError;
+  }
+
+  io_context.release();
+  return Status::Ok;
+}
+
+#undef DCHECK_ALIGNMENT
+
+}
+} // namespace FASTER::environment
diff --git a/cc/src/environment/file_linux.h b/cc/src/environment/file_linux.h
new file mode 100644
index 000000000..500576e42
--- /dev/null
+++ b/cc/src/environment/file_linux.h
@@ -0,0 +1,254 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <string>
+#include <libaio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../core/async.h"
+#include "../core/status.h"
+#include "file_common.h"
+
+namespace FASTER {
+namespace environment {
+
+constexpr const char* kPathSeparator = "/";
+
+/// The File class encapsulates the OS file handle.
+class File {
+ protected:
+  File()
+    : fd_{ -1 }
+    , device_alignment_{ 0 }
+    , filename_{}
+    , owner_{ false }
+#ifdef IO_STATISTICS
+    , bytes_written_ { 0 }
+    , read_count_{ 0 }
+    , bytes_read_{ 0 }
+#endif
+  {
+  }
+
+  File(const std::string& filename)
+    : fd_{ -1 }
+    , device_alignment_{ 0 }
+    , filename_{ filename }
+    , owner_{ false }
+#ifdef IO_STATISTICS
+    , bytes_written_ { 0 }
+    , read_count_{ 0 }
+    , bytes_read_{ 0 }
+#endif
+  {
+  }
+
+  /// Move constructor.
+  File(File&& other)
+    : fd_{ other.fd_ }
+    , device_alignment_{ other.device_alignment_ }
+    , filename_{ std::move(other.filename_) }
+    , owner_{ other.owner_ }
+#ifdef IO_STATISTICS
+    , bytes_written_ { other.bytes_written_ }
+    , read_count_{ other.read_count_ }
+    , bytes_read_{ other.bytes_read_ }
+#endif
+  {
+    other.owner_ = false;
+  }
+
+  ~File() {
+    if(owner_) {
+      Status s = Close();
+    }
+  }
+
+  /// Move assignment operator.
+  File& operator=(File&& other) {
+    fd_ = other.fd_;
+    device_alignment_ = other.device_alignment_;
+    filename_ = std::move(other.filename_);
+    owner_ = other.owner_;
+#ifdef IO_STATISTICS
+    bytes_written_ = other.bytes_written_;
+    read_count_ = other.read_count_;
+    bytes_read_ = other.bytes_read_;
+#endif
+    other.owner_ = -1;
+    return *this;
+  }
+
+ protected:
+  Status Open(int flags, FileCreateDisposition create_disposition, bool* exists = nullptr);
+
+ public:
+  Status Close();
+  Status Delete();
+
+  uint64_t size() const {
+    struct stat stat_buffer;
+    int result = ::fstat(fd_, &stat_buffer);
+    return (result == 0) ? stat_buffer.st_size : 0;
+  }
+
+  size_t device_alignment() const {
+    return device_alignment_;
+  }
+
+  const std::string& filename() const {
+    return filename_;
+  }
+
+#ifdef IO_STATISTICS
+  uint64_t bytes_written() const {
+    return bytes_written_.load();
+  }
+  uint64_t read_count() const {
+    return read_count_.load();
+  }
+  uint64_t bytes_read() const {
+    return bytes_read_.load();
+  }
+#endif
+
+ private:
+  Status GetDeviceAlignment();
+  static int GetCreateDisposition(FileCreateDisposition create_disposition);
+
+ protected:
+  int fd_;
+
+ private:
+  size_t device_alignment_;
+  std::string filename_;
+  bool owner_;
+
+#ifdef IO_STATISTICS
+ protected:
+  std::atomic<uint64_t> bytes_written_;
+  std::atomic<uint64_t> read_count_;
+  std::atomic<uint64_t> bytes_read_;
+#endif
+};
+
+class QueueFile;
+
+/// The QueueIoHandler class encapsulates completions for async file I/O, where the completions
+/// are put on the AIO completion queue.
+class QueueIoHandler {
+ public:
+  typedef QueueFile async_file_t;
+
+ private:
+  constexpr static int kMaxEvents = 128;
+
+ public:
+  QueueIoHandler()
+    : io_object_{ 0 } {
+  }
+  QueueIoHandler(size_t max_threads)
+    : io_object_{ 0 } {
+    int result = ::io_setup(kMaxEvents, &io_object_);
+    assert(result >= 0);
+  }
+
+  /// Move constructor
+  QueueIoHandler(QueueIoHandler&& other) {
+    io_object_ = other.io_object_;
+    other.io_object_ = 0;
+  }
+
+  ~QueueIoHandler() {
+    if(io_object_ != 0)
+      ::io_destroy(io_object_);
+  }
+
+  /// Invoked whenever a Linux AIO completes.
+  static void IoCompletionCallback(io_context_t ctx, struct iocb* iocb, long res, long res2);
+
+  struct IoCallbackContext {
+    IoCallbackContext(FileOperationType operation, int fd, size_t offset, uint32_t length,
+                      uint8_t* buffer, IAsyncContext* context_, AsyncIOCallback callback_)
+      : caller_context{ context_ }
+      , callback{ callback_ } {
+      if(FileOperationType::Read == operation) {
+        ::io_prep_pread(&this->parent_iocb, fd, buffer, length, offset);
+      } else {
+        ::io_prep_pwrite(&this->parent_iocb, fd, buffer, length, offset);
+      }
+      ::io_set_callback(&this->parent_iocb, IoCompletionCallback);
+    }
+
+    // WARNING: "parent_iocb" must be the first field in AioCallbackContext. This class is a C-style
+    // subclass of "struct iocb".
+
+    /// The iocb structure for Linux AIO.
+    struct iocb parent_iocb;
+
+    /// Caller callback context.
+    IAsyncContext* caller_context;
+
+    /// The caller's asynchronous callback function
+    AsyncIOCallback callback;
+  };
+
+  inline io_context_t io_object() const {
+    return io_object_;
+  }
+
+  /// Try to execute the next IO completion on the queue, if any.
+  bool TryComplete();
+
+ private:
+  /// The Linux AIO context used for IO completions.
+  io_context_t io_object_;
+};
+
+/// The QueueFile class encapsulates asynchronous reads and writes, using the specified AIO
+/// context.
+class QueueFile : public File {
+ public:
+  QueueFile()
+    : File()
+    , io_object_{ nullptr } {
+  }
+  QueueFile(const std::string& filename)
+    : File(filename)
+    , io_object_{ nullptr } {
+  }
+  /// Move constructor
+  QueueFile(QueueFile&& other)
+    : File(std::move(other))
+    , io_object_{ other.io_object_ } {
+  }
+  /// Move assignment operator.
+  QueueFile& operator=(QueueFile&& other) {
+    File::operator=(std::move(other));
+    io_object_ = other.io_object_;
+    return *this;
+  }
+
+  Status Open(FileCreateDisposition create_disposition, const FileOptions& options,
+              QueueIoHandler* handler, bool* exists = nullptr);
+
+  Status Read(size_t offset, uint32_t length, uint8_t* buffer,
+              IAsyncContext& context, AsyncIOCallback callback) const;
+  Status Write(size_t offset, uint32_t length, const uint8_t* buffer,
+               IAsyncContext& context, AsyncIOCallback callback);
+
+ private:
+  Status ScheduleOperation(FileOperationType operationType, uint8_t* buffer, size_t offset,
+                           uint32_t length, IAsyncContext& context, AsyncIOCallback callback);
+
+  io_context_t io_object_;
+};
+
+}
+} // namespace FASTER::environment
diff --git a/cc/src/environment/file_windows.cc b/cc/src/environment/file_windows.cc
new file mode 100644
index 000000000..a0dd2f975
--- /dev/null
+++ b/cc/src/environment/file_windows.cc
@@ -0,0 +1,372 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <cassert>
+#include <iomanip>
+#include <sstream>
+#include "file.h"
+
+using namespace FASTER::core;
+
+namespace FASTER {
+namespace environment {
+std::string FormatWin32AndHRESULT(DWORD win32_result) {
+  std::stringstream ss;
+  ss << "Win32(" << win32_result << ") HRESULT("
+     << std::showbase << std::uppercase << std::setfill('0') << std::hex
+     << HRESULT_FROM_WIN32(win32_result) << ")";
+  return ss.str();
+}
+
+#ifdef _DEBUG
+#define DCHECK_ALIGNMENT(o, l, b) \
+do { \
+  assert(reinterpret_cast<uintptr_t>(b) % device_alignment() == 0); \
+  assert((o) % device_alignment() == 0); \
+  assert((l) % device_alignment() == 0); \
+} while (0)
+#else
+#define DCHECK_ALIGNMENT(o, l, b) do {} while(0)
+#endif
+
+Status File::Open(DWORD flags, FileCreateDisposition create_disposition, bool* exists) {
+  assert(!filename_.empty());
+  if(exists) {
+    *exists = false;
+  }
+
+  file_handle_ = ::CreateFileA(filename_.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr,
+                               GetCreateDisposition(create_disposition), flags, nullptr);
+  if(exists) {
+    // Let the caller know whether the file we tried to open or create (already) exists.
+    if(create_disposition == FileCreateDisposition::CreateOrTruncate ||
+        create_disposition == FileCreateDisposition::OpenOrCreate) {
+      *exists = (::GetLastError() == ERROR_ALREADY_EXISTS);
+    } else if(create_disposition == FileCreateDisposition::OpenExisting) {
+      *exists = (::GetLastError() != ERROR_FILE_NOT_FOUND);
+      if(!*exists) {
+        // The file doesn't exist. Don't return an error, since the caller is expecting this case.
+        return Status::Ok;
+      }
+    }
+  }
+  if(file_handle_ == INVALID_HANDLE_VALUE) {
+    auto error = ::GetLastError();
+    return Status::IOError;
+  }
+
+  Status result = GetDeviceAlignment();
+  if(result != Status::Ok) {
+    Close();
+  }
+  owner_ = true;
+  return result;
+}
+
+Status File::Close() {
+  if(file_handle_ != INVALID_HANDLE_VALUE) {
+    bool success = ::CloseHandle(file_handle_);
+    file_handle_ = INVALID_HANDLE_VALUE;
+    if(!success) {
+      auto error = ::GetLastError();
+      return Status::IOError;
+    }
+  }
+  owner_ = false;
+  return Status::Ok;
+}
+
+Status File::Delete() {
+  bool success = ::DeleteFileA(filename_.c_str());
+  if(!success) {
+    auto error = ::GetLastError();
+    return Status::IOError;
+  }
+  return Status::Ok;
+}
+
+Status File::GetDeviceAlignment() {
+  FILE_STORAGE_INFO info;
+  bool result = ::GetFileInformationByHandleEx(file_handle_,
+                FILE_INFO_BY_HANDLE_CLASS::FileStorageInfo, &info, sizeof(info));
+  if(!result) {
+    auto error = ::GetLastError();
+    return Status::IOError;
+  }
+
+  device_alignment_ = info.LogicalBytesPerSector;
+  return Status::Ok;
+}
+
+DWORD File::GetCreateDisposition(FileCreateDisposition create_disposition) {
+  switch(create_disposition) {
+  case FileCreateDisposition::CreateOrTruncate:
+    return CREATE_ALWAYS;
+  case FileCreateDisposition::OpenOrCreate:
+    return OPEN_ALWAYS;
+  case FileCreateDisposition::OpenExisting:
+    return OPEN_EXISTING;
+  default:
+    assert(false);
+    return INVALID_FILE_ATTRIBUTES; // not reached
+  }
+}
+
+void CALLBACK ThreadPoolIoHandler::IoCompletionCallback(PTP_CALLBACK_INSTANCE instance,
+    PVOID context, PVOID overlapped, ULONG ioResult, ULONG_PTR bytesTransferred, PTP_IO io) {
+  // context is always nullptr; state is threaded via the OVERLAPPED
+  auto callback_context = make_context_unique_ptr<IoCallbackContext>(
+                            reinterpret_cast<IoCallbackContext*>(overlapped));
+
+  HRESULT hr = HRESULT_FROM_WIN32(ioResult);
+  Status return_status;
+  if(FAILED(hr)) {
+    return_status = Status::IOError;
+  } else {
+    return_status = Status::Ok;
+  }
+  callback_context->callback(callback_context->caller_context, return_status,
+                             static_cast<size_t>(bytesTransferred));
+}
+
+WindowsPtpThreadPool::WindowsPtpThreadPool(size_t max_threads)
+  : pool_{ nullptr }
+  , callback_environment_{ nullptr }
+  , cleanup_group_{ nullptr }
+  , max_threads_{ max_threads } {
+  pool_ = ::CreateThreadpool(nullptr);
+  ::SetThreadpoolThreadMaximum(pool_, static_cast<DWORD>(max_threads));
+  bool ret = ::SetThreadpoolThreadMinimum(pool_, 1);
+  if(!ret) {
+    throw std::runtime_error{ "Cannot set threadpool thread minimum to 1" };
+  }
+  cleanup_group_ = ::CreateThreadpoolCleanupGroup();
+  if(!cleanup_group_) {
+    throw std::runtime_error{ "Cannot create threadpool cleanup group" };
+  }
+
+  callback_environment_ = new TP_CALLBACK_ENVIRON{};
+
+  ::InitializeThreadpoolEnvironment(callback_environment_);
+  ::SetThreadpoolCallbackPool(callback_environment_, pool_);
+  ::SetThreadpoolCallbackPriority(callback_environment_, TP_CALLBACK_PRIORITY_LOW);
+  ::SetThreadpoolCallbackCleanupGroup(callback_environment_, cleanup_group_, nullptr);
+}
+
+WindowsPtpThreadPool::~WindowsPtpThreadPool() {
+  if(!cleanup_group_) return;
+
+  // Wait until all callbacks have finished.
+  ::CloseThreadpoolCleanupGroupMembers(cleanup_group_, FALSE, nullptr);
+
+  ::DestroyThreadpoolEnvironment(callback_environment_);
+
+  ::CloseThreadpoolCleanupGroup(cleanup_group_);
+  ::CloseThreadpool(pool_);
+
+  delete callback_environment_;
+}
+
+Status WindowsPtpThreadPool::Schedule(Task task, void* task_parameters) {
+  auto info = alloc_context<TaskInfo>(sizeof(TaskInfo));
+  if(!info.get()) return Status::OutOfMemory;
+  new(info.get()) TaskInfo();
+
+  info->task = task;
+  info->task_parameters = task_parameters;
+
+  PTP_WORK_CALLBACK ptp_callback = TaskStartSpringboard;
+  PTP_WORK work = CreateThreadpoolWork(ptp_callback, info.get(), callback_environment_);
+  if(!work) {
+    std::stringstream ss;
+    ss << "Failed to schedule work: " << FormatWin32AndHRESULT(::GetLastError());
+    fprintf(stderr, "%s\n", ss.str().c_str());
+    return Status::Aborted;
+  }
+  SubmitThreadpoolWork(work);
+  info.release();
+
+  return Status::Ok;
+}
+
+void CALLBACK WindowsPtpThreadPool::TaskStartSpringboard(PTP_CALLBACK_INSTANCE instance,
+    PVOID parameter, PTP_WORK work) {
+  auto info = make_context_unique_ptr<TaskInfo>(reinterpret_cast<TaskInfo*>(parameter));
+  info->task(info->task_parameters);
+  CloseThreadpoolWork(work);
+}
+
+Status ThreadPoolFile::Open(FileCreateDisposition create_disposition, const FileOptions& options,
+                            ThreadPoolIoHandler* handler, bool* exists) {
+  DWORD flags = FILE_FLAG_RANDOM_ACCESS | FILE_FLAG_OVERLAPPED;
+  if(options.unbuffered) {
+    flags |= FILE_FLAG_NO_BUFFERING;
+  }
+  RETURN_NOT_OK(File::Open(flags, create_disposition, exists));
+  if(exists && !*exists) {
+    return Status::Ok;
+  }
+
+  io_object_ = ::CreateThreadpoolIo(file_handle_, handler->IoCompletionCallback, nullptr,
+                                    handler->callback_environment());
+  if(!io_object_) {
+    Close();
+    return Status::IOError;
+  }
+  return Status::Ok;
+}
+
+Status ThreadPoolFile::Read(size_t offset, uint32_t length, uint8_t* buffer,
+                            IAsyncContext& context, AsyncIOCallback callback) const {
+  DCHECK_ALIGNMENT(offset, length, buffer);
+#ifdef IO_STATISTICS
+  ++read_count_;
+  bytes_read_ += length;
+#endif
+  return const_cast<ThreadPoolFile*>(this)->ScheduleOperation(FileOperationType::Read, buffer,
+         offset, length, context, callback);
+}
+
+Status ThreadPoolFile::Write(size_t offset, uint32_t length, const uint8_t* buffer,
+                             IAsyncContext& context, AsyncIOCallback callback) {
+  DCHECK_ALIGNMENT(offset, length, buffer);
+#ifdef IO_STATISTICS
+  bytes_written_ += length;
+#endif
+  return ScheduleOperation(FileOperationType::Write, const_cast<uint8_t*>(buffer), offset, length,
+                           context, callback);
+}
+
+Status ThreadPoolFile::ScheduleOperation(FileOperationType operationType, uint8_t* buffer,
+    size_t offset, uint32_t length, IAsyncContext& context, AsyncIOCallback callback) {
+  auto io_context = alloc_context<ThreadPoolIoHandler::IoCallbackContext>(sizeof(
+                      ThreadPoolIoHandler::IoCallbackContext));
+  if(!io_context.get()) return Status::OutOfMemory;
+
+  IAsyncContext* caller_context_copy;
+  RETURN_NOT_OK(context.DeepCopy(caller_context_copy));
+
+  new(io_context.get()) ThreadPoolIoHandler::IoCallbackContext(offset, caller_context_copy,
+      callback);
+
+  ::StartThreadpoolIo(io_object_);
+
+  bool success = FALSE;
+  if(FileOperationType::Read == operationType) {
+    success = ::ReadFile(file_handle_, buffer, length, nullptr, &io_context->parent_overlapped);
+  } else {
+    success = ::WriteFile(file_handle_, buffer, length, nullptr, &io_context->parent_overlapped);
+  }
+  if(!success) {
+    DWORD win32_result = ::GetLastError();
+    // Any error other than ERROR_IO_PENDING means the IO failed. Otherwise it will finish
+    // asynchronously on the threadpool
+    if(ERROR_IO_PENDING != win32_result) {
+      ::CancelThreadpoolIo(io_object_);
+      std::stringstream ss;
+      ss << "Failed to schedule async IO: " << FormatWin32AndHRESULT(win32_result);
+      fprintf(stderr, "%s\n", ss.str().c_str());
+      return Status::IOError;
+    }
+  }
+  io_context.release();
+  return Status::Ok;
+}
+
+bool QueueIoHandler::TryComplete() {
+  DWORD bytes_transferred;
+  ULONG_PTR completion_key;
+  LPOVERLAPPED overlapped = NULL;
+  bool succeeded = ::GetQueuedCompletionStatus(io_completion_port_, &bytes_transferred,
+                   &completion_key, &overlapped, 0);
+  if(overlapped) {
+    Status return_status;
+    if(!succeeded) {
+      return_status = Status::IOError;
+    } else {
+      return_status = Status::Ok;
+    }
+    auto callback_context = make_context_unique_ptr<IoCallbackContext>(
+                              reinterpret_cast<IoCallbackContext*>(overlapped));
+    callback_context->callback(callback_context->caller_context, return_status, bytes_transferred);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+Status QueueFile::Open(FileCreateDisposition create_disposition, const FileOptions& options,
+                       QueueIoHandler* handler, bool* exists) {
+  DWORD flags = FILE_FLAG_RANDOM_ACCESS | FILE_FLAG_OVERLAPPED;
+  if(options.unbuffered) {
+    flags |= FILE_FLAG_NO_BUFFERING;
+  }
+  RETURN_NOT_OK(File::Open(flags, create_disposition, exists));
+  if(exists && !*exists) {
+    return Status::Ok;
+  }
+
+  handler->AssociateFile(file_handle_);
+  return Status::Ok;
+}
+
+Status QueueFile::Read(size_t offset, uint32_t length, uint8_t* buffer,
+                       IAsyncContext& context, AsyncIOCallback callback) const {
+  DCHECK_ALIGNMENT(offset, length, buffer);
+#ifdef IO_STATISTICS
+  ++read_count_;
+  bytes_read_ += length;
+#endif
+  return const_cast<QueueFile*>(this)->ScheduleOperation(FileOperationType::Read, buffer,
+         offset, length, context, callback);
+}
+
+Status QueueFile::Write(size_t offset, uint32_t length, const uint8_t* buffer,
+                        IAsyncContext& context, AsyncIOCallback callback) {
+  DCHECK_ALIGNMENT(offset, length, buffer);
+#ifdef IO_STATISTICS
+  bytes_written_ += length;
+#endif
+  return ScheduleOperation(FileOperationType::Write, const_cast<uint8_t*>(buffer), offset, length,
+                           context, callback);
+}
+
+Status QueueFile::ScheduleOperation(FileOperationType operationType, uint8_t* buffer,
+                                    size_t offset, uint32_t length, IAsyncContext& context,
+                                    AsyncIOCallback callback) {
+  auto io_context = alloc_context<QueueIoHandler::IoCallbackContext>(sizeof(
+                      QueueIoHandler::IoCallbackContext));
+  if(!io_context.get()) return Status::OutOfMemory;
+
+  IAsyncContext* caller_context_copy;
+  RETURN_NOT_OK(context.DeepCopy(caller_context_copy));
+
+  new(io_context.get()) QueueIoHandler::IoCallbackContext(offset, caller_context_copy,
+      callback);
+
+  bool success = FALSE;
+  if(FileOperationType::Read == operationType) {
+    success = ::ReadFile(file_handle_, buffer, length, nullptr, &io_context->parent_overlapped);
+  } else {
+    success = ::WriteFile(file_handle_, buffer, length, nullptr, &io_context->parent_overlapped);
+  }
+  if(!success) {
+    DWORD win32_result = ::GetLastError();
+    // Any error other than ERROR_IO_PENDING means the IO failed. Otherwise it will finish
+    // asynchronously on the threadpool
+    if(ERROR_IO_PENDING != win32_result) {
+      std::stringstream ss;
+      ss << "Failed to schedule async IO: " << FormatWin32AndHRESULT(win32_result) <<
+         ", handle " << std::to_string((uint64_t)file_handle_);
+      fprintf(stderr, "%s\n", ss.str().c_str());
+      return Status::IOError;
+    }
+  }
+  io_context.release();
+  return Status::Ok;
+}
+
+#undef DCHECK_ALIGNMENT
+
+}
+} // namespace FASTER::environment
\ No newline at end of file
diff --git a/cc/src/environment/file_windows.h b/cc/src/environment/file_windows.h
new file mode 100644
index 000000000..aa1aa5c8e
--- /dev/null
+++ b/cc/src/environment/file_windows.h
@@ -0,0 +1,415 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#ifdef _WIN32
+#define NOMINMAX
+#define _WINSOCKAPI_
+#include <Windows.h>
+#endif
+
+#include <atomic>
+#include <cstdint>
+#include <string>
+
+#include "../core/async.h"
+#include "../core/status.h"
+#include "file_common.h"
+
+/// Windows file routines.
+
+namespace FASTER {
+namespace environment {
+constexpr const char* kPathSeparator = "\\";
+
+/// The File class encapsulates the OS file handle.
+class File {
+ protected:
+  File()
+    : file_handle_{ INVALID_HANDLE_VALUE }
+    , device_alignment_{ 0 }
+    , filename_{}
+    , owner_{ false }
+#ifdef IO_STATISTICS
+    , bytes_written_ { 0 }
+    , read_count_{ 0 }
+    , bytes_read_{ 0 }
+#endif
+  {
+  }
+
+  File(const std::string& filename)
+    : file_handle_{ INVALID_HANDLE_VALUE }
+    , device_alignment_{ 0 }
+    , filename_{ filename }
+    , owner_{ false }
+#ifdef IO_STATISTICS
+    , bytes_written_ { 0 }
+    , read_count_{ 0 }
+    , bytes_read_{ 0 }
+#endif
+  {
+  }
+
+  /// Move constructor.
+  File(File&& other)
+    : file_handle_{ other.file_handle_ }
+    , device_alignment_{ other.device_alignment_ }
+    , filename_{ std::move(other.filename_) }
+    , owner_{ other.owner_ }
+#ifdef IO_STATISTICS
+    , bytes_written_ { other.bytes_written_ }
+    , read_count_{ other.read_count_ }
+    , bytes_read_{ other.bytes_read_ }
+#endif
+  {
+    other.owner_ = false;
+  }
+
+  ~File() {
+    if(owner_) {
+      Status s = Close();
+    }
+  }
+
+  /// Move assignment operator.
+  File& operator=(File&& other) {
+    file_handle_ = other.file_handle_;
+    device_alignment_ = other.device_alignment_;
+    filename_ = std::move(other.filename_);
+    owner_ = other.owner_;
+#ifdef IO_STATISTICS
+    bytes_written_ = other.bytes_written_;
+    read_count_ = other.read_count_;
+    bytes_read_ = other.bytes_read_;
+#endif
+    other.owner_ = false;
+    return *this;
+  }
+
+ protected:
+  Status Open(DWORD flags, FileCreateDisposition create_disposition, bool* exists = nullptr);
+
+ public:
+  Status Close();
+  Status Delete();
+
+  uint64_t size() const {
+    LARGE_INTEGER file_size;
+    auto result = ::GetFileSizeEx(file_handle_, &file_size);
+    return result ? file_size.QuadPart : 0;
+  }
+
+  size_t device_alignment() const {
+    return device_alignment_;
+  }
+
+  const std::string& filename() const {
+    return filename_;
+  }
+
+#ifdef IO_STATISTICS
+  uint64_t bytes_written() const {
+    return bytes_written_.load();
+  }
+  uint64_t read_count() const {
+    return read_count_.load();
+  }
+  uint64_t bytes_read() const {
+    return bytes_read_.load();
+  }
+#endif
+
+ private:
+  Status GetDeviceAlignment();
+  static DWORD GetCreateDisposition(FileCreateDisposition create_disposition);
+
+ protected:
+  HANDLE file_handle_;
+
+ private:
+  size_t device_alignment_;
+  std::string filename_;
+  bool owner_;
+
+#ifdef IO_STATISTICS
+ protected:
+  std::atomic<uint64_t> bytes_written_;
+  std::atomic<uint64_t> read_count_;
+  std::atomic<uint64_t> bytes_read_;
+#endif
+};
+
+class WindowsPtpThreadPool {
+ public:
+  typedef void(*Task)(void* arguments);
+
+  WindowsPtpThreadPool()
+    : pool_{ nullptr }
+    , callback_environment_{ nullptr }
+    , cleanup_group_{ nullptr }
+    , max_threads_{ 0 } {
+  }
+
+  WindowsPtpThreadPool(size_t max_threads);
+
+  /// Move constructor
+  WindowsPtpThreadPool(WindowsPtpThreadPool&& other)
+    : pool_{ other.pool_ }
+    , callback_environment_{ other.callback_environment_ }
+    , cleanup_group_{ other.cleanup_group_ }
+    , max_threads_{ other.max_threads_ } {
+    other.pool_ = nullptr;
+    other.callback_environment_ = nullptr;
+    other.cleanup_group_ = nullptr;
+    other.max_threads_ = 0;
+  }
+
+  ~WindowsPtpThreadPool();
+
+  Status Schedule(Task task, void* task_argument);
+
+  PTP_CALLBACK_ENVIRON callback_environment() {
+    return callback_environment_;
+  }
+
+ private:
+  /// Describes a task that should be invoked. Created and enqueued in ScheduleTask(); dispatched
+  /// and freed in TaskStartSpringboard().
+  struct TaskInfo {
+    TaskInfo()
+      : task{}
+      , task_parameters{} {
+    }
+
+    /// The task to be invoked when the work item is issued by the pool.
+    Task task;
+
+    /// Argument passed into #m_task when it is called.
+    void* task_parameters;
+  };
+
+  /// Called asynchronously by a thread from #m_pool whenever the thread pool starts to execute a
+  /// task scheduled via ScheduleTask(). Just determines which routine was requested for execution
+  /// and calls it.
+  static void CALLBACK TaskStartSpringboard(PTP_CALLBACK_INSTANCE instance, PVOID parameter,
+      PTP_WORK work);
+
+  /// A Window Thread Pool object that is used to run asynchronous IO
+  /// operations (and callbacks) and other tasks  (scheduled via
+  /// ScheduleTask()).
+  PTP_POOL pool_;
+
+  /// An environment that associates Windows Thread Pool IO and Task objects
+  /// to #m_pool. AsyncIOFileWrappers and scheduled tasks are associated
+  /// with this environments to schedule them for execution.
+  PTP_CALLBACK_ENVIRON callback_environment_;
+
+  /// The cleanup group associated with all environments and the thread pool.
+  PTP_CLEANUP_GROUP cleanup_group_;
+
+  /// Maximum number of threads the thread pool should allocate.
+  uint64_t max_threads_;
+};
+
+class ThreadPoolFile;
+class QueueFile;
+
+/// The ThreadPoolIoHandler class encapsulates completions for async file I/O, scheduled on a
+/// thread pool.
+class ThreadPoolIoHandler {
+ public:
+  typedef ThreadPoolFile async_file_t;
+
+  ThreadPoolIoHandler()
+    : threadpool_{} {
+  }
+
+  ThreadPoolIoHandler(size_t max_threads)
+    : threadpool_{ max_threads } {
+  }
+
+  /// Move constructor.
+  ThreadPoolIoHandler(ThreadPoolIoHandler&& other)
+    : threadpool_{ std::move(other.threadpool_) } {
+  }
+
+  /// Invoked whenever an asynchronous IO completes; needed because Windows asynchronous IOs are
+  /// tied to a specific TP_IO object. As a result, we allocate pointers for a per-operation
+  /// callback along with its OVERLAPPED structure. This allows us to call a specific function in
+  /// response to each IO, without having to create a TP_IO for each of them.
+  static void CALLBACK IoCompletionCallback(PTP_CALLBACK_INSTANCE instance, PVOID context,
+      PVOID overlapped, ULONG ioResult, ULONG_PTR bytesTransferred, PTP_IO io);
+
+  PTP_CALLBACK_ENVIRON callback_environment() {
+    return threadpool_.callback_environment();
+  }
+
+  struct IoCallbackContext {
+    IoCallbackContext(size_t offset, IAsyncContext* context_, AsyncIOCallback callback_)
+      : caller_context{ context_ }
+      , callback{ callback_ } {
+      ::memset(&parent_overlapped, 0, sizeof(parent_overlapped));
+      parent_overlapped.Offset = offset & 0xffffffffllu;
+      parent_overlapped.OffsetHigh = offset >> 32;
+    }
+
+    // WARNING: parent_overlapped must be the first field in IOCallbackContext. This class is a
+    // C-style subclass of "OVERLAPPED".
+
+    /// The overlapped structure for Windows IO
+    OVERLAPPED parent_overlapped;
+    /// Caller callback context.
+    IAsyncContext* caller_context;
+    /// The caller's asynchronous callback function
+    AsyncIOCallback callback;
+  };
+
+  inline static constexpr bool TryComplete() {
+    return false;
+  }
+
+ private:
+  /// The parent threadpool.
+  WindowsPtpThreadPool threadpool_;
+};
+
+/// The QueueIoHandler class encapsulates completions for async file I/O, where the completions
+/// are put on a completion port's queue.
+class QueueIoHandler {
+ public:
+  typedef QueueFile async_file_t;
+
+  QueueIoHandler()
+    : io_completion_port_{ INVALID_HANDLE_VALUE } {
+  }
+  QueueIoHandler(size_t max_threads)
+    : io_completion_port_{ 0 } {
+    io_completion_port_ = ::CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0,
+                          (DWORD)Thread::kMaxNumThreads);
+  }
+
+  /// Move constructor
+  QueueIoHandler(QueueIoHandler&& other)
+    : io_completion_port_{ other.io_completion_port_ } {
+    other.io_completion_port_ = INVALID_HANDLE_VALUE;
+  }
+
+  ~QueueIoHandler() {
+    if(io_completion_port_ != INVALID_HANDLE_VALUE) {
+      ::CloseHandle(io_completion_port_);
+    }
+  }
+
+  inline void AssociateFile(HANDLE file_handle) {
+    assert(io_completion_port_ != 0);
+    ::CreateIoCompletionPort(file_handle, io_completion_port_,
+                             reinterpret_cast<uint64_t>(file_handle), 0);
+  }
+
+  struct IoCallbackContext {
+    IoCallbackContext(size_t offset, IAsyncContext* context_, AsyncIOCallback callback_)
+      : caller_context{ context_ }
+      , callback{ callback_ } {
+      ::memset(&parent_overlapped, 0, sizeof(parent_overlapped));
+      parent_overlapped.Offset = offset & 0xffffffffllu;
+      parent_overlapped.OffsetHigh = offset >> 32;
+    }
+
+    // WARNING: parent_overlapped must be the first field in IOCallbackContext. This class is a
+    // C-style subclass of "OVERLAPPED".
+
+    /// The overlapped structure for Windows IO
+    OVERLAPPED parent_overlapped;
+    /// Caller callback context.
+    IAsyncContext* caller_context;
+    /// The caller's asynchronous callback function
+    AsyncIOCallback callback;
+  };
+
+  bool TryComplete();
+
+ private:
+  /// The completion port to whose queue completions are added.
+  HANDLE io_completion_port_;
+};
+
+/// The ThreadPoolFile class encapsulates asynchronous reads and writes, where the OS schedules the
+/// IO completion on a thread pool.
+class ThreadPoolFile : public File {
+ public:
+  ThreadPoolFile()
+    : File()
+    , io_object_{ nullptr } {
+  }
+
+  ThreadPoolFile(const std::string& filename)
+    : File(filename)
+    , io_object_{ nullptr } {
+  }
+
+  /// Move constructor
+  ThreadPoolFile(ThreadPoolFile&& other)
+    : File(std::move(other))
+    , io_object_{ other.io_object_} {
+  }
+
+  /// Move assignment operator.
+  ThreadPoolFile& operator=(ThreadPoolFile&& other) {
+    File::operator=(std::move(other));
+    io_object_ = other.io_object_;
+    return *this;
+  }
+
+  Status Open(FileCreateDisposition create_disposition, const FileOptions& options,
+              ThreadPoolIoHandler* handler, bool* exists = nullptr);
+
+  Status Read(size_t offset, uint32_t length, uint8_t* buffer,
+              IAsyncContext& context, AsyncIOCallback callback) const;
+  Status Write(size_t offset, uint32_t length, const uint8_t* buffer,
+               IAsyncContext& context, AsyncIOCallback callback);
+
+ private:
+  Status ScheduleOperation(FileOperationType operationType, uint8_t* buffer, size_t offset,
+                           uint32_t length, IAsyncContext& context, AsyncIOCallback callback);
+
+  PTP_IO io_object_;
+};
+
+/// The QueueFile class encapsulates asynchronous reads and writes, where the IO completions are
+/// placed on the completion port's queue.
+class QueueFile : public File {
+ public:
+  QueueFile()
+    : File() {
+  }
+  QueueFile(const std::string& filename)
+    : File(filename) {
+  }
+  /// Move constructor
+  QueueFile(QueueFile&& other)
+    : File(std::move(other)) {
+  }
+
+  /// Move assignment operator.
+  QueueFile& operator=(QueueFile&& other) {
+    File::operator=(std::move(other));
+    return *this;
+  }
+
+  Status Open(FileCreateDisposition create_disposition, const FileOptions& options,
+              QueueIoHandler* handler, bool* exists = nullptr);
+
+  Status Read(size_t offset, uint32_t length, uint8_t* buffer,
+              IAsyncContext& context, AsyncIOCallback callback) const;
+  Status Write(size_t offset, uint32_t length, const uint8_t* buffer,
+               IAsyncContext& context, AsyncIOCallback callback);
+
+ private:
+  Status ScheduleOperation(FileOperationType operationType, uint8_t* buffer, size_t offset,
+                           uint32_t length, IAsyncContext& context, AsyncIOCallback callback);
+};
+
+}
+} // namespace FASTER::environment
\ No newline at end of file
diff --git a/cc/test/CMakeLists.txt b/cc/test/CMakeLists.txt
new file mode 100644
index 000000000..0b28e4a30
--- /dev/null
+++ b/cc/test/CMakeLists.txt
@@ -0,0 +1,11 @@
+ADD_FAST_TEST(in_memory_test "")
+ADD_FAST_TEST(malloc_fixed_page_size_test "")
+ADD_FAST_TEST(paging_queue_test "paging_test.h")
+if(MSVC)
+ADD_FAST_TEST(paging_threadpool_test "paging_test.h")
+endif()
+ADD_FAST_TEST(recovery_queue_test "recovery_test.h")
+if(MSVC)
+ADD_FAST_TEST(recovery_threadpool_test "recovery_test.h")
+endif()
+ADD_FAST_TEST(utility_test "")
diff --git a/cc/test/in_memory_test.cc b/cc/test/in_memory_test.cc
new file mode 100644
index 000000000..85d6547f3
--- /dev/null
+++ b/cc/test/in_memory_test.cc
@@ -0,0 +1,1912 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <thread>
+#include "gtest/gtest.h"
+
+#include "core/faster.h"
+#include "device/null_disk.h"
+
+using namespace FASTER::core;
+TEST(InMemFaster, UpsertRead) {
+  class alignas(2) Key {
+   public:
+    Key(uint8_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint8_t> hash_fn;
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint8_t key_;
+  };
+
+  class UpsertContext;
+  class ReadContext;
+
+  class Value {
+   public:
+    Value()
+      : value_{ 0 } {
+    }
+    Value(const Value& other)
+      : value_{ other.value_ } {
+    }
+    Value(uint8_t value)
+      : value_{ value } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    union {
+      uint8_t value_;
+      std::atomic<uint8_t> atomic_value_;
+    };
+  };
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(uint8_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.value_ = 23;
+    }
+    inline bool PutAtomic(Value& value) {
+      value.atomic_value_.store(42);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint8_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      output = value.atomic_value_.load();
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    uint8_t output;
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store { 128, 1073741824, "" };
+
+  store.StartSession();
+
+  // Insert.
+  for(size_t idx = 0; idx < 256; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    UpsertContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Upsert(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+  }
+  // Read.
+  for(size_t idx = 0; idx < 256; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+    // All upserts should have inserts (non-atomic).
+    ASSERT_EQ(23, context.output);
+  }
+  // Update.
+  for(size_t idx = 0; idx < 256; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    UpsertContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Upsert(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+  }
+  // Read again.
+  for(size_t idx = 0; idx < 256; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+    // All upserts should have updates (atomic).
+    ASSERT_EQ(42, context.output);
+  }
+
+  store.StopSession();
+}
+
+/// The hash always returns "0," so the FASTER store devolves into a linked list.
+TEST(InMemFaster, UpsertRead_DummyHash) {
+  class UpsertContext;
+  class ReadContext;
+
+  class Key {
+   public:
+    Key(uint16_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      return KeyHash{ 42 };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    uint16_t key_;
+  };
+
+  class Value {
+   public:
+    Value()
+      : value_{ 0 } {
+    }
+    Value(const Value& other)
+      : value_{ other.value_ } {
+    }
+    Value(uint16_t value)
+      : value_{ value } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    union {
+      uint16_t value_;
+      std::atomic<uint16_t> atomic_value_;
+    };
+  };
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(uint16_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.value_ = key_.key_;
+    }
+    inline bool PutAtomic(Value& value) {
+      value.atomic_value_.store(key_.key_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint16_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      output = value.atomic_value_.load();
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    uint16_t output;
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store{ 128, 1073741824, "" };
+
+  store.StartSession();
+
+  // Insert.
+  for(uint16_t idx = 0; idx < 10000; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    UpsertContext context{ idx };
+    Status result = store.Upsert(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+  }
+  // Read.
+  for(uint16_t idx = 0; idx < 10000; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ idx };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+    // All upserts should have inserts (non-atomic).
+    ASSERT_EQ(idx, context.output);
+  }
+
+  store.StopSession();
+}
+
+TEST(InMemFaster, UpsertRead_Concurrent) {
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn;
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint32_t key_;
+  };
+
+  class UpsertContext;
+  class ReadContext;
+
+  class alignas(16) Value {
+   public:
+    Value()
+      : length_{ 0 }
+      , value_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    uint8_t value_[31];
+    std::atomic<uint8_t> length_;
+  };
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(uint32_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.length_ = 5;
+      std::memset(value.value_, 23, 5);
+    }
+    inline bool PutAtomic(Value& value) {
+      // Get the lock on the value.
+      bool success;
+      do {
+        uint8_t expected_length;
+        do {
+          // Spin until other the thread releases the lock.
+          expected_length = value.length_.load();
+        } while(expected_length == UINT8_MAX);
+        // Try to get the lock.
+        success = value.length_.compare_exchange_weak(expected_length, UINT8_MAX);
+      } while(!success);
+
+      std::memset(value.value_, 42, 7);
+      value.length_.store(7);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint32_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      do {
+        output_length = value.length_.load();
+        ASSERT_EQ(0, reinterpret_cast<size_t>(value.value_) % 16);
+        output_pt1 = *reinterpret_cast<const uint64_t*>(value.value_);
+        output_pt2 = *reinterpret_cast<const uint64_t*>(value.value_ + 8);
+      } while(output_length != value.length_.load());
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    uint8_t output_length;
+    uint64_t output_pt1;
+    uint64_t output_pt2;
+  };
+
+  static constexpr size_t kNumOps = 1024;
+  static constexpr size_t kNumThreads = 8;
+
+  auto upsert_worker = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  size_t thread_idx) {
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumOps; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      UpsertContext context{ static_cast<uint32_t>((thread_idx * kNumOps) + idx) };
+      Status result = store_->Upsert(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+
+    store_->StopSession();
+  };
+
+  auto read_worker = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  size_t thread_idx, uint64_t expected_value) {
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumOps; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      ReadContext context{ static_cast<uint32_t>((thread_idx * kNumOps) + idx) };
+      Status result = store_->Read(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+      ASSERT_EQ(expected_value, context.output_pt1);
+    }
+
+    store_->StopSession();
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store{ 128, 1073741824, "" };
+
+  // Insert.
+  std::deque<std::thread> threads{};
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(upsert_worker, &store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read.
+  threads.clear();
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &store, idx, 0x1717171717);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Update.
+  threads.clear();
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(upsert_worker, &store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read again.
+  threads.clear();
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &store, idx, 0x2a2a2a2a2a2a2a);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+}
+
+TEST(InMemFaster, UpsertRead_ResizeValue_Concurrent) {
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn;
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint32_t key_;
+  };
+
+  class UpsertContext;
+  class ReadContext;
+
+  class GenLock {
+   public:
+    GenLock()
+      : control_{ 0 } {
+    }
+    GenLock(uint64_t control)
+      : control_{ control } {
+    }
+    inline GenLock& operator=(const GenLock& other) {
+      control_ = other.control_;
+      return *this;
+    }
+
+    union {
+        struct {
+          uint64_t gen_number : 62;
+          uint64_t locked : 1;
+          uint64_t replaced : 1;
+        };
+        uint64_t control_;
+      };
+  };
+  static_assert(sizeof(GenLock) == 8, "sizeof(GenLock) != 8");
+
+  class AtomicGenLock {
+   public:
+    AtomicGenLock()
+      : control_{ 0 } {
+    }
+    AtomicGenLock(uint64_t control)
+      : control_{ control } {
+    }
+
+    inline GenLock load() const {
+      return GenLock{ control_.load() };
+    }
+    inline void store(GenLock desired) {
+      control_.store(desired.control_);
+    }
+
+    inline bool try_lock(bool& replaced) {
+      replaced = false;
+      GenLock expected{ control_.load() };
+      expected.locked = 0;
+      expected.replaced = 0;
+      GenLock desired{ expected.control_ };
+      desired.locked = 1;
+
+      if(control_.compare_exchange_strong(expected.control_, desired.control_)) {
+        return true;
+      }
+      if(expected.replaced) {
+        replaced = true;
+      }
+      return false;
+    }
+    inline void unlock(bool replaced) {
+      if(replaced) {
+        // Just turn off "locked" bit and increase gen number.
+        uint64_t sub_delta = ((uint64_t)1 << 62) - 1;
+        control_.fetch_sub(sub_delta);
+      } else {
+        // Turn off "locked" bit, turn on "replaced" bit, and increase gen number
+        uint64_t add_delta = ((uint64_t)1 << 63) - ((uint64_t)1 << 62) + 1;
+        control_.fetch_add(add_delta);
+      }
+    }
+
+   private:
+    std::atomic<uint64_t> control_;
+  };
+  static_assert(sizeof(AtomicGenLock) == 8, "sizeof(AtomicGenLock) != 8");
+
+  class Value {
+   public:
+    Value()
+      : gen_lock_{ 0 }
+      , size_{ 0 }
+      , length_{ 0 } {
+    }
+
+    inline uint32_t size() const {
+      return size_;
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    AtomicGenLock gen_lock_;
+    uint32_t size_;
+    uint32_t length_;
+
+    inline const uint8_t* buffer() const {
+      return reinterpret_cast<const uint8_t*>(this + 1);
+    }
+    inline uint8_t* buffer() {
+      return reinterpret_cast<uint8_t*>(this + 1);
+    }
+  };
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(uint32_t key, uint32_t length)
+      : key_{ key }
+      , length_{ length } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ }
+      , length_{ other.length_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline uint32_t value_size() const {
+      return sizeof(Value) + length_;
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.gen_lock_.store(0);
+      value.size_ = sizeof(Value) + length_;
+      value.length_ = length_;
+      std::memset(value.buffer(), 88, length_);
+    }
+    inline bool PutAtomic(Value& value) {
+      bool replaced;
+      while(!value.gen_lock_.try_lock(replaced) && !replaced) {
+        std::this_thread::yield();
+      }
+      if(replaced) {
+        // Some other thread replaced this record.
+        return false;
+      }
+      if(value.size_ < sizeof(Value) + length_) {
+        // Current value is too small for in-place update.
+        value.gen_lock_.unlock(true);
+        return false;
+      }
+      // In-place update overwrites length and buffer, but not size.
+      value.length_ = length_;
+      std::memset(value.buffer(), 88, length_);
+      value.gen_lock_.unlock(false);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t length_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint32_t key)
+      : key_{ key }
+      , output_length{ 0 } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ }
+      , output_length{ 0 } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      GenLock before, after;
+      do {
+        before = value.gen_lock_.load();
+        output_length = value.length_;
+        output_bytes[0] = value.buffer()[0];
+        output_bytes[1] = value.buffer()[value.length_ - 1];
+        after = value.gen_lock_.load();
+      } while(before.gen_number != after.gen_number);
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    uint8_t output_length;
+    // Extract two bytes of output.
+    uint8_t output_bytes[2];
+  };
+
+  static constexpr size_t kNumOps = 1024;
+  static constexpr size_t kNumThreads = 8;
+
+  auto upsert_worker = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  size_t thread_idx, uint32_t value_length) {
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumOps; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      UpsertContext context{ static_cast<uint32_t>((thread_idx * kNumOps) + idx), value_length };
+      Status result = store_->Upsert(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+
+    store_->StopSession();
+  };
+
+  auto read_worker = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  size_t thread_idx, uint8_t expected_value) {
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumOps; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      ReadContext context{ static_cast<uint32_t>((thread_idx * kNumOps) + idx) };
+      Status result = store_->Read(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+      ASSERT_EQ(expected_value, context.output_bytes[0]);
+      ASSERT_EQ(expected_value, context.output_bytes[1]);
+    }
+
+    store_->StopSession();
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store{ 128, 1073741824, "" };
+
+  // Insert.
+  std::deque<std::thread> threads{};
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(upsert_worker, &store, idx, 7);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read.
+  threads.clear();
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &store, idx, 88);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Update.
+  threads.clear();
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(upsert_worker, &store, idx, 11);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read again.
+  threads.clear();
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &store, idx, 88);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+}
+TEST(InMemFaster, Rmw) {
+  class Key {
+   public:
+    Key(uint64_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint64_t> hash_fn;
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint64_t key_;
+  };
+
+  class RmwContext;
+  class ReadContext;
+
+  class Value {
+   public:
+    Value()
+      : value_{ 0 } {
+    }
+    Value(const Value& other)
+      : value_{ other.value_ } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class RmwContext;
+    friend class ReadContext;
+
+   private:
+    union {
+      int32_t value_;
+      std::atomic<int32_t> atomic_value_;
+    };
+  };
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(uint64_t key, int32_t incr)
+      : key_{ key }
+      , incr_{ incr } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , incr_{ other.incr_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    inline void RmwInitial(Value& value) {
+      value.value_ = incr_;
+    }
+    inline void RmwCopy(const Value& old_value, Value& value) {
+      value.value_ = old_value.value_ + incr_;
+    }
+    inline bool RmwAtomic(Value& value) {
+      value.atomic_value_.fetch_add(incr_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    int32_t incr_;
+    Key key_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint64_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      output = value.atomic_value_.load();
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    int32_t output;
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store{ 256, 1073741824, "" };
+
+  store.StartSession();
+
+  // Rmw, increment by 1.
+  for(size_t idx = 0; idx < 2048; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    RmwContext context{ idx % 512, 1 };
+    Status result = store.Rmw(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+  }
+  // Read.
+  for(size_t idx = 0; idx < 512; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ idx };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result) << idx;
+    // Should have performed 4 RMWs.
+    ASSERT_EQ(4, context.output);
+  }
+  // Rmw, decrement by 1.
+  for(size_t idx = 0; idx < 2048; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    RmwContext context{ idx % 512, -1 };
+    Status result = store.Rmw(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+  }
+  // Read again.
+  for(size_t idx = 0; idx < 512; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+    // All upserts should have inserts (non-atomic).
+    ASSERT_EQ(0, context.output);
+  }
+
+  store.StopSession();
+}
+
+TEST(InMemFaster, Rmw_Concurrent) {
+  class Key {
+   public:
+    Key(uint64_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint64_t> hash_fn;
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint64_t key_;
+  };
+
+  class RmwContext;
+  class ReadContext;
+
+  class Value {
+   public:
+    Value()
+      : value_{ 0 } {
+    }
+    Value(const Value& other)
+      : value_{ other.value_ } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class RmwContext;
+    friend class ReadContext;
+
+   private:
+    union {
+      int64_t value_;
+      std::atomic<int64_t> atomic_value_;
+    };
+  };
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(uint64_t key, int64_t incr)
+      : key_{ key }
+      , incr_{ incr } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , incr_{ other.incr_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+
+    inline void RmwInitial(Value& value) {
+      value.value_ = incr_;
+    }
+    inline void RmwCopy(const Value& old_value, Value& value) {
+      value.value_ = old_value.value_ + incr_;
+    }
+    inline bool RmwAtomic(Value& value) {
+      value.atomic_value_.fetch_add(incr_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    int64_t incr_;
+    Key key_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint64_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      output = value.atomic_value_.load();
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    int64_t output;
+  };
+
+  static constexpr size_t kNumThreads = 8;
+  static constexpr size_t kNumRmws = 2048;
+  static constexpr size_t kRange = 512;
+
+  auto rmw_worker = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  int64_t incr) {
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumRmws; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      RmwContext context{ idx % kRange, incr };
+      Status result = store_->Rmw(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+
+    store_->StopSession();
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store{ 256, 1073741824, "" };
+
+  // Rmw, increment by 2 * idx.
+  std::deque<std::thread> threads{};
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, 2 * idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read.
+  store.StartSession();
+
+  for(size_t idx = 0; idx < kRange; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ idx };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result) << idx;
+    // Should have performed 4 RMWs.
+    ASSERT_EQ((kNumThreads * (kNumThreads - 1)) * (kNumRmws / kRange), context.output);
+  }
+
+  store.StopSession();
+
+  // Rmw, decrement by idx.
+  threads.clear();
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, -idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read again.
+  store.StartSession();
+
+  for(size_t idx = 0; idx < kRange; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+    // All upserts should have inserts (non-atomic).
+    ASSERT_EQ(((kNumThreads * (kNumThreads - 1)) / 2) * (kNumRmws / kRange), context.output);
+  }
+
+  store.StopSession();
+}
+
+TEST(InMemFaster, Rmw_ResizeValue_Concurrent) {
+  class Key {
+   public:
+    Key(uint64_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint64_t> hash_fn;
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint64_t key_;
+  };
+
+  class RmwContext;
+  class ReadContext;
+
+  class GenLock {
+   public:
+    GenLock()
+      : control_{ 0 } {
+    }
+    GenLock(uint64_t control)
+      : control_{ control } {
+    }
+    inline GenLock& operator=(const GenLock& other) {
+      control_ = other.control_;
+      return *this;
+    }
+
+    union {
+        struct {
+          uint64_t gen_number : 62;
+          uint64_t locked : 1;
+          uint64_t replaced : 1;
+        };
+        uint64_t control_;
+      };
+  };
+  static_assert(sizeof(GenLock) == 8, "sizeof(GenLock) != 8");
+
+  class AtomicGenLock {
+   public:
+    AtomicGenLock()
+      : control_{ 0 } {
+    }
+    AtomicGenLock(uint64_t control)
+      : control_{ control } {
+    }
+
+    inline GenLock load() const {
+      return GenLock{ control_.load() };
+    }
+    inline void store(GenLock desired) {
+      control_.store(desired.control_);
+    }
+
+    inline bool try_lock(bool& replaced) {
+      replaced = false;
+      GenLock expected{ control_.load() };
+      expected.locked = 0;
+      expected.replaced = 0;
+      GenLock desired{ expected.control_ };
+      desired.locked = 1;
+
+      if(control_.compare_exchange_strong(expected.control_, desired.control_)) {
+        return true;
+      }
+      if(expected.replaced) {
+        replaced = true;
+      }
+      return false;
+    }
+    inline void unlock(bool replaced) {
+      if(replaced) {
+        // Just turn off "locked" bit and increase gen number.
+        uint64_t sub_delta = ((uint64_t)1 << 62) - 1;
+        control_.fetch_sub(sub_delta);
+      } else {
+        // Turn off "locked" bit, turn on "replaced" bit, and increase gen number
+        uint64_t add_delta = ((uint64_t)1 << 63) - ((uint64_t)1 << 62) + 1;
+        control_.fetch_add(add_delta);
+      }
+    }
+
+   private:
+    std::atomic<uint64_t> control_;
+  };
+  static_assert(sizeof(AtomicGenLock) == 8, "sizeof(AtomicGenLock) != 8");
+
+  class Value {
+   public:
+    Value()
+      : gen_lock_{ 0 }
+      , size_{ 0 }
+      , length_{ 0 } {
+    }
+
+    inline uint32_t size() const {
+      return size_;
+    }
+
+    friend class RmwContext;
+    friend class ReadContext;
+
+   private:
+    AtomicGenLock gen_lock_;
+    uint32_t size_;
+    uint32_t length_;
+
+    inline const int8_t* buffer() const {
+      return reinterpret_cast<const int8_t*>(this + 1);
+    }
+    inline int8_t* buffer() {
+      return reinterpret_cast<int8_t*>(this + 1);
+    }
+  };
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(uint64_t key, int8_t incr, uint32_t length)
+      : key_{ key }
+      , incr_{ incr }
+      , length_{ length } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , incr_{ other.incr_ }
+      , length_{ other.length_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline uint32_t value_size() const {
+      return sizeof(value_t) + length_;
+    }
+
+    inline void RmwInitial(Value& value) {
+      value.gen_lock_.store(GenLock{});
+      value.size_ = sizeof(Value) + length_;
+      value.length_ = length_;
+      std::memset(value.buffer(), incr_, length_);
+    }
+    inline void RmwCopy(const Value& old_value, Value& value) {
+      value.gen_lock_.store(GenLock{});
+      value.size_ = sizeof(Value) + length_;
+      value.length_ = length_;
+      std::memset(value.buffer(), incr_, length_);
+      for(uint32_t idx = 0; idx < std::min(old_value.length_, length_); ++idx) {
+        value.buffer()[idx] = old_value.buffer()[idx] + incr_;
+      }
+    }
+    inline bool RmwAtomic(Value& value) {
+      bool replaced;
+      while(!value.gen_lock_.try_lock(replaced) && !replaced) {
+        std::this_thread::yield();
+      }
+      if(replaced) {
+        // Some other thread replaced this record.
+        return false;
+      }
+      if(value.size_ < sizeof(Value) + length_) {
+        // Current value is too small for in-place update.
+        value.gen_lock_.unlock(true);
+        return false;
+      }
+      // In-place update overwrites length and buffer, but not size.
+      value.length_ = length_;
+      for(uint32_t idx = 0; idx < length_; ++idx) {
+        value.buffer()[idx] += incr_;
+      }
+      value.gen_lock_.unlock(false);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    int8_t incr_;
+    uint32_t length_;
+    Key key_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint64_t key)
+      : key_{ key }
+      , output_length{ 0 } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ }
+      , output_length{ 0 } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      GenLock before, after;
+      do {
+        before = value.gen_lock_.load();
+        output_length = value.length_;
+        output_bytes[0] = value.buffer()[0];
+        output_bytes[1] = value.buffer()[value.length_ - 1];
+        after = value.gen_lock_.load();
+      } while(before.gen_number != after.gen_number);
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    uint8_t output_length;
+    // Extract two bytes of output.
+    int8_t output_bytes[2];
+  };
+
+  static constexpr int8_t kNumThreads = 8;
+  static constexpr size_t kNumRmws = 2048;
+  static constexpr size_t kRange = 512;
+
+  auto rmw_worker = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  int8_t incr, uint32_t value_length) {
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumRmws; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      RmwContext context{ idx % kRange, incr, value_length };
+      Status result = store_->Rmw(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+
+    store_->StopSession();
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store{ 256, 1073741824, "" };
+
+  // Rmw, increment by 3.
+  std::deque<std::thread> threads{};
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, 3, 5);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read.
+  store.StartSession();
+
+  for(size_t idx = 0; idx < kRange; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ idx };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result) << idx;
+    // Should have performed 4 RMWs.
+    ASSERT_EQ(5, context.output_length);
+    ASSERT_EQ(kNumThreads * 4 * 3, context.output_bytes[0]);
+    ASSERT_EQ(kNumThreads * 4 * 3, context.output_bytes[1]);
+  }
+
+  store.StopSession();
+
+  // Rmw, decrement by 4.
+  threads.clear();
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, -4, 8);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read again.
+  store.StartSession();
+
+  for(size_t idx = 0; idx < kRange; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+    // Should have performed 4 RMWs.
+    ASSERT_EQ(8, context.output_length);
+    ASSERT_EQ(kNumThreads * -4, context.output_bytes[0]);
+    ASSERT_EQ(kNumThreads * -16, context.output_bytes[1]);
+  }
+
+  store.StopSession();
+}
+
+TEST(InMemFaster, GrowHashTable) {
+  class Key {
+   public:
+    Key(uint64_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint64_t> hash_fn;
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint64_t key_;
+  };
+
+  class RmwContext;
+  class ReadContext;
+
+  class Value {
+   public:
+    Value()
+      : value_{ 0 } {
+    }
+    Value(const Value& other)
+      : value_{ other.value_ } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class RmwContext;
+    friend class ReadContext;
+
+   private:
+    union {
+      int64_t value_;
+      std::atomic<int64_t> atomic_value_;
+    };
+  };
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(uint64_t key, int64_t incr)
+      : key_{ key }
+      , incr_{ incr } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , incr_{ other.incr_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+
+    inline void RmwInitial(Value& value) {
+      value.value_ = incr_;
+    }
+    inline void RmwCopy(const Value& old_value, Value& value) {
+      value.value_ = old_value.value_ + incr_;
+    }
+    inline bool RmwAtomic(Value& value) {
+      value.atomic_value_.fetch_add(incr_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    int64_t incr_;
+    Key key_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(uint64_t key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // All reads should be atomic (from the mutable tail).
+      ASSERT_TRUE(false);
+    }
+    inline void GetAtomic(const Value& value) {
+      output = value.atomic_value_.load();
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    int64_t output;
+  };
+
+  static constexpr size_t kNumThreads = 8;
+  static constexpr size_t kNumRmws = 32768;
+  static constexpr size_t kRange = 8192;
+
+  static std::atomic<bool> grow_done{ false };
+
+  auto rmw_worker0 = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  int64_t incr) {
+    auto callback = [](uint64_t new_size) {
+      grow_done = true;
+    };
+
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumRmws; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      RmwContext context{ idx % kRange, incr };
+      Status result = store_->Rmw(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+
+    // Double the size of the index.
+    store_->GrowIndex(callback);
+
+    while(!grow_done) {
+      store_->Refresh();
+      std::this_thread::yield();
+    }
+
+    store_->StopSession();
+  };
+
+  auto rmw_worker = [](FasterKv<Key, Value, FASTER::device::NullDisk>* store_,
+  int64_t incr) {
+    store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumRmws; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+      RmwContext context{ idx % kRange, incr };
+      Status result = store_->Rmw(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+
+    while(!grow_done) {
+      store_->Refresh();
+      std::this_thread::yield();
+    }
+
+    store_->StopSession();
+  };
+
+  FasterKv<Key, Value, FASTER::device::NullDisk> store{ 256, 1073741824, "" };
+
+  // Rmw, increment by 2 * idx.
+  std::deque<std::thread> threads{};
+  threads.emplace_back(rmw_worker0, &store, 0);
+  for(int64_t idx = 1; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, 2 * idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read.
+  store.StartSession();
+
+  for(size_t idx = 0; idx < kRange; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ idx };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result) << idx;
+    // Should have performed 4 RMWs.
+    ASSERT_EQ((kNumThreads * (kNumThreads - 1)) * (kNumRmws / kRange), context.output);
+  }
+
+  store.StopSession();
+
+  // Rmw, decrement by idx.
+  grow_done = false;
+  threads.clear();
+  threads.emplace_back(rmw_worker0, &store, 0);
+  for(int64_t idx = 1; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, -idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read again.
+  store.StartSession();
+
+  for(size_t idx = 0; idx < kRange; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // In-memory test.
+      ASSERT_TRUE(false);
+    };
+    ReadContext context{ static_cast<uint8_t>(idx) };
+    Status result = store.Read(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+    // All upserts should have inserts (non-atomic).
+    ASSERT_EQ(((kNumThreads * (kNumThreads - 1)) / 2) * (kNumRmws / kRange), context.output);
+  }
+
+  store.StopSession();
+}
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/cc/test/malloc_fixed_page_size_test.cc b/cc/test/malloc_fixed_page_size_test.cc
new file mode 100644
index 000000000..248ba4e83
--- /dev/null
+++ b/cc/test/malloc_fixed_page_size_test.cc
@@ -0,0 +1,81 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <cstdint>
+#include <deque>
+#include <thread>
+#include "gtest/gtest.h"
+
+#include "core/light_epoch.h"
+#include "core/malloc_fixed_page_size.h"
+#include "device/null_disk.h"
+
+struct alignas(32) Item {
+  uint8_t buffer[32];
+};
+
+using namespace FASTER::core;
+
+typedef MallocFixedPageSize<Item, FASTER::device::NullDisk> alloc_t;
+
+TEST(MallocFixedPageSize, AllocFree) {
+  LightEpoch epoch;
+  alloc_t allocator{};
+  allocator.Initialize(256, epoch);
+  for(size_t idx = 0; idx < 1000000; ++idx) {
+    FixedPageAddress address = allocator.Allocate();
+    Item* item = &allocator.Get(address);
+    ASSERT_EQ(0, reinterpret_cast<size_t>(item) % alignof(Item));
+    allocator.FreeAtEpoch(address, 0);
+  }
+  ASSERT_EQ(1, allocator.free_list().size());
+}
+
+TEST(MallocFixedPageSize, Alloc) {
+  LightEpoch epoch;
+  alloc_t allocator{};
+  allocator.Initialize(128, epoch);
+  for(size_t idx = 0; idx < 32000000; ++idx) {
+    FixedPageAddress address = allocator.Allocate();
+    Item* item = &allocator.Get(address);
+    ASSERT_EQ(0, reinterpret_cast<size_t>(item) % alignof(Item));
+  }
+  ASSERT_EQ(0, allocator.free_list().size());
+}
+
+
+static void MultiThread_Worker(alloc_t* allocator) {
+  constexpr size_t kAllocCount = 2000000;
+  FixedPageAddress* addresses = new FixedPageAddress[kAllocCount];
+
+  for(size_t idx = 0; idx < kAllocCount; ++idx) {
+    addresses[idx] = allocator->Allocate();
+    Item* item = &allocator->Get(addresses[idx]);
+    ASSERT_EQ(0, reinterpret_cast<size_t>(item) % alignof(Item));
+  }
+  for(size_t idx = 0; idx < kAllocCount; ++idx) {
+    allocator->FreeAtEpoch(addresses[idx], idx);
+  }
+  ASSERT_EQ(kAllocCount, allocator->free_list().size());
+
+  delete[] addresses;
+}
+
+TEST(MallocFixedPageSize, Concurrent) {
+  constexpr size_t kNumThreads = 16;
+  LightEpoch epoch;
+  alloc_t allocator{};
+  allocator.Initialize(64, epoch);
+  std::deque<std::thread> threads{};
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(MultiThread_Worker, &allocator);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+}
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/cc/test/paging_queue_test.cc b/cc/test/paging_queue_test.cc
new file mode 100644
index 000000000..212e0f966
--- /dev/null
+++ b/cc/test/paging_queue_test.cc
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <atomic>
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <thread>
+#include "gtest/gtest.h"
+#include "core/faster.h"
+#include "device/file_system_disk.h"
+
+using namespace FASTER::core;
+
+typedef FASTER::environment::QueueIoHandler handler_t;
+
+#define CLASS PagingTest_Queue
+
+#include "paging_test.h"
+
+#undef CLASS
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/cc/test/paging_test.h b/cc/test/paging_test.h
new file mode 100644
index 000000000..da8294d63
--- /dev/null
+++ b/cc/test/paging_test.h
@@ -0,0 +1,1017 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <experimental/filesystem>
+
+using namespace FASTER;
+
+/// Disk's log uses 64 MB segments.
+typedef FASTER::device::FileSystemDisk<handler_t, 67108864L> disk_t;
+
+TEST(CLASS, UpsertRead_Serial) {
+  class Key {
+   public:
+    Key(uint64_t pt1, uint64_t pt2)
+      : pt1_{ pt1 }
+      , pt2_{ pt2 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint64_t> hash_fn;
+      return KeyHash{ hash_fn(pt1_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return pt1_ == other.pt1_ &&
+             pt2_ == other.pt2_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return pt1_ != other.pt1_ ||
+             pt2_ != other.pt2_;
+    }
+
+   private:
+    uint64_t pt1_;
+    uint64_t pt2_;
+  };
+
+  class UpsertContext;
+  class ReadContext;
+
+  class Value {
+   public:
+    Value()
+      : gen_{ 0 }
+      , value_{ 0 }
+      , length_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    std::atomic<uint64_t> gen_;
+    uint8_t value_[1014];
+    uint16_t length_;
+  };
+  static_assert(sizeof(Value) == 1024, "sizeof(Value) != 1024");
+  static_assert(alignof(Value) == 8, "alignof(Value) != 8");
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(const Key& key, uint8_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.gen_ = 0;
+      std::memset(value.value_, val_, val_);
+      value.length_ = val_;
+    }
+    inline bool PutAtomic(Value& value) {
+      // Get the lock on the value.
+      uint64_t expected_gen;
+      bool success;
+      do {
+        do {
+          // Spin until other the thread releases the lock.
+          expected_gen = value.gen_.load();
+        } while(expected_gen == UINT64_MAX);
+        // Try to get the lock.
+        success = value.gen_.compare_exchange_weak(expected_gen, UINT64_MAX);
+      } while(!success);
+
+      std::memset(value.value_, val_, val_);
+      value.length_ = val_;
+      // Increment the value's generation number.
+      value.gen_.store(expected_gen + 1);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint8_t val_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(Key key, uint8_t expected)
+      : key_{ key }
+      , expected_{ expected } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ }
+      , expected_{ other.expected_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // This is a paging test, so we expect to read stuff from disk.
+      ASSERT_EQ(expected_, value.length_);
+      ASSERT_EQ(expected_, value.value_[expected_ - 5]);
+    }
+    inline void GetAtomic(const Value& value) {
+      uint64_t post_gen = value.gen_.load();
+      uint64_t pre_gen;
+      uint16_t len;
+      uint8_t val;
+      do {
+        // Pre- gen # for this read is last read's post- gen #.
+        pre_gen = post_gen;
+        len = value.length_;
+        val = value.value_[len - 5];
+        post_gen = value.gen_.load();
+      } while(pre_gen != post_gen);
+      ASSERT_EQ(expected_, static_cast<uint8_t>(len));
+      ASSERT_EQ(expected_, val);
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint8_t expected_;
+  };
+
+  std::experimental::filesystem::create_directories("logs");
+
+  // 8 pages!
+  FasterKv<Key, Value, disk_t> store{ 262144, 268435456, "logs", 0.5 };
+
+  Guid session_id = store.StartSession();
+
+  constexpr size_t kNumRecords = 300000;
+
+  // Insert.
+  for(size_t idx = 0; idx < kNumRecords; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // Upserts don't go to disk.
+      ASSERT_TRUE(false);
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    UpsertContext context{ Key{idx, idx}, 25 };
+    Status result = store.Upsert(context, callback, 1);
+    ASSERT_EQ(Status::Ok, result);
+  }
+  // Read.
+  static std::atomic<uint64_t> records_read{ 0 };
+  for(size_t idx = 0; idx < kNumRecords; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    ReadContext context{ Key{ idx, idx}, 25 };
+    Status result = store.Read(context, callback, 1);
+    if(result == Status::Ok) {
+      ++records_read;
+    } else {
+      ASSERT_EQ(Status::Pending, result);
+    }
+  }
+
+  ASSERT_LT(records_read.load(), kNumRecords);
+  bool result = store.CompletePending(true);
+  ASSERT_TRUE(result);
+  ASSERT_EQ(kNumRecords, records_read.load());
+
+  // Update.
+  static std::atomic<uint64_t> records_updated{ 0 };
+  for(size_t idx = 0; idx < kNumRecords; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      // Upserts don't go to disk.
+      ASSERT_TRUE(false);
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    UpsertContext context{ Key{ idx, idx }, 87 };
+    Status result = store.Upsert(context, callback, 1);
+    if(result == Status::Ok) {
+      ++records_updated;
+    } else {
+      ASSERT_EQ(Status::Pending, result);
+    }
+  }
+
+  ASSERT_EQ(kNumRecords, records_updated.load());
+  result = store.CompletePending(true);
+  ASSERT_TRUE(result);
+
+  // Read again.
+  records_read = 0;;
+  for(size_t idx = 0; idx < kNumRecords; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    ReadContext context{ Key{ idx, idx }, 87 };
+    Status result = store.Read(context, callback, 1);
+    if(result == Status::Ok) {
+      ++records_read;
+    } else {
+      ASSERT_EQ(Status::Pending, result);
+    }
+  }
+
+  ASSERT_LT(records_read.load(), kNumRecords);
+  result = store.CompletePending(true);
+  ASSERT_TRUE(result);
+  ASSERT_EQ(kNumRecords, records_read.load());
+
+  store.StopSession();
+}
+
+TEST(CLASS, UpsertRead_Concurrent) {
+  class UpsertContext;
+  class ReadContext;
+
+  class Key {
+   public:
+    Key(uint64_t pt1, uint64_t pt2)
+      : pt1_{ pt1 }
+      , pt2_{ pt2 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint64_t> hash_fn;
+      return KeyHash{ hash_fn(pt1_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return pt1_ == other.pt1_ &&
+             pt2_ == other.pt2_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return pt1_ != other.pt1_ ||
+             pt2_ != other.pt2_;
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    uint64_t pt1_;
+    uint64_t pt2_;
+  };
+
+  class Value {
+   public:
+    Value()
+      : gen_{ 0 }
+      , value_{ 0 }
+      , length_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext;
+
+   private:
+    std::atomic<uint64_t> gen_;
+    uint8_t value_[1014];
+    uint16_t length_;
+  };
+  static_assert(sizeof(Value) == 1024, "sizeof(Value) != 1024");
+  static_assert(alignof(Value) == 8, "alignof(Value) != 8");
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(const Key& key, uint8_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.gen_ = 0;
+      std::memset(value.value_, val_, val_);
+      value.length_ = val_;
+    }
+    inline bool PutAtomic(Value& value) {
+      // Get the lock on the value.
+      uint64_t expected_gen;
+      bool success;
+      do {
+        do {
+          // Spin until other the thread releases the lock.
+          expected_gen = value.gen_.load();
+        } while(expected_gen == UINT64_MAX);
+        // Try to get the lock.
+        success = value.gen_.compare_exchange_weak(expected_gen, UINT64_MAX);
+      } while(!success);
+
+      std::memset(value.value_, val_, val_);
+      value.length_ = val_;
+      // Increment the value's generation number.
+      value.gen_.store(expected_gen + 1);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint8_t val_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(Key key, uint8_t expected)
+      : key_{ key }
+      , expected_{ expected } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ }
+      , expected_{ other.expected_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      // This is a paging test, so we expect to read stuff from disk.
+      ASSERT_EQ(expected_, value.length_);
+      ASSERT_EQ(expected_, value.value_[expected_ - 5]);
+    }
+    inline void GetAtomic(const Value& value) {
+      uint64_t post_gen = value.gen_.load();
+      uint64_t pre_gen;
+      uint16_t len;
+      uint8_t val;
+      do {
+        // Pre- gen # for this read is last read's post- gen #.
+        pre_gen = post_gen;
+        len = value.length_;
+        val = value.value_[len - 5];
+        post_gen = value.gen_.load();
+      } while(pre_gen != post_gen);
+      ASSERT_EQ(expected_, val);
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint8_t expected_;
+  };
+
+  std::experimental::filesystem::create_directories("logs");
+
+  // 8 pages!
+  FasterKv<Key, Value, disk_t> store{ 262144, 268435456, "logs\\", 0.5 };
+
+  static constexpr size_t kNumRecords = 300000;
+  static constexpr size_t kNumThreads = 16;
+
+  static std::atomic<uint64_t> num_writes{ 0 };
+
+  auto upsert_worker = [](FasterKv<Key, Value, disk_t>* store_,
+  size_t thread_idx, uint8_t val) {
+    Guid session_id = store_->StartSession();
+
+    for(size_t idx = 0; idx < kNumRecords / kNumThreads; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        // In-memory test.
+        ASSERT_TRUE(false);
+      };
+
+      if(idx % 256 == 0) {
+        store_->Refresh();
+      }
+
+      uint64_t key_component = thread_idx * (kNumRecords / kNumThreads) + idx;
+      UpsertContext context{ Key{ key_component, key_component }, val };
+      Status result = store_->Upsert(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+      ++num_writes;
+    }
+
+    store_->StopSession();
+  };
+
+  // Insert.
+  std::deque<std::thread> threads{};
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(upsert_worker, &store, idx, 25);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_EQ(kNumRecords, num_writes.load());
+
+  // Read.
+  Guid session_id = store.StartSession();
+
+  static std::atomic<uint64_t> records_read{ 0 };
+  for(size_t idx = 0; idx < kNumRecords; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    ReadContext context{ Key{ idx, idx }, 25 };
+    Status result = store.Read(context, callback, 1);
+    if(result == Status::Ok) {
+      ++records_read;
+    } else {
+      ASSERT_EQ(Status::Pending, result) << idx;
+    }
+  }
+
+  ASSERT_LT(records_read.load(), kNumRecords);
+  bool result = store.CompletePending(true);
+  ASSERT_TRUE(result);
+  ASSERT_EQ(kNumRecords, records_read.load());
+
+  //// Update.
+  num_writes = 0;
+  threads.clear();
+  for(size_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(upsert_worker, &store, idx, 87);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_EQ(kNumRecords, num_writes.load());
+
+  // Delete some old copies of records (160 MB) that we no longer need.
+  static constexpr uint64_t kNewBeginAddress{ 167772160L };
+  static std::atomic<bool> truncated{ false };
+  static std::atomic<bool> complete{ false };
+  auto truncate_callback = [](uint64_t offset) {
+    ASSERT_LE(offset, kNewBeginAddress);
+    truncated = true;
+  };
+  auto complete_callback = []() {
+    complete = true;
+  };
+
+  result = store.ShiftBeginAddress(Address{ kNewBeginAddress }, truncate_callback, complete_callback);
+  ASSERT_TRUE(result);
+
+  while(!truncated || !complete) {
+    store.CompletePending(false);
+  }
+
+  // Read again.
+  records_read = 0;;
+  for(size_t idx = 0; idx < kNumRecords; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    ReadContext context{ Key{ idx, idx }, 87 };
+    Status result = store.Read(context, callback, 1);
+    if(result == Status::Ok) {
+      ++records_read;
+    } else {
+      ASSERT_EQ(Status::Pending, result);
+    }
+  }
+
+  ASSERT_LT(records_read.load(), kNumRecords);
+  result = store.CompletePending(true);
+  ASSERT_TRUE(result);
+  ASSERT_EQ(kNumRecords, records_read.load());
+
+  store.StopSession();
+}
+
+TEST(CLASS, Rmw) {
+  class Key {
+   public:
+    Key(uint64_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      return KeyHash{ Utility::GetHashCode(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint64_t key_;
+  };
+
+  class RmwContext;
+
+  class Value {
+   public:
+    Value()
+      : counter_{ 0 }
+      , junk_{ 1 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class RmwContext;
+
+   private:
+    std::atomic<uint64_t> counter_;
+    uint8_t junk_[1016];
+  };
+  static_assert(sizeof(Value) == 1024, "sizeof(Value) != 1024");
+  static_assert(alignof(Value) == 8, "alignof(Value) != 8");
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(Key key, uint64_t incr)
+      : key_{ key }
+      , incr_{ incr }
+      , val_{ 0 } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , incr_{ other.incr_ }
+      , val_{ other.val_ } {
+    }
+
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    inline void RmwInitial(Value& value) {
+      value.counter_ = incr_;
+      val_ = value.counter_;
+    }
+    inline void RmwCopy(const Value& old_value, Value& value) {
+      value.counter_ = old_value.counter_ + incr_;
+      val_ = value.counter_;
+    }
+    inline bool RmwAtomic(Value& value) {
+      val_ = value.counter_.fetch_add(incr_) + incr_;
+      return true;
+    }
+
+    inline uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint64_t incr_;
+
+    uint64_t val_;
+  };
+
+  std::experimental::filesystem::create_directories("logs");
+
+  // 8 pages!
+  FasterKv<Key, Value, disk_t> store{ 262144, 268435456, "logs", 0.5 };
+
+  Guid session_id = store.StartSession();
+
+  constexpr size_t kNumRecords = 300000;
+
+  // Initial RMW.
+  static std::atomic<uint64_t> records_touched{ 0 };
+  for(size_t idx = 0; idx < kNumRecords; ++idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<RmwContext> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ASSERT_EQ(3, context->val());
+      ++records_touched;
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    RmwContext context{ Key{ idx }, 3 };
+    Status result = store.Rmw(context, callback, 1);
+    if(result == Status::Ok) {
+      ASSERT_EQ(3, context.val());
+      ++records_touched;
+    } else {
+      ASSERT_EQ(Status::Pending, result);
+    }
+  }
+
+  bool result = store.CompletePending(true);
+  ASSERT_TRUE(result);
+  ASSERT_EQ(kNumRecords, records_touched.load());
+
+  // Second RMW.
+  records_touched = 0;
+  for(size_t idx = kNumRecords; idx > 0; --idx) {
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<RmwContext> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ASSERT_EQ(8, context->val());
+      ++records_touched;
+    };
+
+    if(idx % 256 == 0) {
+      store.Refresh();
+    }
+
+    RmwContext context{ Key{ idx - 1 }, 5 };
+    Status result = store.Rmw(context, callback, 1);
+    if(result == Status::Ok) {
+      ASSERT_EQ(8, context.val()) << idx - 1;
+      ++records_touched;
+    } else {
+      ASSERT_EQ(Status::Pending, result);
+    }
+  }
+
+  ASSERT_LT(records_touched.load(), kNumRecords);
+  result = store.CompletePending(true);
+  ASSERT_TRUE(result);
+  ASSERT_EQ(kNumRecords, records_touched.load());
+
+  store.StopSession();
+}
+
+TEST(CLASS, Rmw_Concurrent) {
+  class Key {
+   public:
+    Key(uint64_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      return KeyHash{ Utility::GetHashCode(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint64_t key_;
+  };
+
+  class RmwContext;
+  class ReadContext;
+
+  class Value {
+   public:
+    Value()
+      : counter_{ 0 }
+      , junk_{ 1 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class RmwContext;
+    friend class ReadContext;
+
+   private:
+    std::atomic<uint64_t> counter_;
+    uint8_t junk_[1016];
+  };
+  static_assert(sizeof(Value) == 1024, "sizeof(Value) != 1024");
+  static_assert(alignof(Value) == 8, "alignof(Value) != 8");
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(Key key, uint64_t incr)
+      : key_{ key }
+      , incr_{ incr } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , incr_{ other.incr_ } {
+    }
+
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    inline void RmwInitial(Value& value) {
+      value.counter_ = incr_;
+    }
+    inline void RmwCopy(const Value& old_value, Value& value) {
+      value.counter_ = old_value.counter_ + incr_;
+    }
+    inline bool RmwAtomic(Value& value) {
+      value.counter_.fetch_add(incr_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint64_t incr_;
+  };
+
+  class ReadContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext(Key key)
+      : key_{ key } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext(const ReadContext& other)
+      : key_{ other.key_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      counter = value.counter_.load(std::memory_order_acquire);
+    }
+    inline void GetAtomic(const Value& value) {
+      counter = value.counter_.load();
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+   public:
+    uint64_t counter;
+  };
+
+  static constexpr size_t kNumRecords = 300000;
+  static constexpr size_t kNumThreads = 8;
+
+  auto rmw_worker = [](FasterKv<Key, Value, disk_t>* store_, uint64_t incr) {
+    Guid session_id = store_->StartSession();
+    for(size_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<RmwContext> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+      };
+
+      if(idx % 256 == 0) {
+        store_->Refresh();
+      }
+
+      RmwContext context{ Key{ idx }, incr };
+      Status result = store_->Rmw(context, callback, 1);
+      if(result != Status::Ok) {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    bool result = store_->CompletePending(true);
+    ASSERT_TRUE(result);
+    store_->StopSession();
+  };
+
+  auto read_worker1 = [](FasterKv<Key, Value, disk_t>* store_, size_t thread_idx) {
+    Guid session_id = store_->StartSession();
+    for(size_t idx = 0; idx < kNumRecords / kNumThreads; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(7 * kNumThreads, context->counter);
+      };
+
+      if(idx % 256 == 0) {
+        store_->Refresh();
+      }
+
+      ReadContext context{ Key{ thread_idx* (kNumRecords / kNumThreads) + idx } };
+      Status result = store_->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ASSERT_EQ(7 * kNumThreads, context.counter);
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    bool result = store_->CompletePending(true);
+    ASSERT_TRUE(result);
+    store_->StopSession();
+  };
+
+  auto read_worker2 = [](FasterKv<Key, Value, disk_t>* store_, size_t thread_idx) {
+    Guid session_id = store_->StartSession();
+    for(size_t idx = 0; idx < kNumRecords / kNumThreads; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(13 * kNumThreads, context->counter);
+      };
+
+      if(idx % 256 == 0) {
+        store_->Refresh();
+      }
+
+      ReadContext context{ Key{ thread_idx* (kNumRecords / kNumThreads) + idx } };
+      Status result = store_->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ASSERT_EQ(13 * kNumThreads, context.counter);
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    bool result = store_->CompletePending(true);
+    ASSERT_TRUE(result);
+    store_->StopSession();
+  };
+
+  std::experimental::filesystem::create_directories("logs");
+
+  // 8 pages!
+  FasterKv<Key, Value, disk_t> store{ 262144, 268435456, "logs\\", 0.5 };
+
+  // Initial RMW.
+  std::deque<std::thread> threads{};
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, 7);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read.
+  threads.clear();
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker1, &store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Second RMW.
+  threads.clear();
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(rmw_worker, &store, 6);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  // Read again.
+  threads.clear();
+  for(int64_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker2, &store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+}
diff --git a/cc/test/paging_threadpool_test.cc b/cc/test/paging_threadpool_test.cc
new file mode 100644
index 000000000..79ea2dfe0
--- /dev/null
+++ b/cc/test/paging_threadpool_test.cc
@@ -0,0 +1,27 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <atomic>
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <thread>
+#include "gtest/gtest.h"
+#include "core/faster.h"
+#include "device/file_system_disk.h"
+
+using namespace FASTER::core;
+
+typedef FASTER::environment::ThreadPoolIoHandler handler_t;
+
+#define CLASS PagingTest_ThreadPool
+
+#include "paging_test.h"
+
+#undef CLASS
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/cc/test/recovery_queue_test.cc b/cc/test/recovery_queue_test.cc
new file mode 100644
index 000000000..e04f3c7e0
--- /dev/null
+++ b/cc/test/recovery_queue_test.cc
@@ -0,0 +1,31 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <atomic>
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <random>
+#include <thread>
+#include "gtest/gtest.h"
+#include "core/faster.h"
+#include "core/light_epoch.h"
+#include "core/thread.h"
+#include "device/file_system_disk.h"
+
+using namespace FASTER::core;
+
+typedef FASTER::environment::QueueIoHandler handler_t;
+
+#define CLASS RecoveryTest_Queue
+
+#include "recovery_test.h"
+
+#undef CLASS
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/cc/test/recovery_test.h b/cc/test/recovery_test.h
new file mode 100644
index 000000000..e42988fa1
--- /dev/null
+++ b/cc/test/recovery_test.h
@@ -0,0 +1,3753 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma once
+
+#include <experimental/filesystem>
+
+using namespace FASTER;
+
+/// Disk's log uses 32 MB segments.
+typedef FASTER::device::FileSystemDisk<handler_t, 33554432L> disk_t;
+typedef FASTER::device::FileSystemFile<handler_t> file_t;
+
+TEST(CLASS, MallocFixedPageSize) {
+  typedef MallocFixedPageSize<HashBucket, disk_t> alloc_t;
+
+  // Test copied from C#, RecoveryTest.cs.
+  std::random_device rd{};
+  uint32_t seed = rd();
+  std::mt19937_64 rng{ seed };
+  std::experimental::filesystem::create_directories("test_ofb");
+
+  size_t num_bytes_written;
+
+  LightEpoch epoch;
+  alloc_t allocator{};
+  allocator.Initialize(512, epoch);
+
+  size_t num_buckets_to_add = 16 * FixedPage<HashBucket>::kPageSize + 5;
+
+  FixedPageAddress* buckets = new FixedPageAddress[num_buckets_to_add];
+
+  {
+    disk_t checkpoint_disk{ "test_ofb", epoch };
+    file_t checkpoint_file = checkpoint_disk.NewFile("test_ofb.dat");
+    Status result = checkpoint_file.Open(&checkpoint_disk.handler());
+    ASSERT_EQ(Status::Ok, result);
+
+    //do something
+    for(size_t bucket_idx = 0; bucket_idx < num_buckets_to_add; ++bucket_idx) {
+      buckets[bucket_idx] = allocator.Allocate();
+      HashBucket& bucket = allocator.Get(buckets[bucket_idx]);
+      for(size_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+        HashBucketEntry expected{ 0 };
+        uint64_t random_num = rng();
+        bool success = bucket.entries[entry_idx].compare_exchange_strong(expected, random_num);
+        ASSERT_TRUE(success);
+      }
+      HashBucketOverflowEntry expected{ 0 };
+      uint64_t random_num = rng();
+      bool success = bucket.overflow_entry.compare_exchange_strong(expected, random_num);
+      ASSERT_TRUE(success);
+    }
+    //issue call to checkpoint
+    result = allocator.Checkpoint(checkpoint_disk, std::move(checkpoint_file), num_bytes_written);
+    ASSERT_EQ(Status::Ok, result);
+    // (All the bucket we allocated, + the null page.)
+    ASSERT_EQ((num_buckets_to_add + 1) * sizeof(HashBucket), num_bytes_written);
+    //wait until complete
+    result = allocator.CheckpointComplete(true);
+    ASSERT_EQ(Status::Ok, result);
+  }
+
+  LightEpoch recover_epoch;
+  alloc_t recover_allocator{};
+  recover_allocator.Initialize(512, recover_epoch);
+  disk_t recover_disk{ "test_ofb", recover_epoch };
+  file_t recover_file = recover_disk.NewFile("test_ofb.dat");
+  Status result = recover_file.Open(&recover_disk.handler());
+  ASSERT_EQ(Status::Ok, result);
+
+  //issue call to recover
+  result = recover_allocator.Recover(recover_disk, std::move(recover_file), num_bytes_written,
+                                     num_bytes_written / sizeof(typename alloc_t::item_t));
+  ASSERT_EQ(Status::Ok, result);
+  //wait until complete
+  result = recover_allocator.RecoverComplete(true);
+  ASSERT_EQ(Status::Ok, result);
+
+  //verify that something
+  std::mt19937_64 rng2{ seed };
+  for(size_t bucket_idx = 0; bucket_idx < num_buckets_to_add; ++bucket_idx) {
+    HashBucket& bucket = allocator.Get(buckets[bucket_idx]);
+    for(size_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+      uint64_t random_num = rng2();
+      ASSERT_EQ(random_num, bucket.entries[entry_idx].load().control_);
+    }
+    uint64_t random_num = rng2();
+    ASSERT_EQ(random_num, bucket.overflow_entry.load().control_);
+  }
+
+  FixedPageAddress address = recover_allocator.Allocate();
+  ASSERT_EQ(FixedPageAddress{ num_buckets_to_add + 1 }, address);
+
+  delete[] buckets;
+}
+
+TEST(CLASS, InternalHashTable) {
+  // (Just the hash table itself--no overflow buckets.)
+  std::random_device rd{};
+  uint32_t seed = rd();
+  std::mt19937_64 rng{ seed };
+  std::experimental::filesystem::create_directories("test_ht");
+
+  constexpr uint64_t kNumBuckets = 8388608;
+  size_t num_bytes_written;
+  {
+    LightEpoch epoch;
+    disk_t checkpoint_disk{ "test_ht", epoch };
+    file_t checkpoint_file = checkpoint_disk.NewFile("test_ht.dat");
+    Status result = checkpoint_file.Open(&checkpoint_disk.handler());
+    ASSERT_EQ(Status::Ok, result);
+
+    InternalHashTable<disk_t> table{};
+    table.Initialize(kNumBuckets, checkpoint_file.alignment());
+
+    //do something
+    for(size_t bucket_idx = 0; bucket_idx < kNumBuckets; ++bucket_idx) {
+      for(size_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+        HashBucketEntry expected{ 0 };
+        bool success = table.bucket(bucket_idx).entries[entry_idx].compare_exchange_strong(
+                         expected, rng());
+        ASSERT_TRUE(success);
+      }
+      HashBucketOverflowEntry expected{ 0 };
+      bool success = table.bucket(bucket_idx).overflow_entry.compare_exchange_strong(expected,
+                     rng());
+      ASSERT_TRUE(success);
+    }
+
+    //issue call to checkpoint
+    result = table.Checkpoint(checkpoint_disk, std::move(checkpoint_file), num_bytes_written);
+    ASSERT_EQ(Status::Ok, result);
+    // (All the bucket we allocated, + the null page.)
+    ASSERT_EQ(kNumBuckets * sizeof(HashBucket), num_bytes_written);
+    //wait until complete
+    result = table.CheckpointComplete(true);
+    ASSERT_EQ(Status::Ok, result);
+  }
+
+  LightEpoch epoch;
+  disk_t recover_disk{ "test_ht", epoch };
+  file_t recover_file = recover_disk.NewFile("test_ht.dat");
+  Status result = recover_file.Open(&recover_disk.handler());
+  ASSERT_EQ(Status::Ok, result);
+
+  InternalHashTable<disk_t> recover_table{};
+  //issue call to recover
+  result = recover_table.Recover(recover_disk, std::move(recover_file), num_bytes_written);
+  ASSERT_EQ(Status::Ok, result);
+  //wait until complete
+  result = recover_table.RecoverComplete(true);
+  ASSERT_EQ(Status::Ok, result);
+
+  //verify that something
+  std::mt19937_64 rng2{ seed };
+  for(size_t bucket_idx = 0; bucket_idx < kNumBuckets; ++bucket_idx) {
+    for(size_t entry_idx = 0; entry_idx < HashBucket::kNumEntries; ++entry_idx) {
+      uint64_t random_num = rng2();
+      ASSERT_EQ(random_num, recover_table.bucket(bucket_idx).entries[entry_idx].load().control_);
+    }
+    uint64_t random_num = rng2();
+    ASSERT_EQ(random_num, recover_table.bucket(bucket_idx).overflow_entry.load().control_);
+  }
+}
+
+TEST(CLASS, Serial) {
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn{};
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint32_t key_;
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class UpsertContext1;
+  class UpsertContext2;
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value1 {
+   public:
+    inline uint32_t size() const {
+      return size_;
+    }
+
+    friend class UpsertContext1;
+    friend class UpsertContext2;
+    friend class ReadContext1;
+
+   private:
+    uint16_t size_;
+    union {
+      std::atomic<uint32_t> atomic_val1_;
+      uint32_t val1_;
+    };
+  };
+  static_assert(sizeof(Value1) == 8, "sizeof(Value1) != 8");
+  static_assert(alignof(Value1) == 4, "alignof(Value1) != 4");
+
+  class Value2 : public Value1 {
+   public:
+    friend class UpsertContext2;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint16_t> atomic_val2_;
+      uint16_t val2_;
+    };
+    uint8_t wasted_space[3];
+  };
+  static_assert(sizeof(Value2) == 16, "sizeof(Value2) != 12");
+  static_assert(alignof(Value2) == 4, "alignof(Value2) != 4");
+
+  class UpsertContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value1 value_t;
+
+    UpsertContext1(const Key& key, uint32_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext1(const UpsertContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value1& value) {
+      value.size_ = sizeof(value);
+      value.val1_ = val_;
+    }
+    inline bool PutAtomic(Value1& value) {
+      EXPECT_EQ(value.size_, sizeof(value));
+      value.atomic_val1_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+  };
+
+  class UpsertContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value2 value_t;
+
+    UpsertContext2(const Key& key, uint16_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext2(const UpsertContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value2& value) {
+      value.size_ = sizeof(value);
+      value.val2_ = val_;
+    }
+    inline bool PutAtomic(Value2& value) {
+      EXPECT_EQ(value.size_, sizeof(value));
+      value.atomic_val2_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint16_t val_;
+  };
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value1 value_t;
+
+    ReadContext1(Key key, uint32_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext1(const ReadContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value1& value) {
+      val_ = value.val1_;
+    }
+    inline void GetAtomic(const Value1& value) {
+      val_ = value.atomic_val1_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value2 value_t;
+
+    ReadContext2(Key key, uint16_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext2(const ReadContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value2& value) {
+      val_ = value.val2_;
+    }
+    inline void GetAtomic(const Value2& value) {
+      val_ = value.atomic_val2_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint16_t val_;
+   public:
+    const uint16_t expected;
+  };
+
+  auto upsert_callback = [](IAsyncContext* context, Status result) {
+    // Upserts don't go to disk.
+    ASSERT_TRUE(false);
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr size_t kNumRecords = 6000000;
+
+  Guid session_id;
+
+  {
+    // Populate and checkpoint the store.
+    // 6 pages!
+    FasterKv<Key, Value1, disk_t> store{ 524288, 201326592, "storage", 0.4 };
+
+    session_id = store.StartSession();
+
+    // upsert some records
+    assert(kNumRecords % 2 == 0);
+    for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+      {
+        UpsertContext1 context{ Key{ idx }, idx + 7 };
+        Status result = store.Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+      }
+      {
+        UpsertContext2 context{ Key{ idx + 1 }, 55 };
+        Status result = store.Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+      }
+    }
+    // verify them
+    static std::atomic<uint64_t> records_read;
+    records_read = 0;
+    for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+      auto callback1 = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ++records_read;
+        ASSERT_EQ(context->expected, context->val());
+      };
+      auto callback2 = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext2> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ++records_read;
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      if(idx % 256 == 0) {
+        store.Refresh();
+        store.CompletePending(false);
+      }
+
+      {
+        ReadContext1 context{ Key{ idx }, idx + 7 };
+        Status result = store.Read(context, callback1, 1);
+        if(result == Status::Ok) {
+          ++records_read;
+          ASSERT_EQ(context.expected, context.val());
+        } else {
+          ASSERT_EQ(Status::Pending, result);
+        }
+      }
+      {
+        ReadContext2 context{ Key{ idx + 1 }, 55 };
+        Status result = store.Read(context, callback2, 1);
+        if(result == Status::Ok) {
+          ++records_read;
+          ASSERT_EQ(context.expected, context.val());
+        } else {
+          ASSERT_EQ(Status::Pending, result);
+        }
+      }
+    }
+
+    static std::atomic<size_t> num_threads_persistent;
+    num_threads_persistent = 0;
+    static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads];
+    for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+      threads_persistent[idx] = false;
+    }
+
+    auto persistence_callback = [](uint64_t persistent_serial_num) {
+      bool expected = false;
+      ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected,
+                  true));
+      ++num_threads_persistent;
+    };
+
+    // checkpoint (transition from REST to INDEX_CHKPT)
+    ASSERT_TRUE(store.Checkpoint(persistence_callback));
+
+    while(num_threads_persistent < 1) {
+      store.CompletePending(false);
+    }
+
+    bool result = store.CompletePending(true);
+    ASSERT_TRUE(result);
+    ASSERT_EQ(kNumRecords, records_read.load());
+
+    store.StopSession();
+  }
+
+  // Test recovery.
+  FasterKv<Key, Value1, disk_t> new_store{ 524288, 201326592, "storage", 0.4 };
+
+  std::vector<Guid> session_ids;
+  Status status = new_store.Recover(1, 1, session_ids);
+  ASSERT_EQ(Status::Ok, status);
+  ASSERT_EQ(1, session_ids.size());
+  ASSERT_EQ(session_id, session_ids[0]);
+  ASSERT_EQ(1, new_store.ContinueSession(session_id));
+
+  // Verify the recovered store.
+  static std::atomic<uint64_t> records_read;
+  records_read = 0;
+  for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+    auto callback1 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext1> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result) << *reinterpret_cast<const uint32_t*>(&context->key());
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+    auto callback2 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result) << *reinterpret_cast<const uint32_t*>(&context->key());
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+
+    if(idx % 256 == 0) {
+      new_store.Refresh();
+      new_store.CompletePending(false);
+    }
+
+    {
+      ReadContext1 context{ Key{ idx }, idx + 7 };
+      Status result = new_store.Read(context, callback1, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    {
+      ReadContext2 context{ Key{ idx + 1 }, 55 };
+      Status result = new_store.Read(context, callback2, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+  }
+
+  new_store.CompletePending(true);
+  ASSERT_EQ(records_read.load(), kNumRecords);
+  new_store.StopSession();
+
+  session_id = new_store.StartSession();
+
+  // Upsert some changes and verify them.
+  for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+    {
+      UpsertContext1 context{ Key{ idx }, idx + 55 };
+      Status result = new_store.Upsert(context, upsert_callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+    {
+      UpsertContext2 context{ Key{ idx + 1 }, 77 };
+      Status result = new_store.Upsert(context, upsert_callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+  }
+  records_read = 0;
+  for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+    auto callback1 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext1> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+    auto callback2 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+
+    if(idx % 256 == 0) {
+      new_store.Refresh();
+      new_store.CompletePending(false);
+    }
+
+    {
+      ReadContext1 context{ Key{ idx }, idx + 55 };
+      Status result = new_store.Read(context, callback1, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    {
+      ReadContext2 context{ Key{ idx + 1 }, 77 };
+      Status result = new_store.Read(context, callback2, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+  }
+
+  new_store.CompletePending(true);
+  ASSERT_EQ(records_read.load(), kNumRecords);
+  new_store.StopSession();
+}
+
+TEST(CLASS, Serial_VariableLengthKey) {
+  class alignas(4) Key {
+   public:
+    Key(uint8_t len, uint32_t fill)
+      : len_{ len } {
+      for(uint8_t idx = 0; idx < len_; ++idx) {
+        buffer()[idx] = fill;
+      }
+    }
+
+    /// Copy constructor.
+    Key(const Key& other)
+      : len_{ other.len_ } {
+      std::memcpy(buffer(), other.buffer(), len_ * sizeof(uint32_t));
+    }
+
+    inline uint32_t size() const {
+      return sizeof(*this) + (len_ * sizeof(uint32_t));
+    }
+   private:
+    inline uint32_t* buffer() {
+      return reinterpret_cast<uint32_t*>(this + 1);
+    }
+   public:
+    inline const uint32_t* buffer() const {
+      return reinterpret_cast<const uint32_t*>(this + 1);
+    }
+    inline KeyHash GetHash() const {
+      return KeyHash{ Utility::HashBytes(
+                        reinterpret_cast<const uint16_t*>(buffer()), len_ * 2) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return len_ == other.len_ &&
+             std::memcmp(buffer(), other.buffer(), len_ * sizeof(uint32_t)) == 0;
+    }
+    inline bool operator!=(const Key& other) const {
+      return len_ != other.len_ ||
+             std::memcmp(buffer(), other.buffer(), len_ * sizeof(uint32_t)) != 0;
+    }
+
+   private:
+    uint8_t len_;
+
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class UpsertContext1;
+  class UpsertContext2;
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value1 {
+   public:
+    inline uint32_t size() const {
+      return size_;
+    }
+
+    friend class UpsertContext1;
+    friend class UpsertContext2;
+    friend class ReadContext1;
+
+   private:
+    uint16_t size_;
+    union {
+      std::atomic<uint32_t> atomic_val1_;
+      uint32_t val1_;
+    };
+  };
+  static_assert(sizeof(Value1) == 8, "sizeof(Value1) != 8");
+  static_assert(alignof(Value1) == 4, "alignof(Value1) != 4");
+
+  class Value2 : public Value1 {
+   public:
+    friend class UpsertContext2;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint16_t> atomic_val2_;
+      uint16_t val2_;
+    };
+    uint8_t wasted_space[3];
+  };
+  static_assert(sizeof(Value2) == 16, "sizeof(Value2) != 12");
+  static_assert(alignof(Value2) == 4, "alignof(Value2) != 4");
+
+  class UpsertContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value1 value_t;
+
+    UpsertContext1(uint32_t key, uint32_t val)
+      : val_{ val } {
+      uint8_t len = (key % 16) + 1;
+      key_ = alloc_context<key_t>(sizeof(key_t) + (len * sizeof(uint32_t)));
+      new(key_.get()) key_t{ len, key };
+    }
+
+    /// Deep-copy constructor.
+    UpsertContext1(UpsertContext1& other)
+      : key_{ std::move(other.key_) }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return *key_.get();
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value1& value) {
+      value.size_ = sizeof(value);
+      value.val1_ = val_;
+    }
+    inline bool PutAtomic(Value1& value) {
+      EXPECT_EQ(value.size_, sizeof(value));
+      value.atomic_val1_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    context_unique_ptr_t<key_t> key_;
+    uint32_t val_;
+  };
+
+  class UpsertContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value2 value_t;
+
+    UpsertContext2(uint32_t key, uint16_t val)
+      : val_{ val } {
+      uint8_t len = (key % 16) + 1;
+      key_ = alloc_context<key_t>(sizeof(key_t) + (len * sizeof(uint32_t)));
+      new(key_.get()) key_t{ len, key };
+    }
+
+    /// Deep-copy constructor.
+    UpsertContext2(UpsertContext2& other)
+      : key_{ std::move(other.key_) }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return *key_.get();
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value2& value) {
+      value.size_ = sizeof(value);
+      value.val2_ = val_;
+    }
+    inline bool PutAtomic(Value2& value) {
+      EXPECT_EQ(value.size_, sizeof(value));
+      value.atomic_val2_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    context_unique_ptr_t<key_t> key_;
+    uint16_t val_;
+  };
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value1 value_t;
+
+    ReadContext1(uint32_t key, uint32_t expected_)
+      : val_{ 0 }
+      , expected{ expected_ } {
+      uint8_t len = (key % 16) + 1;
+      key_ = alloc_context<key_t>(sizeof(key_t) + (len * sizeof(uint32_t)));
+      new(key_.get()) key_t{ len, key };
+    }
+
+    /// Deep-copy constructor.
+    ReadContext1(ReadContext1& other)
+      : key_{ std::move(other.key_) }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return *key_.get();
+    }
+
+    inline void Get(const Value1& value) {
+      val_ = value.val1_;
+    }
+    inline void GetAtomic(const Value1& value) {
+      val_ = value.atomic_val1_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    context_unique_ptr_t<key_t> key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value2 value_t;
+
+    ReadContext2(uint32_t key, uint16_t expected_)
+      : val_{ 0 }
+      , expected{ expected_ } {
+      uint8_t len = (key % 16) + 1;
+      key_ = alloc_context<key_t>(sizeof(key_t) + (len * sizeof(uint32_t)));
+      new(key_.get()) key_t{ len, key };
+    }
+
+    /// Deep-copy constructor.
+    ReadContext2(ReadContext2& other)
+      : key_{ std::move(other.key_) }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return *key_.get();
+    }
+
+    inline void Get(const Value2& value) {
+      val_ = value.val2_;
+    }
+    inline void GetAtomic(const Value2& value) {
+      val_ = value.atomic_val2_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    context_unique_ptr_t<key_t> key_;
+    uint16_t val_;
+   public:
+    const uint16_t expected;
+  };
+
+  auto upsert_callback = [](IAsyncContext* context, Status result) {
+    // Upserts don't go to disk.
+    ASSERT_TRUE(false);
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr size_t kNumRecords = 6000000;
+
+  Guid session_id;
+
+  {
+    // Populate and checkpoint the store.
+    // 6 pages!
+    FasterKv<Key, Value1, disk_t> store{ 524288, 201326592, "storage", 0.4 };
+
+    session_id = store.StartSession();
+
+    // upsert some records
+    assert(kNumRecords % 2 == 0);
+    for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+      {
+        UpsertContext1 context{ idx, idx + 7 };
+        Status result = store.Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+      }
+      {
+        UpsertContext2 context{ idx + 1, 55 };
+        Status result = store.Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+      }
+    }
+    // verify them
+    static std::atomic<uint64_t> records_read;
+    records_read = 0;
+    for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+      auto callback1 = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ++records_read;
+        ASSERT_EQ(context->expected, context->val());
+      };
+      auto callback2 = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext2> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ++records_read;
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      if(idx % 256 == 0) {
+        store.Refresh();
+        store.CompletePending(false);
+      }
+
+      {
+        ReadContext1 context{ idx, idx + 7 };
+        Status result = store.Read(context, callback1, 1);
+        if(result == Status::Ok) {
+          ++records_read;
+          ASSERT_EQ(context.expected, context.val());
+        } else {
+          ASSERT_EQ(Status::Pending, result);
+        }
+      }
+      {
+        ReadContext2 context{ idx + 1, 55 };
+        Status result = store.Read(context, callback2, 1);
+        if(result == Status::Ok) {
+          ++records_read;
+          ASSERT_EQ(context.expected, context.val());
+        } else {
+          ASSERT_EQ(Status::Pending, result);
+        }
+      }
+    }
+
+    static std::atomic<size_t> num_threads_persistent;
+    num_threads_persistent = 0;
+    static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads];
+    for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+      threads_persistent[idx] = false;
+    }
+
+    auto persistence_callback = [](uint64_t persistent_serial_num) {
+      bool expected = false;
+      ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected,
+                  true));
+      ++num_threads_persistent;
+    };
+
+    // checkpoint (transition from REST to INDEX_CHKPT)
+    ASSERT_TRUE(store.Checkpoint(persistence_callback));
+
+    while(num_threads_persistent < 1) {
+      store.CompletePending(false);
+    }
+
+    bool result = store.CompletePending(true);
+    ASSERT_TRUE(result);
+    ASSERT_EQ(kNumRecords, records_read.load());
+
+    store.StopSession();
+  }
+
+  // Test recovery.
+  FasterKv<Key, Value1, disk_t> new_store{ 524288, 201326592, "storage", 0.4 };
+
+  std::vector<Guid> session_ids;
+  Status status = new_store.Recover(1, 1, session_ids);
+  ASSERT_EQ(Status::Ok, status);
+  ASSERT_EQ(1, session_ids.size());
+  ASSERT_EQ(session_id, session_ids[0]);
+  ASSERT_EQ(1, new_store.ContinueSession(session_id));
+
+  // Verify the recovered store.
+  static std::atomic<uint64_t> records_read;
+  records_read = 0;
+  for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+    auto callback1 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext1> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result) << *reinterpret_cast<const uint32_t*>(&context->key());
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+    auto callback2 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result) << *reinterpret_cast<const uint32_t*>(&context->key());
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+
+    if(idx % 256 == 0) {
+      new_store.Refresh();
+      new_store.CompletePending(false);
+    }
+
+    {
+      ReadContext1 context{ idx, idx + 7 };
+      Status result = new_store.Read(context, callback1, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    {
+      ReadContext2 context{ idx + 1, 55 };
+      Status result = new_store.Read(context, callback2, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+  }
+
+  new_store.CompletePending(true);
+  ASSERT_EQ(records_read.load(), kNumRecords);
+  new_store.StopSession();
+
+  session_id = new_store.StartSession();
+
+  // Upsert some changes and verify them.
+  for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+    {
+      UpsertContext1 context{ idx, idx + 55 };
+      Status result = new_store.Upsert(context, upsert_callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+    {
+      UpsertContext2 context{ idx + 1, 77 };
+      Status result = new_store.Upsert(context, upsert_callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+    }
+  }
+  records_read = 0;
+  for(uint32_t idx = 0; idx < kNumRecords; idx += 2) {
+    auto callback1 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext1> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+    auto callback2 = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      ++records_read;
+      ASSERT_EQ(context->expected, context->val());
+    };
+
+    if(idx % 256 == 0) {
+      new_store.Refresh();
+      new_store.CompletePending(false);
+    }
+
+    {
+      ReadContext1 context{ idx, idx + 55 };
+      Status result = new_store.Read(context, callback1, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    {
+      ReadContext2 context{ idx + 1, 77 };
+      Status result = new_store.Read(context, callback2, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+  }
+
+  new_store.CompletePending(true);
+  ASSERT_EQ(records_read.load(), kNumRecords);
+  new_store.StopSession();
+}
+
+TEST(CLASS, Concurrent_Insert_Small) {
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn{};
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint32_t key_;
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class UpsertContext;
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value {
+   public:
+    Value()
+      : val_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext1;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint32_t> atomic_val_;
+      uint32_t val_;
+    };
+  };
+  static_assert(sizeof(Value) == 4, "sizeof(Value) != 4");
+  static_assert(alignof(Value) == 4, "alignof(Value) != 4");
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(const Key& key, uint32_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.val_ = val_;
+    }
+    inline bool PutAtomic(Value& value) {
+      value.atomic_val_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+  };
+
+  static auto upsert_callback = [](IAsyncContext* context, Status result) {
+    // Upserts don't go to disk.
+    ASSERT_TRUE(false);
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr uint32_t kNumRecords = 200000;
+  static constexpr uint32_t kNumThreads = 16;
+  static constexpr uint32_t kNumRecordsPerThread = kNumRecords / kNumThreads;
+
+  static Guid session_ids[kNumThreads];
+  std::memset(session_ids, 0, sizeof(session_ids));
+
+  static std::atomic<uint32_t> num_threads_persistent;
+  num_threads_persistent = 0;
+  static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads];
+  for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+    threads_persistent[idx] = false;
+  }
+
+  static std::atomic<uint32_t> num_threads_started;
+  num_threads_started = 0;
+
+  static auto persistence_callback = [](uint64_t persistent_serial_num) {
+    bool expected = false;
+    ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected, true));
+    ++num_threads_persistent;
+  };
+
+  typedef FasterKv<Key, Value, disk_t> store_t;
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext1(Key key, uint32_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext1(const ReadContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  {
+    // Populate and checkpoint the store.
+
+    // 6 pages!
+    store_t store{ 8192, 201326592, "storage", 0.4 };
+
+    auto upsert_checkpoint_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id == 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // upsert some records
+      for(uint32_t idx =  kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 7 };
+
+        Status result = store->Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      while(num_threads_started < kNumThreads) {
+        std::this_thread::yield();
+      }
+      // checkpoint (transition from REST to INDEX_CHKPT)
+      ASSERT_TRUE(store->Checkpoint(persistence_callback));
+
+      // Ensure that the checkpoint completes.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    auto upsert_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id != 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // upsert some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 7 };
+        Status result = store->Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      // Don't exit this session until the checkpoint has completed.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    std::deque<std::thread> threads{};
+    threads.emplace_back(upsert_checkpoint_worker, &store, 0);
+    for(uint32_t idx = 1; idx < kNumThreads; ++idx) {
+      threads.emplace_back(upsert_worker, &store, idx);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+
+    // Verify the store.
+    store.StartSession();
+
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      ReadContext1 context{ Key{ idx }, idx + 7 };
+      Status result = store.Read(context, callback, 1);
+      if(result != Status::Ok) {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+
+    store.StopSession();
+  }
+
+  // Test recovery.
+  store_t new_store{ 8192, 201326592, "storage", 0.4 };
+
+  std::vector<Guid> recovered_session_ids;
+  Status status = new_store.Recover(1, 1, recovered_session_ids);
+  ASSERT_EQ(recovered_session_ids.size(), kNumThreads);
+  ASSERT_EQ(Status::Ok, status);
+
+  static std::atomic<uint32_t> records_read;
+  records_read = 0;
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext2(Key key, uint32_t expected_, uint32_t idx_, std::atomic<bool>* found_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ }
+      , idx{ idx_ }
+      , found{ found_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext2(const ReadContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected }
+      , idx{ other.idx }
+      , found{ other.found } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+    const uint32_t idx;
+    std::atomic<bool>* found;
+  };
+
+  auto read_worker = [](store_t* store, uint32_t thread_id) {
+    uint64_t serial_num = store->ContinueSession(session_ids[thread_id]);
+    ASSERT_EQ(1, serial_num);
+
+    std::unique_ptr<std::atomic<bool>> found{ new std::atomic<bool>[kNumRecordsPerThread] };
+    std::memset(found.get(), 0, sizeof(found.get()[0]) * kNumRecordsPerThread);
+
+    // verify records
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context->expected, context->val());
+        bool expected = false;
+        ASSERT_TRUE(context->found[context->idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_EQ(Status::NotFound, result);
+        ASSERT_FALSE(context->found[context->idx].load());
+      }
+    };
+    for(uint32_t idx = kNumRecordsPerThread * thread_id;
+        idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+      ReadContext2 context{ Key{ idx }, idx + 7, idx - (kNumRecordsPerThread * thread_id),
+                            found.get() };
+      Status result = store->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+        bool expected = false;
+        ASSERT_TRUE(found.get()[context.idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_TRUE(result == Status::Pending || result == Status::NotFound);
+        if(result == Status::NotFound) {
+          ASSERT_FALSE(found.get()[context.idx].load());
+        }
+      }
+
+      if(idx % 256 == 0) {
+        store->Refresh();
+        store->CompletePending(false);
+      }
+    }
+    store->CompletePending(true);
+    store->StopSession();
+
+    bool found_all = true;
+    for(uint32_t idx = 0; idx < kNumRecordsPerThread; ++idx) {
+      if(found_all != found.get()[idx]) {
+        // Consistent-point recovery implies that after one record isn't found, all subsequent
+        // records will not be found.
+        Key key{ kNumRecordsPerThread* thread_id + idx };
+        KeyHash hash = key.GetHash();
+        std::string error;
+        error += "key = ";
+        error += std::to_string(kNumRecordsPerThread* thread_id + idx);
+        error += ", idx = ";
+        error += std::to_string(hash.idx(8192));
+        error += ", tag = ";
+        error += std::to_string(hash.tag());
+        ASSERT_TRUE(found_all) << error;
+        found_all = false;
+      }
+    }
+  };
+
+  std::deque<std::thread> threads{};
+  for(uint32_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &new_store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_GT(records_read, (uint32_t)0);
+  ASSERT_LE(records_read, kNumRecords);
+}
+
+TEST(CLASS, Concurrent_Insert_Large) {
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn{};
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint32_t key_;
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class UpsertContext;
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value {
+   public:
+    Value()
+      : val_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext1;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint32_t> atomic_val_;
+      uint32_t val_;
+    };
+  };
+  static_assert(sizeof(Value) == 4, "sizeof(Value) != 4");
+  static_assert(alignof(Value) == 4, "alignof(Value) != 4");
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(const Key& key, uint32_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.val_ = val_;
+    }
+    inline bool PutAtomic(Value& value) {
+      value.atomic_val_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+  };
+
+  static auto upsert_callback = [](IAsyncContext* context, Status result) {
+    // Upserts don't go to disk.
+    ASSERT_TRUE(false);
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr uint32_t kNumRecords = 6000000;
+  static constexpr uint32_t kNumThreads = 16;
+  static constexpr uint32_t kNumRecordsPerThread = kNumRecords / kNumThreads;
+
+  static Guid session_ids[kNumThreads];
+  std::memset(session_ids, 0, sizeof(session_ids));
+
+  static std::atomic<uint32_t> num_threads_persistent;
+  num_threads_persistent = 0;
+  static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads];
+  for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+    threads_persistent[idx] = false;
+  }
+
+  static std::atomic<uint32_t> num_threads_started;
+  num_threads_started = 0;
+
+  static auto persistence_callback = [](uint64_t persistent_serial_num) {
+    bool expected = false;
+    ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected, true));
+    ++num_threads_persistent;
+  };
+
+  typedef FasterKv<Key, Value, disk_t> store_t;
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext1(Key key, uint32_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext1(const ReadContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  {
+    // Populate and checkpoint the store.
+
+    // 6 pages!
+    store_t store{ 524288, 201326592, "storage", 0.4 };
+
+    auto upsert_checkpoint_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id == 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // upsert some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 7 };
+
+        Status result = store->Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      while(num_threads_started < kNumThreads) {
+        std::this_thread::yield();
+      }
+      // checkpoint (transition from REST to INDEX_CHKPT)
+      ASSERT_TRUE(store->Checkpoint(persistence_callback));
+
+      // Ensure that the checkpoint completes.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    auto upsert_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id != 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // upsert some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 7 };
+        Status result = store->Upsert(context, upsert_callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      // Don't exit this session until the checkpoint has completed.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    std::deque<std::thread> threads{};
+    threads.emplace_back(upsert_checkpoint_worker, &store, 0);
+    for(uint32_t idx = 1; idx < kNumThreads; ++idx) {
+      threads.emplace_back(upsert_worker, &store, idx);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+
+    // Verify the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      ReadContext1 context{ Key{ idx }, idx + 7 };
+      Status result = store.Read(context, callback, 1);
+      if(result != Status::Ok) {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    store.StopSession();
+  }
+
+  // Test recovery.
+  store_t new_store{ 524288, 201326592, "storage", 0.4 };
+
+  std::vector<Guid> recovered_session_ids;
+  Status status = new_store.Recover(1, 1, recovered_session_ids);
+  ASSERT_EQ(recovered_session_ids.size(), kNumThreads);
+  ASSERT_EQ(Status::Ok, status);
+
+  static std::atomic<uint32_t> records_read;
+  records_read = 0;
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext2(Key key, uint32_t expected_, uint32_t idx_, std::atomic<bool>* found_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ }
+      , idx{ idx_ }
+      , found{ found_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext2(const ReadContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected }
+      , idx{ other.idx }
+      , found{ other.found } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+    const uint32_t idx;
+    std::atomic<bool>* found;
+  };
+
+  auto read_worker = [](store_t* store, uint32_t thread_id) {
+    uint64_t serial_num = store->ContinueSession(session_ids[thread_id]);
+    ASSERT_EQ(1, serial_num);
+
+    std::unique_ptr<std::atomic<bool>> found{ new std::atomic<bool>[kNumRecordsPerThread] };
+    std::memset(found.get(), 0, sizeof(found.get()[0]) * kNumRecordsPerThread);
+
+    // verify records
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context->expected, context->val());
+        bool expected = false;
+        ASSERT_TRUE(context->found[context->idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_EQ(Status::NotFound, result);
+        ASSERT_FALSE(context->found[context->idx].load());
+      }
+    };
+    for(uint32_t idx = kNumRecordsPerThread * thread_id;
+        idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+      ReadContext2 context{ Key{ idx }, idx + 7, idx - (kNumRecordsPerThread * thread_id),
+                            found.get() };
+      Status result = store->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        ASSERT_EQ(context.expected, context.val());
+        bool expected = false;
+        ASSERT_TRUE(found.get()[context.idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_TRUE(result == Status::Pending || result == Status::NotFound);
+        if(result == Status::NotFound) {
+          ASSERT_FALSE(found.get()[context.idx].load());
+        }
+      }
+
+      if(idx % 256 == 0) {
+        store->Refresh();
+        store->CompletePending(false);
+      }
+    }
+    store->CompletePending(true);
+    store->StopSession();
+
+    bool found_all = true;
+    for(uint32_t idx = 0; idx < kNumRecordsPerThread; ++idx) {
+      if(found_all != found.get()[idx]) {
+        // Consistent-point recovery implies that after one record isn't found, all subsequent
+        // records will not be found.
+        Key key{ kNumRecordsPerThread* thread_id + idx };
+        KeyHash hash = key.GetHash();
+        std::string error;
+        error += "key = ";
+        error += std::to_string(kNumRecordsPerThread* thread_id + idx);
+        error += ", idx = ";
+        error += std::to_string(hash.idx(8192));
+        error += ", tag = ";
+        error += std::to_string(hash.tag());
+        ASSERT_TRUE(found_all) << error;
+        found_all = false;
+      }
+    }
+  };
+
+  std::deque<std::thread> threads{};
+  for(uint32_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &new_store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_GT(records_read, (uint32_t)0);
+  ASSERT_LE(records_read, kNumRecords);
+}
+
+TEST(CLASS, Concurrent_Update_Small) {
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn{};
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint32_t key_;
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class UpsertContext;
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value {
+   public:
+    Value()
+      : val_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext1;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint32_t> atomic_val_;
+      uint32_t val_;
+    };
+  };
+  static_assert(sizeof(Value) == 4, "sizeof(Value) != 4");
+  static_assert(alignof(Value) == 4, "alignof(Value) != 4");
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(const Key& key, uint32_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.val_ = val_;
+    }
+    inline bool PutAtomic(Value& value) {
+      value.atomic_val_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+  };
+
+  static auto upsert_callback = [](IAsyncContext* context, Status result) {
+    // Upserts don't go to disk.
+    ASSERT_TRUE(false);
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr uint32_t kNumRecords = 200000;
+  static constexpr uint32_t kNumThreads = 16;
+  static constexpr uint32_t kNumRecordsPerThread = kNumRecords / kNumThreads;
+
+  static Guid session_ids[kNumThreads];
+  std::memset(session_ids, 0, sizeof(session_ids));
+
+  static std::atomic<uint32_t> num_threads_persistent;
+  num_threads_persistent = 0;
+  static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads];
+  for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+    threads_persistent[idx] = false;
+  }
+
+  static std::atomic<uint32_t> num_threads_started;
+  num_threads_started = 0;
+
+  static auto persistence_callback = [](uint64_t persistent_serial_num) {
+    bool expected = false;
+    ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected,
+                true));
+    ++num_threads_persistent;
+  };
+
+  typedef FasterKv<Key, Value, disk_t> store_t;
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext1(Key key, uint32_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext1(const ReadContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  {
+    // 6 pages!
+    store_t store{ 8192, 201326592, "storage", 0.4 };
+
+    // Populate the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      UpsertContext context{ Key{ idx }, 999 };
+      Status result = store.Upsert(context, upsert_callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+      if(idx % 256 == 0) {
+        store.Refresh();
+        store.CompletePending(false);
+      }
+    }
+    store.StopSession();
+
+    /// Update and checkpoint the store.
+    auto upsert_checkpoint_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id == 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // update some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 1 };
+
+        Status result = store->Upsert(context, upsert_callback, idx + 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      while(num_threads_started < kNumThreads) {
+        std::this_thread::yield();
+      }
+      // checkpoint (transition from REST to INDEX_CHKPT)
+      ASSERT_TRUE(store->Checkpoint(persistence_callback));
+
+      // Ensure that the checkpoint completes.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    auto upsert_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id != 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // update some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 1 };
+        Status result = store->Upsert(context, upsert_callback, idx + 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      // Don't exit this session until the checkpoint has completed.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    std::deque<std::thread> threads{};
+    threads.emplace_back(upsert_checkpoint_worker, &store, 0);
+    for(uint32_t idx = 1; idx < kNumThreads; ++idx) {
+      threads.emplace_back(upsert_worker, &store, idx);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+
+    // Verify the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      ReadContext1 context{ Key{ idx }, idx + 1 };
+      Status result = store.Read(context, callback, 1);
+      if(result != Status::Ok) {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    store.StopSession();
+  }
+
+  // Test recovery.
+  store_t new_store{ 8192, 201326592, "storage", 0.4 };
+
+  std::vector<Guid> recovered_session_ids;
+  Status status = new_store.Recover(1, 1, recovered_session_ids);
+  ASSERT_EQ(recovered_session_ids.size(), kNumThreads);
+  ASSERT_EQ(Status::Ok, status);
+
+  static std::atomic<uint32_t> records_read;
+  records_read = 0;
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext2(Key key, uint32_t expected_, uint32_t idx_, std::atomic<bool>* found_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ }
+      , idx{ idx_ }
+      , found{ found_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext2(const ReadContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected }
+      , idx{ other.idx }
+      , found{ other.found } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+    const uint32_t idx;
+    std::atomic<bool>* found;
+  };
+
+  auto read_worker = [](store_t* store, uint32_t thread_id) {
+    uint64_t serial_num = store->ContinueSession(session_ids[thread_id]);
+    ASSERT_GE(serial_num, 1);
+
+    std::unique_ptr<std::atomic<bool>> found{ new std::atomic<bool>[kNumRecordsPerThread] };
+    std::memset(found.get(), 0, sizeof(found.get()[0]) * kNumRecordsPerThread);
+
+    // verify records
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      if(context->expected == context->val()) {
+        bool expected = false;
+        ASSERT_TRUE(context->found[context->idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_EQ(999, context->val());
+        bool expected = false;
+        ASSERT_FALSE(context->found[context->idx].load());
+      }
+    };
+    for(uint32_t idx = kNumRecordsPerThread * thread_id;
+        idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+      ReadContext2 context{ Key{ idx }, idx + 1, idx - (kNumRecordsPerThread * thread_id),
+                            found.get() };
+      Status result = store->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        if(context.expected == context.val()) {
+          bool expected = false;
+          ASSERT_TRUE(found.get()[context.idx].compare_exchange_strong(expected, true));
+        } else {
+          ASSERT_EQ(999, context.val());
+          bool expected = false;
+          ASSERT_FALSE(found.get()[context.idx].load());
+        }
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+      if(idx % 256 == 0) {
+        store->Refresh();
+        store->CompletePending(false);
+      }
+    }
+    store->CompletePending(true);
+    store->StopSession();
+
+    bool found_all = true;
+    for(uint32_t idx = 0; idx < kNumRecordsPerThread; ++idx) {
+      if(found_all != found.get()[idx]) {
+        // Consistent-point recovery implies that after one record isn't found, all subsequent
+        // records will not be found.
+        Key key{ kNumRecordsPerThread* thread_id + idx };
+        KeyHash hash = key.GetHash();
+        std::string error;
+        error += "key = ";
+        error += std::to_string(kNumRecordsPerThread* thread_id + idx);
+        error += ", idx = ";
+        error += std::to_string(hash.idx(8192));
+        error += ", tag = ";
+        error += std::to_string(hash.tag());
+        ASSERT_TRUE(found_all) << error;
+        found_all = false;
+      }
+    }
+  };
+
+  std::deque<std::thread> threads{};
+  for(uint32_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &new_store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_GT(records_read, (uint32_t)0);
+  ASSERT_LE(records_read, kNumRecords);
+}
+
+TEST(CLASS, Concurrent_Update_Large) {
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn{};
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+   private:
+    uint32_t key_;
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class UpsertContext;
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value {
+   public:
+    Value()
+      : val_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class UpsertContext;
+    friend class ReadContext1;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint32_t> atomic_val_;
+      uint32_t val_;
+    };
+  };
+  static_assert(sizeof(Value) == 4, "sizeof(Value) != 4");
+  static_assert(alignof(Value) == 4, "alignof(Value) != 4");
+
+  class UpsertContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    UpsertContext(const Key& key, uint32_t val)
+      : key_{ key }
+      , val_{ val } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    UpsertContext(const UpsertContext& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void Put(Value& value) {
+      value.val_ = val_;
+    }
+    inline bool PutAtomic(Value& value) {
+      value.atomic_val_.store(val_);
+      return true;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+  };
+
+  static auto upsert_callback = [](IAsyncContext* context, Status result) {
+    // Upserts don't go to disk.
+    ASSERT_TRUE(false);
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr uint32_t kNumRecords = 10000000;
+  static constexpr uint32_t kNumThreads = 16;
+  static constexpr uint32_t kNumRecordsPerThread = kNumRecords / kNumThreads;
+
+  static Guid session_ids[kNumThreads];
+  std::memset(session_ids, 0, sizeof(session_ids));
+
+  static std::atomic<uint32_t> num_threads_persistent;
+  num_threads_persistent = 0;
+  static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads];
+  for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+    threads_persistent[idx] = false;
+  }
+
+  static std::atomic<uint32_t> num_threads_started;
+  num_threads_started = 0;
+
+  static auto persistence_callback = [](uint64_t persistent_serial_num) {
+    bool expected = false;
+    ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected, true));
+    ++num_threads_persistent;
+  };
+
+  typedef FasterKv<Key, Value, disk_t> store_t;
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext1(Key key, uint32_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext1(const ReadContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  {
+    // 6 pages!
+    store_t store{ 524288, 201326592, "storage", 0.4 };
+
+    // Populate the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      UpsertContext context{ Key{ idx }, 999 };
+      Status result = store.Upsert(context, upsert_callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+      if(idx % 256 == 0) {
+        store.Refresh();
+        store.CompletePending(false);
+      }
+    }
+
+    // Truncate some old copies of records that we no longer need.
+    static std::atomic<bool> truncated;
+    truncated = false;
+    static std::atomic<bool> complete;
+    complete = false;
+    auto truncate_callback = [](uint64_t offset) {
+      truncated = true;
+    };
+    auto complete_callback = []() {
+      complete = true;
+    };
+    ASSERT_TRUE(store.ShiftBeginAddress(Address{ 33554432L }, truncate_callback,
+                                        complete_callback));
+    while(!truncated || !complete) {
+      store.CompletePending(false);
+    }
+    store.StopSession();
+
+    /// Update and checkpoint the store.
+    auto upsert_checkpoint_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id == 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // update some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 1 };
+
+        Status result = store->Upsert(context, upsert_callback, idx + 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      while(num_threads_started < kNumThreads) {
+        std::this_thread::yield();
+      }
+      // checkpoint (transition from REST to INDEX_CHKPT)
+      ASSERT_TRUE(store->Checkpoint(persistence_callback));
+
+      // Ensure that the checkpoint completes.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    auto upsert_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id != 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // update some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        UpsertContext context{ Key{ idx }, idx + 1 };
+        Status result = store->Upsert(context, upsert_callback, idx + 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+        }
+      }
+
+      // Don't exit this session until the checkpoint has completed.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    std::deque<std::thread> threads{};
+    threads.emplace_back(upsert_checkpoint_worker, &store, 0);
+    for(uint32_t idx = 1; idx < kNumThreads; ++idx) {
+      threads.emplace_back(upsert_worker, &store, idx);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+
+    // Verify the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      ReadContext1 context{ Key{ idx }, idx + 1 };
+      Status result = store.Read(context, callback, 1);
+      if(result != Status::Ok) {
+        ASSERT_EQ(Status::Pending, result);
+      }
+      if(idx % 256 == 0) {
+        store.Refresh();
+        store.CompletePending(false);
+      }
+    }
+
+    bool result = store.CompletePending(true);
+    ASSERT_TRUE(result);
+    store.StopSession();
+  }
+
+  // Test recovery.
+  store_t new_store{ 524288, 201326592, "storage", 0.4 };
+
+  std::vector<Guid> recovered_session_ids;
+  Status status = new_store.Recover(1, 1, recovered_session_ids);
+  ASSERT_EQ(recovered_session_ids.size(), kNumThreads);
+  ASSERT_EQ(Status::Ok, status);
+
+  static std::atomic<uint32_t> records_read;
+  records_read = 0;
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext2(Key key, uint32_t expected_, uint32_t idx_, std::atomic<bool>* found_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ }
+      , idx{ idx_ }
+      , found{ found_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext2(const ReadContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected }
+      , idx{ other.idx }
+      , found{ other.found } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+    const uint32_t idx;
+    std::atomic<bool>* found;
+  };
+
+  auto read_worker = [](store_t* store, uint32_t thread_id) {
+    uint64_t serial_num = store->ContinueSession(session_ids[thread_id]);
+    ASSERT_GE(serial_num, 1);
+
+    std::unique_ptr<std::atomic<bool>> found{ new std::atomic<bool>[kNumRecordsPerThread] };
+    std::memset(found.get(), 0, sizeof(found.get()[0]) * kNumRecordsPerThread);
+
+    // verify records
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      if(context->expected == context->val()) {
+        bool expected = false;
+        ASSERT_TRUE(context->found[context->idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_EQ(999, context->val());
+        bool expected = false;
+        ASSERT_FALSE(context->found[context->idx].load());
+      }
+    };
+    for(uint32_t idx = kNumRecordsPerThread * thread_id;
+        idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+      ReadContext2 context{ Key{ idx }, idx + 1, idx - (kNumRecordsPerThread * thread_id),
+                            found.get() };
+      Status result = store->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        if(context.expected == context.val()) {
+          bool expected = false;
+          ASSERT_TRUE(found.get()[context.idx].compare_exchange_strong(expected, true));
+        } else {
+          ASSERT_EQ(999, context.val());
+          bool expected = false;
+          ASSERT_FALSE(found.get()[context.idx].load());
+        }
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+      if(idx % 256 == 0) {
+        store->Refresh();
+        store->CompletePending(false);
+      }
+    }
+    store->CompletePending(true);
+    store->StopSession();
+
+    bool found_all = true;
+    for(uint32_t idx = 0; idx < kNumRecordsPerThread; ++idx) {
+      if(found_all != found.get()[idx]) {
+        // Consistent-point recovery implies that after one record isn't found, all subsequent
+        // records will not be found.
+        Key key{ kNumRecordsPerThread* thread_id + idx };
+        KeyHash hash = key.GetHash();
+        std::string error;
+        error += "key = ";
+        error += std::to_string(kNumRecordsPerThread* thread_id + idx);
+        error += ", idx = ";
+        error += std::to_string(hash.idx(8192));
+        error += ", tag = ";
+        error += std::to_string(hash.tag());
+        ASSERT_TRUE(found_all) << error;
+        found_all = false;
+      }
+    }
+  };
+
+  std::deque<std::thread> threads{};
+  for(uint32_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &new_store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_GT(records_read, (uint32_t)0);
+  ASSERT_LE(records_read, kNumRecords);
+}
+
+TEST(CLASS, Concurrent_Rmw_Small) {
+  class RmwContext;
+
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn{};
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+    friend class RmwContext;
+
+   private:
+    uint32_t key_;
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value {
+   public:
+    Value()
+      : val_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class RmwContext;
+    friend class ReadContext1;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint32_t> atomic_val_;
+      uint32_t val_;
+    };
+  };
+  static_assert(sizeof(Value) == 4, "sizeof(Value) != 4");
+  static_assert(alignof(Value) == 4, "alignof(Value) != 4");
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(const Key& key, uint32_t delta)
+      : key_{ key }
+      , delta_{ delta } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , delta_{ other.delta_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void RmwInitial(Value& value) {
+      value.val_ = key_.key_;
+    }
+    inline void RmwCopy(const value_t& old_value, value_t& value) {
+      value.val_ = old_value.val_ + delta_;
+    }
+    inline bool RmwAtomic(value_t& value) {
+      value.atomic_val_ += delta_;
+      return true;
+    }
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t delta_;
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr uint32_t kNumRecords = 200000;
+  static constexpr uint32_t kNumThreads = 16;
+  static constexpr uint32_t kNumRecordsPerThread = kNumRecords / kNumThreads;
+
+  static Guid session_ids[kNumThreads];
+  std::memset(session_ids, 0, sizeof(session_ids));
+
+  static std::atomic<uint32_t> num_threads_persistent;
+  num_threads_persistent = 0;
+  static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads] = {};
+  for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+    threads_persistent[idx] = false;
+  }
+
+  static std::atomic<uint32_t> num_threads_started;
+  num_threads_started = 0;
+
+  static auto persistence_callback = [](uint64_t persistent_serial_num) {
+    bool expected = false;
+    ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected, true));
+    ++num_threads_persistent;
+  };
+
+  typedef FasterKv<Key, Value, disk_t> store_t;
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext1(Key key, uint32_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext1(const ReadContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  {
+    // 6 pages!
+    store_t store{ 8192, 402653184, "storage", 0.4 };
+
+    // Populate the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* context, Status result) {
+        ASSERT_EQ(Status::Ok, result);
+      };
+
+      RmwContext context{ Key{ idx }, 230 };
+      Status result = store.Rmw(context, callback, 1);
+      ASSERT_EQ(Status::Ok, result);
+      if(idx % 256 == 0) {
+        store.Refresh();
+        store.CompletePending(false);
+      }
+    }
+    store.StopSession();
+
+    /// Read-modify-write and checkpoint the store.
+    auto rmw_checkpoint_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id == 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // read-modify-write some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        auto callback = [](IAsyncContext* context, Status result) {
+          ASSERT_EQ(Status::Ok, result);
+        };
+        RmwContext context{ Key{ idx }, 230 };
+        Status result = store->Rmw(context, callback, idx + 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+          store->CompletePending(false);
+        }
+      }
+
+      while(num_threads_started < kNumThreads) {
+        std::this_thread::yield();
+      }
+      // checkpoint (transition from REST to INDEX_CHKPT)
+      ASSERT_TRUE(store->Checkpoint(persistence_callback));
+
+      // Ensure that the checkpoint completes.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    auto rmw_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id != 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // update some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        auto callback = [](IAsyncContext* context, Status result) {
+          ASSERT_EQ(Status::Ok, result);
+        };
+        RmwContext context{ Key{ idx }, 230 };
+        Status result = store->Rmw(context, callback, idx + 1);
+        ASSERT_EQ(Status::Ok, result);
+
+        if(idx % 256 == 0) {
+          store->Refresh();
+          store->CompletePending(false);
+        }
+      }
+
+      // Don't exit this session until the checkpoint has completed.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    std::deque<std::thread> threads{};
+    threads.emplace_back(rmw_checkpoint_worker, &store, 0);
+    for(uint32_t idx = 1; idx < kNumThreads; ++idx) {
+      threads.emplace_back(rmw_worker, &store, idx);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+
+    // Verify the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      ReadContext1 context{ Key{ idx }, idx + 230 };
+      Status result = store.Read(context, callback, 1);
+      if(result != Status::Ok) {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    store.StopSession();
+  }
+
+  // Test recovery.
+  store_t new_store{ 8192, 402653184, "storage", 0.4 };
+
+  std::vector<Guid> recovered_session_ids;
+  Status status = new_store.Recover(1, 1, recovered_session_ids);
+  ASSERT_EQ(recovered_session_ids.size(), kNumThreads);
+  ASSERT_EQ(Status::Ok, status);
+
+  static std::atomic<uint32_t> records_read;
+  records_read = 0;
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext2(Key key, uint32_t expected_, uint32_t idx_, std::atomic<bool>* found_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ }
+      , idx{ idx_ }
+      , found{ found_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext2(const ReadContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected }
+      , idx{ other.idx }
+      , found{ other.found } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+    const uint32_t idx;
+    std::atomic<bool>* found;
+  };
+
+  auto read_worker = [](store_t* store, uint32_t thread_id) {
+    uint64_t serial_num = store->ContinueSession(session_ids[thread_id]);
+    ASSERT_GE(serial_num, 1);
+
+    std::unique_ptr<std::atomic<bool>> found{ new std::atomic<bool>[kNumRecordsPerThread] };
+    std::memset(found.get(), 0, sizeof(found.get()[0]) * kNumRecordsPerThread);
+
+    // verify records
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      if(context->expected == context->val()) {
+        bool expected = false;
+        ASSERT_TRUE(context->found[context->idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_EQ(context->expected - 230, context->val());
+        bool expected = false;
+        ASSERT_FALSE(context->found[context->idx].load());
+      }
+    };
+    for(uint32_t idx = kNumRecordsPerThread * thread_id;
+        idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+      ReadContext2 context{ Key{ idx }, idx + 230, idx - (kNumRecordsPerThread * thread_id),
+                            found.get() };
+      Status result = store->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        if(context.expected == context.val()) {
+          bool expected = false;
+          ASSERT_TRUE(found.get()[context.idx].compare_exchange_strong(expected, true));
+        } else {
+          ASSERT_EQ(idx, context.val());
+          bool expected = false;
+          ASSERT_FALSE(found.get()[context.idx].load());
+        }
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+      if(idx % 256 == 0) {
+        store->Refresh();
+        store->CompletePending(false);
+      }
+    }
+    store->CompletePending(true);
+    store->StopSession();
+
+    bool found_all = true;
+    for(uint32_t idx = 0; idx < kNumRecordsPerThread; ++idx) {
+      if(found_all != found.get()[idx]) {
+        // Consistent-point recovery implies that after one record isn't found, all subsequent
+        // records will not be found.
+        Key key{ kNumRecordsPerThread* thread_id + idx };
+        KeyHash hash = key.GetHash();
+        std::string error;
+        error += "key = ";
+        error += std::to_string(kNumRecordsPerThread* thread_id + idx);
+        error += ", idx = ";
+        error += std::to_string(hash.idx(8192));
+        error += ", tag = ";
+        error += std::to_string(hash.tag());
+        ASSERT_TRUE(found_all) << error;
+        found_all = false;
+      }
+    }
+  };
+
+  std::deque<std::thread> threads{};
+  for(uint32_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &new_store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_GT(records_read, (uint32_t)0);
+  ASSERT_LE(records_read, kNumRecords);
+}
+
+TEST(CLASS, Concurrent_Rmw_Large) {
+  class RmwContext;
+
+  class Key {
+   public:
+    Key(uint32_t key)
+      : key_{ key } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Key));
+    }
+    inline KeyHash GetHash() const {
+      std::hash<uint32_t> hash_fn{};
+      return KeyHash{ hash_fn(key_) };
+    }
+
+    /// Comparison operators.
+    inline bool operator==(const Key& other) const {
+      return key_ == other.key_;
+    }
+    inline bool operator!=(const Key& other) const {
+      return key_ != other.key_;
+    }
+
+    friend class RmwContext;
+
+   private:
+    uint32_t key_;
+  };
+  static_assert(sizeof(Key) == 4, "sizeof(Key) != 4");
+  static_assert(alignof(Key) == 4, "alignof(Key) != 4");
+
+  class ReadContext1;
+  class ReadContext2;
+
+  class Value {
+   public:
+    Value()
+      : val_{ 0 } {
+    }
+
+    inline static constexpr uint32_t size() {
+      return static_cast<uint32_t>(sizeof(Value));
+    }
+
+    friend class RmwContext;
+    friend class ReadContext1;
+    friend class ReadContext2;
+
+   private:
+    union {
+      std::atomic<uint32_t> atomic_val_;
+      uint32_t val_;
+    };
+  };
+  static_assert(sizeof(Value) == 4, "sizeof(Value) != 4");
+  static_assert(alignof(Value) == 4, "alignof(Value) != 4");
+
+  class RmwContext : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    RmwContext(const Key& key, uint32_t delta)
+      : key_{ key }
+      , delta_{ delta } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    RmwContext(const RmwContext& other)
+      : key_{ other.key_ }
+      , delta_{ other.delta_ } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+    inline static constexpr uint32_t value_size() {
+      return sizeof(value_t);
+    }
+    /// Non-atomic and atomic Put() methods.
+    inline void RmwInitial(Value& value) {
+      value.val_ = key_.key_;
+    }
+    inline void RmwCopy(const value_t& old_value, value_t& value) {
+      value.val_ = old_value.val_ + delta_;
+    }
+    inline bool RmwAtomic(value_t& value) {
+      value.atomic_val_ += delta_;
+      return true;
+    }
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t delta_;
+  };
+
+  std::experimental::filesystem::create_directories("storage");
+
+  static constexpr uint32_t kNumRecords = 6000000;
+  static constexpr uint32_t kNumThreads = 16;
+  static_assert(kNumRecords % kNumThreads == 0, "kNumRecords % kNumThreads != 0");
+  static constexpr uint32_t kNumRecordsPerThread = kNumRecords / kNumThreads;
+
+  static Guid session_ids[kNumThreads];
+  std::memset(session_ids, 0, sizeof(session_ids));
+
+  static std::atomic<uint32_t> num_threads_persistent;
+  num_threads_persistent = 0;
+  static std::atomic<bool> threads_persistent[Thread::kMaxNumThreads];
+  for(size_t idx = 0; idx < Thread::kMaxNumThreads; ++idx) {
+    threads_persistent[idx] = false;
+  }
+
+  static std::atomic<uint32_t> num_threads_started;
+  num_threads_started = 0;
+
+  static auto persistence_callback = [](uint64_t persistent_serial_num) {
+    bool expected = false;
+    ASSERT_TRUE(threads_persistent[Thread::id()].compare_exchange_strong(expected, true));
+    ++num_threads_persistent;
+  };
+
+  typedef FasterKv<Key, Value, disk_t> store_t;
+
+  class ReadContext1 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext1(Key key, uint32_t expected_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext1(const ReadContext1& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+  };
+
+  {
+    // 6 pages!
+    store_t store{ 524288, 402653184, "storage", 0.4 };
+
+    // Populate the store.
+    auto populate_worker0 = [](store_t* store, uint32_t thread_id) {
+      store->StartSession();
+      auto callback = [](IAsyncContext* context, Status result) {
+        ASSERT_EQ(Status::Ok, result);
+      };
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        RmwContext context{ Key{ idx }, 230 };
+        Status result = store->Rmw(context, callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+        if(idx % 256 == 0) {
+          store->Refresh();
+          store->CompletePending(false);
+        }
+      }
+      store->GrowIndex(nullptr);
+      store->StopSession();
+    };
+    auto populate_worker = [](store_t* store, uint32_t thread_id) {
+      store->StartSession();
+      auto callback = [](IAsyncContext* context, Status result) {
+        ASSERT_EQ(Status::Ok, result);
+      };
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        RmwContext context{ Key{ idx }, 230 };
+        Status result = store->Rmw(context, callback, 1);
+        ASSERT_EQ(Status::Ok, result);
+        if(idx % 256 == 0) {
+          store->Refresh();
+          store->CompletePending(false);
+        }
+      }
+      store->StopSession();
+    };
+
+    std::deque<std::thread> threads{};
+    threads.emplace_back(populate_worker0, &store, 0);
+    for(uint32_t idx = 1; idx < kNumThreads; ++idx) {
+      threads.emplace_back(populate_worker, &store, idx);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+
+    /// Read-modify-write and checkpoint the store.
+    auto rmw_checkpoint_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id == 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // read-modify-write some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        auto callback = [](IAsyncContext* context, Status result) {
+          ASSERT_EQ(Status::Ok, result);
+        };
+        RmwContext context{ Key{ idx }, 230 };
+        Status result = store->Rmw(context, callback, idx + 1);
+        ASSERT_TRUE(result == Status::Ok || result == Status::Pending);
+        if(idx % 256 == 0) {
+          store->Refresh();
+          store->CompletePending(false);
+        }
+      }
+
+      while(num_threads_started < kNumThreads) {
+        std::this_thread::yield();
+      }
+      // checkpoint (transition from REST to INDEX_CHKPT)
+      ASSERT_TRUE(store->Checkpoint(persistence_callback));
+
+      // Ensure that the checkpoint completes.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    auto rmw_worker = [](store_t* store, uint32_t thread_id) {
+      assert(thread_id != 0);
+      session_ids[thread_id] = store->StartSession();
+      ++num_threads_started;
+
+      // update some records
+      for(uint32_t idx = kNumRecordsPerThread * thread_id;
+          idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+        auto callback = [](IAsyncContext* context, Status result) {
+          ASSERT_EQ(Status::Ok, result);
+        };
+        RmwContext context{ Key{ idx }, 230 };
+        Status result = store->Rmw(context, callback, idx + 1);
+        ASSERT_TRUE(result == Status::Ok || result == Status::Pending);
+        if(idx % 256 == 0) {
+          store->Refresh();
+          store->CompletePending(false);
+        }
+      }
+
+      // Don't exit this session until the checkpoint has completed.
+      while(num_threads_persistent < kNumThreads) {
+        store->CompletePending(false);
+      }
+
+      bool result = store->CompletePending(true);
+      ASSERT_TRUE(result);
+      store->StopSession();
+    };
+
+    threads.clear();
+    threads.emplace_back(rmw_checkpoint_worker, &store, 0);
+    for(uint32_t idx = 1; idx < kNumThreads; ++idx) {
+      threads.emplace_back(rmw_worker, &store, idx);
+    }
+    for(auto& thread : threads) {
+      thread.join();
+    }
+
+    // Verify the store.
+    store.StartSession();
+    for(uint32_t idx = 0; idx < kNumRecords; ++idx) {
+      auto callback = [](IAsyncContext* ctxt, Status result) {
+        CallbackContext<ReadContext1> context{ ctxt };
+        ASSERT_EQ(Status::Ok, result);
+        ASSERT_EQ(context->expected, context->val());
+      };
+
+      ReadContext1 context{ Key{ idx }, idx + 230 };
+      Status result = store.Read(context, callback, 1);
+      if(result != Status::Ok) {
+        ASSERT_EQ(Status::Pending, result);
+      }
+    }
+    store.StopSession();
+  }
+
+  // Test recovery.
+  store_t new_store{ 524288 * 2, 402653184, "storage", 0.4 };
+
+  std::vector<Guid> recovered_session_ids;
+  Status status = new_store.Recover(1, 1, recovered_session_ids);
+  ASSERT_EQ(recovered_session_ids.size(), kNumThreads);
+  ASSERT_EQ(Status::Ok, status);
+
+  static std::atomic<uint32_t> records_read;
+  records_read = 0;
+
+  class ReadContext2 : public IAsyncContext {
+   public:
+    typedef Key key_t;
+    typedef Value value_t;
+
+    ReadContext2(Key key, uint32_t expected_, uint32_t idx_, std::atomic<bool>* found_)
+      : key_{ key }
+      , val_{ 0 }
+      , expected{ expected_ }
+      , idx{ idx_ }
+      , found{ found_ } {
+    }
+
+    /// Copy (and deep-copy) constructor.
+    ReadContext2(const ReadContext2& other)
+      : key_{ other.key_ }
+      , val_{ other.val_ }
+      , expected{ other.expected }
+      , idx{ other.idx }
+      , found{ other.found } {
+    }
+
+    /// The implicit and explicit interfaces require a key() accessor.
+    inline const Key& key() const {
+      return key_;
+    }
+
+    inline void Get(const Value& value) {
+      val_ = value.val_;
+    }
+    inline void GetAtomic(const Value& value) {
+      val_ = value.atomic_val_.load();
+    }
+
+    uint64_t val() const {
+      return val_;
+    }
+
+   protected:
+    /// The explicit interface requires a DeepCopy_Internal() implementation.
+    Status DeepCopy_Internal(IAsyncContext*& context_copy) {
+      return IAsyncContext::DeepCopy_Internal(*this, context_copy);
+    }
+
+   private:
+    Key key_;
+    uint32_t val_;
+   public:
+    const uint32_t expected;
+    const uint32_t idx;
+    std::atomic<bool>* found;
+  };
+
+  auto read_worker = [](store_t* store, uint32_t thread_id) {
+    uint64_t serial_num = store->ContinueSession(session_ids[thread_id]);
+    ASSERT_GE(serial_num, 1);
+
+    std::unique_ptr<std::atomic<bool>> found{ new std::atomic<bool>[kNumRecordsPerThread] };
+    std::memset(found.get(), 0, sizeof(found.get()[0]) * kNumRecordsPerThread);
+
+    // verify records
+    auto callback = [](IAsyncContext* ctxt, Status result) {
+      CallbackContext<ReadContext2> context{ ctxt };
+      ASSERT_EQ(Status::Ok, result);
+      if(context->expected == context->val()) {
+        bool expected = false;
+        ASSERT_TRUE(context->found[context->idx].compare_exchange_strong(expected, true));
+      } else {
+        ASSERT_EQ(context->expected - 230, context->val());
+        bool expected = false;
+        ASSERT_FALSE(context->found[context->idx].load());
+      }
+    };
+    for(uint32_t idx = kNumRecordsPerThread * thread_id;
+        idx < kNumRecordsPerThread * (thread_id + 1); ++idx) {
+      ReadContext2 context{ Key{ idx }, idx + 230, idx - (kNumRecordsPerThread * thread_id),
+                            found.get() };
+      Status result = store->Read(context, callback, 1);
+      if(result == Status::Ok) {
+        ++records_read;
+        if(context.expected == context.val()) {
+          bool expected = false;
+          ASSERT_TRUE(found.get()[context.idx].compare_exchange_strong(expected, true));
+        } else {
+          ASSERT_EQ(idx, context.val());
+          bool expected = false;
+          ASSERT_FALSE(found.get()[context.idx].load());
+        }
+      } else {
+        ASSERT_EQ(Status::Pending, result);
+      }
+      if(idx % 256 == 0) {
+        store->Refresh();
+        store->CompletePending(false);
+      }
+    }
+    store->CompletePending(true);
+    store->StopSession();
+
+    bool found_all = true;
+    for(uint32_t idx = 0; idx < kNumRecordsPerThread; ++idx) {
+      if(found_all != found.get()[idx]) {
+        // Consistent-point recovery implies that after one record isn't found, all subsequent
+        // records will not be found.
+        Key key{ kNumRecordsPerThread* thread_id + idx };
+        KeyHash hash = key.GetHash();
+        std::string error;
+        error += "key = ";
+        error += std::to_string(kNumRecordsPerThread* thread_id + idx);
+        error += ", idx = ";
+        error += std::to_string(hash.idx(8192));
+        error += ", tag = ";
+        error += std::to_string(hash.tag());
+        ASSERT_TRUE(found_all) << error;
+        found_all = false;
+      }
+    }
+  };
+
+  std::deque<std::thread> threads{};
+  for(uint32_t idx = 0; idx < kNumThreads; ++idx) {
+    threads.emplace_back(read_worker, &new_store, idx);
+  }
+  for(auto& thread : threads) {
+    thread.join();
+  }
+
+  ASSERT_GT(records_read, (uint32_t)0);
+  ASSERT_LE(records_read, kNumRecords);
+}
diff --git a/cc/test/recovery_threadpool_test.cc b/cc/test/recovery_threadpool_test.cc
new file mode 100644
index 000000000..cbe9728f7
--- /dev/null
+++ b/cc/test/recovery_threadpool_test.cc
@@ -0,0 +1,31 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <atomic>
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <random>
+#include <thread>
+#include "gtest/gtest.h"
+#include "core/faster.h"
+#include "core/light_epoch.h"
+#include "core/thread.h"
+#include "device/file_system_disk.h"
+
+using namespace FASTER::core;
+
+typedef FASTER::environment::ThreadPoolIoHandler handler_t;
+
+#define CLASS RecoveryTest_ThreadPool
+
+#include "recovery_test.h"
+
+#undef CLASS
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/cc/test/utility_test.cc b/cc/test/utility_test.cc
new file mode 100644
index 000000000..4c21007d3
--- /dev/null
+++ b/cc/test/utility_test.cc
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <cstdint>
+#include "gtest/gtest.h"
+
+#include "core/auto_ptr.h"
+
+using namespace FASTER::core;
+
+TEST(UtilityTest, NextPowerOfTwo) {
+  EXPECT_EQ(1, next_power_of_two(1));
+  EXPECT_EQ(2, next_power_of_two(2));
+  EXPECT_EQ(4, next_power_of_two(3));
+  EXPECT_EQ(4, next_power_of_two(4));
+  EXPECT_EQ(8, next_power_of_two(5));
+  EXPECT_EQ(8, next_power_of_two(6));
+  EXPECT_EQ(8, next_power_of_two(7));
+  EXPECT_EQ(8, next_power_of_two(8));
+}
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/cs/src/FASTER.sln b/cs/src/FASTER.sln
new file mode 100644
index 000000000..94c309df8
--- /dev/null
+++ b/cs/src/FASTER.sln
@@ -0,0 +1,149 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.27004.2008
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FASTER.benchmark", "benchmark\FASTER.benchmark.csproj", "{33A732D1-2B58-4FEE-9696-B9483496229F}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FASTER.test", "test\FASTER.test.csproj", "{0DC7F5A2-E963-4E7F-BD37-6F7864B726F2}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "native", "native", "{6D05489A-B06F-4946-AF59-887A14D83171}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "readtsc", "native\readtsc\readtsc.vcxproj", "{A6510B80-BD50-4C11-9712-64C3B3865AFF}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "adv-file-ops", "native\adv-file-ops\adv-file-ops.vcxproj", "{5852AC33-6B01-44F5-BAF3-2AAF796E8449}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FASTER.core", "core\FASTER.core.csproj", "{01002755-60CA-40EE-94D9-11C07EB58786}"
+	ProjectSection(ProjectDependencies) = postProject
+		{5852AC33-6B01-44F5-BAF3-2AAF796E8449} = {5852AC33-6B01-44F5-BAF3-2AAF796E8449}
+		{A6510B80-BD50-4C11-9712-64C3B3865AFF} = {A6510B80-BD50-4C11-9712-64C3B3865AFF}
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "playground", "playground", "{E6026D6A-01C5-4582-B2C1-64751490DABE}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManagedSample1", "playground\ManagedSample1\ManagedSample1.csproj", "{17BDD0A5-98E5-464A-8A00-050D9FF4C562}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "core", "core", "{EE591221-F22E-49B3-837C-1921302082DC}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "benchmark", "benchmark", "{CA6AB459-A31A-4C15-B1A6-A82C349B54B4}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{81B3B5D1-70F6-4979-AC76-003F9A6B316B}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManagedSample2", "playground\ManagedSample2\ManagedSample2.csproj", "{7DB87633-9CAB-4AE4-9ED0-AA6E77448486}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManagedSample3", "playground\ManagedSample3\ManagedSample3.csproj", "{3E571C7C-59B5-485C-AC78-3F34D3511CD2}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SumStore", "playground\SumStore\SumStore.csproj", "{05D61B37-9714-4234-9961-384A63F7175E}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManagedSample4", "playground\ManagedSample4\ManagedSample4.csproj", "{E1AC9797-ABE3-4881-A51B-37D8687AAE35}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ClassCache", "playground\ClassCache\ClassCache.csproj", "{10FD4868-BB16-442B-B0AC-18AE278D9C60}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NestedTypesTest", "playground\NestedTypesTest\NestedTypesTest.csproj", "{2D5F23F7-3184-43EC-A7F1-C924F7FEF786}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{33A732D1-2B58-4FEE-9696-B9483496229F}.Debug|x64.ActiveCfg = Debug|x64
+		{33A732D1-2B58-4FEE-9696-B9483496229F}.Debug|x64.Build.0 = Debug|x64
+		{33A732D1-2B58-4FEE-9696-B9483496229F}.Release|x64.ActiveCfg = Release|x64
+		{33A732D1-2B58-4FEE-9696-B9483496229F}.Release|x64.Build.0 = Release|x64
+		{0DC7F5A2-E963-4E7F-BD37-6F7864B726F2}.Debug|x64.ActiveCfg = Debug|x64
+		{0DC7F5A2-E963-4E7F-BD37-6F7864B726F2}.Debug|x64.Build.0 = Debug|x64
+		{0DC7F5A2-E963-4E7F-BD37-6F7864B726F2}.Release|x64.ActiveCfg = Release|x64
+		{0DC7F5A2-E963-4E7F-BD37-6F7864B726F2}.Release|x64.Build.0 = Release|x64
+		{A6510B80-BD50-4C11-9712-64C3B3865AFF}.Debug|x64.ActiveCfg = Release|x64
+		{A6510B80-BD50-4C11-9712-64C3B3865AFF}.Debug|x64.Build.0 = Release|x64
+		{A6510B80-BD50-4C11-9712-64C3B3865AFF}.Release|x64.ActiveCfg = Release|x64
+		{A6510B80-BD50-4C11-9712-64C3B3865AFF}.Release|x64.Build.0 = Release|x64
+		{5852AC33-6B01-44F5-BAF3-2AAF796E8449}.Debug|x64.ActiveCfg = Release|x64
+		{5852AC33-6B01-44F5-BAF3-2AAF796E8449}.Debug|x64.Build.0 = Release|x64
+		{5852AC33-6B01-44F5-BAF3-2AAF796E8449}.Release|x64.ActiveCfg = Release|x64
+		{5852AC33-6B01-44F5-BAF3-2AAF796E8449}.Release|x64.Build.0 = Release|x64
+		{01002755-60CA-40EE-94D9-11C07EB58786}.Debug|x64.ActiveCfg = Debug|x64
+		{01002755-60CA-40EE-94D9-11C07EB58786}.Debug|x64.Build.0 = Debug|x64
+		{01002755-60CA-40EE-94D9-11C07EB58786}.Release|x64.ActiveCfg = Release|x64
+		{01002755-60CA-40EE-94D9-11C07EB58786}.Release|x64.Build.0 = Release|x64
+		{17BDD0A5-98E5-464A-8A00-050D9FF4C562}.Debug|x64.ActiveCfg = Debug|x64
+		{17BDD0A5-98E5-464A-8A00-050D9FF4C562}.Debug|x64.Build.0 = Debug|x64
+		{17BDD0A5-98E5-464A-8A00-050D9FF4C562}.Release|x64.ActiveCfg = Release|x64
+		{17BDD0A5-98E5-464A-8A00-050D9FF4C562}.Release|x64.Build.0 = Release|x64
+		{7DB87633-9CAB-4AE4-9ED0-AA6E77448486}.Debug|x64.ActiveCfg = Debug|x64
+		{7DB87633-9CAB-4AE4-9ED0-AA6E77448486}.Debug|x64.Build.0 = Debug|x64
+		{7DB87633-9CAB-4AE4-9ED0-AA6E77448486}.Release|x64.ActiveCfg = Release|x64
+		{7DB87633-9CAB-4AE4-9ED0-AA6E77448486}.Release|x64.Build.0 = Release|x64
+		{3E571C7C-59B5-485C-AC78-3F34D3511CD2}.Debug|x64.ActiveCfg = Debug|x64
+		{3E571C7C-59B5-485C-AC78-3F34D3511CD2}.Debug|x64.Build.0 = Debug|x64
+		{3E571C7C-59B5-485C-AC78-3F34D3511CD2}.Release|x64.ActiveCfg = Release|x64
+		{3E571C7C-59B5-485C-AC78-3F34D3511CD2}.Release|x64.Build.0 = Release|x64
+		{05D61B37-9714-4234-9961-384A63F7175E}.Debug|x64.ActiveCfg = Debug|x64
+		{05D61B37-9714-4234-9961-384A63F7175E}.Debug|x64.Build.0 = Debug|x64
+		{05D61B37-9714-4234-9961-384A63F7175E}.Release|x64.ActiveCfg = Release|x64
+		{05D61B37-9714-4234-9961-384A63F7175E}.Release|x64.Build.0 = Release|x64
+		{E1AC9797-ABE3-4881-A51B-37D8687AAE35}.Debug|x64.ActiveCfg = Debug|x64
+		{E1AC9797-ABE3-4881-A51B-37D8687AAE35}.Debug|x64.Build.0 = Debug|x64
+		{E1AC9797-ABE3-4881-A51B-37D8687AAE35}.Release|x64.ActiveCfg = Release|x64
+		{E1AC9797-ABE3-4881-A51B-37D8687AAE35}.Release|x64.Build.0 = Release|x64
+		{10FD4868-BB16-442B-B0AC-18AE278D9C60}.Debug|x64.ActiveCfg = Debug|x64
+		{10FD4868-BB16-442B-B0AC-18AE278D9C60}.Debug|x64.Build.0 = Debug|x64
+		{10FD4868-BB16-442B-B0AC-18AE278D9C60}.Release|x64.ActiveCfg = Release|x64
+		{10FD4868-BB16-442B-B0AC-18AE278D9C60}.Release|x64.Build.0 = Release|x64
+		{2D5F23F7-3184-43EC-A7F1-C924F7FEF786}.Debug|x64.ActiveCfg = Debug|x64
+		{2D5F23F7-3184-43EC-A7F1-C924F7FEF786}.Debug|x64.Build.0 = Debug|x64
+		{2D5F23F7-3184-43EC-A7F1-C924F7FEF786}.Release|x64.ActiveCfg = Release|x64
+		{2D5F23F7-3184-43EC-A7F1-C924F7FEF786}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(NestedProjects) = preSolution
+		{33A732D1-2B58-4FEE-9696-B9483496229F} = {CA6AB459-A31A-4C15-B1A6-A82C349B54B4}
+		{0DC7F5A2-E963-4E7F-BD37-6F7864B726F2} = {81B3B5D1-70F6-4979-AC76-003F9A6B316B}
+		{A6510B80-BD50-4C11-9712-64C3B3865AFF} = {6D05489A-B06F-4946-AF59-887A14D83171}
+		{5852AC33-6B01-44F5-BAF3-2AAF796E8449} = {6D05489A-B06F-4946-AF59-887A14D83171}
+		{01002755-60CA-40EE-94D9-11C07EB58786} = {EE591221-F22E-49B3-837C-1921302082DC}
+		{17BDD0A5-98E5-464A-8A00-050D9FF4C562} = {E6026D6A-01C5-4582-B2C1-64751490DABE}
+		{7DB87633-9CAB-4AE4-9ED0-AA6E77448486} = {E6026D6A-01C5-4582-B2C1-64751490DABE}
+		{3E571C7C-59B5-485C-AC78-3F34D3511CD2} = {E6026D6A-01C5-4582-B2C1-64751490DABE}
+		{05D61B37-9714-4234-9961-384A63F7175E} = {E6026D6A-01C5-4582-B2C1-64751490DABE}
+		{E1AC9797-ABE3-4881-A51B-37D8687AAE35} = {E6026D6A-01C5-4582-B2C1-64751490DABE}
+		{10FD4868-BB16-442B-B0AC-18AE278D9C60} = {E6026D6A-01C5-4582-B2C1-64751490DABE}
+		{2D5F23F7-3184-43EC-A7F1-C924F7FEF786} = {E6026D6A-01C5-4582-B2C1-64751490DABE}
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {A0750637-2CCB-4139-B25E-F2CE740DCFAC}
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
+EndGlobal
diff --git a/cs/src/benchmark/App.config b/cs/src/benchmark/App.config
new file mode 100644
index 000000000..306dce5bf
--- /dev/null
+++ b/cs/src/benchmark/App.config
@@ -0,0 +1,17 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+  <startup>
+    <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6" />
+  </startup>
+  <runtime>
+    <gcServer enabled="true" />
+    <gcAllowVeryLargeObjects enabled="true" />
+  </runtime>
+  <system.diagnostics>
+    <trace autoflush="false" indentsize="4">
+      <listeners>
+        <add name="configConsoleListener" type="System.Diagnostics.ConsoleTraceListener" />
+      </listeners>
+    </trace>
+  </system.diagnostics>
+</configuration>
\ No newline at end of file
diff --git a/cs/src/benchmark/FASTER.benchmark.csproj b/cs/src/benchmark/FASTER.benchmark.csproj
new file mode 100644
index 000000000..c2745a6c0
--- /dev/null
+++ b/cs/src/benchmark/FASTER.benchmark.csproj
@@ -0,0 +1,43 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net46</TargetFramework>
+    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win7-x64</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <RootNamespace>FASTER.benchmark</RootNamespace>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <DefineConstants>TRACE;DEBUG</DefineConstants>
+    <DebugType>full</DebugType>
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
+    <DefineConstants>TRACE</DefineConstants>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\x64\Release\</OutputPath>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="CommandLineParser" Version="2.2.1" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\core\FASTER.core.csproj" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/benchmark/FasterYcsbBenchmark.cs b/cs/src/benchmark/FasterYcsbBenchmark.cs
new file mode 100644
index 000000000..444d0fd11
--- /dev/null
+++ b/cs/src/benchmark/FasterYcsbBenchmark.cs
@@ -0,0 +1,585 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma warning disable 0162
+
+//#define DASHBOARD
+//#define USE_CODEGEN
+
+using FASTER.core;
+using System;
+using System.Diagnostics;
+using System.IO;
+using System.Net;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace FASTER.benchmark
+{
+    public unsafe class FASTER_YcsbBenchmark
+    {
+        public enum Op : ulong
+        {
+            Upsert = 0,
+            Read = 1,
+            ReadModifyWrite = 2
+        }
+
+        const long kInitCount = 250000000;
+        const long kTxnCount = 1000000000;
+        const int kFileChunkSize = 4096;
+        const long kChunkSize = 640;
+        const bool kUseSyntheticData = false;
+
+        Key[] init_keys_;
+
+        Key[] txn_keys_;
+        Key* txn_keys_ptr;
+
+        long idx_ = 0;
+
+        Input[] input_;
+        Input* input_ptr;
+        readonly IDevice device;
+
+#if USE_CODEGEN
+        IFASTER
+#else
+        FasterKV
+#endif
+            store;
+
+        long total_ops_done = 0;
+
+        const string kKeyWorkload = "a";
+        readonly int threadCount;
+        readonly int numaStyle;
+        readonly string distribution;
+        readonly int readPercent;
+
+        const int kMaxKey = 268435456;
+        const int kRunSeconds = 30;
+        const int kCheckpointSeconds = -1;
+
+        volatile bool done = false;
+
+        public FASTER_YcsbBenchmark(int threadCount_, int numaStyle_, string distribution_, int readPercent_)
+        {
+            threadCount = threadCount_;
+            numaStyle = numaStyle_;
+            distribution = distribution_;
+            readPercent = readPercent_;
+
+#if DASHBOARD
+            statsWritten = new AutoResetEvent[threadCount];
+            for (int i = 0; i < threadCount; i++)
+            {
+                statsWritten[i] = new AutoResetEvent(false);
+            }
+            threadThroughput = new double[threadCount];
+            threadAverageLatency = new double[threadCount];
+            threadMaximumLatency = new double[threadCount];
+            threadProgress = new long[threadCount];
+            writeStats = new bool[threadCount];
+            freq = HiResTimer.EstimateCPUFrequency();
+#endif
+
+            device = FASTERFactory.CreateLogDevice("D:\\data\\hlog");
+
+#if USE_CODEGEN
+            store = FASTERFactory.Create<Key, Value, Input, Output, Context, Functions, IFASTER>
+#else
+            store = new FasterKV
+#endif
+                (kMaxKey / 2, device);
+        }
+
+        private void SetupYcsb(int thread_idx)
+        {
+            if (numaStyle == 0)
+                Native32.AffinitizeThreadRoundRobin((uint)thread_idx);
+            else
+                Native32.AffinitizeThreadShardedTwoNuma((uint)thread_idx);
+
+            store.StartSession();
+
+#if DASHBOARD
+            var tstart = HiResTimer.Rdtsc();
+            var tstop1 = tstart;
+            var lastWrittenValue = 0;
+            int count = 0;
+#endif
+
+            Value value = default(Value);
+
+            for (long chunk_idx = Interlocked.Add(ref idx_, kChunkSize) - kChunkSize;
+                chunk_idx < kInitCount;
+                chunk_idx = Interlocked.Add(ref idx_, kChunkSize) - kChunkSize)
+            {
+                for (long idx = chunk_idx; idx < chunk_idx + kChunkSize; ++idx)
+                {
+                    if (idx % 256 == 0)
+                    {
+                        store.Refresh();
+
+                        if (idx % 65536 == 0)
+                        {
+                            store.CompletePending(false);
+                        }
+                    }
+
+                    Key key = init_keys_[idx];
+                    store.Upsert(&key, &value, null, 1);
+                }
+#if DASHBOARD
+                count += (int)kChunkSize;
+
+                //Check if stats collector is requesting for statistics
+                if (writeStats[thread_idx])
+                {
+                    var tstart1 = tstop1;
+                    tstop1 = HiResTimer.Rdtsc();
+                    threadThroughput[thread_idx] = (count - lastWrittenValue) / ((tstop1 - tstart1) / freq);
+                    lastWrittenValue = count;
+                    writeStats[thread_idx] = false;
+                    statsWritten[thread_idx].Set();
+                }
+#endif
+            }
+
+
+            store.CompletePending(true);
+            store.StopSession();
+        }
+        private void RunYcsb(int thread_idx)
+        {
+            RandomGenerator rng = new RandomGenerator((uint)(1 + thread_idx));
+
+            if (numaStyle == 0)
+                Native32.AffinitizeThreadRoundRobin((uint)thread_idx);
+            else
+                Native32.AffinitizeThreadShardedTwoNuma((uint)thread_idx);
+
+            Stopwatch sw = new Stopwatch();
+            sw.Start();
+
+            Value value = default(Value);
+            long reads_done = 0;
+            long writes_done = 0;
+
+#if DASHBOARD
+            var tstart = HiResTimer.Rdtsc();
+            var tstop1 = tstart;
+            var lastWrittenValue = 0;
+            int count = 0;
+#endif
+
+            store.StartSession();
+
+            while (!done)
+            {
+                long chunk_idx = Interlocked.Add(ref idx_, kChunkSize) - kChunkSize;
+                while (chunk_idx >= kTxnCount)
+                {
+                    if (chunk_idx == kTxnCount)
+                        idx_ = 0;
+                    chunk_idx = Interlocked.Add(ref idx_, kChunkSize) - kChunkSize;
+                }
+
+                var local_txn_keys_ptr = txn_keys_ptr + chunk_idx;
+
+                for (long idx = chunk_idx; idx < chunk_idx + kChunkSize && !done; ++idx, ++local_txn_keys_ptr)
+                {
+                    Op op;
+                    int r = (int)rng.Generate(100);
+                    if (r < readPercent)
+                        op = Op.Read;
+                    else if (readPercent >= 0)
+                        op = Op.Upsert;
+                    else
+                        op = Op.ReadModifyWrite;
+
+                    if (idx % 256 == 0)
+                    {
+                        store.Refresh();
+
+                        if (idx % 65536 == 0)
+                        {
+                            store.CompletePending(false);
+                        }
+                    }
+
+                    switch (op)
+                    {
+                        case Op.Upsert:
+                            {
+                                store.Upsert(local_txn_keys_ptr, &value, null, 1);
+                                ++writes_done;
+                                break;
+                            }
+                        case Op.Read:
+                            {
+                                Status result = store.Read(local_txn_keys_ptr, null, (Output*)&value, null, 1);
+                                if (result == Status.OK)
+                                {
+                                    ++reads_done;
+                                }
+                                break;
+                            }
+                        case Op.ReadModifyWrite:
+                            {
+                                Status result = store.RMW(local_txn_keys_ptr, input_ptr + (idx & 0x7), null, 1);
+                                if (result == Status.OK)
+                                {
+                                    ++writes_done;
+                                }
+                                break;
+                            }
+                        default:
+                            throw new NotImplementedException("Unexpected op: " + op);
+                    }
+                }
+
+#if DASHBOARD
+                count += (int)kChunkSize;
+
+                //Check if stats collector is requesting for statistics
+                if (writeStats[thread_idx])
+                {
+                    var tstart1 = tstop1;
+                    tstop1 = HiResTimer.Rdtsc();
+                    threadProgress[thread_idx] = count;
+                    threadThroughput[thread_idx] = (count - lastWrittenValue) / ((tstop1 - tstart1) / freq);
+                    lastWrittenValue = count;
+                    writeStats[thread_idx] = false;
+                    statsWritten[thread_idx].Set();
+                }
+#endif
+            }
+
+            store.CompletePending(true);
+            store.StopSession();
+            sw.Stop();
+
+            Console.WriteLine("Thread " + thread_idx + " done; " + reads_done + " reads, " +
+                writes_done + " writes, in " + sw.ElapsedMilliseconds + " ms.");
+            Interlocked.Add(ref total_ops_done, reads_done + writes_done);
+        }
+
+#if DASHBOARD
+        int measurementInterval = 2000;
+        bool allDone;
+        bool measureLatency;
+        bool[] writeStats;
+        private EventWaitHandle[] statsWritten;
+        double[] threadThroughput;
+        double[] threadAverageLatency;
+        double[] threadMaximumLatency;
+        long[] threadProgress;
+        double freq;
+
+        void DoContinuousMeasurements()
+        {
+
+            if (numaStyle == 0)
+                Native32.AffinitizeThreadRoundRobin((uint)threadCount + 1);
+            else
+                Native32.AffinitizeThreadShardedTwoNuma((uint)threadCount + 1);
+
+            double totalThroughput, totalLatency, maximumLatency;
+            double totalProgress;
+            int ver = 0;
+
+            using (var client = new WebClient())
+            {
+                while (!allDone)
+                {
+                    ver++;
+
+                    Thread.Sleep(measurementInterval);
+
+                    totalProgress = 0;
+                    totalThroughput = 0;
+                    totalLatency = 0;
+                    maximumLatency = 0;
+
+                    for (int i = 0; i < threadCount; i++)
+                    {
+                        writeStats[i] = true;
+                    }
+
+
+                    for (int i = 0; i < threadCount; i++)
+                    {
+                        statsWritten[i].WaitOne();
+                        totalThroughput += threadThroughput[i];
+                        totalProgress += threadProgress[i];
+                        if (measureLatency)
+                        {
+                            totalLatency += threadAverageLatency[i];
+                            if (threadMaximumLatency[i] > maximumLatency)
+                            {
+                                maximumLatency = threadMaximumLatency[i];
+                            }
+                        }
+                    }
+
+                    if (measureLatency)
+                    {
+                        Console.WriteLine("{0} \t {1:0.000} \t {2} \t {3} \t {4} \t {5}", ver, totalThroughput / (double)1000000, totalLatency / threadCount, maximumLatency, store.Size, totalProgress);
+                    }
+                    else
+                    {
+                        Console.WriteLine("{0} \t {1:0.000} \t {2} \t {3}", ver, totalThroughput / (double)1000000, store.Size, totalProgress);
+                    }
+                }
+            }
+        }
+#endif
+
+        #region Load Data
+
+        private void LoadDataFromFile(string filePath)
+        {
+            string init_filename = filePath + "\\load_" + distribution + "_250M_raw.dat";
+            string txn_filename = filePath + "\\run_" + distribution + "_250M_1000M_raw.dat";
+
+            long count = 0;
+            using (FileStream stream = File.Open(init_filename, FileMode.Open, FileAccess.Read,
+                FileShare.Read))
+            {
+                Console.WriteLine("loading keys from " + init_filename + " into memory...");
+                init_keys_ = new Key[kInitCount];
+
+                byte[] chunk = new byte[kFileChunkSize];
+                GCHandle chunk_handle = GCHandle.Alloc(chunk, GCHandleType.Pinned);
+                byte* chunk_ptr = (byte*)chunk_handle.AddrOfPinnedObject();
+
+                long offset = 0;
+
+                while (true)
+                {
+                    stream.Position = offset;
+                    int size = stream.Read(chunk, 0, kFileChunkSize);
+                    for (int idx = 0; idx < size; idx += Key.kSizeInBytes)
+                    {
+                        init_keys_[count] = *((Key*)(chunk_ptr + idx));
+                        ++count;
+                    }
+                    if (size == kFileChunkSize)
+                        offset += kFileChunkSize;
+                    else
+                        break;
+
+                    if (count == kInitCount)
+                        break;
+                }
+
+                if (count != kInitCount)
+                {
+                    throw new InvalidDataException("Init file load fail!");
+                }
+            }
+
+            Console.WriteLine("loaded " + kInitCount + " keys.");
+
+
+            using (FileStream stream = File.Open(txn_filename, FileMode.Open, FileAccess.Read, FileShare.Read))
+            {
+                byte[] chunk = new byte[kFileChunkSize];
+                GCHandle chunk_handle = GCHandle.Alloc(chunk, GCHandleType.Pinned);
+                byte* chunk_ptr = (byte*)chunk_handle.AddrOfPinnedObject();
+
+                Console.WriteLine("loading txns from " + txn_filename + " into memory...");
+
+                txn_keys_ = new Key[kTxnCount];
+                GCHandle handle2 = GCHandle.Alloc(txn_keys_, GCHandleType.Pinned);
+                txn_keys_ptr = (Key*)handle2.AddrOfPinnedObject();
+
+                count = 0;
+                long offset = 0;
+
+                while (true)
+                {
+                    stream.Position = offset;
+                    int size = stream.Read(chunk, 0, kFileChunkSize);
+                    for (int idx = 0; idx < size; idx += Key.kSizeInBytes)
+                    {
+                        txn_keys_[count] = *((Key*)(chunk_ptr + idx));
+                        ++count;
+                    }
+                    if (size == kFileChunkSize)
+                        offset += kFileChunkSize;
+                    else
+                        break;
+
+                    if (count == kTxnCount)
+                        break;
+                }
+
+                if (count != kTxnCount)
+                {
+                    throw new InvalidDataException("Txn file load fail!" + count + ":" + kTxnCount);
+                }
+            }
+        }
+
+        private void LoadData()
+        {
+            if (kUseSyntheticData)
+            {
+                LoadSyntheticData();
+                return;
+            }
+
+            string filePath = "C:\\ycsb_files";
+
+            if (!Directory.Exists(filePath))
+            {
+                filePath = "D:\\ycsb_files";
+            }
+            if (!Directory.Exists(filePath))
+            {
+                filePath = "E:\\ycsb_files";
+            }
+
+            if (Directory.Exists(filePath))
+            {
+                LoadDataFromFile(filePath);
+            }
+            else
+            {
+                Console.WriteLine("WARNING: Could not find YCSB directory, loading synthetic data instead");
+                LoadSyntheticData();
+            }
+        }
+
+        private void LoadSyntheticData()
+        {
+            init_keys_ = new Key[kInitCount];
+            long val = 0;
+            for (int idx = 0; idx < kInitCount; idx++)
+            {
+                init_keys_[idx] = new Key { value = val++ };
+            }
+
+            Console.WriteLine("loaded " + kInitCount + " keys.");
+
+            RandomGenerator generator = new RandomGenerator();
+
+            txn_keys_ = new Key[kTxnCount];
+            GCHandle handle2 = GCHandle.Alloc(txn_keys_, GCHandleType.Pinned);
+            txn_keys_ptr = (Key*)handle2.AddrOfPinnedObject();
+
+            for (int idx = 0; idx < kTxnCount; idx++)
+            {
+                txn_keys_[idx] = new Key { value = (long)generator.Generate64(kInitCount) };
+            }
+
+            Console.WriteLine("loaded " + kTxnCount + " txns.");
+
+        }
+        #endregion
+
+        public unsafe void Run()
+        {
+            RandomGenerator rng = new RandomGenerator();
+
+            LoadData();
+
+            input_ = new Input[8];
+            for (int i = 0; i < 8; i++)
+            {
+                input_[i].value = i;
+            }
+            GCHandle handle = GCHandle.Alloc(input_, GCHandleType.Pinned);
+            input_ptr = (Input*)handle.AddrOfPinnedObject();
+
+            Console.WriteLine("loaded " + kTxnCount + " txns.");
+
+#if DASHBOARD
+            var dash = new Thread(() => DoContinuousMeasurements());
+            dash.Start();
+#endif
+
+            Thread[] workers = new Thread[threadCount];
+
+            Console.WriteLine("Executing setup.");
+
+            // Setup the store for the YCSB benchmark.
+            for (int idx = 0; idx < threadCount; ++idx)
+            {
+                int x = idx;
+                workers[idx] = new Thread(() => SetupYcsb(x));
+            }
+            // Start threads.
+            foreach (Thread worker in workers)
+            {
+                worker.Start();
+            }
+            foreach (Thread worker in workers)
+            {
+                worker.Join();
+            }
+
+            long startTailAddress = store.Size;
+            Console.WriteLine("Start tail address = " + startTailAddress);
+
+
+            idx_ = 0;
+            store.DumpDistribution();
+
+            Console.WriteLine("Executing experiment.");
+
+            // Run the experiment.
+            for (int idx = 0; idx < threadCount; ++idx)
+            {
+                int x = idx;
+                workers[idx] = new Thread(() => RunYcsb(x));
+            }
+            // Start threads.
+            foreach (Thread worker in workers)
+            {
+                worker.Start();
+            }
+
+            Stopwatch swatch = new Stopwatch();
+            swatch.Start();
+
+            if (kCheckpointSeconds <= 0)
+            {
+                Thread.Sleep(TimeSpan.FromSeconds(kRunSeconds));
+            }
+            else
+            {
+                int runSeconds = 0;
+                while (runSeconds < kRunSeconds)
+                {
+                    Thread.Sleep(TimeSpan.FromSeconds(kCheckpointSeconds));
+                    store.TakeFullCheckpoint(out Guid token);
+                    runSeconds += kCheckpointSeconds;
+                }
+            }
+
+            swatch.Stop();
+
+            done = true;
+
+            foreach (Thread worker in workers)
+            {
+                worker.Join();
+            }
+
+            double seconds = swatch.ElapsedMilliseconds / 1000.0;
+            long endTailAddress = store.Size;
+            Console.WriteLine("End tail address = " + endTailAddress);
+
+            Console.WriteLine("Total " + total_ops_done + " ops done " + " in " + seconds + " secs.");
+            Console.WriteLine("##, " + distribution + ", " + numaStyle + ", " + readPercent + ", "
+                + threadCount + ", " + total_ops_done / seconds + ", " 
+                + (endTailAddress - startTailAddress));
+
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/benchmark/Program.cs b/cs/src/benchmark/Program.cs
new file mode 100644
index 000000000..37279570f
--- /dev/null
+++ b/cs/src/benchmark/Program.cs
@@ -0,0 +1,58 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using FASTER.core;
+using CommandLine;
+
+namespace FASTER.benchmark
+{
+    class Options
+    {
+        [Option('b', "benchmark", Required = false, Default = 0,
+        HelpText = "Benchmark to run (0 - YCSB)")]
+        public int Benchmark { get; set; }
+
+        [Option('t', "threads", Required = false, Default = 8,
+         HelpText = "Number of threads to run the workload.")]
+        public int ThreadCount { get; set; }
+
+        [Option('n', "numa", Required = false, Default = 0,
+             HelpText = "0 = no numa, 1 = sharded numa")]
+        public int NumaStyle { get; set; }
+
+        [Option('r', "read_percent", Required = false, Default = 50,
+         HelpText = "Percentage of reads (-1 for 100% RMW")]
+        public int ReadPercent { get; set; }
+
+        [Option('d', "distribution", Required = false, Default = "uniform",
+            HelpText = "Distribution")]
+        public string Distribution { get; set; }
+    }
+
+    enum BenchmarkType : int
+    {
+        Ycsb
+    };
+
+    public class Program
+    {
+        public static void Main(string[] args)
+        {
+            ParserResult<Options> result = Parser.Default.ParseArguments<Options>(args);
+            if (result.Tag == ParserResultType.NotParsed)
+            {
+                return;
+            }
+
+            var options = result.MapResult(o => o, xs => new Options());
+            BenchmarkType b = (BenchmarkType)options.Benchmark;
+
+            if (b == BenchmarkType.Ycsb)
+            {
+                var test = new FASTER_YcsbBenchmark(options.ThreadCount, options.NumaStyle, options.Distribution, options.ReadPercent);
+                test.Run();
+            }
+        }
+    }
+}
diff --git a/cs/src/benchmark/Properties/AssemblyInfo.cs b/cs/src/benchmark/Properties/AssemblyInfo.cs
new file mode 100644
index 000000000..62e3fd7b9
--- /dev/null
+++ b/cs/src/benchmark/Properties/AssemblyInfo.cs
@@ -0,0 +1,39 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+//[assembly: AssemblyTitle("FASTER.benchmark")]
+[assembly: AssemblyDescription("")]
+//[assembly: AssemblyConfiguration("")]
+//[assembly: AssemblyCompany("")]
+//[assembly: AssemblyProduct("FASTER.benchmark")]
+[assembly: AssemblyCopyright("Copyright ©  2015")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("19ed104b-3dcc-42ed-9dc0-afa825042543")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+//[assembly: AssemblyVersion("1.0.0.0")]
+//[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/cs/src/benchmark/RandomGenerator.cs b/cs/src/benchmark/RandomGenerator.cs
new file mode 100644
index 000000000..8dbc55637
--- /dev/null
+++ b/cs/src/benchmark/RandomGenerator.cs
@@ -0,0 +1,85 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public class RandomGenerator
+    {
+        private uint x;
+        private uint y;
+        private uint z;
+        private uint w;
+
+        public RandomGenerator(uint seed = 0)
+        {
+            if (seed == 0)
+            {
+                long counter = 0;
+                HiResTimer.QueryPerformanceCounter(ref counter);
+                x = (uint)(counter & 0x0FFFFFFF);
+            }
+            else
+            {
+                x = seed;
+            }
+
+            y = 362436069;
+            z = 521288629;
+            w = 88675123;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public uint Generate()
+        {
+            uint t;
+            t = (x ^ (x << 11));
+            x = y;
+            y = z;
+            z = w;
+
+            return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public uint Generate(uint max)
+        {
+            uint t;
+            t = (x ^ (x << 11));
+            x = y;
+            y = z;
+            z = w;
+
+            return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))) % max;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public ulong Generate64(ulong max)
+        {
+            uint t;
+            t = (x ^ (x << 11));
+            x = y;
+            y = z;
+            z = w;
+
+            ulong r = (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)));
+
+            r <<= 32;
+
+            t = (x ^ (x << 11));
+            x = y;
+            y = z;
+            z = w;
+
+            r |= ((w = (w ^ (w >> 19)) ^ (t ^ (t >> 8))));
+
+            return r % max;
+        }
+    }
+}
diff --git a/cs/src/core/Allocator/IAllocator.cs b/cs/src/core/Allocator/IAllocator.cs
new file mode 100644
index 000000000..2e04cc109
--- /dev/null
+++ b/cs/src/core/Allocator/IAllocator.cs
@@ -0,0 +1,14 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+namespace FASTER.core
+{
+    public interface IAllocator
+    {
+        long Allocate(int numSlots);
+        long GetPhysicalAddress(long logicalAddress);
+        void CheckForAllocateComplete(ref long address);
+        int RecordSize {  get; }
+        void Free();
+    }
+}
diff --git a/cs/src/core/Allocator/MallocFixedPageSize.cs b/cs/src/core/Allocator/MallocFixedPageSize.cs
new file mode 100644
index 000000000..28b5a1dc5
--- /dev/null
+++ b/cs/src/core/Allocator/MallocFixedPageSize.cs
@@ -0,0 +1,615 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define CALLOC
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Runtime.InteropServices;
+using System.Collections.Concurrent;
+using System.Linq.Expressions;
+using System.IO;
+
+namespace FASTER.core
+{
+    public unsafe class MallocFixedPageSize<T>
+    {
+        public static bool ForceUnpinnedAllocation = false;
+
+        public static MallocFixedPageSize<T> Instance = new MallocFixedPageSize<T>();
+        public static MallocFixedPageSize<T> PhysicalInstance = new MallocFixedPageSize<T>(true);
+
+        protected const int PageSizeBits = 16;
+        internal const int PageSize = 1 << PageSizeBits;
+        protected const int PageSizeMask = PageSize - 1;
+        protected const int LevelSizeBits = 18;
+        protected const int LevelSize = 1 << LevelSizeBits;
+        protected const int LevelSizeMask = LevelSize - 1;
+
+        protected T[][] values = new T[LevelSize][];
+        protected GCHandle[] handles = new GCHandle[LevelSize];
+        protected IntPtr[] pointers = new IntPtr[LevelSize];
+
+        protected T[] values0;
+        protected GCHandle handles0;
+        protected IntPtr pointers0;
+        protected readonly int RecordSize;
+        protected readonly int AlignedPageSize;
+
+        protected volatile int writeCacheLevel;
+
+        protected volatile int count;
+
+        public readonly bool IsPinned;
+        public readonly bool ReturnPhysicalAddress;
+
+        [ThreadStatic]
+        public static Queue<FreeItem> freeList;
+#if DEBUG
+        public ConcurrentBag<Queue<FreeItem>> allQueues = new ConcurrentBag<Queue<FreeItem>>();
+#endif
+        public MallocFixedPageSize(bool returnPhysicalAddress = false)
+        {
+            values[0] = new T[PageSize];
+
+#if !(CALLOC)
+            Array.Clear(values[0], 0, PageSize);
+#endif
+            ReturnPhysicalAddress = returnPhysicalAddress;
+
+            if (ForceUnpinnedAllocation)
+            {
+                IsPinned = false;
+                ReturnPhysicalAddress = false;
+            }
+            else
+            {
+                IsPinned = true;
+                if (default(T) == null)
+                {
+                    IsPinned = false;
+                    ReturnPhysicalAddress = false;
+                }
+                else
+                {
+                    try
+                    {
+                        handles[0] = GCHandle.Alloc(values[0], GCHandleType.Pinned);
+                        pointers[0] = handles[0].AddrOfPinnedObject();
+                        handles0 = handles[0];
+                        pointers0 = pointers[0];
+                        RecordSize = Marshal.SizeOf(values[0][0]);
+                        AlignedPageSize = RecordSize * PageSize;
+                    }
+                    catch (Exception)
+                    {
+                        IsPinned = false;
+                        ReturnPhysicalAddress = false;
+                    }
+                }
+            }
+
+            values0 = values[0];
+            writeCacheLevel = -1;
+            Interlocked.MemoryBarrier();
+
+            BulkAllocate(); // null pointer
+        }
+
+        public void ReInitialize()
+        {
+            values = new T[LevelSize][];
+            handles = new GCHandle[LevelSize];
+            pointers = new IntPtr[LevelSize];
+            values[0] = new T[PageSize];
+
+
+#if !(CALLOC)
+            Array.Clear(values[0], 0, PageSize);
+#endif
+
+            if (IsPinned)
+            {
+                handles[0] = GCHandle.Alloc(values[0], GCHandleType.Pinned);
+                pointers[0] = handles[0].AddrOfPinnedObject();
+                handles0 = handles[0];
+                pointers0 = pointers[0];
+            }
+
+            values0 = values[0];
+            writeCacheLevel = -1;
+            Interlocked.MemoryBarrier();
+
+            BulkAllocate(); // null pointer
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetPhysicalAddress(long address)
+        {
+            if (ReturnPhysicalAddress)
+            {
+                return address;
+            }
+            else
+            {
+                return
+                    (long)pointers[address >> PageSizeBits]
+                  + (long)(address & PageSizeMask) * RecordSize;
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public ref T Get(long index)
+        {
+            if (this.ReturnPhysicalAddress)
+                throw new Exception("Physical pointer returned by allocator: de-reference pointer to get records instead of calling Get");
+
+            return ref values
+                [index >> PageSizeBits]
+                [index & PageSizeMask];
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Set(long index, ref T value)
+        {
+            if (this.ReturnPhysicalAddress)
+                throw new Exception("Physical pointer returned by allocator: de-reference pointer to set records instead of calling Set (otherwise, set ForceUnpinnedAllocation to true)");
+
+            values
+                [index >> PageSizeBits]
+                [index & PageSizeMask]
+                = value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Set(long index, T value)
+        {
+            Set(index, ref value);
+        }
+
+        //static long _freed = 0;
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void FreeAtEpoch(long pointer, int removed_epoch = -1)
+        {
+            //if (Interlocked.Increment(ref _freed) % 100000 == 0)
+            //{
+            //    Console.WriteLine("Freed " + _freed);
+            //}
+
+            if (!ReturnPhysicalAddress)
+            {
+                values[pointer >> PageSizeBits][pointer & PageSizeMask] = default(T);
+            }
+            if (freeList == null) freeList = new Queue<FreeItem>();
+            freeList.Enqueue(new FreeItem { removed_item = pointer, removal_epoch = removed_epoch });
+        }
+
+#if DEBUG
+        public int TotalFreeCount()
+        {
+            int result = 0;
+            var x = allQueues.ToArray();
+            foreach (var q in x)
+            {
+                result += q.Count;
+            }
+            return result;
+        }
+
+        public int TotalUsedPointers()
+        {
+            return count - TotalFreeCount();
+        }
+#endif
+        public const int kAllocateChunkSize = 16;
+
+
+        /// <summary>
+        /// Warning: cannot mix 'n' match use of
+        /// Allocate and BulkAllocate
+        /// </summary>
+        /// <returns></returns>
+        public long BulkAllocate()
+        {
+            // Determine insertion index.
+            // ReSharper disable once CSharpWarnings::CS0420
+#pragma warning disable 420
+            int index = Interlocked.Add(ref count, kAllocateChunkSize) - kAllocateChunkSize;
+#pragma warning restore 420
+
+            int offset = index & PageSizeMask;
+            int baseAddr = index >> PageSizeBits;
+
+            // Handle indexes in first batch specially because they do not use write cache.
+            if (baseAddr == 0)
+            {
+                // If index 0, then allocate space for next level.
+                if (index == 0)
+                {
+                    var tmp = new T[PageSize];
+#if !(CALLOC)
+                    Array.Clear(tmp, 0, PageSize);
+#endif
+
+                    if (IsPinned)
+                    {
+                        handles[1] = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                        pointers[1] = handles[1].AddrOfPinnedObject();
+                    }
+                    values[1] = tmp;
+                    Interlocked.MemoryBarrier();
+                }
+
+                // Return location.
+                if (ReturnPhysicalAddress)
+                    return (((long)pointers0) + index * RecordSize);
+                else
+                    return index;
+            }
+
+            // See if write cache contains corresponding array.
+            var cache = writeCacheLevel;
+            T[] array;
+
+            if (cache != -1)
+            {
+                // Write cache is correct array only if index is within [arrayCapacity, 2*arrayCapacity).
+                if (cache == baseAddr)
+                {
+                    // Return location.
+                    if (ReturnPhysicalAddress)
+                        return ((long)pointers[baseAddr]) + (long)offset * RecordSize;
+                    else
+                        return index;
+                }
+            }
+
+            // Write cache did not work, so get level information from index.
+            // int level = GetLevelFromIndex(index);
+
+            // Spin-wait until level has an allocated array.
+            var spinner = new SpinWait();
+            while (true)
+            {
+                array = values[baseAddr];
+                if (array != null)
+                {
+                    break;
+                }
+                spinner.SpinOnce();
+            }
+
+            // Perform extra actions if inserting at offset 0 of level.
+            if (offset == 0)
+            {
+                // Update write cache to point to current level.
+                writeCacheLevel = baseAddr;
+                Interlocked.MemoryBarrier();
+
+                // Allocate for next page
+                int newBaseAddr = baseAddr + 1;
+                var tmp = new T[PageSize];
+
+#if !(CALLOC)
+                    Array.Clear(tmp, 0, PageSize);
+#endif
+
+                if (IsPinned)
+                {
+                    handles[newBaseAddr] = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                    pointers[newBaseAddr] = handles[newBaseAddr].AddrOfPinnedObject();
+                }
+                values[newBaseAddr] = tmp;
+
+                Interlocked.MemoryBarrier();
+            }
+
+            // Return location.
+            if (ReturnPhysicalAddress)
+                return ((long)pointers[baseAddr]) + (long)offset * RecordSize;
+            else
+                return index;
+        }
+
+        //static long _allocated = 0;
+        public long Allocate()
+        {
+            //if (Interlocked.Increment(ref _allocated) % 100000 == 0)
+            //{
+            //    Console.WriteLine("Allocated " + _allocated);
+            //}
+
+            if (freeList == null)
+            {
+                freeList = new Queue<FreeItem>();
+#if DEBUG
+                allQueues.Add(freeList);
+#endif
+            }
+            if (freeList.Count > 0)
+            {
+                if (freeList.Peek().removal_epoch <= LightEpoch.Instance.SafeToReclaimEpoch)
+                    return freeList.Dequeue().removed_item;
+
+                //if (freeList.Count % 64 == 0)
+                //    LightEpoch.Instance.BumpCurrentEpoch();
+            }
+
+            // Determine insertion index.
+            // ReSharper disable once CSharpWarnings::CS0420
+#pragma warning disable 420
+            int index = Interlocked.Increment(ref count) - 1;
+#pragma warning restore 420
+
+            int offset = index & PageSizeMask;
+            int baseAddr = index >> PageSizeBits;
+
+            // Handle indexes in first batch specially because they do not use write cache.
+            if (baseAddr == 0)
+            {
+                // If index 0, then allocate space for next level.
+                if (index == 0)
+                {
+                    var tmp = new T[PageSize];
+
+#if !(CALLOC)
+                    Array.Clear(tmp, 0, PageSize);
+#endif
+
+                    if (IsPinned)
+                    {
+                        handles[1] = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                        pointers[1] = handles[1].AddrOfPinnedObject();
+                    }
+                    values[1] = tmp;
+                    Interlocked.MemoryBarrier();
+                }
+
+                // Return location.
+                if (ReturnPhysicalAddress)
+                    return ((long)pointers0) + index * RecordSize;
+                else
+                    return index;
+            }
+
+            // See if write cache contains corresponding array.
+            var cache = writeCacheLevel;
+            T[] array;
+
+            if (cache != -1)
+            {
+                // Write cache is correct array only if index is within [arrayCapacity, 2*arrayCapacity).
+                if (cache == baseAddr)
+                {
+                    // Return location.
+                    if (ReturnPhysicalAddress)
+                        return ((long)pointers[baseAddr]) + (long)offset * RecordSize;
+                    else
+                        return index;
+                }
+            }
+
+            // Write cache did not work, so get level information from index.
+            // int level = GetLevelFromIndex(index);
+
+            // Spin-wait until level has an allocated array.
+            var spinner = new SpinWait();
+            while (true)
+            {
+                array = values[baseAddr];
+                if (array != null)
+                {
+                    break;
+                }
+                spinner.SpinOnce();
+            }
+
+            // Perform extra actions if inserting at offset 0 of level.
+            if (offset == 0)
+            {
+                // Update write cache to point to current level.
+                writeCacheLevel = baseAddr;
+                Interlocked.MemoryBarrier();
+
+                // Allocate for next page
+                int newBaseAddr = baseAddr + 1;
+                var tmp = new T[PageSize];
+
+#if !(CALLOC)
+                    Array.Clear(tmp, 0, PageSize);
+#endif
+
+                if (IsPinned)
+                {
+                    handles[newBaseAddr] = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                    pointers[newBaseAddr] = handles[newBaseAddr].AddrOfPinnedObject();
+                }
+                values[newBaseAddr] = tmp;
+
+                Interlocked.MemoryBarrier();
+            }
+
+            // Return location.
+            if (ReturnPhysicalAddress)
+                return ((long)pointers[baseAddr]) + (long)offset * RecordSize;
+            else
+                return index;
+        }
+
+        public void Dispose()
+        {
+            for (int i = 0; i < values.Length; i++)
+            {
+                if (IsPinned && (handles[i].IsAllocated)) handles[i].Free();
+                values[i] = null;
+            }
+            handles = null;
+            pointers = null;
+            values = null;
+            values0 = null;
+            count = 0;
+        }
+
+        public int GetMaxAllocated()
+        {
+            return count;
+        }
+
+        #region Checkpoint
+
+        // Public facing persistence API
+        public void TakeCheckpoint(IDevice device, out ulong numBytes)
+        {
+            begin_checkpoint(device, 0UL, out numBytes);
+        }
+
+        public bool IsCheckpointCompleted(bool waitUntilComplete = false)
+        {
+            bool completed = checkpointEvent.IsSet;
+            if (!completed && waitUntilComplete)
+            {
+                checkpointEvent.Wait();
+                return true;
+            }
+            return completed;
+        }
+
+        // Implementation of an asynchronous checkpointing scheme
+        protected CountdownEvent checkpointEvent;
+
+        internal void begin_checkpoint(IDevice device, ulong offset, out ulong numBytesWritten)
+        {
+            int localCount = count;
+            int recordsCountInLastLevel = localCount & PageSizeMask;
+            int numCompleteLevels = localCount >> PageSizeBits;
+            int numLevels = numCompleteLevels + (recordsCountInLastLevel > 0 ? 1 : 0);
+            checkpointEvent = new CountdownEvent(numLevels);
+
+            uint alignedPageSize = PageSize * (uint)RecordSize;
+            uint lastLevelSize = (uint)recordsCountInLastLevel * (uint)RecordSize;
+
+            numBytesWritten = 0;
+            for (int i = 0; i < numLevels; i++)
+            {
+                OverflowPagesFlushAsyncResult result = default(OverflowPagesFlushAsyncResult);
+                device.WriteAsync(pointers[i], offset + numBytesWritten, alignedPageSize, async_flush_callback, result);
+                numBytesWritten += (i == numCompleteLevels) ? lastLevelSize : alignedPageSize;
+            }
+        }
+
+        private void async_flush_callback(uint errorCode, uint numBytes, NativeOverlapped* overlap)
+        {
+            try
+            {
+                if (errorCode != 0)
+                {
+                    System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                System.Diagnostics.Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                checkpointEvent.Signal();
+            }
+        }
+
+        public int GetMaxValidAddress()
+        {
+            return count;
+        }
+        #endregion
+
+        #region Recover
+        public void Recover(string filename, int buckets, ulong numBytes)
+        {
+            Recover(new LocalStorageDevice(filename, false, false, true), buckets, numBytes);
+        }
+
+        public void Recover(IDevice device, int buckets, ulong numBytes)
+        {
+            begin_recovery(device, 0UL, buckets, numBytes, out ulong numBytesRead);
+        }
+
+        public bool IsRecoveryCompleted(bool waitUntilComplete = false)
+        {
+            bool completed = (numLevelsToBeRecovered == 0);
+            if (!completed && waitUntilComplete)
+            {
+                while (numLevelsToBeRecovered != 0)
+                {
+                    Thread.Sleep(10);
+                }
+            }
+            return completed;
+        }
+
+        // Implementation of asynchronous recovery
+        private int numLevelsToBeRecovered;
+
+        internal void begin_recovery(IDevice device,
+                                    ulong offset,
+                                    int buckets,
+                                    ulong numBytesToRead,
+                                    out ulong numBytesRead)
+        {
+            // Allocate as many records in memory
+            while (count < buckets)
+            {
+                Allocate();
+            }
+
+            int numRecords = (int)numBytesToRead / RecordSize;
+            int recordsCountInLastLevel = numRecords & PageSizeMask;
+            int numCompleteLevels = numRecords >> PageSizeBits;
+            int numLevels = numCompleteLevels + (recordsCountInLastLevel > 0 ? 1 : 0);
+
+            numLevelsToBeRecovered = numLevels;
+
+            numBytesRead = 0;
+            uint alignedPageSize = (uint)PageSize * (uint)RecordSize;
+            uint lastLevelSize = (uint)recordsCountInLastLevel * (uint)RecordSize;
+            for (int i = 0; i < numLevels; i++)
+            {
+                //read a full page
+                uint length = (uint)PageSize * (uint)RecordSize; ;
+                OverflowPagesReadAsyncResult result = default(OverflowPagesReadAsyncResult);
+                device.ReadAsync(offset + numBytesRead, pointers[i], length, async_page_read_callback, result);
+                numBytesRead += (i == numCompleteLevels) ? lastLevelSize : alignedPageSize;
+            }
+        }
+
+        private void async_page_read_callback(
+                                    uint errorCode,
+                                    uint numBytes,
+                                    NativeOverlapped* overlap)
+        {
+            try
+            {
+                if (errorCode != 0)
+                {
+                    System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                System.Diagnostics.Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                Interlocked.Decrement(ref numLevelsToBeRecovered);
+            }
+        }
+        #endregion
+    }
+
+    public struct FreeItem
+    {
+        public long removed_item;
+        public int removal_epoch;
+    }
+}
diff --git a/cs/src/core/Allocator/PersistentMemoryMalloc.cs b/cs/src/core/Allocator/PersistentMemoryMalloc.cs
new file mode 100644
index 000000000..e6fc06234
--- /dev/null
+++ b/cs/src/core/Allocator/PersistentMemoryMalloc.cs
@@ -0,0 +1,899 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define CALLOC
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Runtime.InteropServices;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Linq.Expressions;
+using System.IO;
+using System.Diagnostics;
+
+namespace FASTER.core
+{
+
+	public enum FlushStatus : int { Flushed, InProgress };
+
+	public enum CloseStatus : int { Closed, Open };
+
+    
+    public struct FullPageStatus
+	{
+        public long LastFlushedUntilAddress;
+        public FlushCloseStatus PageFlushCloseStatus;
+	}
+
+    [StructLayout(LayoutKind.Explicit)]
+    public struct FlushCloseStatus
+    {
+        [FieldOffset(0)]
+        public FlushStatus PageFlushStatus;
+        [FieldOffset(4)]
+        public CloseStatus PageCloseStatus;
+        [FieldOffset(0)]
+        public long value;
+    }
+
+    [StructLayout(LayoutKind.Explicit)]
+    internal struct PageOffset
+    {
+        [FieldOffset(0)]
+        public int Offset;
+        [FieldOffset(4)]
+        public int Page;
+        [FieldOffset(0)]
+        public long PageAndOffset;
+    }
+
+    public unsafe partial class PersistentMemoryMalloc<T> : IAllocator
+    {
+        // Epoch information
+        public LightEpoch epoch;
+
+        // Read buffer pool
+        NativeSectorAlignedBufferPool readBufferPool;
+
+        // Record size and pinning
+        private readonly bool IsPinned;
+        private const int PrivateRecordSize = 1;
+        private static bool ForceUnpinnedAllocation = false;
+
+        private readonly IDevice device;
+        private readonly ISegmentedDevice objlogDevice;
+        private readonly int sectorSize;
+
+        // Page size
+        private const int LogPageSizeBits = 25;
+        private const int PageSize = 1 << LogPageSizeBits;
+        private const int PageSizeMask = PageSize - 1;
+        private readonly int AlignedPageSizeBytes;
+
+        // Segment size
+        private const int LogSegmentSizeBits = 30;
+        private const long SegmentSize = 1 << LogSegmentSizeBits;
+        private const long SegmentSizeMask = SegmentSize - 1;
+        private const int SegmentBufferSize = 1 +
+            (LogTotalSizeBytes / SegmentSize < 1 ? 1 : (int)(LogTotalSizeBytes / SegmentSize));
+
+        // Total HLOG size
+        private const long LogTotalSizeBytes = 1L << 34; // 29
+        private const int BufferSize = (int)(LogTotalSizeBytes / (1L << LogPageSizeBits));
+
+        // HeadOffset lag (from tail)
+        private const int HeadOffsetLagNumPages = 4;
+        private const int HeadOffsetLagSize = BufferSize - HeadOffsetLagNumPages;
+        private const long HeadOffsetLagAddress = (long)HeadOffsetLagSize << LogPageSizeBits;
+
+        // ReadOnlyOffset lag (from tail)
+        public const double LogMutableFraction = 0.9;
+        public const long ReadOnlyLagAddress = (long)(LogMutableFraction * BufferSize) << LogPageSizeBits;
+
+        // Circular buffer definition
+        private T[][] values = new T[BufferSize][];
+        private GCHandle[] handles = new GCHandle[BufferSize];
+        private IntPtr[] pointers = new IntPtr[BufferSize];
+        private GCHandle ptrHandle;
+        private long* nativePointers;
+
+        // Array that indicates the status of each buffer page
+        private FullPageStatus[] PageStatusIndicator = new FullPageStatus[BufferSize];
+
+        NativeSectorAlignedBufferPool ioBufferPool;
+
+        // Index in circular buffer, of the current tail page
+        private volatile int TailPageIndex;
+
+        // Global address of the current tail (next element to be allocated from the circular buffer)
+        private PageOffset TailPageOffset;
+
+        public long ReadOnlyAddress;
+
+        public long SafeReadOnlyAddress;
+
+        public long HeadAddress;
+
+        public long SafeHeadAddress;
+
+        public long FlushedUntilAddress;
+
+        public long BeginAddress;
+
+        /// <summary>
+        /// The smallest record size that can be allotted
+        /// </summary>
+        public int RecordSize
+        {
+            get
+            {
+                return PrivateRecordSize;
+            }
+        }
+
+        public PersistentMemoryMalloc(IDevice device) : this(device, 0)
+        {
+            Allocate(Constants.kFirstValidAddress); // null pointer
+            ReadOnlyAddress = GetTailAddress();
+            SafeReadOnlyAddress = ReadOnlyAddress;
+            HeadAddress = ReadOnlyAddress;
+            SafeHeadAddress = ReadOnlyAddress;
+            BeginAddress = ReadOnlyAddress;
+        }
+
+        public PersistentMemoryMalloc(IDevice device, long startAddress)
+        {
+            // Console.WriteLine("Total memory (GB) = " + totalSize/1000000000);
+            // Console.WriteLine("BufferSize = " + BufferSize);
+            // Console.WriteLine("ReadOnlyLag = " + (ReadOnlyLagAddress >> PageSizeBits));
+
+            if (BufferSize < 16)
+            {
+                throw new Exception("HLOG buffer must be at least 16 pages");
+            }
+
+            this.device = device;
+
+            objlogDevice = CreateObjectLogDevice(device);
+
+            sectorSize = (int)device.GetSectorSize();
+            epoch = LightEpoch.Instance;
+            ioBufferPool = new NativeSectorAlignedBufferPool(1, sectorSize);
+
+            if (ForceUnpinnedAllocation)
+            {
+                IsPinned = false;
+            }
+            else
+            {
+                IsPinned = true;
+                try
+                {
+                    var tmp = new T[1];
+                    var h = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                    var p = h.AddrOfPinnedObject();
+                    //PrivateRecordSize = Marshal.SizeOf(tmp[0]);
+                    AlignedPageSizeBytes = (((PrivateRecordSize * PageSize) + (sectorSize - 1)) & ~(sectorSize - 1));
+                }
+                catch (Exception)
+                {
+                    IsPinned = false;
+                }
+            }
+
+            ptrHandle = GCHandle.Alloc(pointers, GCHandleType.Pinned);
+            nativePointers = (long*)ptrHandle.AddrOfPinnedObject();
+
+            Initialize(startAddress);
+        }
+        
+        public int GetSectorSize()
+        {
+            return sectorSize;
+        }
+
+        public void Initialize(long startAddress)
+        {
+            readBufferPool = new NativeSectorAlignedBufferPool(PrivateRecordSize, sectorSize);
+            long tailPage = startAddress >> LogPageSizeBits;
+            int tailPageIndex = (int)(tailPage % BufferSize);
+
+            AllocatePage(tailPageIndex);
+
+            SafeReadOnlyAddress = startAddress;
+            ReadOnlyAddress = startAddress;
+            SafeHeadAddress = startAddress;
+            HeadAddress = startAddress;
+            FlushedUntilAddress = startAddress;
+            BeginAddress = startAddress;
+
+            TailPageOffset.Page = (int)(startAddress >> LogPageSizeBits);
+            TailPageOffset.Offset = (int)(startAddress & PageSizeMask);
+
+            TailPageIndex = -1;
+
+            //Handle the case when startAddress + pageSize overflows
+            //onto the next pageIndex in our buffer pages array
+            if (0 != (startAddress & PageSizeMask))
+            {
+                // Update write cache to point to current level.
+                TailPageIndex = tailPageIndex;
+                Interlocked.MemoryBarrier();
+
+                // Allocate for next page
+                int newPageIndex = (tailPageIndex + 1) % BufferSize;
+                AllocatePage(newPageIndex);
+            }
+        }
+
+        /// <summary>
+        /// Dispose memory allocator
+        /// </summary>
+        public void Free()
+        {
+            for (int i = 0; i < values.Length; i++)
+            {
+                if (IsPinned && (handles[i].IsAllocated)) handles[i].Free();
+                values[i] = null;
+                PageStatusIndicator[i].PageFlushCloseStatus = new FlushCloseStatus { PageFlushStatus = FlushStatus.Flushed, PageCloseStatus = CloseStatus.Closed };
+            }
+            handles = null;
+            pointers = null;
+            values = null;
+            TailPageOffset.Page = 0;
+            TailPageOffset.Offset = 0;
+            SafeReadOnlyAddress = 0;
+            ReadOnlyAddress = 0;
+            SafeHeadAddress = 0;
+            HeadAddress = 0;
+            BeginAddress = 1;
+        }
+
+        public long GetTailAddress()
+        {
+            var local = TailPageOffset;
+            return ((long)local.Page << LogPageSizeBits) | (uint)local.Offset;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public T Get(long index)
+        {
+            if (this.IsPinned)
+                throw new Exception("Physical pointer returned by allocator: de-reference pointer to get records instead of calling Get");
+
+            return values
+                [index >> LogPageSizeBits]
+                [index & PageSizeMask];
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Set(long index, ref T value)
+        {
+            if (this.IsPinned)
+                throw new Exception("Physical pointer returned by allocator: de-reference pointer to set records instead of calling Set (otherwise, set ForceUnpinnedAllocation to true)");
+
+            values
+                [index >> LogPageSizeBits]
+                [index & PageSizeMask]
+                = value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Set(long index, T value)
+        {
+            Set(index, ref value);
+        }
+
+#if USEFREELIST
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void FreeAtEpoch(long pointer, int removed_epoch)
+        {
+            if (freeList == null) freeList = new Queue<FreeItem>();
+            freeList.Enqueue(new FreeItem { removed_item = pointer, removal_epoch = removed_epoch });
+        }
+
+#if DEBUG
+        public long TotalFreeCount()
+        {
+            long result = 0;
+            var x = allQueues.ToArray();
+            foreach (var q in x)
+            {
+                result += q.Count;
+            }
+            return result;
+        }
+
+        public long TotalUsedPointers()
+        {
+            return TailAddress - TotalFreeCount();
+        }
+
+#endif
+#endif
+        //Simple Accessor Functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetPage(long logicalAddress)
+        {
+            return (logicalAddress >> LogPageSizeBits);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int GetPageIndexForPage(long page)
+        {
+            return (int)(page % BufferSize);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int GetPageIndexForAddress(long address)
+        {
+            return (int)((address >> LogPageSizeBits) % BufferSize);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int GetCapacityNumPages()
+        {
+            return BufferSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetStartLogicalAddress(long page)
+        {
+            return page << LogPageSizeBits;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetPageSize()
+        {
+            return PageSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetOffsetInPage(long address)
+        {
+            return address & PageSizeMask;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetHeadOffsetLagInPages()
+        {
+            return HeadOffsetLagSize;
+        }
+
+        /// <summary>
+        /// Used to obtain the physical address corresponding to a logical address
+        /// </summary>
+        /// <param name="logicalAddress"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetPhysicalAddress(long logicalAddress)
+        {
+            // Offset within page
+            int offset = (int)(logicalAddress & PageSizeMask);
+
+            // Global page address
+            long page = (logicalAddress >> LogPageSizeBits);
+
+            // Index of page within the circular buffer
+            int pageIndex = (int)(page % BufferSize);
+
+            return (*(nativePointers+pageIndex)) + offset*PrivateRecordSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long GetPhysicalAddressInternal(long logicalAddress)
+        {
+            // Offset within page
+            int offset = (int)(logicalAddress & PageSizeMask);
+
+            // Global page address
+            long page = (logicalAddress >> LogPageSizeBits);
+
+            // Index of page within the circular buffer
+            int pageIndex = (int)(page % BufferSize);
+
+            return (*(nativePointers + pageIndex)) + offset * PrivateRecordSize;
+        }
+
+        /// <summary>
+        /// Key function used to allocate memory for a specified number of items
+        /// </summary>
+        /// <param name="numSlots"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long Allocate(int numSlots = 1)
+        {
+            PageOffset localTailPageOffset = default(PageOffset);
+
+            // Determine insertion index.
+            // ReSharper disable once CSharpWarnings::CS0420
+#pragma warning disable 420
+            localTailPageOffset.PageAndOffset = Interlocked.Add(ref TailPageOffset.PageAndOffset, numSlots);
+#pragma warning restore 420
+
+            int page = localTailPageOffset.Page;
+            int offset = localTailPageOffset.Offset - numSlots;
+
+#region HANDLE PAGE OVERFLOW
+            /* To prove correctness of the following modifications 
+             * done to TailPageOffset and the allocation itself, 
+             * we should use the fact that only one thread will have any 
+             * of the following cases since it is a counter and we spin-wait
+             * until the tail is folded onto next page accordingly.
+             */
+            if (localTailPageOffset.Offset >= PageSize)
+            {
+                if (offset >= PageSize)
+                {
+                    //The tail offset value was more than page size before atomic add
+                    //We consider that a failed attempt and retry again
+                    var spin = new SpinWait();
+                    do
+                    {
+                        //Just to give some more time to the thread
+                        // that is handling this overflow
+                        while (TailPageOffset.Offset >= PageSize)
+                        {
+                            spin.SpinOnce();
+                        }
+
+                        // ReSharper disable once CSharpWarnings::CS0420
+#pragma warning disable 420
+                        localTailPageOffset.PageAndOffset = Interlocked.Add(ref TailPageOffset.PageAndOffset, numSlots);
+#pragma warning restore 420
+
+                        page = localTailPageOffset.Page;
+                        offset = localTailPageOffset.Offset - numSlots;
+                    } while (offset >= PageSize);
+                }
+
+
+                if (localTailPageOffset.Offset == PageSize)
+                {
+                    //Folding over at page boundary
+                    localTailPageOffset.Page++;
+                    localTailPageOffset.Offset = 0;
+                    TailPageOffset = localTailPageOffset;
+                }
+                else if (localTailPageOffset.Offset >= PageSize)
+                {
+                    //Overflows not allowed. We allot same space in next page.
+                    localTailPageOffset.Page++;
+                    localTailPageOffset.Offset = numSlots;
+                    TailPageOffset = localTailPageOffset;
+
+                    page = localTailPageOffset.Page;
+                    offset = 0;
+                }
+            }
+#endregion
+
+            long address = (((long)page) << LogPageSizeBits) | ((long)offset);
+
+            // Check if TailPageIndex is appropriate and allocated!
+            int pageIndex = page % BufferSize;
+
+            /*
+            if (pageIndex == 0 && page != 0)
+            {
+                Debugger.Break();
+            }*/
+            if (TailPageIndex == pageIndex)
+            {
+                return (address);
+            }
+
+            //Invert the address if either the previous page is not flushed or if it is null
+            if ((PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageFlushStatus != FlushStatus.Flushed) ||
+                (PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus != CloseStatus.Closed) ||
+                (values[pageIndex] == null))
+            {
+                address = -address;
+            }
+
+            // Update the read-only so that we can get more space for the tail
+            if (offset == 0)
+            {
+                if (address >= 0)
+                {
+                    TailPageIndex = pageIndex;
+                    Interlocked.MemoryBarrier();
+                }
+
+                long newPage = page + 1;
+                int newPageIndex = (int)((page + 1) % BufferSize);
+
+                long tailAddress = (address < 0 ? -address : address);
+                PageAlignedShiftReadOnlyAddress(tailAddress);
+                PageAlignedShiftHeadAddress(tailAddress);
+
+                if (values[newPageIndex] == null) 
+                {
+                    AllocatePage(newPageIndex);
+                }
+            }
+
+            return (address);
+        }
+
+        /// <summary>
+        /// If allocator cannot allocate new memory as the head has not shifted or the previous page 
+        /// is not yet closed, it allocates but returns the negative address. 
+        /// This function is invoked to check if the address previously allocated has become valid to be used
+        /// </summary>
+        /// <param name="address"></param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void CheckForAllocateComplete(ref long address)
+        {
+            if (address >= 0)
+            {
+                throw new Exception("Address already allocated!");
+            }
+
+            PageOffset p = default(PageOffset);
+            p.Page = (int)((-address) >> LogPageSizeBits);
+            p.Offset = (int)((-address) & PageSizeMask);
+
+            //Check write cache
+            int pageIndex = p.Page % BufferSize;
+            if (TailPageIndex == pageIndex)
+            {
+                address = -address;
+                return;
+            }
+
+            //Check if we can move the head offset
+            long currentTailAddress = GetTailAddress();
+            PageAlignedShiftHeadAddress(currentTailAddress);
+
+            //Check if I can allocate pageIndex at all
+            if ((PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageFlushStatus != FlushStatus.Flushed) ||
+                (PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus != CloseStatus.Closed) ||
+                (values[pageIndex] == null))
+            {
+                return;
+            }
+
+            //correct values and set write cache
+            address = -address;
+            if (p.Offset == 0)
+            {
+                TailPageIndex = pageIndex;
+            }
+            return;
+        }
+        
+        /// <summary>
+        /// Used by applications to make the current state of the database immutable quickly
+        /// </summary>
+        /// <param name="tailAddress"></param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void ShiftReadOnlyToTail(out long tailAddress)
+        {
+            tailAddress = GetTailAddress();
+            long localTailAddress = tailAddress;
+            long currentReadOnlyOffset = ReadOnlyAddress;
+            if (MonotonicUpdate(ref ReadOnlyAddress, tailAddress, out long oldReadOnlyOffset))
+            {
+                epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(localTailAddress, false));
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void ShiftBeginAddress(long oldBeginAddress, long newBeginAddress)
+        {
+            epoch.BumpCurrentEpoch(() =>
+            {
+                device.DeleteAddressRange(oldBeginAddress, newBeginAddress);
+                objlogDevice.DeleteSegmentRange((int)(oldBeginAddress >> LogSegmentSizeBits), (int)(newBeginAddress >> LogSegmentSizeBits));
+            });
+        }
+
+        /// <summary>
+        /// Checks if until address has been flushed!
+        /// </summary>
+        /// <param name="address"></param>
+        /// <returns></returns>
+        public bool CheckFlushedUntil(long address)
+        {
+            return FlushedUntilAddress >= address;
+        }
+
+        public void KillFuzzyRegion()
+        {
+            while (SafeReadOnlyAddress != ReadOnlyAddress)
+            {
+                Interlocked.CompareExchange(ref SafeReadOnlyAddress,
+                                            ReadOnlyAddress,
+                                            SafeReadOnlyAddress);
+            }
+        }
+
+        /// <summary>
+        /// Seal: make sure there are no longer any threads writing to the page
+        /// Flush: send page to secondary store
+        /// </summary>
+        /// <param name="untilAddress"></param>
+        /// <param name="waitForPendingFlushComplete"></param>
+        public void OnPagesMarkedReadOnly(long newSafeReadOnlyAddress, bool waitForPendingFlushComplete = false)
+        {
+            if(MonotonicUpdate(ref SafeReadOnlyAddress, newSafeReadOnlyAddress, out long oldSafeReadOnlyAddress))
+            {
+                Debug.WriteLine("SafeReadOnly shifted from {0:X} to {1:X}", oldSafeReadOnlyAddress, newSafeReadOnlyAddress);
+                long startPage = oldSafeReadOnlyAddress >> LogPageSizeBits;
+
+                long endPage = (newSafeReadOnlyAddress >> LogPageSizeBits);
+                int numPages = (int)(endPage - startPage);
+                if (numPages > 10)
+                {
+                    new Thread(
+                        () => AsyncFlushPages(startPage, newSafeReadOnlyAddress)).Start();
+                }
+                else
+                {
+                    AsyncFlushPages(startPage, newSafeReadOnlyAddress);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Action to be performed for when all threads have agreed that a page range is closed.
+        /// </summary>
+        /// <param name="oldSafeHeadOffset"></param>
+        /// <param name="newSafeHeadAddress"></param>
+        /// <param name="replaceWithCleanPage"></param>
+        public void OnPagesClosed(long newSafeHeadAddress,  bool replaceWithCleanPage = false)
+        {
+            if (MonotonicUpdate(ref SafeHeadAddress, newSafeHeadAddress, out long oldSafeHeadAddress))
+            {
+                Debug.WriteLine("SafeHeadOffset shifted from {0:X} to {1:X}", oldSafeHeadAddress, newSafeHeadAddress);
+
+                for (long closePageAddress = oldSafeHeadAddress; closePageAddress < newSafeHeadAddress; closePageAddress += PageSize)
+                {
+                    int closePage = (int)((closePageAddress >> LogPageSizeBits) % BufferSize);
+
+                    if (replaceWithCleanPage)
+                    {
+                        if (values[closePage] == null)
+                        {
+                            // Allocate a new page
+                            AllocatePage(closePage);
+                        }
+                        else
+                        {
+                            //Clear an old used page
+                            // BUG: we cannot clear because the
+                            // page may not be flushed.
+                            // Array.Clear(values[closePage], 0, values[closePage].Length);
+                        }
+                    }
+                    else
+                    {
+                        values[closePage] = null;
+                    }
+
+                    while (true)
+                    {
+                        var oldStatus = PageStatusIndicator[closePage].PageFlushCloseStatus;
+                        if (oldStatus.PageFlushStatus == FlushStatus.Flushed)
+                        {
+                            ClearPage(closePage, (closePageAddress >> LogPageSizeBits) == 0);
+
+                            var thisCloseSegment = closePageAddress >> LogSegmentSizeBits;
+                            var nextClosePage = (closePageAddress >> LogPageSizeBits) + 1;
+                            var nextCloseSegment = nextClosePage >> (LogSegmentSizeBits - LogPageSizeBits);
+
+                            if (thisCloseSegment != nextCloseSegment)
+                            {
+                                // Last page in current segment
+                                segmentOffsets[thisCloseSegment % SegmentBufferSize] = 0;
+                            }
+                        }
+                        else
+                        {
+                            throw new Exception("Impossible");
+                        }
+                        var newStatus = oldStatus;
+                        newStatus.PageCloseStatus = CloseStatus.Closed;
+                        if (oldStatus.value == Interlocked.CompareExchange(ref PageStatusIndicator[closePage].PageFlushCloseStatus.value, newStatus.value, oldStatus.value))
+                        {
+                            break;
+                        }
+                    }
+
+                    //Necessary to propagate this change to other threads
+                    Interlocked.MemoryBarrier();
+                }
+            }
+        }
+        
+        private void ClearPage(int page, bool pageZero)
+        {
+            if (Key.HasObjectsToSerialize() || Value.HasObjectsToSerialize())
+            {
+                long ptr = (long)pointers[page];
+                int numBytes = PageSize * PrivateRecordSize;
+                long endptr = ptr + numBytes;
+
+                if (pageZero) ptr += Constants.kFirstValidAddress;
+
+                List<long> addr = new List<long>();
+                while (ptr < endptr)
+                {
+                    if (!Layout.GetInfo(ptr)->Invalid)
+                    {
+                        if (Key.HasObjectsToSerialize())
+                        {
+                            Key* key = Layout.GetKey(ptr);
+                            Key.Free(key);
+                        }
+                        if (Value.HasObjectsToSerialize())
+                        {
+                            Value* value = Layout.GetValue(ptr);
+                            Value.Free(value);
+                        }
+                    }
+                    ptr += Layout.GetPhysicalSize(ptr);
+                }
+            }
+            Array.Clear(values[page], 0, values[page].Length);
+        }
+
+
+        /// <summary>
+        /// Allocate memory page, pinned in memory, and in sector aligned form, if possible
+        /// </summary>
+        /// <param name="index"></param>
+        private void AllocatePage(int index, bool clear = false)
+        {
+            if (IsPinned)
+            {
+                var adjustedSize = PageSize + (int)Math.Ceiling(2 * sectorSize / PrivateRecordSize * 1.0);
+                T[] tmp = new T[adjustedSize];
+                if (clear)
+                {
+                    Array.Clear(tmp, 0, adjustedSize);
+                }
+                else
+                {
+#if !(CALLOC)
+                    Array.Clear(tmp, 0, adjustedSize);
+#endif
+                }
+
+                handles[index] = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                long p = (long)handles[index].AddrOfPinnedObject();
+                pointers[index] = (IntPtr)((p + (sectorSize - 1)) & ~(sectorSize - 1));
+                values[index] = tmp;
+            }
+            else
+            {
+                T[] tmp = new T[PageSize];
+#if !(CALLOC)
+                    Array.Clear(tmp, 0, tmp.Length);
+#endif
+                values[index] = tmp;
+            }
+
+            PageStatusIndicator[index].PageFlushCloseStatus.PageFlushStatus = FlushStatus.Flushed;
+            PageStatusIndicator[index].PageFlushCloseStatus.PageCloseStatus = CloseStatus.Closed;
+            Interlocked.MemoryBarrier();
+        }
+
+        /// <summary>
+        /// Called every time a new tail page is allocated. Here the read-only is 
+        /// shifted only to page boundaries unlike ShiftReadOnlyToTail where shifting
+        /// can happen to any fine-grained address.
+        /// </summary>
+        /// <param name="currentTailAddress"></param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void PageAlignedShiftReadOnlyAddress(long currentTailAddress)
+        {
+            long currentReadOnlyAddress = ReadOnlyAddress;
+            long pageAlignedTailAddress = currentTailAddress & ~PageSizeMask;
+            long desiredReadOnlyAddress = (pageAlignedTailAddress - ReadOnlyLagAddress);
+            if (MonotonicUpdate(ref ReadOnlyAddress, desiredReadOnlyAddress, out long oldReadOnlyAddress))
+            {
+                if (oldReadOnlyAddress == 0)
+                    Console.WriteLine("Going read-only");
+                /*
+                for (int i = (int)(oldReadOnlyAddress >> LogPageSizeBits); i < (int)(desiredReadOnlyAddress >> LogPageSizeBits); i++)
+                {
+                    //Set status to in-progress
+                    PageStatusIndicator[i % BufferSize].PageFlushCloseStatus
+                        = new FlushCloseStatus { PageFlushStatus = FlushStatus.InProgress, PageCloseStatus = CloseStatus.Open };
+                    PageStatusIndicator[i % BufferSize].LastFlushedUntilAddress = -1;
+                }
+                */
+                epoch.BumpCurrentEpoch(() => OnPagesMarkedReadOnly(desiredReadOnlyAddress));
+            }
+        }
+
+        /// <summary>
+        /// Called whenever a new tail page is allocated or when the user is checking for a failed memory allocation
+        /// Tries to shift head address based on the head offset lag size.
+        /// </summary>
+        /// <param name="currentTailAddress"></param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void PageAlignedShiftHeadAddress(long currentTailAddress)
+        {
+            //obtain local values of variables that can change
+            long currentHeadAddress = HeadAddress;
+            long currentFlushedUntilAddress = FlushedUntilAddress;
+            long pageAlignedTailAddress = currentTailAddress & ~PageSizeMask;
+            long desiredHeadAddress = (pageAlignedTailAddress - HeadOffsetLagAddress);
+
+            long newHeadAddress = desiredHeadAddress;
+            if(currentFlushedUntilAddress < newHeadAddress)
+            {
+                newHeadAddress = currentFlushedUntilAddress;
+            }
+            newHeadAddress = newHeadAddress & ~PageSizeMask;
+
+            if (MonotonicUpdate(ref HeadAddress, newHeadAddress, out long oldHeadAddress))
+            {
+                if (oldHeadAddress == 0)
+                    Console.WriteLine("Going external memory");
+
+                Debug.WriteLine("Allocate: Moving head offset from {0:X} to {1:X}", oldHeadAddress, newHeadAddress);
+                epoch.BumpCurrentEpoch(() => OnPagesClosed(newHeadAddress, true));
+            }
+        }
+
+        /// <summary>
+        /// Every async flush callback tries to update the flushed until address to the latest value possible
+        /// Is there a better way to do this with enabling fine-grained addresses (not necessarily at page boundaries)?
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private void ShiftFlushedUntilAddress()
+        {
+            long currentFlushedUntilAddress = FlushedUntilAddress;
+            long page = GetPage(currentFlushedUntilAddress);
+
+            bool update = false;
+            long pageLastFlushedAddress = PageStatusIndicator[(int)(page % BufferSize)].LastFlushedUntilAddress;
+            while (pageLastFlushedAddress >= currentFlushedUntilAddress)
+            {
+                currentFlushedUntilAddress = pageLastFlushedAddress;
+                update = true;
+                page++;
+                pageLastFlushedAddress = PageStatusIndicator[(int)(page % BufferSize)].LastFlushedUntilAddress;
+            }
+
+            if(update)
+            {
+                bool success = MonotonicUpdate(ref FlushedUntilAddress, currentFlushedUntilAddress, out long oldFlushedUntilAddress);
+                if (success)
+                {
+                }
+            }
+        }
+
+
+
+        /// <summary>
+        /// Used by several functions to update the variable to newValue. Ignores if newValue is smaller or 
+        /// than the current value.
+        /// </summary>
+        /// <param name="variable"></param>
+        /// <param name="newValue"></param>
+        /// <param name="oldValue"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool MonotonicUpdate(ref long variable, long newValue, out long oldValue)
+        {
+            oldValue = variable;
+            while (oldValue < newValue)
+            {
+                var foundValue = Interlocked.CompareExchange(ref variable, newValue, oldValue);
+                if (foundValue == oldValue)
+                {
+                    return true;
+                }
+                oldValue = foundValue;
+            }
+            return false;
+        }
+    }
+}
diff --git a/cs/src/core/Codegen/CompilerBase.cs b/cs/src/core/Codegen/CompilerBase.cs
new file mode 100644
index 000000000..843ec32d5
--- /dev/null
+++ b/cs/src/core/Codegen/CompilerBase.cs
@@ -0,0 +1,228 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Text;
+using static FASTER.core.Roslyn.Helper;
+
+namespace FASTER.core.Roslyn
+{
+    class CompilerBase
+    {
+        protected CSharpCompilation compilation;
+        protected Dictionary<string, MetadataReference> metadataReferences = new Dictionary<string, MetadataReference>();
+        protected IEnumerable<Template> Sources;
+        private CSharpParseOptions parseOptions;
+
+        public CompilerBase(IEnumerable<string> templateNames)
+        {
+            var sources = new List<Template>();
+            foreach (var tN in templateNames)
+            {
+                sources.Add(new Template(tN));
+            }
+            this.Sources = sources;
+        }
+
+#if DOTNETCORE
+        internal static IEnumerable<PortableExecutableReference> NetCoreAssemblyReferences
+        {
+            get
+            {
+                if (netCoreAssemblyReferences == null)
+                {
+                    var allAvailableAssemblies = ((string)AppContext.GetData("TRUSTED_PLATFORM_ASSEMBLIES")).Split(';');
+
+                    // From: http://source.roslyn.io/#Microsoft.CodeAnalysis.Scripting/ScriptOptions.cs,40
+                    // These references are resolved lazily. Keep in sync with list in core csi.rsp.
+                    var files = new[]
+                    {
+                        "System.Collections",
+                        "System.Collections.Concurrent",
+                        "System.Console",
+                        "System.Diagnostics.Debug",
+                        "System.Diagnostics.Process",
+                        "System.Diagnostics.StackTrace",
+                        "System.Diagnostics.TraceSource",
+                        "System.Globalization",
+                        "System.IO",
+                        "System.IO.FileSystem",
+                        "System.IO.FileSystem.Primitives",
+                        "System.Reflection",
+                        "System.Reflection.Extensions",
+                        "System.Reflection.Primitives",
+                        "System.Runtime",
+                        "System.Runtime.CompilerServices.Unsafe", // added 6/11/2018
+                        "System.Runtime.Extensions",
+                        "System.Runtime.InteropServices",
+                        "System.Text.Encoding",
+                        "System.Text.Encoding.CodePages",
+                        "System.Text.Encoding.Extensions",
+                        "System.Text.RegularExpressions",
+                        "System.Threading",
+                        "System.Threading.Overlapped",
+                        "System.Threading.Tasks",
+                        "System.Threading.Tasks.Parallel",
+                        "System.Threading.Thread",
+                    };
+                    var filteredPaths = allAvailableAssemblies.Where(p => files.Concat(new string[] { "mscorlib", "netstandard", "System.Private.CoreLib", "System.Runtime.Serialization.Primitives", }).Any(f => Path.GetFileNameWithoutExtension(p).Equals(f)));
+                    netCoreAssemblyReferences = filteredPaths.Select(p => MetadataReference.CreateFromFile(p));
+                }
+                return netCoreAssemblyReferences;
+            }
+        }
+        private static IEnumerable<PortableExecutableReference> netCoreAssemblyReferences;
+#endif
+
+        protected void CreateCompilation(bool persistGeneratedCode, bool optimizeCode, IEnumerable<string> preprocessorSymbols = null)
+        {
+            var assemblyName = Path.GetFileNameWithoutExtension(Path.GetRandomFileName());
+            var assemblyDirectory = Path.Combine(GeneratedDirectory, assemblyName);
+
+            if (persistGeneratedCode)
+            {
+                if (Directory.Exists(assemblyDirectory))
+                {
+                    Directory.Delete(assemblyDirectory);
+                }
+                Directory.CreateDirectory(assemblyDirectory);
+            }
+
+
+            IEnumerable<MetadataReference> references;
+
+
+#if DOTNETCORE
+            references = NetCoreAssemblyReferences;
+#else
+            MetadataReference mscorlib = MetadataReference.CreateFromFile(typeof(object).GetTypeInfo().Assembly.Location);
+            MetadataReference system = MetadataReference.CreateFromFile(typeof(Queue<>).GetTypeInfo().Assembly.Location);
+            MetadataReference linq = MetadataReference.CreateFromFile(typeof(System.Linq.Enumerable).GetTypeInfo().Assembly.Location);
+            MetadataReference contracts = MetadataReference.CreateFromFile(typeof(System.Runtime.Serialization.DataContractAttribute).GetTypeInfo().Assembly.Location);
+            var thisAssembly = MetadataReference.CreateFromFile(this.GetType().GetTypeInfo().Assembly.Location);
+
+            MetadataReference unsaferef = MetadataReference.CreateFromFile(typeof(Unsafe).GetTypeInfo().Assembly.Location);
+            // The Unsafe assembly depends (at compile time) on System.Runtime.dll
+            // Since that assembly does not define any types (just type forwarders), I don't know how to get
+            // a reference to it without just looking into the file system.
+            var systemRuntimeLocation = $"{System.Environment.GetEnvironmentVariable("windir")}\\Microsoft.NET\\assembly\\GAC_MSIL\\System.Runtime\\v4.0_4.0.0.0__b03f5f7f11d50a3a\\System.Runtime.dll";
+            var systemRuntimeRef = MetadataReference.CreateFromFile(systemRuntimeLocation);
+
+            references = new List<MetadataReference>(){ mscorlib, system, linq, contracts, thisAssembly, unsaferef, systemRuntimeRef, };
+#endif
+            foreach (var r in references)
+            {
+                this.metadataReferences.Add(r.Display, r);
+            }
+
+            CSharpCompilationOptions options = new CSharpCompilationOptions(
+                allowUnsafe: true,
+                optimizationLevel: (optimizeCode ? OptimizationLevel.Release : OptimizationLevel.Debug),
+                outputKind: OutputKind.DynamicallyLinkedLibrary
+                );
+
+            var preprocessorSyms = new List<string>() {  };
+            if (preprocessorSymbols != null)
+            {
+                preprocessorSyms.AddRange(preprocessorSymbols);
+            }
+
+            this.parseOptions = new CSharpParseOptions().WithPreprocessorSymbols(preprocessorSyms);
+            var trees = Sources
+                .Select(s => CSharpSyntaxTree.ParseText(
+                s.TemplateContents,
+                path: Path.Combine(assemblyDirectory, s.TemplateName + ".cs"),
+                encoding: Encoding.GetEncoding(0),
+                options: parseOptions
+            ));
+            compilation = CSharpCompilation.Create(assemblyName, trees, references, options);
+
+        }
+
+        public void AddAssemblyReferencesNeededFor(params Type[] args)
+        {
+            var rs = args
+                .SelectMany(a => RoslynHelpers.MetadataReferencesNeededForType(a))
+                .Where(r => !this.metadataReferences.ContainsKey(r.Display))
+                .Where(r => !this.compilation.References.Any(r2 => r2.Display.Equals(r.Display)))
+                ;
+            compilation = compilation.AddReferences(rs);
+            //addAssemblyReferences(Microsoft.StreamProcessing.CommonTransformer.Transformer.AssemblyReferencesNeededFor(args).ToArray());
+        }
+        public void AddSource(string source, string fileName)
+        {
+            var assemblyDirectory = Path.GetDirectoryName(compilation.SyntaxTrees.First().FilePath);
+            var tree = CSharpSyntaxTree.ParseText(
+                source,
+                path: Path.Combine(assemblyDirectory, fileName + ".cs"),
+                encoding: Encoding.GetEncoding(0),
+                options: parseOptions
+            );
+            this.compilation = this.compilation.AddSyntaxTrees(tree);
+        }
+        public Tuple<Assembly, string> Compile(bool persistGeneratedCode)
+        {
+            string errors = "";
+
+            try
+            {
+                if (persistGeneratedCode)
+                {
+                    foreach (var t in this.compilation.SyntaxTrees)
+                    {
+                        var root = t.GetRoot();
+                        ReplaceNode(root, root.NormalizeWhitespace()); // updates compilation
+
+                    }
+                }
+                var assembly = RoslynHelpers.EmitCompilationAndLoadAssembly(compilation, persistGeneratedCode, out errors);
+                return Tuple.Create(assembly, errors);
+            }
+            catch (Exception)
+            {
+                RoslynHelpers.DebugPPrint(compilation.SyntaxTrees[0].GetRoot()); // BUG: Need to print entire compilation?
+                System.Diagnostics.Debug.WriteLine("Can't load dll for the above code");
+                System.Diagnostics.Debug.WriteLine(errors);
+                throw;
+            }
+        }
+        public void ReplaceNode(SyntaxNode oldNode, SyntaxNode newNode)
+        {
+            var oldTree = oldNode.SyntaxTree;
+            var newRoot = oldTree.GetRoot().ReplaceNode(oldNode, newNode);
+            var newTree = oldTree.WithRootAndOptions(newRoot, CSharpParseOptions.Default);
+            compilation = compilation.ReplaceSyntaxTree(oldTree, newTree);
+        }
+
+        /// <summary>
+        /// Finds the semantic symbol associated with an identifier with the specified name in the compilation.
+        /// </summary>
+        /// <param name="name">The string name of the identifier.</param>
+        /// <returns>The semantic symbol. Throws an exception if no identifier is found.</returns>
+        protected ISymbol FindSymbol(string name)
+        {
+            var s = compilation.GetTypeByMetadataName("FASTER.core." + name);
+            if (s != null) return s;
+            throw new NullReferenceException("symbol!");
+        }
+
+        public struct Template
+        {
+            public string TemplateName { get; }
+            public string TemplateContents { get; }
+            public Template(string name)
+            {
+                this.TemplateName = name;
+                this.TemplateContents = FASTER.core.Properties.Resources.ResourceManager.GetString(name);
+            }
+        }
+
+    }
+}
diff --git a/cs/src/core/Codegen/FasterHashTableCompiler.cs b/cs/src/core/Codegen/FasterHashTableCompiler.cs
new file mode 100644
index 000000000..5fc0114fa
--- /dev/null
+++ b/cs/src/core/Codegen/FasterHashTableCompiler.cs
@@ -0,0 +1,132 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Linq.Expressions;
+using System.Reflection;
+using System.Text;
+using static Microsoft.CodeAnalysis.CSharp.SyntaxFactory;
+using static FASTER.core.Roslyn.Helper;
+
+namespace FASTER.core.Roslyn
+{
+    class FasterHashTableCompiler<TKey, TValue, TInput, TOutput, TContext, TFunctions, TIFaster> : TypeReplacerCompiler
+    {
+        private FasterHashTableCompiler()
+            : base(sourceNames, typeof(TKey), typeof(TValue), typeof(TInput), typeof(TOutput), typeof(TContext), typeof(TFunctions), typeof(TIFaster))
+        {
+        }
+
+        private static IEnumerable<string> sourceNames = new string[] {
+                "FASTER",
+                "FASTERImpl",
+                "FASTERThread",
+                "AsyncIO",
+                "Checkpoint",
+                "Contexts",
+                "Layout",
+                "PersistentMemoryMalloc",
+
+                "AsyncResultTypes",
+                "FASTERBase",
+                "Recovery",
+                "IndexRecovery",
+                "IndexCheckpoint"
+            };
+
+
+    /// <summary>
+    /// 
+    /// </summary>
+    /// <returns>The generated type (to be instantiated). If null, then the error messages giving the reason for failing to generate the type.</returns>
+    public static Tuple<Type, string> GenerateFasterHashTableClass(bool persistGeneratedCode, bool optimizeCode, long LogTotalSizeBytes, double LogMutableFraction, int LogPageSizeBits)
+        {
+            var c = new FasterHashTableCompiler<TKey, TValue, TInput, TOutput, TContext, TFunctions, TIFaster>();
+            c.Run(persistGeneratedCode, optimizeCode, LogTotalSizeBytes, LogMutableFraction, LogPageSizeBits);
+            var name = String.Format("FASTER.core.Codegen_{0}.FasterKV", c.compilation.AssemblyName);
+            var r = c.Compile(persistGeneratedCode);
+            var a = r.Item1;
+
+            if (a == null)
+            {
+                string error = "Errors during code-gen compilation: \n" + r.Item2;
+                Console.WriteLine(error);
+                throw new Exception(error);
+            }
+            return Tuple.Create(a.GetType(name), r.Item2);
+        }
+
+        /// <summary>
+        /// Runs the transformations needed to produce a valid compilation unit.
+        /// </summary>
+        public void Run(bool persistGeneratedCode, bool optimizeCode, long LogTotalSizeBytes, double LogMutableFraction, int LogPageSizeBits)
+        {
+#if TIMING
+            Stopwatch sw = new Stopwatch();
+            sw.Start();
+#endif
+            // side-effect: creates this.compilation
+            CreateCompilation(persistGeneratedCode, optimizeCode);
+
+            foreach (var rtTP in this.runtimeTypeParameters)
+            {
+                AddAssemblyReferencesNeededFor(rtTP);
+            }
+
+            var d = new Dictionary<ISymbol, SyntaxNode>();
+            var i = 0;
+            UpdateDictionary(d, FindSymbol("Key"), this.runtimeTypeParameters.ElementAt(i++));
+            UpdateDictionary(d, FindSymbol("Value"), this.runtimeTypeParameters.ElementAt(i++));
+            UpdateDictionary(d, FindSymbol("Input"), this.runtimeTypeParameters.ElementAt(i++));
+            UpdateDictionary(d, FindSymbol("Output"), this.runtimeTypeParameters.ElementAt(i++));
+            UpdateDictionary(d, FindSymbol("Context"), this.runtimeTypeParameters.ElementAt(i++));
+            UpdateDictionary(d, FindSymbol("Functions"), this.runtimeTypeParameters.ElementAt(i++));
+            UpdateDictionary(d, FindSymbol("IFASTER"), this.runtimeTypeParameters.ElementAt(i++));
+
+            var pass1 = new TypeReplacer(this.compilation, d);
+            var pass2 = new NamespaceReplacer(this.compilation);
+
+            var FASTDotCoreNamespaceName = SyntaxFactory.QualifiedName(SyntaxFactory.IdentifierName("FASTER"), SyntaxFactory.IdentifierName("core"));
+            var usingFASTDotCore = SyntaxFactory.UsingDirective(FASTDotCoreNamespaceName);
+
+            foreach (var t in compilation.SyntaxTrees)
+            {
+                var oldTree = t;
+                var oldNode = t.GetRoot();
+                var newNode = pass1.Visit(oldNode);
+                newNode = pass2.Visit(newNode);
+
+                var newRoot = oldTree.GetRoot().ReplaceNode(oldNode, newNode);
+                var newTree = oldTree
+                    .WithRootAndOptions(newRoot, CSharpParseOptions.Default)
+                    ;
+                var compilationSyntax = (CompilationUnitSyntax) newTree.GetRoot();
+                compilationSyntax = compilationSyntax.AddUsings(usingFASTDotCore);
+                newTree = newTree
+                    .WithRootAndOptions(compilationSyntax, CSharpParseOptions.Default);
+
+                compilation = compilation.ReplaceSyntaxTree(oldTree, newTree);
+            }
+
+            compilation = RoslynHelpers.ReplaceConstantValue(compilation, "LogMutableFraction", LogMutableFraction.ToString());
+            compilation = RoslynHelpers.ReplaceConstantValue(compilation, "LogTotalSizeBytes", LogTotalSizeBytes.ToString());
+            compilation = RoslynHelpers.ReplaceConstantValue(compilation, "LogPageSizeBits", LogPageSizeBits.ToString());
+
+#if TIMING
+            sw.Stop();
+            System.Diagnostics.Debug.WriteLine("Time to run the FasterHashTable compiler: {0}ms", sw.ElapsedMilliseconds);
+            using (var fileStream = new StreamWriter("foo.txt", true))
+            {
+                fileStream.WriteLine("Time to run the FasterHashTable compiler: {0}ms", sw.ElapsedMilliseconds);
+            }
+#endif
+        }
+
+    }
+}
diff --git a/cs/src/core/Codegen/HashTableManager.cs b/cs/src/core/Codegen/HashTableManager.cs
new file mode 100644
index 000000000..37152e6a9
--- /dev/null
+++ b/cs/src/core/Codegen/HashTableManager.cs
@@ -0,0 +1,49 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+//------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation.  All rights reserved.
+//------------------------------------------------------------------------------
+using System;
+
+namespace FASTER.core
+{
+    public static class HashTableManager
+    {
+        private const bool PersistDll =
+#if DEBUG
+            true
+#else
+
+            false
+#endif
+            ;
+        private const bool OptimizeCode =
+#if DEBUG
+            false
+#else
+
+            true
+#endif
+            ;
+        public static TIFaster GetFasterHashTable<TKey, TValue, TInput, TOutput, TContext, TFunctions, TIFaster>
+            (long size, IDevice logDevice, string checkpointDir, long LogTotalSizeBytes = 17179869184, double LogMutableFraction = 0.9, int LogPageSizeBits = 25, bool persistDll = PersistDll, bool optimizeCode = OptimizeCode)
+        {
+            var s = Roslyn.FasterHashTableCompiler<TKey, TValue, TInput, TOutput, TContext, TFunctions, TIFaster>.GenerateFasterHashTableClass(persistDll, optimizeCode, LogTotalSizeBytes, LogMutableFraction, LogPageSizeBits);
+            var t = s.Item1;
+            var instance = Activator.CreateInstance(t, size, logDevice, checkpointDir);
+            return (TIFaster)instance;
+        }
+
+        public static IManagedFAST<TKey, TValue, TInput, TOutput, TContext>
+            GetMixedManagedFasterHashTable<TKey, TValue, TInput, TOutput, TContext, TFunctions>
+            (long size, IDevice logDevice, string checkpointDir, TFunctions functions, bool treatValueAsAtomic, long LogTotalSizeBytes = 17179869184, double LogMutableFraction = 0.9, int LogPageSizeBits = 25, bool persistDll = PersistDll, bool optimizeCode = OptimizeCode)
+            where TFunctions : IUserFunctions<TKey, TValue, TInput, TOutput, TContext>
+        {
+            var s = Roslyn.MixedBlitManagedFasterHashTableCompiler<TKey, TValue, TInput, TOutput, TContext, TFunctions>.GenerateGenericFasterHashTableClass(size, logDevice, treatValueAsAtomic, persistDll, optimizeCode);
+            var t = s.Item1;
+            var instance = Activator.CreateInstance(t, size, logDevice, checkpointDir, functions, LogTotalSizeBytes, LogMutableFraction, LogPageSizeBits);
+            return (IManagedFAST<TKey, TValue, TInput, TOutput, TContext>)instance;
+        }
+    }
+}
diff --git a/cs/src/core/Codegen/MixedBlitManagedFasterHashTableCompiler.cs b/cs/src/core/Codegen/MixedBlitManagedFasterHashTableCompiler.cs
new file mode 100644
index 000000000..e04544408
--- /dev/null
+++ b/cs/src/core/Codegen/MixedBlitManagedFasterHashTableCompiler.cs
@@ -0,0 +1,257 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+using static FASTER.core.Roslyn.Helper;
+
+namespace FASTER.core.Roslyn
+{
+    class MixedBlitManagedFasterHashTableCompiler<TKey, TValue, TInput, TOutput, TContext, TFunctions> : TypeReplacerCompiler
+    {
+        private readonly bool treatValueAsAtomic;
+
+        private MixedBlitManagedFasterHashTableCompiler(bool treatValueAsAtomic)
+        : base(SourceNames(treatValueAsAtomic),
+              typeof(TKey),
+              typeof(TValue),
+              typeof(TInput),
+              typeof(TOutput),
+              typeof(TContext),
+              typeof(TFunctions)
+              )
+        {
+            this.treatValueAsAtomic = treatValueAsAtomic;
+        }
+        private static IEnumerable<string> SourceNames(bool treatValueAsAtomic)
+        {
+                return new string[] {
+                    "MixedKeyWrapper",
+                    "MixedValueWrapper",
+                    "MixedInputWrapper",
+                    "MixedOutputWrapper",
+                    "MixedContextWrapper",
+                    "MixedFunctionsWrapper",
+                    "IFASTER_Mixed",
+                    "MixedManagedFAST",
+            };
+        }
+
+        /// <summary>
+        /// 
+        /// </summary>
+        /// <returns>The generated type (to be instantiated). If null, then the error messages giving the reason for failing to generate the type.</returns>
+        public static Tuple<Type, string> GenerateGenericFasterHashTableClass(long size, IDevice logDevice, bool treatValueAsAtomic, bool persistGeneratedCode, bool optimizeCode)
+        {
+            var c = new MixedBlitManagedFasterHashTableCompiler<TKey, TValue, TInput, TOutput, TContext, TFunctions>(treatValueAsAtomic);
+            c.Run(persistGeneratedCode, optimizeCode);
+            var name = String.Format("FASTER.core.Codegen_{0}", c.compilation.AssemblyName);
+            var t = c.Compile(persistGeneratedCode);
+            var a = t.Item1;
+
+            if (a == null)
+            {
+                string error = "Errors during code-gen compilation: \n" + t.Item2;
+                Console.WriteLine(error);
+                throw new Exception(error);
+            }
+
+            var managedFastClassType = a.GetType(name + ".MixedManagedFast");
+            return Tuple.Create(managedFastClassType, t.Item2);
+        }
+
+        /// <summary>
+        /// Runs the transformations needed to produce a valid compilation unit.
+        /// </summary>
+        public void Run(bool persistGeneratedCode, bool optimizeCode)
+        {
+#if TIMING
+            Stopwatch sw = new Stopwatch();
+            sw.Start();
+#endif
+
+            var userKeyTypeName = this.typeMapper.CSharpNameFor(typeof(TKey));
+            var userValueTypeName = this.typeMapper.CSharpNameFor(typeof(TValue));
+            var userInputTypeName = this.typeMapper.CSharpNameFor(typeof(TInput));
+            var userOutputTypeName = this.typeMapper.CSharpNameFor(typeof(TOutput));
+            var userContextTypeName = this.typeMapper.CSharpNameFor(typeof(TContext));
+
+            var internalKeyTypeName = userKeyTypeName;
+            var internalValueTypeName = userValueTypeName;
+            var internalInputTypeName = userInputTypeName;
+            var internalOutputTypeName = userOutputTypeName;
+            var internalContextTypeName = userContextTypeName;
+
+            #region Things needed before creating the compilation
+
+            var preprocessorSymbols = new List<string>();
+            string internalWrappedTypes = "";
+
+            if (IsBlittable<TKey>())
+            {
+                var tKeyType = typeof(TKey);
+                preprocessorSymbols.Add("BLIT_KEY");
+                if (tKeyType.IsGenericType)
+                {
+                    preprocessorSymbols.Add("GENERIC_BLIT_KEY");
+                    internalKeyTypeName = "Key_" + String.Join("_", tKeyType.GenericTypeArguments.Select(t => t.GetCSharpSourceSyntax().CleanUpIdentifierName()));
+                    internalWrappedTypes += GenerateInternalWrappedType<TKey>(internalKeyTypeName);
+                }
+            }
+
+            if (this.treatValueAsAtomic)
+            {
+                preprocessorSymbols.Add("VALUE_ATOMIC");
+            }
+            if (IsBlittable<TValue>())
+            {
+                var tValueType = typeof(TValue);
+                preprocessorSymbols.Add("BLIT_VALUE");
+                if (tValueType.IsGenericType)
+                {
+                    preprocessorSymbols.Add("GENERIC_BLIT_VALUE");
+                    internalValueTypeName = "Value_" + String.Join("_", tValueType.GenericTypeArguments.Select(t => t.GetCSharpSourceSyntax().CleanUpIdentifierName()));
+                    internalWrappedTypes += GenerateInternalWrappedType<TValue>(internalValueTypeName);
+                }
+            }
+            if (IsBlittable<TInput>())
+            {
+                var tInputType = typeof(TInput);
+                preprocessorSymbols.Add("BLIT_INPUT");
+                if (tInputType.IsGenericType)
+                {
+                    preprocessorSymbols.Add("GENERIC_BLIT_INPUT");
+                    internalInputTypeName = "Input_" + String.Join("_", tInputType.GenericTypeArguments.Select(t => t.GetCSharpSourceSyntax().CleanUpIdentifierName()));
+                    internalWrappedTypes += GenerateInternalWrappedType<TInput>(internalInputTypeName);
+                }
+            }
+
+            if (IsBlittable<TOutput>())
+            {
+                var tOutputType = typeof(TOutput);
+                preprocessorSymbols.Add("BLIT_OUTPUT");
+                if (tOutputType.IsGenericType)
+                {
+                    preprocessorSymbols.Add("GENERIC_BLIT_OUTPUT");
+                    internalOutputTypeName = "Output_" + String.Join("_", tOutputType.GenericTypeArguments.Select(t => t.GetCSharpSourceSyntax().CleanUpIdentifierName()));
+                    internalWrappedTypes += GenerateInternalWrappedType<TOutput>(internalOutputTypeName);
+                }
+            }
+
+            if (IsBlittable<TContext>())
+            {
+                var tContextType = typeof(TContext);
+                preprocessorSymbols.Add("BLIT_CONTEXT");
+                if (tContextType.IsGenericType)
+                {
+                    preprocessorSymbols.Add("GENERIC_BLIT_CONTEXT");
+                    internalContextTypeName = "Context_" + String.Join("_", tContextType.GenericTypeArguments.Select(t => t.GetCSharpSourceSyntax().CleanUpIdentifierName()));
+                    internalWrappedTypes += GenerateInternalWrappedType<TContext>(internalContextTypeName);
+                }
+            }
+
+            #endregion
+
+            // side-effect: creates this.compilation
+            CreateCompilation(persistGeneratedCode, optimizeCode, preprocessorSymbols: preprocessorSymbols);
+
+            foreach (var rtTP in this.runtimeTypeParameters)
+            {
+                AddAssemblyReferencesNeededFor(rtTP);
+            }
+
+            var d = new Dictionary<string, IDictionary<ISymbol, SyntaxNode>>();
+
+            var userDictionary = new Dictionary<ISymbol, SyntaxNode>();
+            userDictionary.Add(FindSymbol("MixedKey"), SyntaxFactory.ParseTypeName(userKeyTypeName));
+            userDictionary.Add(FindSymbol("MixedValue"), SyntaxFactory.ParseTypeName(userValueTypeName));
+            userDictionary.Add(FindSymbol("MixedInput"), SyntaxFactory.ParseTypeName(userInputTypeName));
+            userDictionary.Add(FindSymbol("MixedOutput"), SyntaxFactory.ParseTypeName(userOutputTypeName));
+            userDictionary.Add(FindSymbol("MixedContext"), SyntaxFactory.ParseTypeName(userContextTypeName));
+            userDictionary.Add(FindSymbol("MixedUserFunctions"), SyntaxFactory.ParseTypeName(this.typeMapper.CSharpNameFor(typeof(TFunctions))));
+
+            d.Add("user", userDictionary);
+
+            var internalDictionary = new Dictionary<ISymbol, SyntaxNode>();
+            internalDictionary.Add(FindSymbol("MixedKey"), SyntaxFactory.ParseTypeName(internalKeyTypeName));
+            internalDictionary.Add(FindSymbol("MixedValue"), SyntaxFactory.ParseTypeName(internalValueTypeName));
+            internalDictionary.Add(FindSymbol("MixedInput"), SyntaxFactory.ParseTypeName(internalInputTypeName));
+            internalDictionary.Add(FindSymbol("MixedOutput"), SyntaxFactory.ParseTypeName(internalOutputTypeName));
+            internalDictionary.Add(FindSymbol("MixedContext"), SyntaxFactory.ParseTypeName(internalContextTypeName));
+            internalDictionary.Add(FindSymbol("MixedUserFunctions"), SyntaxFactory.ParseTypeName(this.typeMapper.CSharpNameFor(typeof(TFunctions))));
+
+            d.Add("internal", internalDictionary);
+
+            var pass1 = new MultiDictionaryTypeReplacer(this.compilation, d);
+            var pass2 = new NamespaceReplacer(this.compilation);
+
+            var FASTDotCoreNamespaceName = SyntaxFactory.QualifiedName(SyntaxFactory.IdentifierName("FASTER"), SyntaxFactory.IdentifierName("core"));
+            var usingFASTDotCore = SyntaxFactory.UsingDirective(FASTDotCoreNamespaceName);
+
+            foreach (var t in compilation.SyntaxTrees)
+            {
+                var oldTree = t;
+                var oldNode = t.GetRoot();
+                var newNode = pass1.Visit(oldNode);
+                newNode = pass2.Visit(newNode);
+
+                var newRoot = oldTree.GetRoot().ReplaceNode(oldNode, newNode);
+                var newTree = oldTree
+                    .WithRootAndOptions(newRoot, CSharpParseOptions.Default)
+                    ;
+                var compilationSyntax = (CompilationUnitSyntax)newTree.GetRoot();
+                compilationSyntax = compilationSyntax.AddUsings(usingFASTDotCore);
+                newTree = newTree
+                    .WithRootAndOptions(compilationSyntax, CSharpParseOptions.Default);
+
+                compilation = compilation.ReplaceSyntaxTree(oldTree, newTree);
+            }
+
+            #region Create new source files from scratch (instead of from a template): it does *not* get transformed
+
+            if (!String.IsNullOrWhiteSpace(internalWrappedTypes))
+            {
+                internalWrappedTypes =
+                    "using System;\r\n" +
+                    "using System.Runtime.CompilerServices;\r\n" +
+                    "using System.Runtime.InteropServices;\r\n" +
+                    $"namespace FASTER.core.Codegen_{this.compilation.AssemblyName}\r\n" +
+                    "{\r\n" +
+                    internalWrappedTypes +
+                    "}\r\n"
+                    ;
+
+                this.AddSource(internalWrappedTypes, "InternalWrappedTypes");
+            }
+
+            #endregion
+
+
+
+#if TIMING
+            sw.Stop();
+            System.Diagnostics.Debug.WriteLine("Time to run the FasterHashTable compiler: {0}ms", sw.ElapsedMilliseconds);
+            using (var fileStream = new StreamWriter("foo.txt", true))
+            {
+                fileStream.WriteLine("Time to run the FasterHashTable compiler: {0}ms", sw.ElapsedMilliseconds);
+            }
+#endif
+        }
+
+        private static string GenerateInternalWrappedType<T>(string internalTypeName)
+        {
+            return
+                $"public unsafe struct {internalTypeName} {{\r\n" +
+                $"   public fixed byte fixedBuffer[{TypeSize.GetSize(default(T))}];" +
+                $"}}\r\n"
+                ;
+        }
+    }
+}
diff --git a/cs/src/core/Codegen/RoslynHelpers.cs b/cs/src/core/Codegen/RoslynHelpers.cs
new file mode 100644
index 000000000..b70affac5
--- /dev/null
+++ b/cs/src/core/Codegen/RoslynHelpers.cs
@@ -0,0 +1,224 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+using Microsoft.CodeAnalysis.Emit;
+using Microsoft.CodeAnalysis.Scripting.Hosting;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.Contracts;
+using System.IO;
+using System.Linq;
+using System.Linq.Expressions;
+using System.Reflection;
+using System.Text;
+#if DOTNETCORE
+using System.Runtime.Loader;
+#endif
+using static FASTER.core.Roslyn.Helper;
+
+
+namespace FASTER.core.Roslyn
+{
+    class RoslynHelpers
+    {
+        public static void DebugPPrint(SyntaxNode node)
+        {
+            System.Diagnostics.Debug.WriteLine(node.NormalizeWhitespace().ToFullString());
+        }
+
+        public class LocalsRenamer : CSharpSyntaxRewriter
+        {
+            readonly SemanticModel model;
+            readonly Func<ISymbol, string> rename;
+
+            public LocalsRenamer(SemanticModel model, Func<ISymbol, string> rename)
+            {
+                this.model = model;
+                this.rename = rename;
+            }
+
+            public override SyntaxNode VisitVariableDeclarator(VariableDeclaratorSyntax node)
+            {
+                var symbol = model.GetDeclaredSymbol(node);
+                if (symbol != null)
+                {
+                    string newName = rename(symbol);
+                    if (newName != null)
+                    {
+                        return node.WithIdentifier(node.Identifier.CopyAnnotationsTo(SyntaxFactory.Identifier(newName)));
+                    }
+                }
+                return base.VisitVariableDeclarator(node);
+            }
+
+            public override SyntaxNode VisitIdentifierName(IdentifierNameSyntax node)
+            {
+                var symbol = model.GetSymbolInfo(node).Symbol;
+                if (symbol != null)
+                {
+                    string newName = rename(symbol);
+                    if (newName != null)
+                    {
+                        return node.WithIdentifier(node.Identifier.CopyAnnotationsTo(SyntaxFactory.Identifier(newName)));
+                    }
+                }
+                return base.VisitIdentifierName(node);
+            }
+        }
+
+        public static CSharpCompilation ReplaceConstantValue(CSharpCompilation c, string constantName, string newValue)
+        {
+            foreach (var t in c.SyntaxTrees)
+            {
+                var oldTree = t;
+                var root = t.GetRoot();
+
+                var variableDeclarations = root.DescendantNodes()
+                    .OfType<FieldDeclarationSyntax>()
+                    .Where(fds => fds.Modifiers.Any(m => m.Kind() == SyntaxKind.ConstKeyword))
+                    .SelectMany(fds => fds.Declaration.Variables.Where(v => v.Identifier.ValueText == constantName))
+                    ;
+
+                foreach (var variableDeclaration in variableDeclarations)
+                {
+
+                    var newNode = variableDeclaration.WithInitializer(
+                        SyntaxFactory.EqualsValueClause(SyntaxFactory.ParseExpression(newValue)));
+                    root = root.ReplaceNode(variableDeclaration, newNode);
+                    var newTree = oldTree
+                        .WithRootAndOptions(root, CSharpParseOptions.Default)
+                        ;
+                    c = c.ReplaceSyntaxTree(oldTree, newTree);
+                }
+            }
+            return c;
+        }
+
+        public static Dictionary<Assembly, MetadataReference> metadataReferenceCache = new Dictionary<Assembly, MetadataReference>();
+        private static InteractiveAssemblyLoader loader = new InteractiveAssemblyLoader();
+        internal static Assembly EmitCompilationAndLoadAssembly(CSharpCompilation compilation, bool makeAssemblyDebuggable, out string errorMessages)
+        {
+            Assembly a = null;
+            EmitResult emitResult;
+
+            if (makeAssemblyDebuggable)
+            {
+                var assemblyFileName = compilation.SourceModule.Name;
+                string directory = null;
+                foreach (var tree in compilation.SyntaxTrees)
+                {
+                    if (directory == null)
+                    {
+                        directory = Path.GetDirectoryName(tree.FilePath);
+                    }
+                    var baseFile = tree.FilePath;
+                    var sourceFile = Path.ChangeExtension(baseFile, ".cs");
+                    File.WriteAllText(sourceFile, tree.GetRoot().ToFullString());
+                }
+                var assemblyPath = Path.Combine(directory, assemblyFileName);
+                var debugSymbolsFile = Path.ChangeExtension(assemblyPath, ".pdb");
+                var emitOptions = new EmitOptions().WithDebugInformationFormat(DebugInformationFormat.PortablePdb);
+                using (var peStream = new FileStream(assemblyPath, FileMode.CreateNew))
+                using (var pdbStream = new FileStream(debugSymbolsFile, FileMode.CreateNew))
+                {
+                    emitResult = compilation.Emit(peStream, pdbStream: pdbStream, options: emitOptions);
+                    if (emitResult.Success)
+                    {
+                        peStream.Close();
+                        pdbStream.Close();
+#if DOTNETCORE
+                        a = AssemblyLoadContext.Default.LoadFromAssemblyPath(Path.GetFullPath(assemblyPath));
+#else
+                        a = Assembly.LoadFrom(assemblyPath);
+#endif
+                    }
+                }
+            }
+            else
+            {
+                using (var peStream = new MemoryStream())
+                {
+                    emitResult = compilation.Emit(peStream);
+                    if (emitResult.Success)
+                    {
+                        peStream.Position = 0;
+#if DOTNETCORE
+                        a = AssemblyLoadContext.Default.LoadFromStream(peStream);
+                        peStream.Position = 0; // Must reset it! Loading leaves its position at the end
+#else
+                        var assembly = peStream.ToArray();
+                        a = Assembly.Load(assembly);
+#endif
+                        loader.RegisterDependency(a);
+                        var aref = MetadataReference.CreateFromStream(peStream);
+                        metadataReferenceCache.Add(a, aref);
+                    }
+                }
+            }
+
+            errorMessages = string.Join("\n", emitResult.Diagnostics);
+
+            if (makeAssemblyDebuggable && emitResult.Diagnostics.Any(d => d.Severity == DiagnosticSeverity.Error))
+                System.Diagnostics.Debug.WriteLine(errorMessages);
+
+            return a;
+        }
+
+        internal static IEnumerable<MetadataReference> MetadataReferencesNeededForType(Type type)
+        {
+            HashSet<Type> closure = new HashSet<Type>();
+            CollectAssemblyReferences(type, closure);
+            var result = closure
+                .Where(t => !t.GetTypeInfo().Assembly.IsDynamic)
+                .Where(t => metadataReferenceCache.ContainsKey(t.GetTypeInfo().Assembly) || Path.IsPathRooted(t.GetTypeInfo().Assembly.Location))
+                //.Select(t => String.IsNullOrWhiteSpace(t.GetTypeInfo().Assembly.Location) && metadataReferenceCache.ContainsKey(t.GetTypeInfo().Assembly) ? metadataReferenceCache[t.GetTypeInfo().Assembly] : MetadataReference.CreateFromFile(t.GetTypeInfo().Assembly.Location))
+                .Select(t => metadataReferenceCache.ContainsKey(t.GetTypeInfo().Assembly) ? metadataReferenceCache[t.GetTypeInfo().Assembly] : MetadataReference.CreateFromFile(t.GetTypeInfo().Assembly.Location))
+                .Distinct(metadataReferenceComparer)
+                ;
+            return result;
+        }
+        internal static void CollectAssemblyReferences(Type t, HashSet<Type> partialClosure)
+        {
+            if (partialClosure.Add(t))
+            {
+                if (t.GetTypeInfo().BaseType != null)
+                    CollectAssemblyReferences(t.GetTypeInfo().BaseType, partialClosure);
+                if (t.IsNested)
+                    CollectAssemblyReferences(t.DeclaringType, partialClosure);
+                foreach (var j in t.GetTypeInfo().GetInterfaces())
+                {
+                    CollectAssemblyReferences(j, partialClosure);
+                }
+                foreach (var genericArgument in t.GenericTypeArguments)
+                {
+                    CollectAssemblyReferences(genericArgument, partialClosure);
+                }
+                foreach (var f in t.GetTypeInfo().GetFields(BindingFlags.Public | BindingFlags.Instance))
+                {
+                    CollectAssemblyReferences(f.FieldType, partialClosure);
+                }
+                foreach (var p in t.GetTypeInfo().GetProperties(BindingFlags.Public | BindingFlags.Instance))
+                {
+                    CollectAssemblyReferences(p.PropertyType, partialClosure);
+                }
+            }
+        }
+
+        internal class MetadataReferenceComparer : IEqualityComparer<MetadataReference>
+        {
+            public bool Equals(MetadataReference x, MetadataReference y)
+            {
+                return x.Display.Equals(y.Display);
+            }
+
+            public int GetHashCode(MetadataReference obj)
+            {
+                return obj.Display.GetHashCode();
+            }
+        }
+        internal static IEqualityComparer<MetadataReference> metadataReferenceComparer = new MetadataReferenceComparer();
+    }
+}
diff --git a/cs/src/core/Codegen/TypeReplacer.cs b/cs/src/core/Codegen/TypeReplacer.cs
new file mode 100644
index 000000000..b50ad85dd
--- /dev/null
+++ b/cs/src/core/Codegen/TypeReplacer.cs
@@ -0,0 +1,108 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+
+namespace FASTER.core.Roslyn
+{
+    class TypeReplacer : CSharpSyntaxRewriter
+    {
+        private readonly CSharpCompilation compilation;
+        IDictionary<ISymbol, SyntaxNode> d;
+
+        public TypeReplacer(CSharpCompilation compilation, IDictionary<ISymbol, SyntaxNode> d)
+        {
+            this.compilation = compilation;
+            this.d = d;
+        }
+
+        public override SyntaxNode VisitIdentifierName(IdentifierNameSyntax node)
+        {
+            var s = compilation.GetTypeByMetadataName("FASTER.core." + node.Identifier.ValueText);
+            if (s == null)
+                return node;
+            SyntaxNode replacementNode;
+            if (d.TryGetValue(s, out replacementNode))
+            {
+                return replacementNode;
+            }
+            return node;
+        }
+
+    }
+
+    class NamespaceReplacer : CSharpSyntaxRewriter
+    {
+        private readonly CSharpCompilation compilation;
+
+        public NamespaceReplacer(CSharpCompilation compilation)
+        {
+            this.compilation = compilation;
+        }
+
+        public override SyntaxNode VisitNamespaceDeclaration(NamespaceDeclarationSyntax node)
+        {
+            //return base.VisitNamespaceDeclaration(node);
+            var n = (SimpleNameSyntax)SyntaxFactory.ParseName("Codegen_" + this.compilation.AssemblyName);
+            var namespaceName = SyntaxFactory.QualifiedName(node.Name, n);
+            return node.WithName(namespaceName);
+
+        }
+
+    }
+
+    class MultiDictionaryTypeReplacer : CSharpSyntaxRewriter
+    {
+        private readonly CSharpCompilation compilation;
+        IDictionary<string, IDictionary<ISymbol, SyntaxNode>> dictionaryMap;
+
+        public MultiDictionaryTypeReplacer(CSharpCompilation compilation, IDictionary<string, IDictionary<ISymbol, SyntaxNode>> dictionaries)
+        {
+            this.compilation = compilation;
+            this.dictionaryMap = dictionaries;
+        }
+
+        public override SyntaxNode VisitClassDeclaration(ClassDeclarationSyntax node)
+        {
+            return RewriteTypeDefinition(node);
+        }
+        public override SyntaxNode VisitStructDeclaration(StructDeclarationSyntax node)
+        {
+            return RewriteTypeDefinition(node);
+        }
+
+        private SyntaxNode RewriteTypeDefinition(TypeDeclarationSyntax node)
+        {
+            var attrs = node.AttributeLists;
+            var dictionaryMapKey =
+                attrs
+                .SelectMany(al =>
+                al
+                .Attributes
+                .Where(a => a.Name.ToFullString().EndsWith("TypeKind"))
+                .Select(a => a.ArgumentList.Arguments[0].ToFullString().Trim('"')))
+                .FirstOrDefault();
+            IDictionary<ISymbol, SyntaxNode> d;
+            if (dictionaryMapKey != null && this.dictionaryMap.TryGetValue(dictionaryMapKey, out d))
+            {
+                var replacer = new TypeReplacer(this.compilation, d);
+                return replacer.Visit(node);
+            }
+            else
+            {
+                return node;
+            }
+        }
+
+    }
+
+    public class TypeKindAttribute : Attribute
+    {
+        public TypeKindAttribute(string kind) { }
+    }
+}
diff --git a/cs/src/core/Codegen/TypeReplacerCompiler.cs b/cs/src/core/Codegen/TypeReplacerCompiler.cs
new file mode 100644
index 000000000..73327ea67
--- /dev/null
+++ b/cs/src/core/Codegen/TypeReplacerCompiler.cs
@@ -0,0 +1,38 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+
+namespace FASTER.core.Roslyn
+{
+    class TypeReplacerCompiler : CompilerBase
+    {
+        protected readonly Type[] runtimeTypeParameters;
+        protected readonly internal TypeMapper typeMapper;
+
+        protected TypeReplacerCompiler(IEnumerable<string> templateNames, params Type[] types)
+            : base(templateNames)
+        {
+            this.runtimeTypeParameters = new Type[types.Length];
+            for (int i = 0; i < types.Length; i++)
+            {
+                var t = types[i];
+                runtimeTypeParameters[i] = t;
+            }
+            typeMapper = new TypeMapper(runtimeTypeParameters);
+        }
+
+        protected void UpdateDictionary(Dictionary<ISymbol, SyntaxNode> d, ISymbol symbol, Type typeToUse)
+        {
+            var name = this.typeMapper.CSharpNameFor(typeToUse);
+            var nodeToUse = SyntaxFactory.ParseTypeName(name);
+            d.Add(symbol, nodeToUse);
+        }
+    }
+}
diff --git a/cs/src/core/Codegen/Utilities.cs b/cs/src/core/Codegen/Utilities.cs
new file mode 100644
index 000000000..88c007479
--- /dev/null
+++ b/cs/src/core/Codegen/Utilities.cs
@@ -0,0 +1,240 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.Contracts;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Linq.Expressions;
+using System.Reflection;
+using System.Reflection.Emit;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace FASTER.core.Roslyn
+{
+
+    internal static class Helper
+    {
+        public static bool IsAnonymousTypeName(this Type type)
+        {
+            Contract.Requires(type != null);
+
+            return type.GetTypeInfo().IsClass
+                && type.GetTypeInfo().IsDefined(typeof(CompilerGeneratedAttribute))
+                && !type.IsNested
+                && type.Name.StartsWith("<>", StringComparison.Ordinal)
+                && type.Name.Contains("__Anonymous");
+        }
+        /// <summary>
+        /// Returns true if <paramref name="type"/> is an anonymous type or is a generic type
+        /// with an anonymous type somewhere in the type tree(s) of its type arguments.
+        /// REVIEW: Is there a better way to tell if a type represents an anonymous type?
+        /// </summary>
+        public static bool IsAnonymousType(this Type type)
+        {
+            Contract.Requires(type != null);
+
+            if (type.IsAnonymousTypeName() || type.GetTypeInfo().Assembly.IsDynamic) return true;
+            if (!type.GetTypeInfo().IsGenericType)
+                return false;
+            else
+                return type.GenericTypeArguments.Any(t => t.IsAnonymousType());
+        }
+
+        public static string GetCSharpSourceSyntax(this Type t)
+        {
+            var list = new List<string>();
+            string ret = TurnTypeIntoCSharpSource(t, ref list);
+            return ret;
+        }
+        private static string TurnTypeIntoCSharpSource(Type t, ref List<string> introducedGenericTypeParameters)
+        {
+            Contract.Requires(t != null);
+            Contract.Requires(introducedGenericTypeParameters != null);
+
+            var typeName = t.FullName.Replace('#', '_').Replace('+', '.');
+            if (t.IsAnonymousTypeName())
+            {
+                var newGenericTypeParameter = t.FullName.CleanUpIdentifierName(); // "A" + introducedGenericTypeParameters.Count.ToString(CultureInfo.InvariantCulture);
+                introducedGenericTypeParameters.Add(newGenericTypeParameter);
+                return newGenericTypeParameter;
+            }
+            if (!t.GetTypeInfo().IsGenericType) // need to test after anonymous because deserialized anonymous types are *not* generic (but unserialized anonymous types *are* generic)
+                return typeName;
+            var sb = new StringBuilder();
+            typeName = typeName.Substring(0, t.FullName.IndexOf('`'));
+            sb.AppendFormat("{0}<", typeName);
+            var first = true;
+            if (!t.GetTypeInfo().Assembly.IsDynamic)
+                foreach (var genericArgument in t.GenericTypeArguments)
+                {
+                    string gaName = TurnTypeIntoCSharpSource(genericArgument, ref introducedGenericTypeParameters);
+                    if (!first) sb.Append(", ");
+                    sb.Append(gaName);
+                    first = false;
+                }
+            sb.Append(">");
+            typeName = sb.ToString();
+            return typeName;
+        }
+        public static string CleanUpIdentifierName(this string s)
+        {
+            Contract.Requires(s != null);
+
+            return s.Replace('`', '_').Replace('.', '_').Replace('<', '_').Replace('>', '_').Replace(',', '_').Replace(' ', '_').Replace('`', '_').Replace('[', '_').Replace(']', '_').Replace('=', '_').Replace('+', '_');
+        }
+
+        internal static bool IsBlittable<T>()
+        {
+            if (default(T) == null)
+                return false;
+
+            //if (typeof(T).IsGenericType) return false;
+
+            try
+            {
+                var tmp = new T[1];
+                var h = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                h.Free();
+            }
+            catch (Exception)
+            {
+                return false;
+            }
+            return true;
+        }
+
+        internal static string GeneratedDirectory
+        {
+            get
+            {
+                if (generatedDirectory == null)
+                {
+                    var tempPath = Path.GetTempPath();
+                    generatedDirectory = Path.Combine(tempPath, "FASTER");
+                    try
+                    {
+                        if (!Directory.Exists(generatedDirectory))
+                        {
+                            Directory.CreateDirectory(generatedDirectory);
+                        }
+                    }
+                    catch
+                    {
+                        Directory.CreateDirectory("Generated"); // let any exceptions bleed through
+                        generatedDirectory = "Generated";
+                    }
+                }
+                return generatedDirectory;
+            }
+        }
+        private static string generatedDirectory;
+    }
+    internal class TypeMapper
+    {
+        Dictionary<Type, string> typeMap = new Dictionary<Type, string>();
+        int anonymousTypeCount = 0;
+
+        public TypeMapper(params Type[] types)
+        {
+            GetCSharpTypeNames(types);
+        }
+
+        public string CSharpNameFor(Type t)
+        {
+            if (!typeMap.TryGetValue(t, out string typeName))
+            {
+                GetCSharpTypeNames(t);
+                typeName = typeMap[t];
+            }
+            return typeName;
+        }
+
+        public IEnumerable<string> GenericTypeVariables(params Type[] types)
+        {
+            var l = new List<string>();
+            foreach (var t in types)
+            {
+                if (t.IsAnonymousTypeName())
+                {
+                    l.Add(typeMap[t]);
+                    continue;
+                }
+                if (!t.GetTypeInfo().Assembly.IsDynamic && t.GetTypeInfo().IsGenericType)
+                {
+                    foreach (var gta in t.GenericTypeArguments)
+                    {
+                        l.AddRange(GenericTypeVariables(gta));
+                    }
+                }
+            }
+            return l.Distinct();
+        }
+
+        private void GetCSharpTypeNames(params Type[] types)
+        {
+            for (int i = 0; i < types.Length; i++)
+            {
+                TurnTypeIntoCSharpSourceHelper(types[i]);
+            }
+            return;
+        }
+
+        private void TurnTypeIntoCSharpSourceHelper(Type t)
+        {
+            Contract.Requires(t != null);
+
+            if (this.typeMap.TryGetValue(t, out string typeName))
+            {
+                return;
+            }
+            typeName = t.FullName.Replace('#', '_').Replace('+', '.');
+            if (t.IsAnonymousTypeName())
+            {
+                var newGenericTypeParameter = "A" + anonymousTypeCount.ToString(CultureInfo.InvariantCulture);
+                anonymousTypeCount++;
+                this.typeMap.Add(t, newGenericTypeParameter);
+                return;
+            }
+            if (!t.GetTypeInfo().IsGenericType) // need to test after anonymous because deserialized anonymous types are *not* generic (but unserialized anonymous types *are* generic)
+            {
+                this.typeMap.Add(t, typeName);
+                return;
+            }
+            var sb = new StringBuilder();
+            typeName = typeName.Substring(0, t.FullName.IndexOf('`'));
+            sb.AppendFormat("{0}<", typeName);
+            var first = true;
+            if (!t.GetTypeInfo().Assembly.IsDynamic)
+            {
+                foreach (var genericArgument in t.GenericTypeArguments)
+                {
+                    TurnTypeIntoCSharpSourceHelper(genericArgument);
+                    string gaName = this.typeMap[genericArgument];
+                    if (!first) sb.Append(", ");
+                    sb.Append(gaName);
+                    first = false;
+                }
+            }
+            sb.Append(">");
+            typeName = sb.ToString();
+            this.typeMap.Add(t, typeName);
+            return;
+        }
+    }
+
+    public static class TypeSize
+    {
+        public static unsafe int GetSize<T>(this T value)
+        {
+            T[] arr = new T[2];
+            return (int)((long)Unsafe.AsPointer(ref arr[1]) - (long)Unsafe.AsPointer(ref arr[0]));
+        }
+    }
+}
diff --git a/cs/src/core/Device/IDevice.cs b/cs/src/core/Device/IDevice.cs
new file mode 100644
index 000000000..6045185c7
--- /dev/null
+++ b/cs/src/core/Device/IDevice.cs
@@ -0,0 +1,29 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.Win32.SafeHandles;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public interface IDevice
+    {
+        uint GetSectorSize();
+        void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult);
+        void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult);
+
+        /// <summary>
+        /// Delete range of addresses from fromAddress (inclusive) to toAddress (exclusive)
+        /// </summary>
+        /// <param name="fromAddress"></param>
+        /// <param name="toAddress"></param>
+        void DeleteAddressRange(long fromAddress, long toAddress);
+    }
+}
diff --git a/cs/src/core/Device/ISegmentedDevice.cs b/cs/src/core/Device/ISegmentedDevice.cs
new file mode 100644
index 000000000..c978a1ab1
--- /dev/null
+++ b/cs/src/core/Device/ISegmentedDevice.cs
@@ -0,0 +1,24 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public interface ISegmentedDevice
+    {
+        uint GetSectorSize();
+        long GetSegmentSize();
+
+        void WriteAsync(IntPtr alignedSourceAddress, int segmentId, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult);
+        void ReadAsync(int segmentId, ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult);
+
+        /// <summary>
+        /// Delete range of segments from fromSegment (inclusive) to toSegment (exclusive)
+        /// </summary>
+        /// <param name="fromAddress"></param>
+        /// <param name="toAddress"></param>
+        void DeleteSegmentRange(int fromSegment, int toSegment);
+    }
+}
diff --git a/cs/src/core/Device/LocalStorageDevice.cs b/cs/src/core/Device/LocalStorageDevice.cs
new file mode 100644
index 000000000..dd09c6d2e
--- /dev/null
+++ b/cs/src/core/Device/LocalStorageDevice.cs
@@ -0,0 +1,268 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.Win32.SafeHandles;
+using System;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public class LocalStorageDevice : IDevice
+    {
+        /// <summary>
+        /// File information
+        /// </summary>
+        private readonly string filename;
+        private readonly SafeFileHandle logHandle;
+        private readonly bool enablePrivileges;
+        private readonly bool useIoCompletionPort;
+
+        /// <summary>
+        /// Device Information obtained from Native32 methods
+        /// </summary>
+        private uint lpSectorsPerCluster;
+        private uint lpBytesPerSector;
+        private uint lpNumberOfFreeClusters;
+        private uint lpTotalNumberOfClusters;
+        private IntPtr ioCompletionPort;
+
+        public LocalStorageDevice(string filename, bool enablePrivileges = false, 
+            bool useIoCompletionPort = false, bool unbuffered = false, bool deleteOnClose = false)
+        {
+            this.filename = filename;
+            this.enablePrivileges = enablePrivileges;
+            this.useIoCompletionPort = useIoCompletionPort;
+
+            if (enablePrivileges)
+            {
+                Native32.EnableProcessPrivileges();
+            }
+
+            Native32.GetDiskFreeSpace(filename.Substring(0, 3),
+                                        out lpSectorsPerCluster,
+                                        out lpBytesPerSector,
+                                        out lpNumberOfFreeClusters,
+                                        out lpTotalNumberOfClusters);
+
+            uint fileAccess = Native32.GENERIC_READ | Native32.GENERIC_WRITE;
+            uint fileShare = unchecked(((uint)FileShare.ReadWrite & ~(uint)FileShare.Inheritable));
+            uint fileCreation = unchecked((uint)FileMode.OpenOrCreate);
+            uint fileFlags = Native32.FILE_FLAG_OVERLAPPED;
+
+            if (unbuffered)
+                fileFlags = fileFlags | Native32.FILE_FLAG_NO_BUFFERING;
+        
+            if (deleteOnClose)
+                fileFlags = fileFlags | Native32.FILE_FLAG_DELETE_ON_CLOSE;
+
+            logHandle = Native32.CreateFileW(filename, 
+                                             fileAccess,
+                                             fileShare, 
+                                             IntPtr.Zero, 
+                                             fileCreation, 
+                                             fileFlags, 
+                                             IntPtr.Zero);
+
+            if (enablePrivileges)
+            {
+                Native32.EnableVolumePrivileges(ref filename, logHandle);
+            }
+
+            if (useIoCompletionPort)
+            {
+                ioCompletionPort = Native32.CreateIoCompletionPort(
+                    logHandle,
+                    IntPtr.Zero,
+                    (uint)logHandle.DangerousGetHandle().ToInt64(),
+                    0);
+            }
+
+            try
+            {
+                ThreadPool.BindHandle(logHandle);
+            } 
+            catch(Exception e)
+            {
+                Console.WriteLine("Log handle! : {0}", logHandle.ToString());
+                Console.WriteLine(e.ToString());
+                Environment.Exit(0);
+            }
+            
+        }
+
+        /// <summary>
+        /// Sets file size to the specified value -- DOES NOT reset file seek pointer to original location
+        /// </summary>
+        /// <param name="size"></param>
+        /// <returns></returns>
+        public bool SetFileSize(long size)
+        {
+            if (enablePrivileges)
+                return Native32.SetFileSize(logHandle, size);
+            else
+            {
+                int lodist = (int)size;
+                int hidist = (int)(size >> 32);
+                Native32.SetFilePointer(logHandle, lodist, ref hidist, Native32.EMoveMethod.Begin);
+                if (!Native32.SetEndOfFile(logHandle)) return false;
+                return true;
+            }
+        }
+
+        public string GetFileName()
+        {
+            return filename;
+        }
+
+        public uint GetSectorSize()
+        {
+            return lpBytesPerSector;
+        }
+
+        public void Close()
+        {
+            Native32.CloseHandle(logHandle);
+        }
+
+        public unsafe void ReadAsync(ulong sourceAddress,
+                                     IntPtr destinationAddress,
+                                     uint readLength,
+                                     IAsyncResult asyncResult)
+        {
+            Overlapped ov = new Overlapped
+            {
+                AsyncResult = asyncResult,
+                OffsetLow = unchecked((int)(sourceAddress & 0xFFFFFFFF)),
+                OffsetHigh = unchecked((int)((sourceAddress >> 32) & 0xFFFFFFFF))
+            };
+
+            NativeOverlapped* ovNative = ov.UnsafePack(null, IntPtr.Zero);
+
+            /* Invoking the Native method ReadFile provided by Kernel32.dll
+             * library. Returns false, if request failed or accepted for async 
+             * operation. Returns true, if success synchronously.
+             */
+            uint bytesRead = default(uint);
+            bool result = Native32.ReadFile(logHandle,
+                                destinationAddress,
+                                readLength,
+                                out bytesRead,
+                                ovNative);
+
+            if (!result)
+            {
+                int error = Marshal.GetLastWin32Error();
+                
+                /* Just handle the case when it is not ERROR_IO_PENDING
+                 * If ERROR_IO_PENDING, then it is accepted for async execution
+                 */ 
+                if (error != Native32.ERROR_IO_PENDING)
+                {
+                    Overlapped.Unpack(ovNative);
+                    Overlapped.Free(ovNative);
+                    throw new Exception("Error reading from log file: " + error);
+                }
+            }
+            else
+            {
+                //executed synchronously, so process callback
+                //callback(0, bytesRead, ovNative);
+            }
+        }
+
+        public unsafe void ReadAsync(ulong sourceAddress, 
+                                     IntPtr destinationAddress, 
+                                     uint readLength, 
+                                     IOCompletionCallback callback, 
+                                     IAsyncResult asyncResult)
+        {
+            //Debug.WriteLine("sourceAddress: {0}, destinationAddress: {1}, readLength: {2}"
+            //    , sourceAddress, (ulong)destinationAddress, readLength);
+
+            if (readLength != 512)
+            {
+
+            }
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero);
+            ovNative->OffsetLow = unchecked((int)((ulong)sourceAddress & 0xFFFFFFFF));
+            ovNative->OffsetHigh = unchecked((int)(((ulong)sourceAddress >> 32) & 0xFFFFFFFF));
+
+            uint bytesRead = default(uint);
+            bool result = Native32.ReadFile(logHandle, 
+                                            destinationAddress, 
+                                            readLength,
+                                            out bytesRead, 
+                                            ovNative);
+
+            if (!result)
+            {
+                int error = Marshal.GetLastWin32Error();
+                if (error != Native32.ERROR_IO_PENDING)
+                {
+                    Overlapped.Unpack(ovNative);
+                    Overlapped.Free(ovNative);
+
+                    // NOTE: alignedDestinationAddress needs to be freed by whoever catches the exception
+                    throw new Exception("Error reading from log file: " + error);
+                }
+            }
+            else
+            {
+                // On synchronous completion, issue callback directly
+                callback(0, bytesRead, ovNative);
+            }
+        }
+
+        public unsafe void WriteAsync(IntPtr sourceAddress, 
+                                      ulong destinationAddress, 
+                                      uint numBytesToWrite, 
+                                      IOCompletionCallback callback, 
+                                      IAsyncResult asyncResult)
+        {
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero);
+            ovNative->OffsetLow = unchecked((int)(destinationAddress & 0xFFFFFFFF));
+            ovNative->OffsetHigh = unchecked((int)((destinationAddress >> 32) & 0xFFFFFFFF));
+
+
+            /* Invoking the Native method WriteFile provided by Kernel32.dll
+            * library. Returns false, if request failed or accepted for async 
+            * operation. Returns true, if success synchronously.
+            */
+            uint bytesWritten = default(uint);
+            bool result = Native32.WriteFile(logHandle,
+                                    sourceAddress,
+                                    numBytesToWrite,
+                                    out bytesWritten,
+                                    ovNative);
+
+            if (!result)
+            {
+                int error = Marshal.GetLastWin32Error();
+                /* Just handle the case when it is not ERROR_IO_PENDING
+                 * If ERROR_IO_PENDING, then it is accepted for async execution
+                 */
+                if (error != Native32.ERROR_IO_PENDING)
+                {
+                    Overlapped.Unpack(ovNative);
+                    Overlapped.Free(ovNative);
+                    throw new Exception("Error writing to log file: " + error);
+                }
+            }
+            else
+            {
+                //executed synchronously, so process callback
+                callback(0, bytesWritten, ovNative);
+            }
+        }
+
+        public void DeleteAddressRange(long fromAddress, long toAddress)
+        {
+        }
+    }
+}
diff --git a/cs/src/core/Device/MemoryDevice.cs b/cs/src/core/Device/MemoryDevice.cs
new file mode 100644
index 000000000..a5f11ef18
--- /dev/null
+++ b/cs/src/core/Device/MemoryDevice.cs
@@ -0,0 +1,195 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public unsafe class MemoryDevice<T> : IDevice
+    {
+        // Epoch information
+        public LightEpoch epoch;
+
+        // Record size and pinning
+        private readonly bool IsPinned;
+        private readonly int PrivateRecordSize;
+        private static bool ForceUnpinnedAllocation = false;
+
+        private readonly IDevice backingDevice;
+        private readonly int sectorSize;
+
+        // Constants for each page in the log
+        public const int PageSizeBits = 22;
+        public const int PageSize = 1 << PageSizeBits;
+        public const int PageSizeMask = PageSize - 1;
+        public readonly int AlignedPageSizeBytes;
+
+        // Constants for circular buffer representing tail of log
+        private const int BufferSizeBits = 14;
+        public const int BufferSize = 1 << BufferSizeBits;
+        public const int BufferSizeMask = BufferSize - 1;
+
+        // Circular buffer definition
+        private T[][] values = new T[BufferSize][];
+        private GCHandle[] handles = new GCHandle[BufferSize];
+        private IntPtr[] pointers = new IntPtr[BufferSize];
+
+        public MemoryDevice()
+        {
+            this.backingDevice = new NullDevice();
+            sectorSize = (int)backingDevice.GetSectorSize();
+            epoch = LightEpoch.Instance;
+
+            if (ForceUnpinnedAllocation)
+            {
+                IsPinned = false;
+            }
+            else
+            {
+                IsPinned = true;
+                try
+                {
+                    var tmp = new T[1];
+                    var h = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                    var p = h.AddrOfPinnedObject();
+                    PrivateRecordSize = Marshal.SizeOf(tmp[0]);
+                    AlignedPageSizeBytes = (((PrivateRecordSize * PageSize) + (sectorSize - 1)) & ~(sectorSize - 1));
+                }
+                catch (Exception)
+                {
+                    IsPinned = false;
+                }
+            }
+        }
+        
+        /// <summary>
+        /// Dispose memory allocator
+        /// </summary>
+        protected void Free()
+        {
+            for (int i = 0; i < values.Length; i++)
+            {
+                if (IsPinned && (handles[i].IsAllocated)) handles[i].Free();
+                values[i] = null;
+            }
+            handles = null;
+            pointers = null;
+            values = null;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected long GetPhysicalAddress(long logicalAddress)
+        {
+            // Offset within page
+            int offset = (int)(logicalAddress & PageSizeMask);
+
+            // Global page address
+            long page = (logicalAddress >> PageSizeBits);
+
+            // Index of page within the circular buffer
+            int pageIndex = (int)(page % BufferSize);
+
+            return ((long)pointers[pageIndex]) + (long)offset * PrivateRecordSize;
+        }
+
+        /// <summary>
+        /// Allocate memory page, pinned in memory, and in sector aligned form, if possible
+        /// </summary>
+        /// <param name="index"></param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void AllocatePage(int index)
+        {
+            if (IsPinned)
+            {
+                var adjustedSize = PageSize + (int)Math.Ceiling(2 * sectorSize / PrivateRecordSize * 1.0);
+                T[] tmp = new T[adjustedSize];
+#if !(CALLOC)
+                Array.Clear(tmp, 0, adjustedSize);
+#endif
+                handles[index] = GCHandle.Alloc(tmp, GCHandleType.Pinned);
+                long p = (long)handles[index].AddrOfPinnedObject();
+                pointers[index] = (IntPtr)((p + (sectorSize - 1)) & ~(sectorSize - 1));
+                values[index] = tmp;
+            }
+            else
+            {
+                T[] tmp = new T[PageSize];
+#if !(CALLOC)
+                Array.Clear(tmp, 0, tmp.Length);
+#endif
+                values[index] = tmp;
+            }
+        }
+        
+        public uint GetSectorSize()
+        {
+            return backingDevice.GetSectorSize();
+        }
+
+        public void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            long logicalAddress = (long)alignedDestinationAddress;
+            long page = logicalAddress >> PageSizeBits;
+            int pageIndex = (int) (page % BufferSize);
+            Debug.Assert(page == pageIndex);
+
+            if (values[pageIndex] == null)
+            {
+                // Allocate a new page
+                AllocatePage(pageIndex);
+            }
+            else
+            {
+                //Clear an old used page
+                Array.Clear(values[pageIndex], 0, values[pageIndex].Length);
+            }
+
+            long physicalAddress = GetPhysicalAddress(logicalAddress);
+            Utility.Copy((byte*)alignedSourceAddress, (byte*)physicalAddress, (int)numBytesToWrite);
+
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ov_native = ov.UnsafePack(callback, IntPtr.Zero);
+
+            ov_native->OffsetLow = unchecked((int)(alignedDestinationAddress & 0xFFFFFFFF));
+            ov_native->OffsetHigh = unchecked((int)((alignedDestinationAddress >> 32) & 0xFFFFFFFF));
+
+            callback(0, numBytesToWrite, ov_native);
+        }
+
+        public void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            long logicalAddress = (long)alignedSourceAddress;
+            long page = logicalAddress >> PageSizeBits;
+            int pageIndex = (int)(page % BufferSize);
+            Debug.Assert(page == pageIndex);
+
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ov_native = ov.UnsafePack(callback, IntPtr.Zero);
+            ov_native->OffsetLow = unchecked((int)(alignedSourceAddress & 0xFFFFFFFF));
+            ov_native->OffsetHigh = unchecked((int)((alignedSourceAddress >> 32) & 0xFFFFFFFF));
+
+            if (values[pageIndex] == null)
+            {
+                callback(2, 0, ov_native);
+            }
+            else
+            {
+                long physicalAddress = GetPhysicalAddress(logicalAddress);
+                Utility.Copy((byte*)physicalAddress, (byte*)alignedDestinationAddress, (int)aligned_read_length);
+                callback(0, aligned_read_length, ov_native);
+            }
+        }
+
+        public void DeleteAddressRange(long fromAddress, long toAddress)
+        {
+        }
+    }
+}
diff --git a/cs/src/core/Device/NullDevice.cs b/cs/src/core/Device/NullDevice.cs
new file mode 100644
index 000000000..0ecf67db3
--- /dev/null
+++ b/cs/src/core/Device/NullDevice.cs
@@ -0,0 +1,48 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.Win32.SafeHandles;
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public class NullDevice : IDevice
+    {
+        public NullDevice(int sector_size = -1)
+        {
+        }
+
+        public void DeleteAddressRange(long fromAddress, long toAddress)
+        {
+        }
+
+        public uint GetSectorSize()
+        {
+            return 512;
+        }
+
+        public unsafe void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ov_native = ov.UnsafePack(callback, IntPtr.Zero);
+            ov_native->OffsetLow = unchecked((int)(alignedSourceAddress & 0xFFFFFFFF));
+            ov_native->OffsetHigh = unchecked((int)((alignedSourceAddress >> 32) & 0xFFFFFFFF));
+
+            callback(0, aligned_read_length, ov_native);
+        }
+
+        public unsafe void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ov_native = ov.UnsafePack(callback, IntPtr.Zero);
+
+            ov_native->OffsetLow = unchecked((int)(alignedDestinationAddress & 0xFFFFFFFF));
+            ov_native->OffsetHigh = unchecked((int)((alignedDestinationAddress >> 32) & 0xFFFFFFFF));
+
+            callback(0, numBytesToWrite, ov_native);
+        }
+    }
+}
diff --git a/cs/src/core/Device/SegmentedLocalStorageDevice.cs b/cs/src/core/Device/SegmentedLocalStorageDevice.cs
new file mode 100644
index 000000000..7d9daa271
--- /dev/null
+++ b/cs/src/core/Device/SegmentedLocalStorageDevice.cs
@@ -0,0 +1,317 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.Win32.SafeHandles;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public class SegmentedLocalStorageDevice : ISegmentedDevice
+    {
+        /// <summary>
+        /// File information
+        /// </summary>
+        private readonly bool enablePrivileges;
+        private readonly bool useIoCompletionPort;
+
+        /// <summary>
+        /// Device Information obtained from Native32 methods
+        /// </summary>
+        private uint lpBytesPerSector;
+
+        private string dirname;
+        private IntPtr ioCompletionPort;
+        private bool unbuffered;
+        private bool deleteOnClose;
+        private long segmentSize;
+        ConcurrentDictionary<int, SafeFileHandle> logHandles;
+
+        public SegmentedLocalStorageDevice(string dirname, 
+            long segmentSize = -1,
+            bool enablePrivileges = false, bool useIoCompletionPort = false, 
+            bool unbuffered = false, bool deleteOnClose = false)
+        {
+            this.dirname = dirname;
+            this.segmentSize = segmentSize;
+            this.enablePrivileges = enablePrivileges;
+            this.useIoCompletionPort = useIoCompletionPort;
+            this.unbuffered = unbuffered;
+            this.deleteOnClose = deleteOnClose;
+
+            if (enablePrivileges)
+            {
+                Native32.EnableProcessPrivileges();
+            }
+
+            Native32.GetDiskFreeSpace(dirname.Substring(0, 3),
+                                        out uint lpSectorsPerCluster,
+                                        out lpBytesPerSector,
+                                        out uint lpNumberOfFreeClusters,
+                                        out uint lpTotalNumberOfClusters);
+
+            logHandles = new ConcurrentDictionary<int, SafeFileHandle>();
+        }
+
+
+        private string GetSegmentName(int segmentId)
+        {
+            return dirname + "_" + segmentId + ".log";
+        }
+
+        private SafeFileHandle GetOrAddHandle(int _segmentId)
+        {
+            return logHandles.GetOrAdd(_segmentId,
+                segmentId =>
+                {
+                    uint fileAccess = Native32.GENERIC_READ | Native32.GENERIC_WRITE;
+                    uint fileShare = unchecked(((uint)FileShare.ReadWrite & ~(uint)FileShare.Inheritable));
+                    uint fileCreation = unchecked((uint)FileMode.OpenOrCreate);
+                    uint fileFlags = Native32.FILE_FLAG_OVERLAPPED;
+
+                    if (unbuffered)
+                        fileFlags = fileFlags | Native32.FILE_FLAG_NO_BUFFERING;
+
+                    if (deleteOnClose)
+                        fileFlags = fileFlags | Native32.FILE_FLAG_DELETE_ON_CLOSE;
+
+                    var logHandle = Native32.CreateFileW(
+                        GetSegmentName(segmentId),
+                        fileAccess, fileShare,
+                        IntPtr.Zero, fileCreation,
+                        fileFlags, IntPtr.Zero);
+
+                    if (enablePrivileges)
+                    {
+                        Native32.EnableVolumePrivileges(ref dirname, logHandle);
+                    }
+                    SetFileSize(logHandle, segmentSize);
+
+                    if (useIoCompletionPort)
+                    {
+                        ioCompletionPort = Native32.CreateIoCompletionPort(
+                            logHandle,
+                            IntPtr.Zero,
+                            (uint)logHandle.DangerousGetHandle().ToInt64(),
+                            0);
+                    }
+
+                    try
+                    {
+                        ThreadPool.BindHandle(logHandle);
+                    }
+                    catch (Exception e)
+                    {
+                        Console.WriteLine("Log handle! : {0}", logHandle.ToString());
+                        Console.WriteLine(e.ToString());
+                        Environment.Exit(0);
+                    }
+                    return logHandle;
+                });
+        }
+
+        /// <summary>
+        /// Sets file size to the specified value -- DOES NOT reset file seek pointer to original location
+        /// </summary>
+        /// <param name="size"></param>
+        /// <returns></returns>
+        private bool SetFileSize(SafeFileHandle logHandle, long size)
+        {
+            if (segmentSize <= 0)
+                return false;
+
+            if (enablePrivileges)
+                return Native32.SetFileSize(logHandle, size);
+            else
+            {
+                int lodist = (int)size;
+                int hidist = (int)(size >> 32);
+                Native32.SetFilePointer(logHandle, lodist, ref hidist, Native32.EMoveMethod.Begin);
+                if (!Native32.SetEndOfFile(logHandle)) return false;
+                return true;
+            }
+        }
+
+        public string GetFileName()
+        {
+            return dirname;
+        }
+
+        public uint GetSectorSize()
+        {
+            return lpBytesPerSector;
+        }
+
+        public void Close()
+        {
+            foreach (var logHandle in logHandles.Values)
+            {
+                Native32.CloseHandle(logHandle);
+            }
+        }
+
+        public unsafe void ReadAsync(int segmentId, ulong sourceAddress,
+                                     IntPtr destinationAddress,
+                                     uint readLength,
+                                     IAsyncResult asyncResult)
+        {
+            var logHandle = GetOrAddHandle(segmentId);
+
+            Overlapped ov = new Overlapped
+            {
+                AsyncResult = asyncResult,
+                OffsetLow = unchecked((int)(sourceAddress & 0xFFFFFFFF)),
+                OffsetHigh = unchecked((int)((sourceAddress >> 32) & 0xFFFFFFFF))
+            };
+
+            NativeOverlapped* ovNative = ov.UnsafePack(null, IntPtr.Zero);
+
+            /* Invoking the Native method ReadFile provided by Kernel32.dll
+             * library. Returns false, if request failed or accepted for async 
+             * operation. Returns true, if success synchronously.
+             */
+            uint bytesRead = default(uint);
+            bool result = Native32.ReadFile(logHandle,
+                                destinationAddress,
+                                readLength,
+                                out bytesRead,
+                                ovNative);
+
+            if (!result)
+            {
+                int error = Marshal.GetLastWin32Error();
+                
+                /* Just handle the case when it is not ERROR_IO_PENDING
+                 * If ERROR_IO_PENDING, then it is accepted for async execution
+                 */ 
+                if (error != Native32.ERROR_IO_PENDING)
+                {
+                    Overlapped.Unpack(ovNative);
+                    Overlapped.Free(ovNative);
+                    throw new Exception("Error reading from log file: " + error);
+                }
+            }
+            else
+            {
+                //executed synchronously, so process callback
+                //callback(0, bytesRead, ovNative);
+            }
+        }
+
+        public unsafe void ReadAsync(int segmentId, ulong sourceAddress, 
+                                     IntPtr destinationAddress, 
+                                     uint readLength, 
+                                     IOCompletionCallback callback, 
+                                     IAsyncResult asyncResult)
+        {
+            var logHandle = GetOrAddHandle(segmentId);
+
+            //Debug.WriteLine("sourceAddress: {0}, destinationAddress: {1}, readLength: {2}"
+            //    , sourceAddress, (ulong)destinationAddress, readLength);
+
+            if (readLength != 512)
+            {
+
+            }
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero);
+            ovNative->OffsetLow = unchecked((int)((ulong)sourceAddress & 0xFFFFFFFF));
+            ovNative->OffsetHigh = unchecked((int)(((ulong)sourceAddress >> 32) & 0xFFFFFFFF));
+
+            uint bytesRead = default(uint);
+            bool result = Native32.ReadFile(logHandle, 
+                                            destinationAddress, 
+                                            readLength,
+                                            out bytesRead, 
+                                            ovNative);
+
+            if (!result)
+            {
+                int error = Marshal.GetLastWin32Error();
+                if (error != Native32.ERROR_IO_PENDING)
+                {
+                    Overlapped.Unpack(ovNative);
+                    Overlapped.Free(ovNative);
+
+                    // NOTE: alignedDestinationAddress needs to be freed by whoever catches the exception
+                    throw new Exception("Error reading from log file: " + error);
+                }
+            }
+            else
+            {
+                // On synchronous completion, issue callback directly
+                callback(0, bytesRead, ovNative);
+            }
+        }
+
+        public unsafe void WriteAsync(IntPtr sourceAddress, 
+                                      int segmentId,
+                                      ulong destinationAddress, 
+                                      uint numBytesToWrite, 
+                                      IOCompletionCallback callback, 
+                                      IAsyncResult asyncResult)
+        {
+            var logHandle = GetOrAddHandle(segmentId);
+            
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ovNative = ov.UnsafePack(callback, IntPtr.Zero);
+            ovNative->OffsetLow = unchecked((int)(destinationAddress & 0xFFFFFFFF));
+            ovNative->OffsetHigh = unchecked((int)((destinationAddress >> 32) & 0xFFFFFFFF));
+
+
+            /* Invoking the Native method WriteFile provided by Kernel32.dll
+            * library. Returns false, if request failed or accepted for async 
+            * operation. Returns true, if success synchronously.
+            */
+            uint bytesWritten = default(uint);
+            bool result = Native32.WriteFile(logHandle,
+                                    sourceAddress,
+                                    numBytesToWrite,
+                                    out bytesWritten,
+                                    ovNative);
+
+            if (!result)
+            {
+                int error = Marshal.GetLastWin32Error();
+                /* Just handle the case when it is not ERROR_IO_PENDING
+                 * If ERROR_IO_PENDING, then it is accepted for async execution
+                 */
+                if (error != Native32.ERROR_IO_PENDING)
+                {
+                    Overlapped.Unpack(ovNative);
+                    Overlapped.Free(ovNative);
+                    throw new Exception("Error writing to log file: " + error);
+                }
+            }
+            else
+            {
+                //executed synchronously, so process callback
+                callback(0, bytesWritten, ovNative);
+            }
+        }
+
+        public void DeleteSegmentRange(int fromSegment, int toSegment)
+        {
+            SafeFileHandle logHandle;
+            for (int i=fromSegment; i<toSegment; i++)
+            {
+                if (logHandles.TryRemove(i, out logHandle))
+                {
+                    Native32.CloseHandle(logHandle);
+                    Native32.DeleteFileW(GetSegmentName(i));
+                }
+            }
+        }
+
+        public long GetSegmentSize()
+        {
+            return segmentSize;
+        }
+    }
+}
diff --git a/cs/src/core/Device/SegmentedNullDevice.cs b/cs/src/core/Device/SegmentedNullDevice.cs
new file mode 100644
index 000000000..c031f1b63
--- /dev/null
+++ b/cs/src/core/Device/SegmentedNullDevice.cs
@@ -0,0 +1,59 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.Win32.SafeHandles;
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public class SegmentedNullDevice : ISegmentedDevice
+    {
+        private readonly int _segmentSizeBits = 30;
+
+        public SegmentedNullDevice(int sector_size = -1)
+        {
+        }
+
+        public void DeleteSegmentRange(int fromSegment, int toSegment)
+        {
+        }
+
+        public uint GetSectorSize()
+        {
+            return 512;
+        }
+
+        public long GetSegmentSize()
+        {
+            return 1L << _segmentSizeBits;
+        }
+
+        public unsafe void ReadAsync(int segmentId, ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            alignedSourceAddress = ((ulong)segmentId << _segmentSizeBits) | alignedSourceAddress;
+
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ov_native = ov.UnsafePack(callback, IntPtr.Zero);
+            ov_native->OffsetLow = unchecked((int)(alignedSourceAddress & 0xFFFFFFFF));
+            ov_native->OffsetHigh = unchecked((int)((alignedSourceAddress >> 32) & 0xFFFFFFFF));
+
+            callback(0, aligned_read_length, ov_native);
+        }
+
+        public unsafe void WriteAsync(IntPtr alignedSourceAddress, int segmentId, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            alignedDestinationAddress = ((ulong)segmentId << _segmentSizeBits) | alignedDestinationAddress;
+
+            Overlapped ov = new Overlapped(0, 0, IntPtr.Zero, asyncResult);
+            NativeOverlapped* ov_native = ov.UnsafePack(callback, IntPtr.Zero);
+
+            ov_native->OffsetLow = unchecked((int)(alignedDestinationAddress & 0xFFFFFFFF));
+            ov_native->OffsetHigh = unchecked((int)((alignedDestinationAddress >> 32) & 0xFFFFFFFF));
+
+            callback(0, numBytesToWrite, ov_native);
+        }
+    }
+}
diff --git a/cs/src/core/Device/WrappedDevice.cs b/cs/src/core/Device/WrappedDevice.cs
new file mode 100644
index 000000000..6770164f1
--- /dev/null
+++ b/cs/src/core/Device/WrappedDevice.cs
@@ -0,0 +1,69 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.Win32.SafeHandles;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public class WrappedDevice : IDevice
+    {
+        private readonly ISegmentedDevice _device;
+        private readonly long _segmentSize;
+        private readonly int _segmentSizeBits;
+        private readonly long _segmentSizeMask;
+
+        public WrappedDevice(ISegmentedDevice device)
+        {
+            _device = device;
+            _segmentSize = device.GetSegmentSize();
+
+            if (!Utility.IsPowerOfTwo(_segmentSize))
+                throw new Exception("Invalid segment size");
+
+            _segmentSizeBits = Utility.GetLogBase2((ulong)_segmentSize);
+            _segmentSizeMask = _segmentSize - 1;
+        }
+
+        public ISegmentedDevice GetUnderlyingDevice()
+        {
+            return _device;
+        }
+
+        public uint GetSectorSize()
+        {
+            return _device.GetSectorSize();
+        }
+
+        public void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            _device.WriteAsync(
+                alignedSourceAddress, 
+                (int)(alignedDestinationAddress >> _segmentSizeBits), 
+                (ulong)((long)alignedDestinationAddress & _segmentSizeMask), 
+                numBytesToWrite, callback, asyncResult);
+        }
+
+        public void ReadAsync(ulong alignedSourceAddress, IntPtr alignedDestinationAddress, uint aligned_read_length, IOCompletionCallback callback, IAsyncResult asyncResult)
+        {
+            _device.ReadAsync(
+                (int)(alignedSourceAddress >> _segmentSizeBits), 
+                (ulong)((long)alignedSourceAddress & _segmentSizeMask), alignedDestinationAddress, 
+                aligned_read_length, callback, asyncResult);
+        }
+
+        public void DeleteAddressRange(long fromAddress, long toAddress)
+        {
+            _device.DeleteSegmentRange(
+                (int)(fromAddress >> _segmentSizeBits), 
+                (int)(toAddress >> _segmentSizeBits));
+        }
+    }
+}
diff --git a/cs/src/core/Epochs/LightEpoch.cs b/cs/src/core/Epochs/LightEpoch.cs
new file mode 100644
index 000000000..11b7ec519
--- /dev/null
+++ b/cs/src/core/Epochs/LightEpoch.cs
@@ -0,0 +1,531 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define COUNT_ACTIVE_THREADS
+
+using System;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using System.Collections.Generic;
+using System.Collections.Concurrent;
+using System.Runtime.InteropServices;
+using System.Runtime.CompilerServices;
+using System.Diagnostics;
+
+namespace FASTER.core
+{
+
+    public unsafe class LightEpoch
+    {
+        public static LightEpoch Instance = new LightEpoch();
+
+        /// <summary>
+        /// Default invalid index entry.
+        /// </summary>
+        public const int kInvalidIndex = 0;
+        /// <summary>
+        /// Default number of entries in the entries table
+        /// </summary>
+        private const int kTableSize = 128;
+        /// <summary>
+        /// Default drainlist size
+        /// </summary>
+        private const int kDrainListSize = 16;
+
+        /// <summary>
+        /// Thread protection status entries. Threads lock entries the
+        /// first time the call Protect() (see reserveEntryForThread()).
+        /// See documentation for the fields to specifics of how threads
+        /// use their Entries to guarantee memory-stability.
+        /// </summary>
+        private Entry[] tableRaw;
+        private GCHandle tableHandle;
+        private Entry* tableAligned;
+
+        /// <summary>
+        /// List of action, epoch pairs containing actions to performed when an epoch becomes safe to reclaim.
+        /// </summary>
+        private int drainCount = 0;
+        private EpochActionPair[] drainList = new EpochActionPair[kDrainListSize];
+
+        /// <summary>
+        /// The number of entries in tableAligned. Currently, this is fixed after
+        /// Initialize() and never changes or grows. If tableAligned runs out
+        /// of entries, then the current implementation will deadlock threads.
+        /// </summary>
+        private int numEntries;
+
+        /// <summary>
+        /// A thread's entry in the epoch table.
+        /// </summary>
+        [ThreadStatic]
+        public static int threadEntryIndex;
+
+        public int CurrentNumThreads;
+
+        /**
+         * A notion of time for objects that are removed from data structures.
+         * Objects in data structures are timestamped with this Epoch just after
+         * they have been (sequentially consistently) "unlinked" from a structure.
+         * Threads also use this Epoch to mark their entry into a protected region
+         * (also in sequentially consistent way). While a thread operates in this
+         * region "unlinked" items that they may be accessing will not be reclaimed.
+         */
+        public int CurrentEpoch;
+
+        /**
+         * Caches the most recent result of ComputeNewSafeToReclaimEpoch() so
+         * that fast decisions about whether an object can be reused or not
+         * (in IsSafeToReclaim()). Effectively, this is periodically computed
+         * by taking the minimum of the protected Epochs in #m_epochTable and
+         * #m_currentEpoch.
+         */
+        public int SafeToReclaimEpoch;
+
+        public LightEpoch(int size = kTableSize)
+        {
+            Initialize(size);
+        }
+
+        ~LightEpoch()
+        {
+            Uninitialize();
+        }
+
+        unsafe void Initialize(int size)
+        {
+            CurrentNumThreads = 0;
+            numEntries = size;
+
+            // over-allocate to do cache-line alignment
+            tableRaw = new Entry[size + 2];
+            tableHandle = GCHandle.Alloc(tableRaw, GCHandleType.Pinned);
+            long p = (long)tableHandle.AddrOfPinnedObject();
+            
+            // force the pointer to align to 64-byte boundaries
+            long p2 = (p + (Constants.kCacheLineBytes - 1)) & ~(Constants.kCacheLineBytes - 1);
+            tableAligned = (Entry*)p2;
+
+            CurrentEpoch = 1;
+            SafeToReclaimEpoch = 0;
+
+            for (int i = 0; i < kDrainListSize; i++)
+                drainList[i].epoch = int.MaxValue;
+            drainCount = 0;
+        }
+
+        void Uninitialize()
+        {
+            tableHandle.Free();
+            tableAligned = null;
+            tableRaw = null;
+
+            numEntries = 0;
+            CurrentEpoch = 1;
+            SafeToReclaimEpoch = 0;
+        } 
+
+        /// <summary>
+        /// Enter the thread into the protected code region, which guarantees
+        /// pointer stability for records in client data structures. After this
+        /// call, accesses to protected data structure items are guaranteed to be
+        /// safe, even if the item is concurrently removed from the structure.
+        ///
+        /// Behavior is undefined if Protect() is called from an already
+        /// protected thread. Upon creation, threads are unprotected.
+        /// </summary>
+        /// <param name="current_epoch">
+        /// A sequentially consistent snapshot of the current
+        /// global epoch. It is okay that this may be stale by the time it
+        /// actually gets entered into the table.</param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int Protect()
+        {
+            int entry = threadEntryIndex;
+            if (kInvalidIndex == entry)
+            {
+                entry = ReserveEntryForThread();
+                threadEntryIndex = entry;
+            }
+
+            (*(tableAligned + entry)).localCurrentEpoch = CurrentEpoch;
+            return (*(tableAligned + entry)).localCurrentEpoch;
+        }
+
+        public bool IsProtected()
+        {
+            return (kInvalidIndex != threadEntryIndex);
+        }
+
+        public int GetThreadEpoch()
+        {
+            int entry = threadEntryIndex;
+            if (kInvalidIndex == entry)
+            {
+                entry = ReserveEntryForThread();
+                threadEntryIndex = entry;
+            }
+            return (*(tableAligned + entry)).localCurrentEpoch;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int ProtectAndDrain()
+        {
+            int entry = threadEntryIndex;
+            if (kInvalidIndex == entry)
+            {
+                entry = ReserveEntryForThread();
+                threadEntryIndex = entry;
+            }
+    
+            (*(tableAligned + entry)).localCurrentEpoch = CurrentEpoch;
+
+            if (drainCount > 0)
+            {
+                Drain((*(tableAligned + entry)).localCurrentEpoch);
+            }
+
+            return (*(tableAligned + entry)).localCurrentEpoch;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int ReentrantProtect()
+        {
+            int entry = threadEntryIndex;
+            if (kInvalidIndex == entry)
+            {
+                entry = ReserveEntryForThread();
+                threadEntryIndex = entry;
+            }
+
+            if ((*(tableAligned + entry)).localCurrentEpoch != 0)
+                return (*(tableAligned + entry)).localCurrentEpoch;
+
+            (*(tableAligned + entry)).localCurrentEpoch = CurrentEpoch;
+            (*(tableAligned + entry)).reentrant++;
+            return (*(tableAligned + entry)).localCurrentEpoch;
+        }
+
+        /// <summary>
+        /// Exit the thread from the protected code region. The thread must
+        /// promise not to access pointers to elements in the protected data
+        /// structures beyond this call.
+        ///
+        /// Behavior is undefined if Unprotect() is called from an already
+        /// unprotected thread.
+        /// </summary>
+        /// <param name="current_epoch"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Unprotect()
+        {
+            (*(tableAligned+threadEntryIndex)).localCurrentEpoch = 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void ReentrantUnprotect()
+        {
+            if (--((*(tableAligned + threadEntryIndex)).reentrant) == 0)
+                (*(tableAligned + threadEntryIndex)).localCurrentEpoch = 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Drain(int nextEpoch)
+        {
+            ComputeNewSafeToReclaimEpoch(nextEpoch);
+
+            for (int i = 0; i < kDrainListSize; i++)
+            {
+                var trigger_epoch = drainList[i].epoch;
+
+                if (trigger_epoch <= SafeToReclaimEpoch)
+                {
+                    if (Interlocked.CompareExchange(ref drainList[i].epoch, int.MaxValue - 1, trigger_epoch) == trigger_epoch)
+                    {
+                        var trigger_action = drainList[i].action;
+                        drainList[i].action = null;
+                        drainList[i].epoch = int.MaxValue;
+                        trigger_action();
+                        if (Interlocked.Decrement(ref drainCount) == 0) break;
+                    }
+                }
+            }
+        }
+
+        public void Release()
+        {
+            int entry = threadEntryIndex;
+            if (kInvalidIndex == entry)
+            {
+                return;
+            }
+
+#if COUNT_ACTIVE_THREADS
+            Interlocked.Decrement(ref CurrentNumThreads);
+#endif
+            threadEntryIndex = kInvalidIndex;
+            (*(tableAligned + entry)).localCurrentEpoch = 0;
+            (*(tableAligned + entry)).threadId = 0;
+        }
+
+        /// <summary>
+        /// Increment the current epoch; this should be called "occasionally" to
+        /// ensure that items removed from client data structures can eventually be
+        /// removed. Roughly, items removed from data structures cannot be reclaimed
+        /// until the epoch in which they were removed ends and all threads that may
+        /// have operated in the protected region during that Epoch have exited the
+        /// protected region. As a result, the current epoch should be bumped whenever
+        /// enough items have been removed from data structures that they represent
+        /// a significant amount of memory. Bumping the epoch unnecessarily may impact
+        /// performance, since it is an atomic operation and invalidates a read-hot
+        /// object in the cache of all of the cores.
+        /// </summary>
+        public int BumpCurrentEpoch()
+        {
+            int nextEpoch = Interlocked.Add(ref CurrentEpoch, 1);
+            
+            if (drainCount > 0)
+            {
+                Drain(nextEpoch);
+            }
+
+            return nextEpoch;
+        }
+
+        public int BumpCurrentEpoch(Action onDrain)
+        {
+            int PriorEpoch = BumpCurrentEpoch() - 1;
+
+            int i = 0, j = 0;
+            while (true)
+            {
+                if (drainList[i].epoch == int.MaxValue)
+                {
+                    if (Interlocked.CompareExchange(ref drainList[i].epoch, int.MaxValue-1, int.MaxValue) == int.MaxValue)
+                    {
+                        drainList[i].action = onDrain;
+                        drainList[i].epoch = PriorEpoch;
+                        Interlocked.Increment(ref drainCount);
+                        break;
+                    }
+                }
+                else
+                {
+                    var triggerEpoch = drainList[i].epoch;
+
+                    if (triggerEpoch <= SafeToReclaimEpoch)
+                    {
+                        if (Interlocked.CompareExchange(ref drainList[i].epoch, int.MaxValue - 1, triggerEpoch) == triggerEpoch)
+                        {
+                            var triggerAction = drainList[i].action;
+                            drainList[i].action = onDrain;
+                            drainList[i].epoch = PriorEpoch;
+                            triggerAction();
+                            break;
+                        }
+                    }
+                }
+
+                if (++i == kDrainListSize)
+                {
+                    i = 0;
+                    if (++j == 500)
+                    {
+                        j = 0;
+                        Thread.Sleep(1);
+                        Console.Write("."); // "Slowdown: Unable to add trigger to epoch");
+                    }
+                }
+            }
+
+            ProtectAndDrain();
+
+            return PriorEpoch + 1;
+        }
+
+        /// <summary>
+        /// Looks at all of the threads in the protected region and \a currentEpoch
+        /// and returns the latest Epoch that is guaranteed to be safe for reclamation.
+        /// That is, all items removed and tagged with a lower Epoch than returned by
+        /// this call may be safely reused.
+        /// </summary>
+        /// <param name="currentEpoch"></param>
+        /// <returns></returns>
+        public int ComputeNewSafeToReclaimEpoch(int currentEpoch)
+        {
+            int oldestOngoingCall = currentEpoch;
+
+            for (int index = 1; index <= numEntries; ++index)
+            {
+                int entry_epoch = (*(tableAligned + index)).localCurrentEpoch;
+                if (0 != entry_epoch && entry_epoch < oldestOngoingCall)
+                {
+                    oldestOngoingCall = entry_epoch;
+                }
+            }
+
+            // The latest safe epoch is the one just before 
+            //the earlier unsafe one.
+            SafeToReclaimEpoch = oldestOngoingCall - 1;
+            return SafeToReclaimEpoch;
+        }
+
+        public void SpinWaitForSafeToReclaim(int currentEpoch, int safeToReclaimEpoch)
+        {
+            do
+            {
+                ComputeNewSafeToReclaimEpoch(currentEpoch);
+            }
+            while (SafeToReclaimEpoch < safeToReclaimEpoch);
+        }
+
+        public bool IsSafeToReclaim(long epoch)
+        {
+            return (epoch <= SafeToReclaimEpoch);
+        }
+
+        /// <summary>
+        /// Does the heavy lifting of reserveEntryForThread() and is really just
+        /// split out for easy unit testing. This method relies on the fact that no
+        /// thread will ever have ID 0.
+        ///
+        /// http://msdn.microsoft.com/en-us/library/windows/desktop/ms686746(v=vs.85).asp
+        /// </summary>
+        /// <param name="startIndex"></param>
+        /// <param name="threadId"></param>
+        /// <returns></returns>
+        private int ReserveEntry(int startIndex, int threadId)
+        {
+            int current_iteration = 0;
+            for (; ; )
+            {
+                // Reserve an entry in the table.
+                for (int i = 0; i < numEntries; ++i)
+                {
+                    int index_to_test = 1 + ((startIndex + i) & (numEntries - 1));
+                    if (0 == (*(tableAligned + index_to_test)).threadId)
+                    {
+                        bool success =
+                            (0 == Interlocked.CompareExchange(
+                            ref (*(tableAligned + index_to_test)).threadId,
+                            threadId, 0));
+
+                        if (success)
+                        {
+#if COUNT_ACTIVE_THREADS
+                            Interlocked.Increment(ref CurrentNumThreads);
+#endif
+                            return (int)index_to_test;
+                        }
+                    }
+                    ++current_iteration;
+                }
+
+                if (current_iteration > (numEntries * 3))
+                {
+                    throw new Exception("unable to make progress reserving an epoch entry.");
+                }
+            }
+        }
+
+        /// <summary>
+        /// Allocate a new Entry to track a thread's protected/unprotected status and
+        /// return the index to it. This should only be called once for a thread.
+        /// </summary>
+        /// <returns></returns>
+        private int ReserveEntryForThread()
+        {
+            int threadId = (int)Native32.GetCurrentThreadId();
+            int startIndex = Utility.Murmur3(threadId);
+            return ReserveEntry(startIndex, threadId);
+        }
+
+        /// <summary>
+        /// An entry tracks the protected/unprotected state of a single
+        /// thread. Threads (conservatively) the Epoch when they entered
+        /// the protected region, and more loosely when they left.
+        /// Threads compete for entries and atomically lock them using a
+        /// compare-and-swap on the #m_threadId member.
+        /// </summary>
+        [StructLayout(LayoutKind.Explicit, Size = Constants.kCacheLineBytes)]
+        private struct Entry
+        {
+
+            /// <summary>
+            /// Threads record a snapshot of the global epoch during Protect().
+            /// Threads reset this to 0 during Unprotect().
+            /// It is safe that this value may actually lag the real current
+            /// epoch by the time it is actually stored. This value is set
+            /// with a sequentially-consistent store, which guarantees that
+            /// it precedes any pointers that were removed (with sequential
+            /// consistency) from data structures before the thread entered
+            /// the epoch. This is critical to ensuring that a thread entering
+            /// a protected region can never see a pointer to a data item that
+            /// was already "unlinked" from a protected data structure. If an
+            /// item is "unlinked" while this field is non-zero, then the thread
+            /// associated with this entry may be able to access the unlinked
+            /// memory still. This is safe, because the value stored here must
+            /// be less than the epoch value associated with the deleted item
+            /// (by sequential consistency, the snapshot of the epoch taken
+            /// during the removal operation must have happened before the
+            /// snapshot taken just before this field was updated during
+            /// Protect()), which will prevent its reuse until this (and all
+            /// other threads that could access the item) have called
+            /// Unprotect().
+            /// </summary>
+            [FieldOffset(0)]
+            public int localCurrentEpoch;
+
+            /// <summary>
+            /// ID of the thread associated with this entry. Entries are
+            /// locked by threads using atomic compare-and-swap. See
+            /// reserveEntry() for details.
+            /// </summary>
+            [FieldOffset(4)]
+            public int threadId;
+
+            [FieldOffset(8)]
+            public int reentrant;
+
+            [FieldOffset(12)]
+            public fixed int markers[13];
+        };
+
+        private struct EpochActionPair
+        {
+            public long epoch;
+            public Action action;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool MarkAndCheckIsComplete(int markerIdx, int version)
+        {
+            int entry = threadEntryIndex;
+            if (kInvalidIndex == entry)
+            {
+                Debug.WriteLine("New Thread entered during CPR");
+                Debug.Assert(false);
+            }
+
+            (*(tableAligned + entry)).markers[markerIdx] = version;
+
+            // check if all threads have reported complete
+            for (int index = 1; index <= numEntries; ++index)
+            {
+                int entry_epoch = (*(tableAligned + index)).localCurrentEpoch;
+                int fc_version = (*(tableAligned + index)).markers[markerIdx];
+                if (0 != entry_epoch)
+                {
+                    if (fc_version != version)
+                    {
+                        return false;
+                    }
+                }
+            }
+            return true;
+        }
+    }
+}
diff --git a/cs/src/core/FASTER.core.csproj b/cs/src/core/FASTER.core.csproj
new file mode 100644
index 000000000..6a368d27e
--- /dev/null
+++ b/cs/src/core/FASTER.core.csproj
@@ -0,0 +1,67 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>netstandard2.0;net46</TargetFrameworks>
+    <Platforms>x64</Platforms>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <RootNamespace>FASTER.core</RootNamespace>
+    <AssemblyName>FASTER.core</AssemblyName>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <SignAssembly>false</SignAssembly>
+    <ApplicationIcon />
+    <OutputType>Library</OutputType>
+    <StartupObject />
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <DefineConstants>TRACE;DEBUG</DefineConstants>
+    <DebugType>full</DebugType>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
+    <DefineConstants>TRACE</DefineConstants>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\x64\Release\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(TargetFramework)'!='net46'">
+    <DefineConstants>$(DefineConstants);DOTNETCORE</DefineConstants>
+  </PropertyGroup>
+  <ItemGroup>
+    <None Include="..\native\bin\x64\Release\adv-file-ops.dll" Link="adv-file-ops.dll">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Include="..\native\bin\x64\Release\readtsc.dll" Link="readtsc.dll">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+    <ItemGroup>
+    <PackageReference Include="Microsoft.CodeAnalysis.CSharp.Scripting">
+      <Version>2.8.2</Version>
+    </PackageReference>
+    <PackageReference Include="System.Reflection.Emit.ILGeneration" Version="4.3.0" />
+    <PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.5.1" />
+  </ItemGroup>
+  <ItemGroup Condition="'$(TargetFramework)'!='net46'">
+    <PackageReference Include="System.Runtime.Loader" Version="4.3.0" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Update="Properties\Resources.Designer.cs">
+      <DesignTime>True</DesignTime>
+      <AutoGen>True</AutoGen>
+      <DependentUpon>Resources.resx</DependentUpon>
+    </Compile>
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Update="Properties\Resources.resx">
+      <Generator>ResXFileCodeGenerator</Generator>
+      <LastGenOutput>Resources.Designer.cs</LastGenOutput>
+    </EmbeddedResource>
+  </ItemGroup>
+
+</Project>
\ No newline at end of file
diff --git a/cs/src/core/FASTER.core.nuspec b/cs/src/core/FASTER.core.nuspec
new file mode 100644
index 000000000..7a857d559
--- /dev/null
+++ b/cs/src/core/FASTER.core.nuspec
@@ -0,0 +1,33 @@
+<?xml version="1.0"?>
+<package >
+  <metadata>
+    <id>FASTER</id>
+    <version>$version$</version>
+    <title>FASTER</title>
+    <authors>FASTER</authors>
+    <owners>Microsoft</owners>
+    <projectUrl>https://github.com/Microsoft/FASTER</projectUrl>
+    <licenseUrl>https://github.com/Microsoft/FASTER/blob/master/LICENSE</licenseUrl>
+    <!--<iconUrl>http://ICON_URL_HERE_OR_DELETE_THIS_LINE</iconUrl>-->
+    <requireLicenseAcceptance>true</requireLicenseAcceptance>
+    <description>A fast key-value store that supports larger-than-memory data</description>
+    <releaseNotes>See project website</releaseNotes>
+    <copyright>Copyright 2018</copyright>
+    <tags>key value store</tags>
+    <dependencies>
+      <group targetFramework="net46">
+        <dependency id="Microsoft.CodeAnalysis.CSharp.Scripting" version="2.6.1" />
+      </group>
+    </dependencies>
+  </metadata>
+  <files>
+    <file src="bin\x64\Release\net46\FASTER.core.dll" target="lib\net46" />
+    <file src="bin\x64\Release\net46\FASTER.core.pdb" target="lib\net46" />
+    <file src="bin\x64\Release\net46\adv-file-ops.dll" target="lib\net46" />
+    <file src="bin\x64\Release\net46\readtsc.dll" target="lib\net46" />
+    <file src="bin\x64\Release\netstandard2.0\FASTER.core.dll" target="lib\netstandard2.0" />
+    <file src="bin\x64\Release\netstandard2.0\FASTER.core.pdb" target="lib\netstandard2.0" />
+    <file src="bin\x64\Release\netstandard2.0\adv-file-ops.dll" target="lib\netstandard2.0" />
+    <file src="bin\x64\Release\netstandard2.0\readtsc.dll" target="lib\netstandard2.0" />
+  </files>
+</package>
\ No newline at end of file
diff --git a/cs/src/core/Index/Common/AddressInfo.cs b/cs/src/core/Index/Common/AddressInfo.cs
new file mode 100644
index 000000000..31341b8b4
--- /dev/null
+++ b/cs/src/core/Index/Common/AddressInfo.cs
@@ -0,0 +1,99 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace FASTER.core
+{
+    [StructLayout(LayoutKind.Explicit, Size = 8)]
+    public unsafe struct AddressInfo
+    {
+        public const int kAddressBitOffset = 48;
+
+        public const int kSizeBits = 13;
+
+        public const int kSizeShiftInWord = 49;
+
+        public const long kSizeMaskInWord = ((1L << kSizeBits) - 1) << kSizeShiftInWord;
+
+        public const long kSizeMaskInInteger = (1L << kSizeBits) - 1;
+
+        public const long kAddressMask = (1L << kAddressBitOffset) - 1;
+
+        public const long kDiskBitMask = (1L << kAddressBitOffset);
+
+        public const int kTotalSizeInBytes = sizeof(long);
+
+        public const int kTotalBits = kTotalSizeInBytes * 8;
+
+        [FieldOffset(0)]
+        private long word;
+
+        public static void WriteInfo(AddressInfo* info, long address, bool isDiskAddress = false, int size = 0)
+        {
+            info->word = default(long);
+            info->IsDiskAddress = isDiskAddress;
+            info->Address = address;
+            info->Size = size;
+        }
+
+        public static string ToString(AddressInfo* info)
+        {
+            return "RecordHeader Word = " + info->word;
+        }
+
+        public bool IsDiskAddress
+        {
+            get
+            {
+                return (word & kDiskBitMask) > 0;
+            }
+
+            set
+            {
+                if (value)
+                {
+                    word |= kDiskBitMask;
+                }
+                else
+                {
+                    word &= ~kDiskBitMask;
+                }
+            }
+        }
+
+        public int Size
+        {
+            get
+            {
+                return (int)(((word & kSizeMaskInWord) >> kSizeShiftInWord) & kSizeMaskInInteger);
+            }
+            set
+            {
+                word &= ~kSizeMaskInWord;
+                word |= ((value & kSizeMaskInInteger) << kSizeShiftInWord);
+            }
+        }
+
+        public long Address
+        {
+            get
+            {
+                return (word & kAddressMask);
+            }
+            set
+            {
+                word &= ~kAddressMask;
+                word |= (value & kAddressMask);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength()
+        {
+            return kTotalSizeInBytes;
+        }
+    }
+}
diff --git a/cs/src/core/Index/Common/Contexts.cs b/cs/src/core/Index/Common/Contexts.cs
new file mode 100644
index 000000000..126de4599
--- /dev/null
+++ b/cs/src/core/Index/Common/Contexts.cs
@@ -0,0 +1,424 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public enum OperationType
+    {
+        READ,
+        RMW,
+        UPSERT,
+        INSERT,
+        DELETE
+    }
+
+    public enum OperationStatus
+    {
+        SUCCESS,
+        RETRY_NOW,
+        RETRY_LATER,
+        RECORD_ON_DISK,
+        SUCCESS_UNMARK,
+        CPR_SHIFT_DETECTED,
+        CPR_PENDING_DETECTED
+    }
+
+    public unsafe struct AsyncIOContext
+    {
+        public long id;
+
+        public Key* key;
+
+        public long logicalAddress;
+        
+        public SectorAlignedMemory record;
+
+        public SectorAlignedMemory objBuffer;
+
+        public BlockingCollection<AsyncIOContext> callbackQueue;
+    }
+
+    public unsafe struct PendingContext
+    {
+        // User provided information
+
+        public OperationType type;
+
+        public Key* key;
+
+        public Input* input;
+
+        public Output* output;
+
+        public Value* value;
+
+        public Context* userContext;
+
+        // Some additional information about the previous attempt
+
+        public long id;
+
+        public int version;
+
+        public long logicalAddress;
+
+        public long serialNum;
+
+        public HashBucketEntry entry;
+    }
+
+    public unsafe class ExecutionContext
+    {
+        public int version;
+        public long serialNum;
+        public Phase phase;
+        public bool[] markers;
+        public long totalPending;
+        public Guid guid;
+        public Queue<PendingContext> retryRequests;
+        public Dictionary<long, PendingContext> ioPendingRequests;
+        public BlockingCollection<AsyncIOContext> readyResponses;
+
+        public void Write(StreamWriter writer)
+        {
+            writer.WriteLine(version);
+            writer.WriteLine(guid);
+            writer.WriteLine(serialNum);
+        }
+        public void Load(StreamReader reader)
+        {
+            string value = reader.ReadLine();
+            version = int.Parse(value);
+
+            value = reader.ReadLine();
+            guid = Guid.Parse(value);
+
+            value = reader.ReadLine();
+            serialNum = long.Parse(value);
+        }
+    }
+
+    public struct DirectoryConfiguration
+    {
+        public const string index_base_folder = "index-checkpoints";
+        public const string index_meta_file = "info";
+        public const string hash_table_file = "ht";
+        public const string overflow_buckets_file = "ofb";
+        public const string snapshot_file = "snapshot";
+
+        public const string cpr_base_folder = "cpr-checkpoints";
+        public const string cpr_meta_file = "info";
+
+        public const string hlog_file = "lss.log";
+
+        public static void CreateIndexCheckpointFolder(Guid token)
+        {
+            var directory = GetIndexCheckpointFolder(token);
+            Directory.CreateDirectory(directory);
+            DirectoryInfo directoryInfo = new System.IO.DirectoryInfo(directory);
+            foreach (System.IO.FileInfo file in directoryInfo.GetFiles())
+                file.Delete();
+        }
+        public static void CreateHybridLogCheckpointFolder(Guid token)
+        {
+            var directory = GetHybridLogCheckpointFolder(token);
+            Directory.CreateDirectory(directory);
+            DirectoryInfo directoryInfo = new System.IO.DirectoryInfo(directory);
+            foreach (System.IO.FileInfo file in directoryInfo.GetFiles())
+                file.Delete();
+        }
+
+        public static string GetHybridLogFileName()
+        {
+            return String.Format("{0}\\{1}", Config.CheckpointDirectory, hlog_file);
+        }
+        public static string GetIndexCheckpointFolder(Guid token)
+        {
+            return String.Format("{0}\\{1}\\{2}", Config.CheckpointDirectory, index_base_folder, token);
+        }
+        public static string GetHybridLogCheckpointFolder(Guid token)
+        {
+            return String.Format("{0}\\{1}\\{2}", Config.CheckpointDirectory, cpr_base_folder, token);
+        }
+        public static string GetIndexCheckpointMetaFileName(Guid token)
+        {
+            return String.Format("{0}\\{1}\\{2}\\{3}.dat",
+                                    Config.CheckpointDirectory,
+                                    index_base_folder,
+                                    token,
+                                    index_meta_file);
+        }
+        public static string GetPrimaryHashTableFileName(Guid token)
+        {
+            return String.Format("{0}\\{1}\\{2}\\{3}.dat",
+                                    Config.CheckpointDirectory,
+                                    index_base_folder,
+                                    token,
+                                    hash_table_file);
+        }
+        public static string GetOverflowBucketsFileName(Guid token)
+        {
+            return String.Format("{0}\\{1}\\{2}\\{3}.dat",
+                                    Config.CheckpointDirectory,
+                                    index_base_folder,
+                                    token,
+                                    overflow_buckets_file);
+        }
+        public static string GetHybridLogCheckpointMetaFileName(Guid token)
+        {
+            return String.Format("{0}\\{1}\\{2}\\{3}.dat",
+                                    Config.CheckpointDirectory,
+                                    cpr_base_folder,
+                                    token,
+                                    cpr_meta_file);
+        }
+        public static string GetHybridLogCheckpointContextFileName(Guid checkpointToken, Guid sessionToken)
+        {
+            return String.Format("{0}\\{1}\\{2}\\{3}.dat",
+                                    Config.CheckpointDirectory,
+                                    cpr_base_folder,
+                                    checkpointToken,
+                                    sessionToken);
+        }
+        public static string GetHybridLogCheckpointFileName(Guid token)
+        {
+            return String.Format("{0}\\{1}\\{2}\\{3}.dat",
+                                    Config.CheckpointDirectory,
+                                    cpr_base_folder,
+                                    token,
+                                    snapshot_file);
+        }
+    }
+
+    public struct HybridLogRecoveryInfo
+    {
+        public Guid guid;
+        public int useSnapshotFile;
+        public int version;
+        public int numThreads;
+        public long flushedLogicalAddress;
+        public long startLogicalAddress;
+        public long finalLogicalAddress;
+        public Guid[] guids;
+        public Dictionary<Guid, long> continueTokens;
+        public void Initialize(Guid token, int _version)
+        {
+            guid = token;
+            useSnapshotFile = 0;
+            version = _version;
+            numThreads = 0;
+            flushedLogicalAddress = 0;
+            startLogicalAddress = 0;
+            finalLogicalAddress = 0;
+            guids = new Guid[64];
+            continueTokens = new Dictionary<Guid, long>();
+        }
+        public void Initialize(StreamReader reader)
+        {
+            guids = new Guid[64];
+            continueTokens = new Dictionary<Guid, long>();
+
+            string value = reader.ReadLine();
+            guid = Guid.Parse(value);
+
+            value = reader.ReadLine();
+            useSnapshotFile = int.Parse(value);
+
+            value = reader.ReadLine();
+            version = int.Parse(value);
+
+            value = reader.ReadLine();
+            flushedLogicalAddress = long.Parse(value);
+
+            value = reader.ReadLine();
+            startLogicalAddress = long.Parse(value);
+
+            value = reader.ReadLine();
+            finalLogicalAddress = long.Parse(value);
+
+            value = reader.ReadLine();
+            numThreads = int.Parse(value);
+
+            for (int i = 0; i < numThreads; i++)
+            {
+                value = reader.ReadLine();
+                guids[i] = Guid.Parse(value);
+            }
+        }
+        public bool Recover(Guid token)
+        {
+            string checkpointInfoFile = DirectoryConfiguration.GetHybridLogCheckpointMetaFileName(token);
+            using (var reader = new StreamReader(checkpointInfoFile))
+            {
+                Initialize(reader);
+            }
+
+            int num_threads = numThreads;
+            for (int i = 0; i < num_threads; i++)
+            {
+                var guid = guids[i];
+                using (var reader = new StreamReader(DirectoryConfiguration.GetHybridLogCheckpointContextFileName(token, guid)))
+                {
+                    var ctx = new ExecutionContext();
+                    ctx.Load(reader);
+                    continueTokens.Add(ctx.guid, ctx.serialNum);
+                }
+            }
+
+            if(continueTokens.Count == num_threads)
+            {
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+        public void Reset()
+        {
+            Initialize(default(Guid), -1);
+        }
+        public void Write(StreamWriter writer)
+        {
+            writer.WriteLine(guid);
+            writer.WriteLine(useSnapshotFile);
+            writer.WriteLine(version);
+            writer.WriteLine(flushedLogicalAddress);
+            writer.WriteLine(startLogicalAddress);
+            writer.WriteLine(finalLogicalAddress);
+            writer.WriteLine(numThreads);
+            for(int i = 0; i < numThreads; i++)
+            {
+                writer.WriteLine(guids[i]);
+            }
+        }
+    }
+
+    public struct HybridLogCheckpointInfo
+    {
+        public HybridLogRecoveryInfo info;
+        public IDevice snapshotFileDevice;
+        public CountdownEvent flushed;
+        public long started;
+
+        public void Initialize(Guid token, int _version)
+        {
+            info.Initialize(token, _version);
+            started = 0;
+        }
+        public void Recover(Guid token)
+        {
+            info.Recover(token);
+            started = 0;
+        }
+        public void Reset()
+        {
+            started = 0;
+            flushed = null;
+            info.Reset();
+        }
+    }
+
+    public struct IndexRecoveryInfo
+    {
+        public Guid token;
+        public long table_size;
+        public ulong num_ht_bytes;
+        public ulong num_ofb_bytes;
+        public int num_buckets;
+        public long startLogicalAddress;
+        public long finalLogicalAddress;
+
+        public void Initialize(Guid token, long _size)
+        {
+            this.token = token;
+            table_size = _size;
+            num_ht_bytes = 0;
+            num_ofb_bytes = 0;
+            startLogicalAddress = 0;
+            finalLogicalAddress = 0;
+            num_buckets = 0;
+        }
+        public void Initialize(StreamReader reader)
+        {
+            string value = reader.ReadLine();
+            token = Guid.Parse(value);
+
+            value = reader.ReadLine();
+            table_size = long.Parse(value);
+
+            value = reader.ReadLine();
+            num_ht_bytes = ulong.Parse(value);
+
+            value = reader.ReadLine();
+            num_ofb_bytes = ulong.Parse(value);
+
+            value = reader.ReadLine();
+            num_buckets = int.Parse(value);
+
+            value = reader.ReadLine();
+            startLogicalAddress = long.Parse(value);
+
+            value = reader.ReadLine();
+            finalLogicalAddress = long.Parse(value);
+        }
+        public void Recover(Guid guid)
+        {
+            string indexInfoFile = DirectoryConfiguration.GetIndexCheckpointMetaFileName(guid);
+            using (var reader = new StreamReader(indexInfoFile))
+            {
+                Initialize(reader);
+            }
+        }
+        public void Write(StreamWriter writer)
+        {
+            writer.WriteLine(token);
+            writer.WriteLine(table_size);
+            writer.WriteLine(num_ht_bytes);
+            writer.WriteLine(num_ofb_bytes);
+            writer.WriteLine(num_buckets);
+            writer.WriteLine(startLogicalAddress);
+            writer.WriteLine(finalLogicalAddress);
+        }
+        public void Reset()
+        {
+            token = default(Guid);
+            table_size = 0;
+            num_ht_bytes = 0;
+            num_ofb_bytes = 0;
+            num_buckets = 0;
+            startLogicalAddress = 0;
+            finalLogicalAddress = 0;
+        }
+    }
+
+    public struct IndexCheckpointInfo
+    {
+        public IndexRecoveryInfo info;
+        public IDevice main_ht_device;
+        public IDevice ofb_device;
+
+        public void Initialize(Guid token, long _size)
+        {
+            info.Initialize(token, _size);
+            main_ht_device = new WrappedDevice(new SegmentedLocalStorageDevice(DirectoryConfiguration.GetPrimaryHashTableFileName(token), 1L << 30, false, false, true));
+            ofb_device = new LocalStorageDevice(DirectoryConfiguration.GetOverflowBucketsFileName(token), false, false, true);
+        }
+        public void Recover(Guid token)
+        {
+            info.Recover(token);
+        }
+        public void Reset()
+        {
+            info.Reset();
+        }
+    }
+}
diff --git a/cs/src/core/Index/Common/Layout.cs b/cs/src/core/Index/Common/Layout.cs
new file mode 100644
index 000000000..59b9c22a2
--- /dev/null
+++ b/cs/src/core/Index/Common/Layout.cs
@@ -0,0 +1,144 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define FIXED_SIZE
+//#define VARIABLE_SIZE
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+#if FIXED_SIZE
+    public unsafe struct Layout
+    {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static RecordInfo* GetInfo(long physicalAddress)
+        {
+            return (RecordInfo*)physicalAddress;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Key* GetKey(long physicalAddress)
+        {
+            return (Key*)((byte*)physicalAddress + RecordInfo.GetLength());
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Value* GetValue(long physicalAddress)
+        {
+            return (Value*)((byte*)physicalAddress + RecordInfo.GetLength() + Key.GetLength(default(Key*)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetPhysicalSize(long physicalAddress)
+        {
+            return RecordInfo.GetLength() + Key.GetLength(default(Key*)) + Value.GetLength(default(Value*));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetInitialPhysicalSize(Key* key, Input* input)
+        {
+            return 
+                RecordInfo.GetLength() + 
+                Key.GetLength(default(Key*)) + 
+                Functions.InitialValueLength(key, input);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int EstimatePhysicalSize(Key* key, Value* value)
+        {
+            return RecordInfo.GetLength() + Key.GetLength(key) + Value.GetLength(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool HasTotalRecord(byte* buffer, int availableBytes, out int bytesRequired)
+        {
+            bytesRequired = GetPhysicalSize((long)buffer);
+            if (availableBytes < bytesRequired)
+            {
+                return false;
+            }
+            return true;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetAveragePhysicalSize()
+        {
+            return RecordInfo.GetLength() + Key.GetLength(default(Key*)) + Value.GetLength(default(Value*));
+        }
+    }
+#elif VARIABLE_SIZE
+    public unsafe struct Layout
+    {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetPhysicalSize(long physicalAddress)
+        {
+            return RecordInfo.GetPhysicalSize() + Key.GetPhysicalSize(GetKey(physicalAddress)) + Value.GetPhysicalSize(GetValue(physicalAddress));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetInitialPhysicalSize(Key* key, Input* input)
+        {
+            return RecordInfo.GetPhysicalSize() + Key.GetPhysicalSize(key) + Value.GetInitialPhysicalSize(input);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static RecordInfo* GetInfo(long physicalAddress)
+        {
+            return (RecordInfo*)physicalAddress;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Key* GetKey(long physicalAddress)
+        {
+            return (Key*)((byte*)physicalAddress + RecordInfo.GetPhysicalSize());
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Value* GetValue(long physicalAddress)
+        {
+            return (Value*)((byte*)physicalAddress + RecordInfo.GetPhysicalSize() + Key.GetPhysicalSize(GetKey(physicalAddress)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int EstimatePhysicalSize(Key* key, Value* value)
+        {
+            return RecordInfo.GetPhysicalSize() + Key.GetPhysicalSize(key) + Value.GetPhysicalSize(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool HasTotalRecord(byte* buffer, int availableBytes, out int bytesRequired)
+        {
+            bytesRequired = RecordInfo.GetPhysicalSize() + sizeof(int);
+            if (availableBytes < bytesRequired)
+            {
+                return false;
+            }
+
+            bytesRequired += Key.GetPhysicalSize(GetKey((long)buffer)) + sizeof(int);
+            if (availableBytes < bytesRequired)
+            {
+                return false;
+            }
+
+            bytesRequired += Value.GetPhysicalSize(GetValue((long)buffer));
+            if (availableBytes < bytesRequired)
+            {
+                return false;
+            }
+
+            return true;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetAveragePhysicalSize()
+        {
+            return 512;
+        }
+    }
+#endif
+}
diff --git a/cs/src/core/Index/Common/RecordInfo.cs b/cs/src/core/Index/Common/RecordInfo.cs
new file mode 100644
index 000000000..296e4430e
--- /dev/null
+++ b/cs/src/core/Index/Common/RecordInfo.cs
@@ -0,0 +1,241 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+//#define RECORD_INFO_WITH_PIN_COUNT
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+#if RECORD_INFO_WITH_PIN_COUNT
+    [StructLayout(LayoutKind.Explicit, Size = 12)]
+#else
+    [StructLayout(LayoutKind.Explicit, Size = 8)]
+#endif
+    public unsafe struct RecordInfo
+    {
+        public const int kFinalBitOffset = 48;
+
+        public const int kTombstoneBitOffset = 49;
+
+        public const int kInvalidBitOffset = 50;
+
+        public const int kVersionBits = 13;
+
+        public const int kVersionShiftInWord = 51;
+
+        public const long kVersionMaskInWord = ((1L << kVersionBits) - 1) << kVersionShiftInWord;
+
+        public const long kVersionMaskInInteger = (1L << kVersionBits) - 1;
+
+        public const long kPreviousAddressMask = (1L << 48) - 1;
+
+        public const long kFinalBitMask = (1L << kFinalBitOffset);
+
+        public const long kTombstoneMask = (1L << kTombstoneBitOffset);
+
+        public const long kInvalidBitMask = (1L << kInvalidBitOffset);
+
+#if RECORD_INFO_WITH_PIN_COUNT
+        public const int kTotalSizeInBytes = sizeof(long) + sizeof(int);
+
+        public const int kTotalBits = kTotalSizeInBytes * 8;
+
+        [FieldOffset(0)]
+        private long word;
+
+        [FieldOffset(sizeof(long))]
+        private int access_data;
+
+        public static void WriteInfo(RecordInfo* info, int checkpointVersion, bool final, bool tombstone, bool invalidBit, long previousAddress)
+        {
+            info->word = default(long);
+            info->Final = final;
+            info->Tombstone = tombstone;
+            info->Invalid = invalidBit;
+            info->PreviousAddress = previousAddress;
+            info->Version = checkpointVersion;
+            info->access_data = 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryPin()
+        {
+            return Interlocked.Increment(ref access_data) > 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryMarkReadOnly()
+        {
+            return Interlocked.CompareExchange(ref access_data, int.MinValue, 0) == 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void MarkReadOnly()
+        {
+            var found_value = Interlocked.CompareExchange(ref access_data, int.MinValue, 0);
+            if (found_value != 0)
+            {
+                int num_iterations = 1000;
+                Thread.SpinWait(num_iterations);
+                while (Interlocked.CompareExchange(ref access_data, int.MinValue, 0) != 0)
+                {
+                    Thread.SpinWait(num_iterations);
+                    num_iterations <<= 1;
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Unpin()
+        {
+            Interlocked.Decrement(ref access_data);
+        }
+
+#else
+        public const int kTotalSizeInBytes = sizeof(long);
+
+        public const int kTotalBits = kTotalSizeInBytes * 8;
+
+        [FieldOffset(0)]
+        private long word;
+
+        public static void WriteInfo(RecordInfo* info, int checkpointVersion, bool final, bool tombstone, bool invalidBit, long previousAddress)
+        {
+            info->word = default(long);
+            info->Final = final;
+            info->Tombstone = tombstone;
+            info->Invalid = invalidBit;
+            info->PreviousAddress = previousAddress;
+            info->Version = checkpointVersion;
+        }
+        
+
+        public static string ToString(RecordInfo* info)
+        {
+            return "RecordHeader Word = " + info->word;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryPin()
+        {
+            throw new NotImplementedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryMarkReadOnly()
+        {
+            throw new NotImplementedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void MarkReadOnly()
+        {
+            throw new NotImplementedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Unpin()
+        {
+            throw new NotImplementedException();
+        }
+#endif
+        public bool IsNull()
+        {
+            return word == 0;
+        }
+
+        public bool Tombstone
+        {
+            get
+            {
+                return (word & kTombstoneMask) > 0;
+            }
+
+            set
+            {
+                if (value)
+                {
+                    word |= kTombstoneMask;
+                }
+                else
+                {
+                    word &= ~kTombstoneMask;
+                }
+            }
+        }
+
+        public bool Final
+        {
+            get
+            {
+                return (word & kFinalBitMask) > 0;
+            }
+            set
+            {
+                if (value)
+                {
+                    word |= kFinalBitMask;
+                }
+                else
+                {
+                    word &= ~kFinalBitMask;
+                }
+            }
+        }
+
+        public bool Invalid
+        {
+            get
+            {
+                return !((word & kInvalidBitMask) > 0);
+            }
+            set
+            {
+                if (value)
+                {
+                    word &= ~kInvalidBitMask; 
+                }
+                else
+                {
+                    word |= kInvalidBitMask;
+                }
+            }
+        }
+
+        public int Version
+        {
+            get
+            {
+                return (int)(((word & kVersionMaskInWord) >> kVersionShiftInWord) & kVersionMaskInInteger);
+            }
+            set
+            {
+                word &= ~kVersionMaskInWord;
+                word |= ((value & kVersionMaskInInteger) << kVersionShiftInWord);
+            }
+        }
+
+        public long PreviousAddress
+        {
+            get
+            {
+                return (word & kPreviousAddressMask);
+            }
+            set
+            {
+                word &= ~kPreviousAddressMask;
+                word |= (value & kPreviousAddressMask);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength()
+        {
+            return kTotalSizeInBytes;
+        }
+    }
+}
diff --git a/cs/src/core/Index/FASTER/AsyncIO.cs b/cs/src/core/Index/FASTER/AsyncIO.cs
new file mode 100644
index 000000000..129cdf4f5
--- /dev/null
+++ b/cs/src/core/Index/FASTER/AsyncIO.cs
@@ -0,0 +1,561 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.Serialization;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe partial class FasterKV : FASTERBase, IFASTER
+    {
+
+        private void AsyncGetFromDisk(long fromLogical, 
+                                      int numRecords, 
+                                      IOCompletionCallback callback, 
+                                      AsyncIOContext context, 
+                                      SectorAlignedMemory result = default(SectorAlignedMemory))
+        {
+            //Debugger.Break();
+            while (numPendingReads > 120)
+            {
+                Thread.SpinWait(100);
+
+                // Do not protect if we are not already protected
+                // E.g., we are in an IO thread
+                if (epoch.IsProtected())
+                    epoch.ProtectAndDrain();
+            }
+            Interlocked.Increment(ref numPendingReads);
+            hlog.AsyncReadRecordToMemory(fromLogical, numRecords, callback, context, result);
+        }
+
+        private bool RetrievedObjects(byte* record, AsyncIOContext ctx)
+        {
+            if (!Key.HasObjectsToSerialize() && !Value.HasObjectsToSerialize())
+                return true;
+
+            if (ctx.objBuffer.buffer == null)
+            {
+                // Issue IO for objects
+                long startAddress = -1;
+                int numBytes = 0;
+                if (Key.HasObjectsToSerialize())
+                {
+                    var x = (AddressInfo*)Layout.GetKey((long)record);
+                    if (x->IsDiskAddress)
+                    {
+                        numBytes += x->Size;
+                        startAddress = x->Address;
+                    }
+                }
+
+                if (Value.HasObjectsToSerialize())
+                {
+                    var x = (AddressInfo*)Layout.GetValue((long)record);
+                    if (x->IsDiskAddress)
+                    {
+                        numBytes += x->Size;
+                        if (startAddress == -1)
+                            startAddress = x->Address;
+                    }
+                }
+
+                AsyncGetFromDisk(startAddress, numBytes, 
+                    AsyncGetFromDiskCallback, ctx, ctx.record);
+                return false;
+            }
+
+            // Parse the key and value objects
+            MemoryStream ms = new MemoryStream(ctx.objBuffer.buffer);
+            ms.Seek(ctx.objBuffer.offset + ctx.objBuffer.valid_offset, SeekOrigin.Begin);
+            Key.Deserialize(Layout.GetKey((long)record), ms);
+            Value.Deserialize(Layout.GetValue((long)record), ms);
+            ctx.objBuffer.Return();
+            return true;
+        }
+
+
+        protected void AsyncGetFromDiskCallback(
+                    uint errorCode,
+                    uint numBytes,
+                    NativeOverlapped* overlap)
+        {
+            //Debugger.Break();
+            var result = (AsyncGetFromDiskResult<AsyncIOContext>)Overlapped.Unpack(overlap).AsyncResult;
+            try
+            {
+                if (errorCode != 0)
+                {
+                    Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                Interlocked.Decrement(ref numPendingReads);
+
+                var ctx = result.context;
+                var record = ctx.record.GetValidPointer();
+                if (Layout.HasTotalRecord(record, ctx.record.available_bytes, out int requiredBytes))
+                {
+                    //We have the complete record.
+                    if (RetrievedObjects(record, ctx))
+                    {
+                        if (Key.Equals(ctx.key, Layout.GetKey((long)record)))
+                        {
+                            //The keys are same, so I/O is complete
+                            // ctx.record = result.record;
+                            ctx.callbackQueue.Add(ctx);
+                        }
+                        else
+                        {
+                            var oldAddress = ctx.logicalAddress;
+
+                            //keys are not same. I/O is not complete
+                            ctx.logicalAddress = ((RecordInfo*)record)->PreviousAddress;
+                            if (ctx.logicalAddress != Constants.kInvalidAddress)
+                            {
+
+                                // Delete key, value, record
+                                if (Key.HasObjectsToSerialize())
+                                {
+                                    var physicalAddress = (long)ctx.record.GetValidPointer();
+                                    Key.Free(Layout.GetKey(physicalAddress));
+                                }
+                                if (Value.HasObjectsToSerialize())
+                                {
+                                    var physicalAddress = (long)ctx.record.GetValidPointer();
+                                    Value.Free(Layout.GetValue(physicalAddress));
+                                }
+                                ctx.record.Return();
+                                AsyncGetFromDisk(ctx.logicalAddress, requiredBytes, AsyncGetFromDiskCallback, ctx);
+                            }
+                            else
+                            {
+                                //Console.WriteLine("Lookup Address = " + oldAddress);
+                                //Console.WriteLine("RecordInfo: " + RecordInfo.ToString((RecordInfo*)record));
+                                // Console.WriteLine("Record not found. Looking for: " + ctx.key->value + " found: " + Layout.GetKey((long)record)->value + " req bytes: " + requiredBytes);
+                                ctx.callbackQueue.Add(ctx);
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    ctx.record.Return();
+                    AsyncGetFromDisk(ctx.logicalAddress, requiredBytes, AsyncGetFromDiskCallback, ctx);
+                }
+
+                Overlapped.Free(overlap);
+            }
+        }
+
+
+
+    }
+
+
+    public unsafe partial class PersistentMemoryMalloc<T> : IAllocator
+    {
+        #region Async file operations
+
+        /// <summary>
+        /// Invoked by users to obtain a record from disk. It uses sector aligned memory to read 
+        /// the record efficiently into memory.
+        /// </summary>
+        /// <typeparam name="TContext"></typeparam>
+        /// <param name="fromLogical"></param>
+        /// <param name="numRecords"></param>
+        /// <param name="callback"></param>
+        /// <param name="context"></param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void AsyncReadRecordToMemory(long fromLogical, int numRecords, IOCompletionCallback callback, AsyncIOContext context, SectorAlignedMemory result = default(SectorAlignedMemory))
+        {
+            ulong fileOffset = (ulong)(AlignedPageSizeBytes * (fromLogical >> LogPageSizeBits) + PrivateRecordSize * (fromLogical & PageSizeMask));
+            ulong alignedFileOffset = (ulong)(((long)fileOffset / sectorSize) * sectorSize);
+
+            uint alignedReadLength = (uint)((long)fileOffset + (numRecords * PrivateRecordSize) - (long)alignedFileOffset);
+            alignedReadLength = (uint)((alignedReadLength + (sectorSize - 1)) & ~(sectorSize - 1));
+
+            var record = readBufferPool.Get((int)alignedReadLength);
+            record.valid_offset = (int)(fileOffset - alignedFileOffset);
+            record.available_bytes = (int)(alignedReadLength - (fileOffset - alignedFileOffset));
+            record.required_bytes = numRecords * RecordSize;
+
+            var asyncResult = default(AsyncGetFromDiskResult<AsyncIOContext>);
+            asyncResult.context = context;
+            if (result.buffer == null)
+            {
+                asyncResult.context.record = record;
+                device.ReadAsync(alignedFileOffset,
+                            (IntPtr)asyncResult.context.record.aligned_pointer,
+                            alignedReadLength,
+                            callback,
+                            asyncResult);
+            }
+            else
+            {
+                asyncResult.context.record = result;
+                asyncResult.context.objBuffer = record;
+                objlogDevice.ReadAsync(
+                    (int)(context.logicalAddress >> LogSegmentSizeBits),
+                    alignedFileOffset,
+                    (IntPtr)asyncResult.context.objBuffer.aligned_pointer,
+                    alignedReadLength,
+                    callback,
+                    asyncResult);
+            }
+        }
+
+        private ISegmentedDevice CreateObjectLogDevice(IDevice device)
+        {
+            if ((device as NullDevice) != null)
+            {
+                return new SegmentedNullDevice();
+            }
+            else if ((device as LocalStorageDevice) != null)
+            {
+                string filename = (device as LocalStorageDevice).GetFileName();
+                filename = filename.Substring(0, filename.Length - 4) + ".objlog";
+                return new SegmentedLocalStorageDevice(filename, -1, false, false, true);
+            }
+            else if ((device as SegmentedLocalStorageDevice) != null)
+            {
+                string filename = (device as SegmentedLocalStorageDevice).GetFileName();
+                filename = filename.Substring(0, filename.Length - 4) + ".objlog";
+                return new SegmentedLocalStorageDevice(filename, -1, false, false, true);
+            }
+            else if ((device as WrappedDevice) != null)
+            {
+                var ud = (device as WrappedDevice).GetUnderlyingDevice() as SegmentedLocalStorageDevice;
+                if (ud != null)
+                {
+                    string filename = ud.GetFileName();
+                    filename = filename.Substring(0, filename.Length - 4) + ".objlog";
+                    return new SegmentedLocalStorageDevice(filename, -1, false, false, true);
+                }
+            }
+
+            Console.WriteLine("Unable to create object log device");
+            return null;
+        }
+
+        public void AsyncReadPagesFromDevice(int numPages, long destinationStartPage, IDevice device, out CountdownEvent completed)
+        {
+            var asyncResult = new PageAsyncFlushResult();
+
+            ISegmentedDevice objlogDevice = null;
+            if (Key.HasObjectsToSerialize() || Value.HasObjectsToSerialize())
+            {
+                objlogDevice = CreateObjectLogDevice(device);
+                throw new Exception("Reading pages with object log not yet supported");
+            }
+            completed = new CountdownEvent(numPages);
+            asyncResult.handle = completed;
+            asyncResult.objlogDevice = objlogDevice;
+
+            for (long flushPage = destinationStartPage; flushPage < destinationStartPage + numPages; flushPage++)
+            {
+                long pageStartAddress = flushPage << LogPageSizeBits;
+                long pageEndAddress = (flushPage + 1) << LogPageSizeBits;
+
+                device.ReadAsync(
+                    (ulong)(AlignedPageSizeBytes * (flushPage - destinationStartPage)),
+                    pointers[flushPage % BufferSize],
+                    (uint)(PageSize * PrivateRecordSize),
+                    AsyncReadPageFromDeviceCallback,
+                    asyncResult);
+            }
+        }
+
+
+        /// <summary>
+        /// Flush page range to disk
+        /// Called when all threads have agreed that a page range is sealed.
+        /// </summary>
+        /// <param name="startPage"></param>
+        /// <param name="numPages"></param>
+        /// <param name="waitForPendingFlushComplete"></param>
+        private void AsyncFlushPages(long startPage, long untilAddress)
+        {
+            long endPage = (untilAddress >> LogPageSizeBits);
+            int numPages = (int)(endPage - startPage);
+            long offsetInEndPage = GetOffsetInPage(untilAddress);
+            if (offsetInEndPage > 0)
+            {
+                numPages++;
+            }
+
+
+            /* Request asynchronous writes to the device. If waitForPendingFlushComplete
+             * is set, then a CountDownEvent is set in the callback handle.
+             */
+            for (long flushPage = startPage; flushPage < (startPage + numPages); flushPage++)
+            {
+                long pageStartAddress = flushPage << LogPageSizeBits;
+                long pageEndAddress = (flushPage + 1) << LogPageSizeBits;
+
+                var asyncResult = new PageAsyncFlushResult();
+                asyncResult.page = flushPage;
+                asyncResult.count = 1;
+                if (pageEndAddress > untilAddress)
+                {
+                    asyncResult.partial = true;
+                    asyncResult.untilAddress = untilAddress;
+                }
+                else
+                {
+                    asyncResult.partial = false;
+                    asyncResult.untilAddress = pageEndAddress;
+
+                    // Set status to in-progress
+                    PageStatusIndicator[flushPage % BufferSize].PageFlushCloseStatus
+                        = new FlushCloseStatus { PageFlushStatus = FlushStatus.InProgress, PageCloseStatus = CloseStatus.Open };
+                }
+
+                PageStatusIndicator[flushPage % BufferSize].LastFlushedUntilAddress = -1;
+
+                WriteAsync(pointers[flushPage % BufferSize],
+                                    (ulong)(AlignedPageSizeBytes * flushPage),
+                                    (uint)(PageSize * PrivateRecordSize),
+                                    AsyncFlushPageCallback,
+                                    asyncResult, device, objlogDevice);
+            }
+        }
+
+        /// <summary>
+        /// Flush pages from startPage (inclusive) to endPage (exclusive)
+        /// to specified log device and obj device
+        /// </summary>
+        /// <param name="startPage"></param>
+        /// <param name="endPage"></param>
+        /// <param name="device"></param>
+        public void AsyncFlushPagesToDevice(long startPage, long endPage, IDevice device, out CountdownEvent completed)
+        {
+            var asyncResult = new PageAsyncFlushResult();
+
+            int numPages = (int)(endPage - startPage);
+
+            ISegmentedDevice objlogDevice = null;
+            if (Key.HasObjectsToSerialize() || Value.HasObjectsToSerialize())
+            {
+                numPages = numPages * 2;
+                objlogDevice = CreateObjectLogDevice(device);
+            }
+
+            completed = new CountdownEvent(numPages);
+            asyncResult.handle = completed;
+            for (long flushPage = startPage; flushPage < endPage; flushPage++)
+            {
+                long pageStartAddress = flushPage << LogPageSizeBits;
+                long pageEndAddress = (flushPage + 1) << LogPageSizeBits;
+
+                WriteAsync(pointers[flushPage % BufferSize],
+                            (ulong)(AlignedPageSizeBytes * (flushPage - startPage)),
+                            (uint)(PageSize * PrivateRecordSize),
+                            AsyncFlushPageToDeviceCallback,
+                            asyncResult, device, objlogDevice);
+            }
+        }
+
+
+        long[] segmentOffsets = new long[SegmentBufferSize];
+
+        private void WriteAsync(IntPtr alignedSourceAddress, ulong alignedDestinationAddress, uint numBytesToWrite,
+                                IOCompletionCallback callback, PageAsyncFlushResult asyncResult,
+                                IDevice device, ISegmentedDevice objlogDevice)
+        {
+            // Debugger.Break();
+
+            if (!Key.HasObjectsToSerialize() && !Value.HasObjectsToSerialize())
+            {
+                device.WriteAsync(alignedSourceAddress, alignedDestinationAddress,
+                    numBytesToWrite, callback, asyncResult);
+                return;
+            }
+
+            // need to write both page and object cache
+            asyncResult.count++;
+            //if (alignedDestinationAddress == 0)
+            //    Debugger.Break();
+            MemoryStream ms = new MemoryStream();
+            var buffer = ioBufferPool.Get(PageSize);
+            Buffer.MemoryCopy((void*)alignedSourceAddress, buffer.aligned_pointer, numBytesToWrite, numBytesToWrite);
+
+            long ptr = (long)buffer.aligned_pointer;
+            List<long> addr = new List<long>();
+            asyncResult.freeBuffer1 = buffer;
+
+            // Correct for page 0 of HLOG
+            if (alignedDestinationAddress >> LogPageSizeBits == 0)
+                ptr += Constants.kFirstValidAddress;
+
+            while (ptr < (long)buffer.aligned_pointer + numBytesToWrite)
+            {
+                if (!Layout.GetInfo(ptr)->Invalid)
+                {
+                    long pos = ms.Position;
+
+                    Key* key = Layout.GetKey(ptr);
+                    Key.Serialize(key, ms);
+                    ((AddressInfo*)key)->IsDiskAddress = true;
+                    ((AddressInfo*)key)->Address = pos;
+                    ((AddressInfo*)key)->Size = (int)(ms.Position - pos);
+                    addr.Add((long)key);
+
+                    pos = ms.Position;
+                    Value* value = Layout.GetValue(ptr);
+                    Value.Serialize(value, ms);
+                    ((AddressInfo*)value)->IsDiskAddress = true;
+                    ((AddressInfo*)value)->Address = pos;
+                    ((AddressInfo*)value)->Size = (int)(ms.Position - pos);
+                    addr.Add((long)value);
+                }
+                ptr += Layout.GetPhysicalSize(ptr);
+            }
+
+
+            var s = ms.ToArray();
+            var objBuffer = ioBufferPool.Get(s.Length);
+            asyncResult.freeBuffer2 = objBuffer;
+
+            var alignedLength = (s.Length + (sectorSize - 1)) & ~(sectorSize - 1);
+
+            var objAddr = Interlocked.Add(ref segmentOffsets[(alignedDestinationAddress >> LogSegmentSizeBits) % SegmentBufferSize], alignedLength) - alignedLength;
+            fixed (void* src = s)
+                Buffer.MemoryCopy(src, objBuffer.aligned_pointer, s.Length, s.Length);
+
+            foreach (var address in addr)
+            {
+                *((long*)address) += objAddr;
+            }
+
+            objlogDevice.WriteAsync(
+                (IntPtr)objBuffer.aligned_pointer,
+                (int)(alignedDestinationAddress >> LogSegmentSizeBits),
+                (ulong)objAddr, (uint)alignedLength, callback, asyncResult);
+
+            device.WriteAsync((IntPtr)buffer.aligned_pointer, alignedDestinationAddress,
+                numBytesToWrite, callback, asyncResult);
+        }
+        #endregion
+
+
+        #region Async callbacks
+
+        /// <summary>
+        /// IOCompletion callback for page read
+        /// </summary>
+        /// <param name="errorCode"></param>
+        /// <param name="numBytes"></param>
+        /// <param name="overlap"></param>
+        private void AsyncReadPageFromDeviceCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap)
+        {
+            if (errorCode != 0)
+            {
+                System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+            }
+
+            PageAsyncFlushResult result = (PageAsyncFlushResult)Overlapped.Unpack(overlap).AsyncResult;
+            if (result.handle != null)
+                result.handle.Signal();
+            Overlapped.Free(overlap);
+
+            if (Key.HasObjectsToSerialize() || Value.HasObjectsToSerialize())
+            {
+                throw new NotImplementedException("Recovery of object log not yet supported");
+            }
+        }
+
+        /// <summary>
+        /// IOCompletion callback for page flush
+        /// </summary>
+        /// <param name="errorCode"></param>
+        /// <param name="numBytes"></param>
+        /// <param name="overlap"></param>
+        private void AsyncFlushPageCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap)
+        {
+            if (errorCode != 0)
+            {
+                System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+            }
+
+            //Set the page status to flushed
+            PageAsyncFlushResult result = (PageAsyncFlushResult)Overlapped.Unpack(overlap).AsyncResult;
+
+            if (Interlocked.Decrement(ref result.count) == 0)
+            {
+                PageStatusIndicator[result.page % BufferSize].LastFlushedUntilAddress = result.untilAddress;
+
+                if (!result.partial)
+                {
+                    while (true)
+                    {
+                        var oldStatus = PageStatusIndicator[result.page % BufferSize].PageFlushCloseStatus;
+                        if (oldStatus.PageCloseStatus == CloseStatus.Closed)
+                        {
+                            ClearPage((int)(result.page % BufferSize), result.page == 0);
+                        }
+                        var newStatus = oldStatus;
+                        newStatus.PageFlushStatus = FlushStatus.Flushed;
+                        if (oldStatus.value == Interlocked.CompareExchange(ref PageStatusIndicator[result.page % BufferSize].PageFlushCloseStatus.value, newStatus.value, oldStatus.value))
+                        {
+                            break;
+                        }
+                    }
+                }
+                ShiftFlushedUntilAddress();
+
+                Interlocked.MemoryBarrier();
+
+                if (result.freeBuffer1.buffer != null)
+                    result.freeBuffer1.Return();
+                if (result.freeBuffer2.buffer != null)
+                    result.freeBuffer2.Return();
+
+                if (result.handle != null)
+                {
+                    result.handle.Signal();
+                }
+            }
+
+            Overlapped.Free(overlap);
+        }
+
+        /// <summary>
+        /// IOCompletion callback for page flush
+        /// </summary>
+        /// <param name="errorCode"></param>
+        /// <param name="numBytes"></param>
+        /// <param name="overlap"></param>
+        private void AsyncFlushPageToDeviceCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap)
+        {
+            if (errorCode != 0)
+            {
+                System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+            }
+
+            PageAsyncFlushResult result = (PageAsyncFlushResult)Overlapped.Unpack(overlap).AsyncResult;
+
+            if (result.freeBuffer1.buffer != null)
+                result.freeBuffer1.Return();
+            if (result.freeBuffer2.buffer != null)
+                result.freeBuffer2.Return();
+
+            if (result.handle != null)
+                result.handle.Signal();
+            Overlapped.Free(overlap);
+        }
+        #endregion
+    }
+}
diff --git a/cs/src/core/Index/FASTER/Checkpoint.cs b/cs/src/core/Index/FASTER/Checkpoint.cs
new file mode 100644
index 000000000..cdb31cb3f
--- /dev/null
+++ b/cs/src/core/Index/FASTER/Checkpoint.cs
@@ -0,0 +1,723 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma warning disable 0162
+
+//#define WAIT_FOR_INDEX_CHECKPOINT
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace FASTER.core
+{   
+
+    public unsafe partial class FasterKV : FASTERBase, IFASTER
+   {
+        public class EpochPhaseIdx
+        {
+            public const int PrepareForIndexCheckpt = 0;
+
+            public const int Prepare = 1;
+
+            public const int InProgress = 2;
+
+            public const int WaitPending = 3;
+
+            public const int WaitFlush = 4;
+
+            public const int PersistenceCallback = 5;
+        }
+
+        #region Starting points
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool InternalTakeCheckpoint(CheckpointType type)
+        {
+            if (_systemState.phase == Phase.GC)
+            {
+                Console.WriteLine("Forcing completion of GC");
+                GarbageCollectBuckets(0, true);
+            }
+
+            if(_systemState.phase == Phase.REST)
+            {
+                var context = (long)type;
+                var currentState = SystemState.Make(Phase.REST, _systemState.version);
+                var nextState = GetNextState(currentState, type);
+                return GlobalMoveToNextState(currentState, nextState, ref context);
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool InternalShiftBeginAddress(long untilAddress)
+        {
+            if (_systemState.phase == Phase.REST)
+            {
+                var version = _systemState.version;
+
+                SystemState nextState = SystemState.Make(Phase.GC, version);
+                long oldBeginAddress = untilAddress;
+                if (GlobalMoveToNextState(SystemState.Make(Phase.REST, version), nextState, ref oldBeginAddress))
+                {
+                    hlog.ShiftBeginAddress(oldBeginAddress, untilAddress);
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool InternalGrowIndex()
+        {
+            if (_systemState.phase == Phase.GC)
+            {
+                Console.WriteLine("Forcing completion of GC");
+                GarbageCollectBuckets(0, true);
+            }
+
+            if (_systemState.phase == Phase.REST)
+            {
+                var version = _systemState.version;
+                long context = 0;
+                SystemState nextState = SystemState.Make(Phase.PREPARE_GROW, version);
+                if (GlobalMoveToNextState(SystemState.Make(Phase.REST, version), nextState, ref context))
+                {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+        #endregion
+
+
+        /// <summary>
+        /// Global transition function that coordinates various state machines. 
+        /// A few characteristics about the state machine:
+        /// <list type="bullet">
+        /// <item>
+        /// <description>
+        /// Transitions happen atomically using a compare-and-swap operation. So, multiple threads can try to do the same transition. Only one will succeed.
+        /// </description>
+        /// </item>
+        /// <item>
+        /// <description>
+        /// Transition from state A to B happens via an intermediate state (INTERMEDIATE). This serves as a lock by a thread to perform the transition. 
+        /// Some transitions are accompanied by actions that must be performed before the transitions such as initializing contexts, etc. 
+        /// </description>
+        /// </item>
+        /// <item>
+        /// <description>
+        /// States can be part of multiple state machines. For example: PREP_INDEX_CHECKPOINT is part of both index-only and full checkpoints. 
+        /// </description>
+        /// </item>
+        /// </list>
+        /// 
+        /// We currently support 5 different state machines:
+        /// <list type="number">
+        /// <item>
+        /// <term> Index-Only Checkpoint </term>
+        /// <description> REST -> PREP_INDEX_CHECKPOINT -> INDEX_CHECKPOINT -> REST </description>
+        /// </item>
+        /// <item>
+        /// <term>HybridLog-Only Checkpoint</term>
+        /// <description>REST -> PREPARE -> IN_PROGRESS -> WAIT_PENDING -> WAIT_FLUSH -> PERSISTENCE_CALLBACK -> REST</description>
+        /// </item>
+        /// <item>
+        /// <term>Full Checkpoint</term>
+        /// <description>REST -> PREP_INDEX_CHECKPOINT -> PREPARE -> IN_PROGRESS -> WAIT_PENDING -> WAIT_FLUSH -> PERSISTENCE_CALLBACK -> REST</description>
+        /// </item>
+        /// <item>
+        /// <term>GC</term>
+        /// <description></description>
+        /// </item>
+        /// <item>
+        /// <term>Grow</term>
+        /// <description></description>
+        /// </item>
+        /// </list>
+        /// </summary>
+        /// <param name="currentState">from state of the transition.</param>
+        /// <param name="nextState">to state of the transition.</param>
+        /// <param name="context">optional additioanl parameter for transition.</param>
+        /// <returns>true if transition succeeds.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool GlobalMoveToNextState(SystemState currentState, SystemState nextState, ref long context)
+        {
+            var intermediateState = SystemState.Make(Phase.INTERMEDIATE, currentState.version);
+            
+            // Move from S1 to I
+            if(MakeTransition(currentState, intermediateState))
+            {
+                // Acquired ownership to make the transition from S1 to S2
+                switch (nextState.phase)
+                {
+                    case Phase.PREP_INDEX_CHECKPOINT:
+                        {
+                            _checkpointType = (CheckpointType)context;
+                            switch (_checkpointType)
+                            {
+                                case CheckpointType.INDEX_ONLY:
+                                    {
+                                        _indexCheckpointToken = Guid.NewGuid();
+                                        InitializeIndexCheckpoint(_indexCheckpointToken);
+                                        break;
+                                    }
+                                case CheckpointType.FULL:
+                                    {
+                                        var fullCheckpointToken = Guid.NewGuid();
+                                        _indexCheckpointToken = fullCheckpointToken;
+                                        _hybridLogCheckpointToken = fullCheckpointToken;
+                                        InitializeIndexCheckpoint(_indexCheckpointToken);
+                                        InitializeHybridLogCheckpoint(_hybridLogCheckpointToken, currentState.version);
+                                        break;
+                                    }
+                                default:
+                                    throw new Exception();
+                            }
+
+                            ObtainCurrentTailAddress(ref _indexCheckpoint.info.startLogicalAddress);
+
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.INDEX_CHECKPOINT:
+                        {
+                            TakeIndexFuzzyCheckpoint();
+
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.PREPARE:
+                        {
+                            switch (currentState.phase)
+                            {
+                                case Phase.REST:
+                                    {
+                                        _checkpointType = (CheckpointType)context;
+                                        Debug.Assert(_checkpointType == CheckpointType.HYBRID_LOG_ONLY);
+                                        _hybridLogCheckpointToken = Guid.NewGuid();
+                                        InitializeHybridLogCheckpoint(_hybridLogCheckpointToken, currentState.version);
+                                        break;
+                                    }
+                                case Phase.PREP_INDEX_CHECKPOINT:
+                                    {
+                                        TakeIndexFuzzyCheckpoint();
+                                        break;
+                                    }
+                                default:
+                                    throw new Exception();
+                            }
+
+                            ObtainCurrentTailAddress(ref _hybridLogCheckpoint.info.startLogicalAddress);
+
+                            if (!Constants.kFoldOverSnapshot)
+                            {
+                                _hybridLogCheckpoint.info.flushedLogicalAddress = hlog.FlushedUntilAddress;
+                                _hybridLogCheckpoint.info.useSnapshotFile = 1;
+                            }
+
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.IN_PROGRESS:
+                        {
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.WAIT_PENDING:
+                        {
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.WAIT_FLUSH:
+                        {
+                            if (_checkpointType == CheckpointType.FULL)
+                            {
+                                _indexCheckpoint.info.num_buckets = overflowBucketsAllocator.GetMaxValidAddress();
+                                ObtainCurrentTailAddress(ref _indexCheckpoint.info.finalLogicalAddress);
+                                WriteIndexMetaFile();
+                            }
+
+                            if (Constants.kFoldOverSnapshot)
+                            {
+                                hlog.ShiftReadOnlyToTail(out long tailAddress);
+
+                                _hybridLogCheckpoint.info.finalLogicalAddress = tailAddress;
+                            }
+                            else
+                            {
+                                ObtainCurrentTailAddress(ref _hybridLogCheckpoint.info.finalLogicalAddress);
+
+                                var fileName = DirectoryConfiguration.GetHybridLogCheckpointFileName(_hybridLogCheckpointToken);
+                                _hybridLogCheckpoint.snapshotFileDevice = new LocalStorageDevice(fileName, false, false, true);
+
+                                long startPage = hlog.GetPage(_hybridLogCheckpoint.info.flushedLogicalAddress);
+                                long endPage = hlog.GetPage(_hybridLogCheckpoint.info.finalLogicalAddress);
+                                if (_hybridLogCheckpoint.info.finalLogicalAddress > hlog.GetStartLogicalAddress(endPage))
+                                {
+                                    endPage++;
+                                }
+
+                                new Thread(() =>
+                                    hlog.AsyncFlushPagesToDevice(startPage, 
+                                                                 endPage, 
+                                                                 _hybridLogCheckpoint.snapshotFileDevice, 
+                                                                 out _hybridLogCheckpoint.flushed)).Start();
+                            }
+
+                            WriteHybridLogMetaInfo();
+
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.PERSISTENCE_CALLBACK:
+                        {
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.GC:
+                        {
+                            var tmp = hlog.BeginAddress;
+                            hlog.BeginAddress = context;
+                            context = tmp;
+
+                            int numChunks = (int)(state[resizeInfo.version].size / Constants.kSizeofChunk);
+                            if (numChunks == 0) numChunks = 1; // at least one chunk
+
+                            numPendingChunksToBeGCed = numChunks;
+                            gcStatus = new long[numChunks];
+
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.PREPARE_GROW:
+                        {
+                            // Note that the transition must be done before bumping epoch here!
+                            MakeTransition(intermediateState, nextState);
+                            epoch.BumpCurrentEpoch(() => 
+                            {
+                                long _context = 0;
+                                GlobalMoveToNextState(nextState, SystemState.Make(Phase.IN_PROGRESS_GROW, nextState.version), ref _context);
+                            });
+                            break;
+                        }
+                    case Phase.IN_PROGRESS_GROW:
+                        {
+                            // Set up the transition to new version of HT
+                            int numChunks = (int)(state[resizeInfo.version].size / Constants.kSizeofChunk);
+                            if (numChunks == 0) numChunks = 1; // at least one chunk
+
+                            numPendingChunksToBeSplit = numChunks;
+                            splitStatus = new long[numChunks];
+
+                            Initialize(1 - resizeInfo.version, state[resizeInfo.version].size * 2, hlog.GetSectorSize());
+
+                            resizeInfo.version = 1 - resizeInfo.version;
+
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                    case Phase.REST:
+                        {
+                            switch (_checkpointType)
+                            {
+                                case CheckpointType.INDEX_ONLY:
+                                    {
+                                        _indexCheckpoint.info.num_buckets = overflowBucketsAllocator.GetMaxValidAddress();
+                                        ObtainCurrentTailAddress(ref _indexCheckpoint.info.finalLogicalAddress);
+                                        WriteIndexMetaFile();
+                                        _indexCheckpoint.Reset();
+                                        break;
+                                    }
+                                case CheckpointType.FULL:
+                                    {
+                                        _indexCheckpoint.Reset();
+                                        _hybridLogCheckpoint.Reset();
+                                        break;
+                                    }
+                                case CheckpointType.HYBRID_LOG_ONLY:
+                                    {
+                                        _hybridLogCheckpoint.Reset();
+                                        break;
+                                    }
+                                case CheckpointType.NONE:
+                                    break;
+                                default:
+                                    throw new Exception();
+                            }
+
+                            _checkpointType = CheckpointType.NONE;
+
+                            MakeTransition(intermediateState, nextState);
+                            break;
+                        }
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Corresponding thread-local actions that must be performed when any state machine is active
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void HandleCheckpointingPhases()
+        {
+            var previousState = SystemState.Make(threadCtx.phase, threadCtx.version);
+            var finalState = SystemState.Copy(ref _systemState);
+
+            // Don't play around when system state is being changed
+            if(finalState.phase == Phase.INTERMEDIATE)
+            {
+                return;
+            }
+
+            // We need to move from previousState to finalState one step at a time
+            do
+            {
+                var currentState = default(SystemState);
+                if (previousState.word == finalState.word)
+                {
+                    currentState.word = previousState.word;
+                }
+                else
+                {
+                    currentState = GetNextState(previousState, _checkpointType);
+                }
+
+                switch (currentState.phase)
+                {
+                    case Phase.PREP_INDEX_CHECKPOINT:
+                        {
+                            if (!threadCtx.markers[EpochPhaseIdx.PrepareForIndexCheckpt])
+                            {
+                                if (epoch.MarkAndCheckIsComplete(EpochPhaseIdx.PrepareForIndexCheckpt, threadCtx.version))
+                                {
+                                    GlobalMoveToNextCheckpointState(currentState);
+                                }
+                                threadCtx.markers[EpochPhaseIdx.PrepareForIndexCheckpt] = true;
+                            }
+                            break;
+                        }
+                    case Phase.INDEX_CHECKPOINT:
+                        {
+                            if(_checkpointType == CheckpointType.INDEX_ONLY)
+                            {
+                                // Reseting the marker for a potential FULL or INDEX_ONLY checkpoint in the future
+                                threadCtx.markers[EpochPhaseIdx.PrepareForIndexCheckpt] = false;
+                            }
+
+                            if (IsIndexFuzzyCheckpointCompleted())
+                            {
+                                GlobalMoveToNextCheckpointState(currentState);
+                            }
+                            break;
+                        }
+                    case Phase.PREPARE:
+                        {
+                            if (!threadCtx.markers[EpochPhaseIdx.Prepare])
+                            {
+                                // Thread local action
+                                AcquireSharedLatchesForAllPendingRequests();
+
+                                var idx = Interlocked.Increment(ref _hybridLogCheckpoint.info.numThreads);
+                                idx -= 1;
+
+                                _hybridLogCheckpoint.info.guids[idx] = threadCtx.guid;
+
+                                if (epoch.MarkAndCheckIsComplete(EpochPhaseIdx.Prepare, threadCtx.version))
+                                {
+                                    GlobalMoveToNextCheckpointState(currentState);
+                                }
+
+                                threadCtx.markers[EpochPhaseIdx.Prepare] = true;
+                            }
+                            break;
+                        }
+                    case Phase.IN_PROGRESS:
+                        {
+                            // Need to be very careful here as threadCtx is changing
+                            ExecutionContext ctx;
+                            if(previousState.phase == Phase.PREPARE)
+                            {
+                                ctx = threadCtx;
+                            }
+                            else
+                            {
+                                ctx = prevThreadCtx;
+                            }
+
+                            if (!ctx.markers[EpochPhaseIdx.InProgress])
+                            {
+                                prevThreadCtx = threadCtx;
+
+                                InitLocalContext(ref threadCtx, prevThreadCtx.guid);
+
+                                if (epoch.MarkAndCheckIsComplete(EpochPhaseIdx.InProgress, ctx.version))
+                                {
+                                    GlobalMoveToNextCheckpointState(currentState);
+                                }
+                                prevThreadCtx.markers[EpochPhaseIdx.InProgress] = true;
+                            }
+                            break;
+                        }
+                    case Phase.WAIT_PENDING:
+                        {
+                            if (!prevThreadCtx.markers[EpochPhaseIdx.WaitPending])
+                            {
+                                var notify = (prevThreadCtx.ioPendingRequests.Count == 0);
+                                notify = notify && (prevThreadCtx.retryRequests.Count == 0);
+
+                                if (notify)
+                                {
+                                    if (epoch.MarkAndCheckIsComplete(EpochPhaseIdx.WaitPending, threadCtx.version))
+                                    {
+                                        GlobalMoveToNextCheckpointState(currentState);
+                                    }
+                                    prevThreadCtx.markers[EpochPhaseIdx.WaitPending] = true;
+                                }
+
+                            }
+                            break;
+                        }
+                    case Phase.WAIT_FLUSH:
+                        {
+                            if (!prevThreadCtx.markers[EpochPhaseIdx.WaitFlush])
+                            {
+                                var notify = false;
+                                if (Constants.kFoldOverSnapshot)
+                                {
+                                    notify = (hlog.FlushedUntilAddress >= _hybridLogCheckpoint.info.finalLogicalAddress);
+                                }
+                                else
+                                {
+                                    notify = _hybridLogCheckpoint.flushed.IsSet;
+                                }
+
+                                if(_checkpointType == CheckpointType.FULL)
+                                {
+                                    notify = notify && IsIndexFuzzyCheckpointCompleted();
+                                }
+
+                                if (notify)
+                                {
+                                    WriteHybridLogContextInfo();
+
+                                    if (epoch.MarkAndCheckIsComplete(EpochPhaseIdx.WaitFlush, prevThreadCtx.version))
+                                    {
+                                        GlobalMoveToNextCheckpointState(currentState);
+                                    }
+
+                                    prevThreadCtx.markers[EpochPhaseIdx.WaitFlush] = true;
+                                }
+                            }
+                            break;
+                        }
+
+                    case Phase.PERSISTENCE_CALLBACK:
+                        {
+                            if (!prevThreadCtx.markers[EpochPhaseIdx.PersistenceCallback])
+                            {
+                                // Thread local action
+                                Functions.PersistenceCallback(LightEpoch.threadEntryIndex, prevThreadCtx.serialNum);
+
+                                if (epoch.MarkAndCheckIsComplete(EpochPhaseIdx.PersistenceCallback, prevThreadCtx.version))
+                                {
+                                    GlobalMoveToNextCheckpointState(currentState);
+                                }
+
+                                prevThreadCtx.markers[EpochPhaseIdx.PersistenceCallback] = true;
+                            }
+                            break;
+                        }
+                    case Phase.REST:
+                        {
+                            break;
+                        }
+                    default:
+                        Debug.WriteLine("Error!");
+                        break;
+                }
+
+                // update thread local variables
+                threadCtx.phase = currentState.phase;
+                threadCtx.version = currentState.version;
+
+                previousState.word = currentState.word;
+            } while (previousState.word != finalState.word);
+        }
+
+        #region Helper functions 
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool GlobalMoveToNextCheckpointState(SystemState currentState)
+        {
+            long context = 0;
+            return GlobalMoveToNextState(currentState, GetNextState(currentState, _checkpointType), ref context);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool MakeTransition(SystemState currentState, SystemState nextState)
+        {
+            // Move from I to P2
+            if (Interlocked.CompareExchange(ref _systemState.word, nextState.word, currentState.word) == currentState.word)
+            {
+                Console.WriteLine("Moved to {0}, {1}", nextState.phase, nextState.version);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void AcquireSharedLatchesForAllPendingRequests()
+        {
+            foreach (var ctx in threadCtx.retryRequests)
+            {
+                AcquireSharedLatch(ctx.key);
+            }
+            foreach (var ctx in threadCtx.ioPendingRequests.Values)
+            {
+                AcquireSharedLatch(ctx.key);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected SystemState GetNextState(SystemState start, CheckpointType type = CheckpointType.FULL)
+        {
+           
+            var nextState = default(SystemState);
+            nextState.word = start.word;
+            switch (start.phase)
+            {
+                case Phase.REST:
+                    switch (type)
+                    {
+                        case CheckpointType.HYBRID_LOG_ONLY:
+                            nextState.phase = Phase.PREPARE;
+                            break;
+                        case CheckpointType.FULL:
+                        case CheckpointType.INDEX_ONLY:
+                            nextState.phase = Phase.PREP_INDEX_CHECKPOINT;
+                            break;
+                    }
+                    break;
+                case Phase.PREP_INDEX_CHECKPOINT:
+                    switch(type)
+                    {
+                        case CheckpointType.INDEX_ONLY:
+                            nextState.phase = Phase.INDEX_CHECKPOINT;
+                            break;
+                        case CheckpointType.FULL:
+                            nextState.phase = Phase.PREPARE;
+                            break;
+                    }
+                    break;
+                case Phase.INDEX_CHECKPOINT:
+                    nextState.phase = Phase.PREPARE;
+                    break;
+                case Phase.PREPARE:
+                    nextState.phase = Phase.IN_PROGRESS;
+                    nextState.version = start.version + 1;
+                    break;
+                case Phase.IN_PROGRESS:
+                    nextState.phase = Phase.WAIT_PENDING;
+                    break;
+                case Phase.WAIT_PENDING:
+                    nextState.phase = Phase.WAIT_FLUSH;
+                    break;
+                case Phase.WAIT_FLUSH:
+                    nextState.phase = Phase.PERSISTENCE_CALLBACK;
+                    break;
+                case Phase.PERSISTENCE_CALLBACK:
+                    nextState.phase = Phase.REST;
+                    break;
+
+                case Phase.GC:
+                    nextState.phase = Phase.REST;
+                    break;
+                case Phase.PREPARE_GROW:
+                    nextState.phase = Phase.IN_PROGRESS_GROW;
+                    break;
+                case Phase.IN_PROGRESS_GROW:
+                    nextState.phase = Phase.REST;
+                    break;
+            }
+            return nextState;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void WriteHybridLogMetaInfo()
+        {
+            string filename = DirectoryConfiguration.GetHybridLogCheckpointMetaFileName(_hybridLogCheckpointToken);
+            using (var file = new StreamWriter(filename, false))
+            {
+                _hybridLogCheckpoint.info.Write(file);
+                file.Flush();
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void WriteHybridLogContextInfo()
+        {
+            string filename = DirectoryConfiguration.GetHybridLogCheckpointContextFileName(_hybridLogCheckpointToken, prevThreadCtx.guid);
+            using (var file = new StreamWriter(filename, false))
+            {
+                prevThreadCtx.Write(file);
+                file.Flush();
+            }
+
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void WriteIndexMetaFile()
+        {
+            string filename = DirectoryConfiguration.GetIndexCheckpointMetaFileName(_indexCheckpointToken);
+            using (var file = new StreamWriter(filename, false))
+            {
+                _indexCheckpoint.info.Write(file);
+                file.Flush();
+            }
+
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool ObtainCurrentTailAddress(ref long location)
+        {
+            var tailAddress = hlog.GetTailAddress();
+            return Interlocked.CompareExchange(ref location, tailAddress, 0) == 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void InitializeIndexCheckpoint(Guid indexToken)
+        {
+            DirectoryConfiguration.CreateIndexCheckpointFolder(indexToken);
+            _indexCheckpoint.Initialize(indexToken, state[resizeInfo.version].size);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void InitializeHybridLogCheckpoint(Guid hybridLogToken, int version)
+        {
+            DirectoryConfiguration.CreateHybridLogCheckpointFolder(hybridLogToken);
+            _hybridLogCheckpoint.Initialize(hybridLogToken, version);
+        }
+
+        #endregion
+    }
+}
diff --git a/cs/src/core/Index/FASTER/FASTER.cs b/cs/src/core/Index/FASTER/FASTER.cs
new file mode 100644
index 000000000..4529ecbd6
--- /dev/null
+++ b/cs/src/core/Index/FASTER/FASTER.cs
@@ -0,0 +1,288 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma warning disable 0162
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe partial class FasterKV : FASTERBase, IFASTER
+    { 
+        public PersistentMemoryMalloc<byte> hlog;
+
+        public static int numPendingReads = 0;
+
+        private const bool kCopyReadsToTail = false;
+        private const bool breakWhenClassIsLoaded = false;
+
+        public long Size => hlog.GetTailAddress();
+
+        public enum CheckpointType
+        {
+            INDEX_ONLY,
+            HYBRID_LOG_ONLY,
+            FULL,
+            NONE
+        }
+
+        protected CheckpointType _checkpointType;
+        protected Guid _indexCheckpointToken;
+        protected Guid _hybridLogCheckpointToken;
+        protected SystemState _systemState;
+
+        protected HybridLogCheckpointInfo _hybridLogCheckpoint;
+
+        [ThreadStatic]
+        protected static ExecutionContext prevThreadCtx = default(ExecutionContext);
+
+        [ThreadStatic]
+        protected static ExecutionContext threadCtx = default(ExecutionContext);
+
+
+        static FasterKV()
+        {
+            if (breakWhenClassIsLoaded)
+            {
+                if (System.Diagnostics.Debugger.IsAttached)
+                    System.Diagnostics.Debugger.Break();
+                else
+                    System.Diagnostics.Debugger.Launch();
+            }
+        }
+        
+        public FasterKV(long size, IDevice logDevice, string checkpointDir = null)
+        {
+            if (checkpointDir != null)
+                Config.CheckpointDirectory = checkpointDir;
+
+            hlog = new PersistentMemoryMalloc<byte>(logDevice);
+            var recordSize = Layout.EstimatePhysicalSize(null, null);
+            Initialize(size, hlog.GetSectorSize());
+
+            _systemState = default(SystemState);
+            _systemState.phase = Phase.REST;
+            _systemState.version = 1;
+            _checkpointType = CheckpointType.HYBRID_LOG_ONLY;
+        }
+
+        public bool TakeFullCheckpoint(out Guid token)
+        {
+            var success = InternalTakeCheckpoint(CheckpointType.FULL);
+            if(success)
+            {
+                token = _indexCheckpointToken;
+            }
+            else
+            {
+                token = default(Guid);
+            }
+            return success;
+        }
+
+        public bool TakeIndexCheckpoint(out Guid token)
+        {
+            var success = InternalTakeCheckpoint(CheckpointType.INDEX_ONLY);
+            if (success)
+            {
+                token = _indexCheckpointToken;
+            }
+            else
+            {
+                token = default(Guid);
+            }
+            return success;
+        }
+
+        public bool TakeHybridLogCheckpoint(out Guid token)
+        {
+            var success = InternalTakeCheckpoint(CheckpointType.HYBRID_LOG_ONLY);
+            if (success)
+            {
+                token = _hybridLogCheckpointToken;
+            }
+            else
+            {
+                token = default(Guid);
+            }
+            return success;
+        }
+
+        public void Recover(Guid fullCheckpointToken)
+        {
+            InternalRecover(fullCheckpointToken, fullCheckpointToken);
+        } 
+
+        public void Recover(Guid indexCheckpointToken, Guid hybridLogCheckpointToken)
+        {
+            InternalRecover(indexCheckpointToken, hybridLogCheckpointToken);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public Guid StartSession()
+        {
+            return InternalAcquire();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public long ContinueSession(Guid guid)
+        {
+            return InternalContinue(guid);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void StopSession()
+        {
+            InternalRelease();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Refresh()
+        {
+            InternalRefresh();
+        }
+
+        
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool CompletePending(bool wait = false)
+        {
+            return InternalCompletePending(wait);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public Status Read(Key* key, Input* input, Output* output, Context* userContext, long monotonicSerialNum)
+        {
+            var context = default(PendingContext);
+            var internalStatus = InternalRead(key, input, output, userContext, ref context);
+            var status = default(Status);
+            if (internalStatus == OperationStatus.SUCCESS)
+            {
+                
+                status = Status.OK;
+            }
+            else
+            {
+                status = HandleOperationStatus(threadCtx, context, internalStatus);
+            }
+            threadCtx.serialNum = monotonicSerialNum;
+            return status;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public Status Upsert(Key* key, Value* desiredValue, Context* userContext, long monotonicSerialNum)
+        {
+            /*
+            Console.WriteLine(hlog.GetPhysicalAddress(0));
+            if (((RecordInfo*)hlog.GetPhysicalAddress(0))->Invalid != true)
+            {
+                Debugger.Break();
+            }*/
+
+            var context = default(PendingContext);
+            var internalStatus = InternalUpsert(key, desiredValue, userContext, ref context);
+            var status = default(Status);
+            if (internalStatus == OperationStatus.SUCCESS)
+            {
+                status = Status.OK;
+            }
+            else
+            {
+                status = HandleOperationStatus(threadCtx, context, internalStatus);
+            }
+            threadCtx.serialNum = monotonicSerialNum;
+            return status;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public Status RMW(Key* key, Input* input, Context* userContext, long monotonicSerialNum)
+        {
+            /*
+            if (((RecordInfo*)hlog.GetPhysicalAddress(0))->Invalid != true)
+            {
+                Debugger.Break();
+            }*/
+
+
+            var context = default(PendingContext);
+            var internalStatus = InternalRMW(key, input, userContext, ref context);
+            var status = default(Status);
+            if (internalStatus == OperationStatus.SUCCESS)
+            {
+                 status = Status.OK;
+            }
+            else
+            {
+                status = HandleOperationStatus(threadCtx, context, internalStatus);
+            }
+            threadCtx.serialNum = monotonicSerialNum;
+            return status;
+        }
+
+        public Status Delete(Key* key, Context* userContext, long monotonicSerialNum)
+        {
+            throw new NotImplementedException();
+        }
+
+        /// <summary>
+        /// Truncate the log until, but not including, untilAddress
+        /// </summary>
+        /// <param name="untilAddress"></param>
+        public bool ShiftBeginAddress(long untilAddress)
+        {
+            return InternalShiftBeginAddress(untilAddress);
+        }
+
+        /// <summary>
+        /// Grow the hash index
+        /// </summary>
+        /// <returns></returns>
+        public bool GrowIndex()
+        {
+            return InternalGrowIndex();
+        }
+
+        
+        internal bool GetLogicalAddress(Key* key, out long logicalAddress)
+        {
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            logicalAddress = Constants.kInvalidAddress;
+            var physicalAddress = default(long);
+            var info = default(RecordInfo*);
+
+            var hash = Key.GetHashCode(key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+            var entry = default(HashBucketEntry);
+            var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry);
+
+            if (tagExists)
+            {
+                logicalAddress = entry.word & Constants.kAddressMask;
+                Debug.Assert(logicalAddress != 0);
+                if (logicalAddress >= hlog.HeadAddress)
+                {
+                    physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                    if (!Key.Equals(key, Layout.GetKey(physicalAddress)))
+                    {
+                        logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+                        TraceBackForKeyMatch(key, logicalAddress, hlog.HeadAddress, out logicalAddress, out physicalAddress);
+                    }
+                }
+            }
+
+            if (logicalAddress < hlog.HeadAddress && logicalAddress != Constants.kInvalidAddress)
+            {
+                return false;
+            }
+            return true;
+        }
+    }
+}
diff --git a/cs/src/core/Index/FASTER/FASTERBase.cs b/cs/src/core/Index/FASTER/FASTERBase.cs
new file mode 100644
index 000000000..ed5c0466a
--- /dev/null
+++ b/cs/src/core/Index/FASTER/FASTERBase.cs
@@ -0,0 +1,709 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Runtime.InteropServices;
+using System.Diagnostics;
+
+namespace FASTER.core
+{
+    public static class Constants
+    {
+        /// Size of cache line in bytes
+        public const int kCacheLineBytes = 64;
+
+        public const bool kFoldOverSnapshot = false;
+
+        public const bool kFineGrainedHandoverRecord = false;
+
+        public const bool kFineGrainedHandoverBucket = true;
+
+        /// Number of entries per bucket (assuming 8-byte entries to fill a cacheline)
+        /// Number of bits per bucket (assuming 8-byte entries to fill a cacheline)
+        public const int kBitsPerBucket = 3;
+
+        public const int kEntriesPerBucket = 1 << kBitsPerBucket;
+
+        // Position of fields in hash-table entry
+        public const int kTentativeBitShift = 63;
+
+        public const long kTentativeBitMask = (1L << kTentativeBitShift);
+
+        public const int kPendingBitShift = 62;
+
+        public const long kPendingBitMask = (1L << kPendingBitShift);
+
+        public const int kTagSize = 14;
+
+        public const int kTagShift = 62 - kTagSize;
+
+        public const long kTagMask = (1L << kTagSize) - 1;
+
+        public const long kTagPositionMask = (kTagMask << kTagShift);
+
+        public const long kAddressMask = (1L << 48) - 1;
+
+        // Position of tag in hash value (offset is always in the least significant bits)
+        public const int kHashTagShift = 64 - kTagSize;
+
+
+        /// Invalid entry value
+        public const int kInvalidEntrySlot = kEntriesPerBucket;
+
+        /// Location of the special bucket entry
+        public const long kOverflowBucketIndex = kEntriesPerBucket - 1;
+
+        /// Invalid value in the hash table
+        public const long kInvalidEntry = 0;
+
+        /// Number of times to retry a compare-and-swap before failure
+        public const long kRetryThreshold = 1000000;
+
+        /// Number of merge/split chunks.
+        public const int kNumMergeChunkBits = 8;
+        public const int kNumMergeChunks = 1 << kNumMergeChunkBits;
+
+        // Size of chunks for garbage collection
+        public const int kSizeofChunkBits = 14;
+        public const int kSizeofChunk = 1 << 14;
+
+        public const long kInvalidAddress = 0;
+        public const long kTempInvalidAddress = 1;
+        public const int kFirstValidAddress = 64;
+    }
+
+    [StructLayout(LayoutKind.Explicit, Size = Constants.kEntriesPerBucket * 8)]
+    public unsafe struct HashBucket
+    {
+
+        public const long kPinConstant = (1L << 48);
+
+        public const long kExclusiveLatchBitMask = (1L << 63);
+
+        [FieldOffset(0)]
+        public fixed long bucket_entries[Constants.kEntriesPerBucket];
+  
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool TryAcquireSharedLatch(HashBucket* bucket)
+        {
+            return Interlocked.Add(ref bucket->bucket_entries[Constants.kOverflowBucketIndex], 
+                                   kPinConstant) > 0;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseSharedLatch(HashBucket* bucket)
+        {
+            Interlocked.Add(ref bucket->bucket_entries[Constants.kOverflowBucketIndex], 
+                            -kPinConstant);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool TryAcquireExclusiveLatch(HashBucket* bucket)
+        {
+            long expected_word = bucket->bucket_entries[Constants.kOverflowBucketIndex];
+            if ((expected_word & ~Constants.kAddressMask) == 0)
+            {
+                long desired_word = expected_word | kExclusiveLatchBitMask;
+                var found_word = Interlocked.CompareExchange(
+                                    ref bucket->bucket_entries[Constants.kOverflowBucketIndex],
+                                    desired_word, 
+                                    expected_word);
+                return found_word == expected_word;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseExclusiveLatch(HashBucket* bucket)
+        {
+            long expected_word = bucket->bucket_entries[Constants.kOverflowBucketIndex];
+            long desired_word = expected_word & Constants.kAddressMask;
+            var found_word = Interlocked.Exchange(
+                                ref bucket->bucket_entries[Constants.kOverflowBucketIndex],
+                                desired_word);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool NoSharedLatches(HashBucket* bucket)
+        {
+            long word = bucket->bucket_entries[Constants.kOverflowBucketIndex];
+            return (word & ~Constants.kAddressMask) == 0;
+        }
+    }
+
+    // Long value layout: [1-bit tentative][15-bit TAG][48-bit address]
+    // Physical little endian memory layout: [48-bit address][15-bit TAG][1-bit tentative]
+    [StructLayout(LayoutKind.Explicit, Size = 8)]
+    public struct HashBucketEntry
+    {
+        [FieldOffset(0)]
+        public long word;
+
+        public long Address
+        {
+            get
+            {
+                return word & Constants.kAddressMask;
+            }
+
+            set
+            {
+                word &= ~Constants.kAddressMask;
+                word |= (value & Constants.kAddressMask);
+            }
+        }
+
+        public ushort Tag
+        {
+            get
+            {
+                return (ushort)((word & Constants.kTagPositionMask) >> Constants.kTagShift);
+            }
+
+            set
+            {
+                word &= ~Constants.kTagPositionMask;
+                word |= ((long)value << Constants.kTagShift);
+            }
+        }
+
+        public bool Pending
+        {
+            get
+            {
+                return (word & Constants.kPendingBitMask) != 0;
+            }
+
+            set
+            {
+                if (value)
+                {
+                    word |= Constants.kPendingBitMask;
+                }
+                else
+                {
+                    word &= ~Constants.kPendingBitMask;
+                }
+            }
+        }
+
+        public bool Tentative
+        {
+            get
+            {
+                return (word & Constants.kTentativeBitMask) != 0;
+            }
+
+            set
+            {
+                if (value)
+                {
+                    word |= Constants.kTentativeBitMask;
+                }
+                else
+                {
+                    word &= ~Constants.kTentativeBitMask;
+                }
+            }
+        }
+    }
+
+    public unsafe struct InternalHashTable
+    {
+        public long size;
+        public long size_mask;
+        public int size_bits;
+        public HashBucket[] tableRaw;
+        public GCHandle tableHandle;
+        public HashBucket* tableAligned;
+    }
+
+	public unsafe partial class FASTERBase
+	{
+		// Initial size of the table
+		protected long minTableSize = 16;
+
+		// Allocator for the hash buckets
+		protected MallocFixedPageSize<HashBucket> overflowBucketsAllocator = new MallocFixedPageSize<HashBucket>();
+
+		// An array of size two, that contains the old and new versions of the hash-table
+		protected internal InternalHashTable[] state = new InternalHashTable[2];
+
+		// Array used to denote if a specific chunk is merged or not
+		protected internal long[] splitStatus;
+
+        // Used as an atomic counter to check if resizing is complete
+        protected long numPendingChunksToBeSplit;
+
+		// Epoch set for resizing
+		protected int resizeEpoch;
+
+		protected LightEpoch epoch;
+
+        protected ResizeInfo resizeInfo;
+
+		protected long currentSize;
+
+		public FASTERBase()
+		{
+            epoch = LightEpoch.Instance;
+		}
+
+		public long KeyCount
+		{
+			get { return currentSize; }
+		}
+
+		public Status Free()
+		{
+			Free(0);
+			Free(1);
+			return Status.OK;
+		}
+
+		private Status Free(int version)
+		{
+            state[version].tableHandle.Free();
+			state[version].tableRaw = null;
+            state[version].tableAligned = null;
+			return Status.OK;
+		}
+
+		public void Initialize(long size, int sector_size)
+		{
+			if (!Utility.IsPowerOfTwo(size))
+			{
+                throw new ArgumentException("Size {0} is not a power of 2");
+			}
+			if (!Utility.Is32Bit(size))
+			{
+                throw new ArgumentException("Size {0} is not 32-bit");
+			}
+
+			minTableSize = size;
+            resizeInfo = default(ResizeInfo);
+			resizeInfo.status = ResizeOperationStatus.DONE;
+			resizeInfo.version = 0;
+			Initialize(resizeInfo.version, size, sector_size);
+		}
+
+		protected void Initialize(int version, long size, int sector_size)
+		{
+            long size_bytes = size * sizeof(HashBucket);
+            long aligned_size_bytes = sector_size +
+                ((size_bytes + (sector_size - 1)) & ~(sector_size - 1));
+
+			//Over-allocate and align the table to the cacheline
+			state[version].size = size;
+            state[version].size_mask = size - 1;
+            state[version].size_bits = Utility.GetLogBase2((int)size);
+
+            state[version].tableRaw = new HashBucket[aligned_size_bytes / Constants.kCacheLineBytes];
+            state[version].tableHandle = GCHandle.Alloc(state[version].tableRaw, GCHandleType.Pinned);
+            long sectorAlignedPointer = ((long)state[version].tableHandle.AddrOfPinnedObject() + (sector_size - 1)) & ~(sector_size - 1);
+            state[version].tableAligned = (HashBucket*)sectorAlignedPointer;
+        }
+
+        /// <summary>
+        /// A helper function that is used to find the slot corresponding to a
+        /// key in the specified version of the hash table
+        /// </summary>
+        /// <param name="key"></param>
+        /// <param name="version"></param>
+        /// <param name="bucket"></param>
+        /// <param name="slot"></param>
+        /// <returns>true if such a slot exists, false otherwise</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+		public bool FindTag(long hash, ushort tag, ref HashBucket* bucket, ref int slot, ref HashBucketEntry entry)
+		{
+            var target_entry_word = default(long);
+            var entry_slot_bucket = default(HashBucket*);
+            var version = resizeInfo.version;
+            var masked_entry_word = hash & state[version].size_mask;
+            bucket = state[version].tableAligned + masked_entry_word;
+            slot = Constants.kInvalidEntrySlot;
+
+            do
+            {
+                // Search through the bucket looking for our key. Last entry is reserved
+                // for the overflow pointer.
+                for (int index = 0; index < Constants.kOverflowBucketIndex; ++index)
+                {
+                    target_entry_word = *(((long*)bucket) + index);
+                    if (0 == target_entry_word)
+                    {
+                        continue;
+                    }
+
+                    entry.word = target_entry_word;
+                    if (tag == entry.Tag)
+                    {
+                        slot = index;
+
+                        // If (final key, return immediately)
+                        //if ((entry.tag & ~Constants.kTagMask) == 0) -- Guna: Is this correct?
+                        if (!entry.Tentative)
+                            return true;
+                    }
+                }
+
+                target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex) & Constants.kAddressMask;
+                // Go to next bucket in the chain
+
+
+                if (target_entry_word == 0)
+                {
+                    entry = default(HashBucketEntry);
+                    return false;
+                }
+                bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word);
+            } while (true);
+        }
+
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+		public void FindOrCreateTag(long hash, ushort tag, ref HashBucket* bucket, ref int slot, ref HashBucketEntry entry)
+		{
+            var version = resizeInfo.version;
+            var masked_entry_word = hash & state[version].size_mask;
+
+            while (true)
+            {
+                bucket = state[version].tableAligned + masked_entry_word;
+                slot = Constants.kInvalidEntrySlot;
+
+                if (FindTagOrFreeInternal(hash, tag, ref bucket, ref slot, ref entry))
+                    return;
+
+
+                // Install tentative tag in free slot
+                entry = default(HashBucketEntry);
+                entry.Tag = tag;
+                entry.Address = Constants.kTempInvalidAddress;
+                entry.Pending = false;
+                entry.Tentative = true;
+
+                if (0 == Interlocked.CompareExchange(ref bucket->bucket_entries[slot], entry.word, 0))
+                {
+                    var orig_bucket = state[version].tableAligned + masked_entry_word;
+                    var orig_slot = Constants.kInvalidEntrySlot;
+
+                    if (FindOtherTagMaybeTentativeInternal(hash, tag, ref orig_bucket, ref orig_slot, bucket, slot))
+                    {
+                        bucket->bucket_entries[slot] = 0;
+                    }
+                    else
+                    {
+                        entry.Tentative = false;
+                        *((long*)bucket + slot) = entry.word;
+                        break;
+                    }
+                }
+            }
+		}
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool FindTagInternal(long hash, ushort tag, ref HashBucket* bucket, ref int slot)
+        {
+            var target_entry_word = default(long);
+            var entry_slot_bucket = default(HashBucket*);
+
+            do
+            {
+                // Search through the bucket looking for our key. Last entry is reserved
+                // for the overflow pointer.
+                for (int index = 0; index < Constants.kOverflowBucketIndex; ++index)
+                {
+                    target_entry_word = *(((long*)bucket) + index);
+                    if (0 == target_entry_word)
+                    {
+                        continue;
+                    }
+
+                    HashBucketEntry entry = default(HashBucketEntry);
+                    entry.word = target_entry_word;
+                    if (tag == entry.Tag)
+                    {
+                        slot = index;
+
+                        // If (final key, return immediately)
+                        //if ((entry.tag & ~Constants.kTagMask) == 0) -- Guna: Is this correct?
+                        if (!entry.Tentative)
+                            return true;
+                    }
+                }
+
+                target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex) & Constants.kAddressMask;
+                // Go to next bucket in the chain
+
+
+                if (target_entry_word == 0)
+                {
+                    return false;
+                }
+                bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word);
+            } while (true);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool FindTagMaybeTentativeInternal(long hash, ushort tag, ref HashBucket* bucket, ref int slot)
+        {
+            var target_entry_word = default(long);
+            var entry_slot_bucket = default(HashBucket*);
+
+            do
+            {
+                // Search through the bucket looking for our key. Last entry is reserved
+                // for the overflow pointer.
+                for (int index = 0; index < Constants.kOverflowBucketIndex; ++index)
+                {
+                    target_entry_word = *(((long*)bucket) + index);
+                    if (0 == target_entry_word)
+                    {
+                        continue;
+                    }
+
+                    HashBucketEntry entry = default(HashBucketEntry);
+                    entry.word = target_entry_word;
+                    if (tag == entry.Tag)
+                    {
+                        slot = index;
+                        return true;
+                    }
+                }
+
+                target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex) & Constants.kAddressMask;
+                // Go to next bucket in the chain
+
+
+                if (target_entry_word == 0)
+                {
+                    return false;
+                }
+                bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word);
+            } while (true);
+        }
+
+        /// <summary>
+        /// Find existing entry (non-tenative)
+        /// If not found, return pointer to some empty slot
+        /// </summary>
+        /// <param name="hash"></param>
+        /// <param name="tag"></param>
+        /// <param name="bucket"></param>
+        /// <param name="slot"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool FindTagOrFreeInternal(long hash, ushort tag, ref HashBucket* bucket, ref int slot, ref HashBucketEntry entry)
+        {
+            var target_entry_word = default(long);
+            var recordExists = false;
+            var entry_slot_bucket = default(HashBucket*);
+
+            do
+            {
+                // Search through the bucket looking for our key. Last entry is reserved
+                // for the overflow pointer.
+                for (int index = 0; index < Constants.kOverflowBucketIndex; ++index)
+                {
+                    target_entry_word = *(((long*)bucket) + index);
+                    if (0 == target_entry_word)
+                    {
+                        if (slot == Constants.kInvalidEntrySlot)
+                        {
+                            slot = index;
+                            entry_slot_bucket = bucket;
+                        }
+                        continue;
+                    }
+
+                    entry.word = target_entry_word;
+                    if (tag == entry.Tag && !entry.Tentative)
+                    {
+                        slot = index;
+                        recordExists = true;
+                        return recordExists;
+                    }
+                }
+
+                target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex);
+                // Go to next bucket in the chain
+
+
+                if ((target_entry_word & Constants.kAddressMask) == 0)
+                {
+                    if (slot == Constants.kInvalidEntrySlot)
+                    {
+                        // Allocate new bucket
+                        var logicalBucketAddress = overflowBucketsAllocator.Allocate();
+                        var physicalBucketAddress = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(logicalBucketAddress);
+                        long compare_word = target_entry_word;
+                        target_entry_word = logicalBucketAddress;
+                        target_entry_word |= (compare_word & ~Constants.kAddressMask);
+
+                        long result_word = Interlocked.CompareExchange(
+                            ref bucket->bucket_entries[Constants.kOverflowBucketIndex],
+                            target_entry_word,
+                            compare_word);
+
+                        if (compare_word != result_word)
+                        {
+                            // Install failed, undo allocation; use the winner's entry
+                            overflowBucketsAllocator.FreeAtEpoch(logicalBucketAddress, 0);
+                            target_entry_word = result_word;
+                        }
+                        else
+                        {
+                            // Install succeeded
+                            bucket = physicalBucketAddress;
+                            slot = 0;
+                            entry = default(HashBucketEntry);
+                            return recordExists;
+                        }
+                    }
+                    else
+                    {
+                        if (!recordExists)
+                        {
+                            bucket = entry_slot_bucket;
+                        }
+                        entry = default(HashBucketEntry);
+                        break;
+                    }
+                }
+
+                bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word & Constants.kAddressMask);
+            } while (true);
+
+            return recordExists;
+        }
+
+
+        /// <summary>
+        /// Find existing entry (tenative or otherwise) other than the specified "exception" slot
+        /// If not found, return false. Does not return a free slot.
+        /// </summary>
+        /// <param name="hash"></param>
+        /// <param name="tag"></param>
+        /// <param name="bucket"></param>
+        /// <param name="slot"></param>
+        /// <param name="except_bucket"></param>
+        /// <param name="except_entry_slot"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool FindOtherTagMaybeTentativeInternal(long hash, ushort tag, ref HashBucket* bucket, ref int slot, HashBucket* except_bucket, int except_entry_slot)
+        {
+            var target_entry_word = default(long);
+            var entry_slot_bucket = default(HashBucket*);
+
+            do
+            {
+                // Search through the bucket looking for our key. Last entry is reserved
+                // for the overflow pointer.
+                for (int index = 0; index < Constants.kOverflowBucketIndex; ++index)
+                {
+                    target_entry_word = *(((long*)bucket) + index);
+                    if (0 == target_entry_word)
+                    {
+                        continue;
+                    }
+
+                    HashBucketEntry entry = default(HashBucketEntry);
+                    entry.word = target_entry_word;
+                    if (tag == entry.Tag)
+                    {
+                        if ((except_entry_slot == index) && (except_bucket == bucket))
+                            continue;
+
+                        slot = index;
+                        return true;
+                    }
+                }
+
+                target_entry_word = *(((long*)bucket) + Constants.kOverflowBucketIndex) & Constants.kAddressMask;
+                // Go to next bucket in the chain
+
+
+                if (target_entry_word == 0)
+                {
+                    return false;
+                }
+                bucket = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(target_entry_word);
+            } while (true);
+        }
+
+
+		/// <summary>
+		/// Helper function used to update the slot atomically with the 
+		/// new offset value using the CAS operation
+		/// </summary>
+		/// <param name="bucket"></param>
+		/// <param name="entrySlot"></param>
+		/// <param name="expected"></param>
+		/// <param name="desired"></param>
+		/// <returns>If atomic update was successful</returns>
+		[MethodImpl(MethodImplOptions.AggressiveInlining)]
+		internal bool UpdateSlot(HashBucket* bucket, int entrySlot, long expected, long desired, out long found)
+		{
+			found = Interlocked.CompareExchange(
+				ref bucket->bucket_entries[entrySlot], 
+                desired,
+				expected);
+
+			return (found == expected);
+		}
+
+        protected virtual void _DumpDistribution(int version)
+        {
+            var table_size_ = state[version].size;
+            var ptable_ = state[version].tableAligned;
+            long total_record_count = 0;
+            long[] histogram = new long[14];
+
+            for (long bucket = 0; bucket < table_size_; ++bucket)
+            {
+                int cnt = 0;
+                HashBucket b = *(ptable_ + bucket);
+                while (true)
+                {
+                    for (int bucket_entry = 0; bucket_entry < Constants.kOverflowBucketIndex; ++bucket_entry)
+                    {
+                        if (0 != b.bucket_entries[bucket_entry])
+                        {
+                            ++cnt;
+                            ++total_record_count;
+                        }
+                    }
+                    if (b.bucket_entries[Constants.kOverflowBucketIndex] == 0) break;
+                    b = *((HashBucket*)overflowBucketsAllocator.GetPhysicalAddress((b.bucket_entries[Constants.kOverflowBucketIndex])));
+                }
+                if (cnt < 14)
+                    histogram[cnt]++;
+            }
+
+            Console.WriteLine("number of hash buckets: {0}", table_size_);
+            Console.WriteLine("total record count: {0}", total_record_count);
+            Console.WriteLine("histogram: ");
+            for (int i=0; i<14; i++)
+            {
+                Console.WriteLine(i.ToString() + ": " + histogram[i].ToString());
+            }
+        }
+
+        /// <summary>
+        /// Dumps the distribution of each non-empty bucket in the hash table.
+        /// </summary>
+        public void DumpDistribution()
+        {
+            _DumpDistribution(resizeInfo.version);
+        }
+
+    }
+
+}
diff --git a/cs/src/core/Index/FASTER/FASTERImpl.cs b/cs/src/core/Index/FASTER/FASTERImpl.cs
new file mode 100644
index 000000000..b9cc37454
--- /dev/null
+++ b/cs/src/core/Index/FASTER/FASTERImpl.cs
@@ -0,0 +1,1736 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma warning disable 0162
+#define CPR
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe partial class FasterKV : FASTERBase, IFASTER
+    {
+        enum LatchOperation : byte
+        {
+            None,
+            ReleaseShared,
+            ReleaseExclusive
+        }
+
+        #region Read Operation
+
+        /// <summary>
+        /// Read operation. Computes the 'output' from 'input' and current value corresponding to 'key'.
+        /// When the read operation goes pending, once the record is retrieved from disk, InternalContinuePendingRead
+        /// function is used to complete the operation.
+        /// </summary>
+        /// <param name="key">Key of the record.</param>
+        /// <param name="input">Input required to compute output from value.</param>
+        /// <param name="output">Location to store output computed from input and value.</param>
+        /// <param name="userContext">User context for the operation, in case it goes pending.</param>
+        /// <param name="pendingContext">Pending context used internally to store the context of the operation.</param>
+        /// <returns>
+        /// <list type="table">
+        ///     <listheader>
+        ///     <term>Value</term>
+        ///     <term>Description</term>
+        ///     </listheader>
+        ///     <item>
+        ///     <term>SUCCESS</term>
+        ///     <term>The output has been computed using current value of 'key' and 'input'; and stored in 'output'.</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RECORD_ON_DISK</term>
+        ///     <term>The record corresponding to 'key' is on disk and the operation.</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>CPR_SHIFT_DETECTED</term>
+        ///     <term>A shift in version has been detected. Synchronize immediately to avoid violating CPR consistency.</term>
+        ///     </item>
+        /// </list>
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected OperationStatus InternalRead(
+                                    Key* key, 
+                                    Input* input, 
+                                    Output* output, 
+                                    Context* userContext,
+                                    ref PendingContext pendingContext)
+        {
+            var status = default(OperationStatus);
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var logicalAddress = Constants.kInvalidAddress;
+            var physicalAddress = default(long);
+            var latestRecordVersion = -1;
+
+            var hash = Key.GetHashCode(key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+            if (threadCtx.phase != Phase.REST)
+                HeavyEnter(hash);
+
+            #region Trace back for record in in-memory HybridLog
+            HashBucketEntry entry = default(HashBucketEntry);
+            var tagExists = FindTag(hash, tag, ref bucket, ref slot, ref entry);
+            if (tagExists)
+            {
+                logicalAddress = entry.Address;
+                if (logicalAddress >= hlog.HeadAddress)
+                {
+                    physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                    latestRecordVersion = Layout.GetInfo(physicalAddress)->Version;
+                    if (!Key.Equals(key, Layout.GetKey(physicalAddress)))
+                    {
+                        logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+                        TraceBackForKeyMatch(key, 
+                                             logicalAddress, 
+                                             hlog.HeadAddress, 
+                                             out logicalAddress, 
+                                             out physicalAddress);
+                    }
+                }
+            }
+            else
+            {
+                // no tag found
+                return OperationStatus.SUCCESS;
+            }
+            #endregion
+
+            if (threadCtx.phase != Phase.REST)
+            {
+                switch(threadCtx.phase)
+                {
+                    case Phase.PREPARE:
+                        {
+                            if (latestRecordVersion != -1 && latestRecordVersion > threadCtx.version)
+                            {
+                                status = OperationStatus.CPR_SHIFT_DETECTED;
+                                goto CreatePendingContext; // Pivot thread
+                            }
+                            break; // Normal processing
+                        }
+                    case Phase.GC:
+                        {
+                            GarbageCollectBuckets(hash);
+                            break;
+                        }
+                    default:
+                        {
+                            break;
+                        }
+                }
+            }
+
+            #region Normal processing
+
+            // Mutable region (even fuzzy region is included here)
+            if (logicalAddress >= hlog.SafeReadOnlyAddress)
+            {
+                var src = Layout.GetValue(physicalAddress);
+                Functions.ConcurrentReader(key, input, src, output);
+                return OperationStatus.SUCCESS;
+            }
+
+            // Immutable region
+            else if (logicalAddress >= hlog.HeadAddress)
+            {
+                var src = Layout.GetValue(physicalAddress);
+                Functions.SingleReader(key, input, src, output);
+                return OperationStatus.SUCCESS;
+            }
+
+            // On-Disk Region
+            else if (logicalAddress >= hlog.BeginAddress)
+            {
+                status = OperationStatus.RECORD_ON_DISK;
+
+                if (threadCtx.phase == Phase.PREPARE)
+                {
+                    if(! HashBucket.TryAcquireSharedLatch(bucket))
+                    {
+                        status = OperationStatus.CPR_SHIFT_DETECTED;
+                    }
+                }
+
+                goto CreatePendingContext;
+            }
+
+            // No record found
+            else
+            {
+                return OperationStatus.SUCCESS;
+            }
+
+            #endregion
+
+            #region Create pending context
+            CreatePendingContext:
+            {
+                pendingContext.type = OperationType.READ;
+                pendingContext.key = Key.MoveToContext(key);
+                pendingContext.input = Input.MoveToContext(input);
+                pendingContext.output = Output.MoveToContext(output);
+                pendingContext.userContext = Context.MoveToContext(userContext);
+                pendingContext.entry.word = entry.word;
+                pendingContext.logicalAddress = logicalAddress;
+                pendingContext.version = threadCtx.version;
+                pendingContext.serialNum = threadCtx.serialNum + 1;
+            }
+            #endregion
+
+            return status;
+        }
+
+        /// <summary>
+        /// Continue a pending read operation. Computes 'output' from 'input' and value corresponding to 'key'
+        /// obtained from disk. Optionally, it copies the value to tail to serve future read/write requests quickly.
+        /// </summary>
+        /// <param name="ctx">The thread (or session) context to execute operation in.</param>
+        /// <param name="request">Async response from disk.</param>
+        /// <param name="pendingContext">Pending context corresponding to operation.</param>
+        /// <returns>
+        /// <list type = "table" >
+        ///     <listheader>
+        ///     <term>Value</term>
+        ///     <term>Description</term>
+        ///     </listheader>
+        ///     <item>
+        ///     <term>SUCCESS</term>
+        ///     <term>The output has been computed and stored in 'output'.</term>
+        ///     </item>
+        /// </list>
+        /// </returns>
+        protected OperationStatus InternalContinuePendingRead(
+                            ExecutionContext ctx,
+                            AsyncIOContext request,
+                            ref PendingContext pendingContext)
+        {
+            Debug.Assert(pendingContext.version == ctx.version);
+
+            if (request.logicalAddress >= hlog.BeginAddress)
+            {
+                var physicalAddress = (long)request.record.GetValidPointer();
+                Debug.Assert(Layout.GetInfo(physicalAddress)->Version <= ctx.version);
+                Functions.SingleReader(pendingContext.key,
+                                       pendingContext.input,
+                                       Layout.GetValue(physicalAddress),
+                                       pendingContext.output);
+
+                if (kCopyReadsToTail)
+                {
+                    InternalContinuePendingReadCopyToTail(ctx, request, ref pendingContext);
+                }
+            }
+
+            return OperationStatus.SUCCESS;
+        }
+
+        /// <summary>
+        /// Copies the record read from disk to tail of the HybridLog. 
+        /// </summary>
+        /// <param name="ctx"> The thread(or session) context to execute operation in.</param>
+        /// <param name="request">Async response from disk.</param>
+        /// <param name="pendingContext">Pending context corresponding to operation.</param>
+        protected void InternalContinuePendingReadCopyToTail(
+                                    ExecutionContext ctx,
+                                    AsyncIOContext request,
+                                    ref PendingContext pendingContext)
+        {
+            Debug.Assert(pendingContext.version == ctx.version);
+
+            var recordSize = default(int);
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var logicalAddress = Constants.kInvalidAddress;
+            var physicalAddress = default(long);
+
+            var hash = Key.GetHashCode(pendingContext.key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+            #region Trace back record in in-memory HybridLog
+            var entry = default(HashBucketEntry);
+            FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+            logicalAddress = entry.word & Constants.kAddressMask;
+            if (logicalAddress >= hlog.HeadAddress)
+            {
+                physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                if (!Key.Equals(pendingContext.key, Layout.GetKey(physicalAddress)))
+                {
+                    logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+                    TraceBackForKeyMatch(pendingContext.key,
+                                            logicalAddress,
+                                            hlog.HeadAddress,
+                                            out logicalAddress,
+                                            out physicalAddress);
+                }
+            }
+            #endregion
+
+            if (logicalAddress > pendingContext.entry.Address)
+            {
+                // Give up early
+                return;
+            }
+
+            #region Create new copy in mutable region
+            physicalAddress = (long)request.record.GetValidPointer();
+            recordSize = Layout.GetPhysicalSize(physicalAddress);
+            BlockAllocate(recordSize, out long newLogicalAddress);
+            var newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress);
+            var recordInfo = Layout.GetInfo(newPhysicalAddress);
+            RecordInfo.WriteInfo(recordInfo, ctx.version,
+                                 true, false, false,
+                                 entry.Address);
+            Key.Copy(request.key, Layout.GetKey(newPhysicalAddress));
+            Functions.SingleWriter(request.key,
+                                   Layout.GetValue(physicalAddress),
+                                   Layout.GetValue(newPhysicalAddress));
+
+            var updatedEntry = default(HashBucketEntry);
+            updatedEntry.Tag = tag;
+            updatedEntry.Address = newLogicalAddress & Constants.kAddressMask;
+            updatedEntry.Pending = entry.Pending;
+            updatedEntry.Tentative = false;
+
+            var foundEntry = default(HashBucketEntry);
+            foundEntry.word = Interlocked.CompareExchange(
+                                            ref bucket->bucket_entries[slot],
+                                            updatedEntry.word,
+                                            entry.word);
+            if (foundEntry.word != entry.word)
+            {
+                Layout.GetInfo(newPhysicalAddress)->Invalid = true;
+                // We don't retry, just give up
+            }
+            #endregion
+        }
+
+        #endregion
+
+        #region Upsert Operation
+
+        /// <summary>
+        /// Upsert operation. Replaces the value corresponding to 'key' with provided 'value', if one exists 
+        /// else inserts a new record with 'key' and 'value'.
+        /// </summary>
+        /// <param name="key">key of the record.</param>
+        /// <param name="value">value to be updated to (or inserted if key does not exist).</param>
+        /// <param name="userContext">User context for the operation, in case it goes pending.</param>
+        /// <param name="pendingContext">Pending context used internally to store the context of the operation.</param>
+        /// <returns>
+        /// <list type="table">
+        ///     <listheader>
+        ///     <term>Value</term>
+        ///     <term>Description</term>
+        ///     </listheader>
+        ///     <item>
+        ///     <term>SUCCESS</term>
+        ///     <term>The value has been successfully replaced(or inserted)</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RETRY_LATER</term>
+        ///     <term>Cannot  be processed immediately due to system state. Add to pending list and retry later</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>CPR_SHIFT_DETECTED</term>
+        ///     <term>A shift in version has been detected. Synchronize immediately to avoid violating CPR consistency.</term>
+        ///     </item>
+        /// </list>
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected OperationStatus InternalUpsert(
+                            Key* key, Value* value,
+                            Context* userContext,
+                            ref PendingContext pendingContext)
+        {
+            var status = default(OperationStatus);
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var logicalAddress = Constants.kInvalidAddress;
+            var physicalAddress = default(long);
+            var latchOperation = default(LatchOperation);
+            var version = default(int);
+            var latestRecordVersion = -1;
+
+            var hash = Key.GetHashCode(key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+            if (threadCtx.phase != Phase.REST)
+                HeavyEnter(hash);
+
+            #region Trace back for record in in-memory HybridLog
+            var entry = default(HashBucketEntry);
+            FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+            logicalAddress = entry.Address;
+            if (logicalAddress >= hlog.HeadAddress)
+            {
+                physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                latestRecordVersion = Layout.GetInfo(physicalAddress)->Version;
+                if (!Key.Equals(key, Layout.GetKey(physicalAddress)))
+                {
+                    logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+                    TraceBackForKeyMatch(key,
+                                        logicalAddress,
+                                        hlog.HeadAddress,
+                                        out logicalAddress,
+                                        out physicalAddress);
+                }
+            }
+            #endregion
+
+            // Optimization for most common case
+            if (threadCtx.phase == Phase.REST && logicalAddress >= hlog.ReadOnlyAddress)
+            {
+                var dst = Layout.GetValue(physicalAddress);
+                Functions.ConcurrentWriter(key, value, dst);
+                return OperationStatus.SUCCESS;
+            }
+
+            #region Entry latch operation
+            if (threadCtx.phase != Phase.REST)
+            {
+                switch (threadCtx.phase)
+                {
+                    case Phase.PREPARE:
+                        {
+                            version = threadCtx.version;
+                            if (HashBucket.TryAcquireSharedLatch(bucket))
+                            {
+                                // Set to release shared latch (default)
+                                latchOperation = LatchOperation.ReleaseShared;
+                                if (latestRecordVersion != -1 && latestRecordVersion > version)
+                                {
+                                    status = OperationStatus.CPR_SHIFT_DETECTED;
+                                    goto CreatePendingContext; // Pivot Thread
+                                }
+                                break; // Normal Processing
+                            }
+                            else
+                            {
+                                status = OperationStatus.CPR_SHIFT_DETECTED;
+                                goto CreatePendingContext; // Pivot Thread
+                            }
+                        }
+                    case Phase.IN_PROGRESS:
+                        {
+                            version = (threadCtx.version - 1);
+                            if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                            {
+                                if (HashBucket.TryAcquireExclusiveLatch(bucket))
+                                {
+                                    // Set to release exclusive latch (default)
+                                    latchOperation = LatchOperation.ReleaseExclusive;
+                                    goto CreateNewRecord; // Create a (v+1) record
+                                }
+                                else
+                                {
+                                    status = OperationStatus.RETRY_LATER;
+                                    goto CreatePendingContext; // Go Pending
+                                }
+                            }
+                            break; // Normal Processing
+                        }
+                    case Phase.WAIT_PENDING:
+                        {
+                            version = (threadCtx.version - 1);
+                            if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                            {
+                                if (HashBucket.NoSharedLatches(bucket))
+                                {
+                                    goto CreateNewRecord; // Create a (v+1) record
+                                }
+                                else
+                                {
+                                    status = OperationStatus.RETRY_LATER;
+                                    goto CreatePendingContext; // Go Pending
+                                }
+                            }
+                            break; // Normal Processing
+                        }
+                    case Phase.WAIT_FLUSH:
+                        {
+                            version = (threadCtx.version - 1);
+                            if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                            {
+                                goto CreateNewRecord; // Create a (v+1) record
+                            }
+                            break; // Normal Processing
+                        }
+                    default:
+                        break;
+                }
+            }
+            #endregion
+
+            Debug.Assert(latestRecordVersion <= threadCtx.version);
+
+            #region Normal processing
+
+            // Mutable Region: Update the record in-place
+            if (logicalAddress >= hlog.ReadOnlyAddress)
+            {
+                Functions.ConcurrentWriter(key, value, Layout.GetValue(physicalAddress));
+                status = OperationStatus.SUCCESS;
+                goto LatchRelease; // Release shared latch (if acquired)
+            }
+
+            // All other regions: Create a record in the mutable region
+            #endregion
+
+            #region Create new record in the mutable region
+            CreateNewRecord:
+            {
+                // Immutable region or new record
+                var recordSize = Layout.EstimatePhysicalSize(key, value);
+                BlockAllocate(recordSize, out long newLogicalAddress);
+                var newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress);
+                RecordInfo.WriteInfo(Layout.GetInfo(newPhysicalAddress),
+                                        threadCtx.version,
+                                        true, false, false,
+                                        entry.Address);
+                Key.Copy(key, Layout.GetKey(newPhysicalAddress));
+                Functions.SingleWriter(key, value,
+                                        Layout.GetValue(newPhysicalAddress));
+
+                var updatedEntry = default(HashBucketEntry);
+                updatedEntry.Tag = tag;
+                updatedEntry.Address = newLogicalAddress & Constants.kAddressMask;
+                updatedEntry.Pending = entry.Pending;
+                updatedEntry.Tentative = false;
+
+                var foundEntry = default(HashBucketEntry);
+                foundEntry.word = Interlocked.CompareExchange(
+                                        ref bucket->bucket_entries[slot],
+                                        updatedEntry.word, entry.word);
+
+                if (foundEntry.word == entry.word)
+                {
+                    status = OperationStatus.SUCCESS;
+                    goto LatchRelease;
+                }
+                else
+                {
+                    Layout.GetInfo(newPhysicalAddress)->Invalid = true;
+                    status = OperationStatus.RETRY_NOW;
+                    goto LatchRelease;
+                }
+            }
+            #endregion
+
+            #region Create pending context
+            CreatePendingContext:
+            {
+                pendingContext.type = OperationType.UPSERT;
+                pendingContext.key = Key.MoveToContext(key);
+                pendingContext.value = Value.MoveToContext(value);
+                pendingContext.userContext = Context.MoveToContext(userContext);
+                pendingContext.entry.word = entry.word;
+                pendingContext.logicalAddress = logicalAddress;
+                pendingContext.version = threadCtx.version;
+                pendingContext.serialNum = threadCtx.serialNum + 1;
+            }
+            #endregion
+
+            #region Latch release
+            LatchRelease:
+            {
+                switch (latchOperation)
+                {
+                    case LatchOperation.ReleaseShared:
+                        HashBucket.ReleaseSharedLatch(bucket);
+                        break;
+                    case LatchOperation.ReleaseExclusive:
+                        HashBucket.ReleaseExclusiveLatch(bucket);
+                        break;
+                    default:
+                        break;
+                }
+            }
+            #endregion
+
+            if(status == OperationStatus.RETRY_NOW)
+            {
+                return InternalUpsert(key, value, userContext, ref pendingContext);
+            }
+            else
+            {
+                return status;
+            }
+        }
+
+        #endregion
+
+        #region RMW Operation
+
+        /// <summary>
+        /// Read-Modify-Write Operation. Updates value of 'key' using 'input' and current value.
+        /// Pending operations are processed either using InternalRetryPendingRMW or 
+        /// InternalContinuePendingRMW.
+        /// </summary>
+        /// <param name="key">key of the record.</param>
+        /// <param name="input">input used to update the value.</param>
+        /// <param name="userContext">user context corresponding to operation used during completion callback.</param>
+        /// <param name="pendingContext">pending context created when the operation goes pending.</param>
+        /// <returns>
+        /// <list type="table">
+        ///     <listheader>
+        ///     <term>Value</term>
+        ///     <term>Description</term>
+        ///     </listheader>
+        ///     <item>
+        ///     <term>SUCCESS</term>
+        ///     <term>The value has been successfully updated(or inserted).</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RECORD_ON_DISK</term>
+        ///     <term>The record corresponding to 'key' is on disk. Issue async IO to retrieve record and retry later.</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RETRY_LATER</term>
+        ///     <term>Cannot  be processed immediately due to system state. Add to pending list and retry later.</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>CPR_SHIFT_DETECTED</term>
+        ///     <term>A shift in version has been detected. Synchronize immediately to avoid violating CPR consistency.</term>
+        ///     </item>
+        /// </list>
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected OperationStatus InternalRMW(
+                                   Key* key, Input* input,
+                                   Context* userContext,
+                                   ref PendingContext pendingContext)
+        {
+            var recordSize = default(int);
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var logicalAddress = Constants.kInvalidAddress;
+            var physicalAddress = default(long);
+            var version = default(int);
+            var latestRecordVersion = -1;
+            var status = default(OperationStatus);
+            var latchOperation = LatchOperation.None;
+
+            var hash = Key.GetHashCode(key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+            if (threadCtx.phase != Phase.REST)
+                HeavyEnter(hash);
+
+            #region Trace back for record in in-memory HybridLog
+            var entry = default(HashBucketEntry);
+            FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+            logicalAddress = entry.Address;
+            if (logicalAddress >= hlog.HeadAddress)
+            {
+                physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                latestRecordVersion = Layout.GetInfo(physicalAddress)->Version;
+                if (!Key.Equals(key, Layout.GetKey(physicalAddress)))
+                {
+                    logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+                    TraceBackForKeyMatch(key, logicalAddress,
+                                            hlog.HeadAddress,
+                                            out logicalAddress,
+                                            out physicalAddress);
+                }
+            }
+            #endregion
+
+            // Optimization for the most common case
+            if (threadCtx.phase == Phase.REST && logicalAddress >= hlog.ReadOnlyAddress)
+            {
+                Functions.InPlaceUpdater(key, input, Layout.GetValue(physicalAddress));
+                return OperationStatus.SUCCESS;
+            }
+
+            #region Entry latch operation
+            if (threadCtx.phase != Phase.REST)
+            {
+                switch (threadCtx.phase)
+                {
+                    case Phase.PREPARE:
+                        {
+                            version = threadCtx.version;
+                            if (HashBucket.TryAcquireSharedLatch(bucket))
+                            {
+                                // Set to release shared latch (default)
+                                latchOperation = LatchOperation.ReleaseShared;
+                                if (latestRecordVersion != -1 && latestRecordVersion > version)
+                                {
+                                    status = OperationStatus.CPR_SHIFT_DETECTED;
+                                    goto CreateFailureContext; // Pivot Thread
+                                }
+                                break; // Normal Processing
+                            }
+                            else
+                            {
+                                status = OperationStatus.CPR_SHIFT_DETECTED;
+                                goto CreateFailureContext; // Pivot Thread
+                            }
+                        }
+                    case Phase.IN_PROGRESS:
+                        {
+                            version = (threadCtx.version - 1);
+                            if (latestRecordVersion <= version)
+                            {
+                                if (HashBucket.TryAcquireExclusiveLatch(bucket))
+                                {
+                                    // Set to release exclusive latch (default)
+                                    latchOperation = LatchOperation.ReleaseExclusive;
+                                    goto CreateNewRecord; // Create a (v+1) record
+                                }
+                                else
+                                {
+                                    status = OperationStatus.RETRY_LATER;
+                                    goto CreateFailureContext; // Go Pending
+                                }
+                            }
+                            break; // Normal Processing
+                        }
+                    case Phase.WAIT_PENDING:
+                        {
+                            version = (threadCtx.version - 1);
+                            if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                            {
+                                if (HashBucket.NoSharedLatches(bucket))
+                                {
+                                    goto CreateNewRecord; // Create a (v+1) record
+                                }
+                                else
+                                {
+                                    status = OperationStatus.RETRY_LATER;
+                                    goto CreateFailureContext; // Go Pending
+                                }
+                            }
+                            break; // Normal Processing
+                        }
+                    case Phase.WAIT_FLUSH:
+                        {
+                            version = (threadCtx.version - 1);
+                            if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                            {
+                                goto CreateNewRecord; // Create a (v+1) record
+                            }
+                            break; // Normal Processing
+                        }
+                    default:
+                        break;
+                }
+            }
+            #endregion
+
+            Debug.Assert(latestRecordVersion <= threadCtx.version);
+
+            #region Normal processing
+
+            // Mutable Region: Update the record in-place
+            if (logicalAddress >= hlog.ReadOnlyAddress)
+            {
+                if(Constants.kFoldOverSnapshot)
+                {
+                    Debug.Assert(Layout.GetInfo(physicalAddress)->Version == threadCtx.version);
+                }
+                Functions.InPlaceUpdater(key, input, Layout.GetValue(physicalAddress));
+                status = OperationStatus.SUCCESS;
+                goto LatchRelease; // Release shared latch (if acquired)
+            }
+
+            // Fuzzy Region: Must go pending due to lost-update anomaly
+            else if (logicalAddress >= hlog.SafeReadOnlyAddress)
+            {
+                status = OperationStatus.RETRY_LATER;
+                // Retain the shared latch (if acquired)
+                if (latchOperation == LatchOperation.ReleaseShared)
+                {
+                    latchOperation = LatchOperation.None;
+                }
+                goto CreateFailureContext; // Go pending
+            }
+
+            // Safe Read-Only Region: Create a record in the mutable region
+            else if (logicalAddress >= hlog.HeadAddress)
+            {
+                goto CreateNewRecord; 
+            }
+
+            // Disk Region: Need to issue async io requests
+            else if (logicalAddress >= hlog.BeginAddress)
+            {
+                status = OperationStatus.RECORD_ON_DISK;
+                // Retain the shared latch (if acquired)
+                if (latchOperation == LatchOperation.ReleaseShared)
+                {
+                    latchOperation = LatchOperation.None;
+                }
+                goto CreateFailureContext; // Go pending
+            }
+
+            // No record exists - create new
+            else
+            {
+                goto CreateNewRecord; 
+            }
+
+            #endregion
+
+            #region Create new record
+            CreateNewRecord:
+            {
+                recordSize = (logicalAddress < hlog.BeginAddress) ?
+                                Layout.GetInitialPhysicalSize(key, input) :
+                                Layout.GetPhysicalSize(physicalAddress);
+                BlockAllocate(recordSize, out long newLogicalAddress);
+                var newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress);
+                var recordInfo = Layout.GetInfo(newPhysicalAddress);
+                RecordInfo.WriteInfo(recordInfo, threadCtx.version,
+                                        true, false, false,
+                                        entry.Address);
+                Key.Copy(key, Layout.GetKey(newPhysicalAddress));
+                if (logicalAddress < hlog.BeginAddress)
+                {
+                    Functions.InitialUpdater(key, input, Layout.GetValue(newPhysicalAddress));
+                }
+                else if (logicalAddress >= hlog.HeadAddress)
+                {
+                    Functions.CopyUpdater(key, input,
+                                            Layout.GetValue(physicalAddress),
+                                            Layout.GetValue(newPhysicalAddress));
+                }
+                else
+                {
+                    // ah, old record slipped onto disk
+                    Layout.GetInfo(newPhysicalAddress)->Invalid = true;
+                    status = OperationStatus.RETRY_NOW;
+                    goto LatchRelease;
+                }
+
+                var updatedEntry = default(HashBucketEntry);
+                updatedEntry.Tag = tag;
+                updatedEntry.Address = newLogicalAddress & Constants.kAddressMask;
+                updatedEntry.Pending = entry.Pending;
+                updatedEntry.Tentative = false;
+
+                var foundEntry = default(HashBucketEntry);
+                foundEntry.word = Interlocked.CompareExchange(
+                                        ref bucket->bucket_entries[slot],
+                                        updatedEntry.word, entry.word);
+
+                if (foundEntry.word == entry.word)
+                {
+                    status = OperationStatus.SUCCESS;
+                    goto LatchRelease;
+                }
+                else
+                {
+                    // ah, CAS failed
+                    Layout.GetInfo(newPhysicalAddress)->Invalid = true;
+                    status = OperationStatus.RETRY_NOW;
+                    goto LatchRelease;
+                }
+            }
+            #endregion
+
+            #region Create failure context
+            CreateFailureContext:
+            {
+                pendingContext.type = OperationType.RMW;
+                pendingContext.key = Key.MoveToContext(key);
+                pendingContext.input = Input.MoveToContext(input);
+                pendingContext.userContext = Context.MoveToContext(userContext);
+                pendingContext.entry.word = entry.word;
+                pendingContext.logicalAddress = logicalAddress;
+                pendingContext.version = threadCtx.version;
+                pendingContext.serialNum = threadCtx.serialNum + 1;
+            }
+            #endregion
+
+            #region Latch release
+            LatchRelease:
+            {
+                switch (latchOperation)
+                {
+                    case LatchOperation.ReleaseShared:
+                        HashBucket.ReleaseSharedLatch(bucket);
+                        break;
+                    case LatchOperation.ReleaseExclusive:
+                        HashBucket.ReleaseExclusiveLatch(bucket);
+                        break;
+                    default:
+                        break;
+                }
+            }
+            #endregion
+
+            if(status == OperationStatus.RETRY_NOW)
+            {
+                return InternalRMW(key, input, userContext, ref pendingContext);
+            }
+            else
+            {
+                return status;
+            }
+        }
+
+        /// <summary>
+        /// Retries a pending RMW operation. 
+        /// </summary>
+        /// <param name="ctx">Thread (or session) context under which operation must be executed.</param>
+        /// <param name="pendingContext">Internal context of the RMW operation.</param>
+        /// <returns>
+        /// <list type="table">
+        ///     <listheader>
+        ///     <term>Value</term>
+        ///     <term>Description</term>
+        ///     </listheader>
+        ///     <item>
+        ///     <term>SUCCESS</term>
+        ///     <term>The value has been successfully updated(or inserted).</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RECORD_ON_DISK</term>
+        ///     <term>The record corresponding to 'key' is on disk. Issue async IO to retrieve record and retry later.</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RETRY_LATER</term>
+        ///     <term>Cannot  be processed immediately due to system state. Add to pending list and retry later.</term>
+        ///     </item>
+        /// </list>
+        /// </returns>
+        protected OperationStatus InternalRetryPendingRMW(
+                            ExecutionContext ctx,
+                            ref PendingContext pendingContext)
+        {
+            var recordSize = default(int);
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var logicalAddress = Constants.kInvalidAddress;
+            var physicalAddress = default(long);
+            var version = default(int);
+            var latestRecordVersion = -1;
+            var status = default(OperationStatus);
+            var latchOperation = LatchOperation.None;
+            var key = pendingContext.key;
+
+            var hash = Key.GetHashCode(key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+            if (threadCtx.phase != Phase.REST)
+                HeavyEnter(hash);
+
+            #region Trace back for record in in-memory HybridLog
+            var entry = default(HashBucketEntry);
+            FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+            logicalAddress = entry.Address;
+            if (logicalAddress >= hlog.HeadAddress)
+            {
+                physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                latestRecordVersion = Layout.GetInfo(physicalAddress)->Version;
+                if (!Key.Equals(key, Layout.GetKey(physicalAddress)))
+                {
+                    logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+                    TraceBackForKeyMatch(key, logicalAddress,
+                                            hlog.HeadAddress,
+                                            out logicalAddress,
+                                            out physicalAddress);
+                }
+            }
+            #endregion
+
+            #region Entry latch operation
+            if (threadCtx.phase != Phase.REST)
+            {
+                if (!((ctx.version < threadCtx.version) 
+                      ||
+                      (threadCtx.phase == Phase.PREPARE))) 
+                {
+                    // Processing a pending (v+1) request
+                    version = (threadCtx.version - 1);
+                    switch (threadCtx.phase)
+                    {
+                        case Phase.IN_PROGRESS:
+                            {
+                                if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                                {
+                                    if (HashBucket.TryAcquireExclusiveLatch(bucket))
+                                    {
+                                        // Set to release exclusive latch (default)
+                                        latchOperation = LatchOperation.ReleaseExclusive;
+                                        goto CreateNewRecord; // Create a (v+1) record
+                                    }
+                                    else
+                                    {
+                                        status = OperationStatus.RETRY_LATER;
+                                        goto UpdateFailureContext; // Go Pending
+                                    }
+                                }
+                                break; // Normal Processing
+                            }
+                        case Phase.WAIT_PENDING:
+                            {
+                                if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                                {
+                                    if (HashBucket.NoSharedLatches(bucket))
+                                    {
+                                        goto CreateNewRecord; // Create a (v+1) record
+                                    }
+                                    else
+                                    {
+                                        status = OperationStatus.RETRY_LATER;
+                                        goto UpdateFailureContext; // Go Pending
+                                    }
+                                }
+                                break; // Normal Processing
+                            }
+                        case Phase.WAIT_FLUSH:
+                            {
+                                if (latestRecordVersion != -1 && latestRecordVersion <= version)
+                                {
+                                    goto CreateNewRecord; // Create a (v+1) record
+                                }
+                                break; // Normal Processing
+                            }
+                        default:
+                            break;
+                    }
+                }
+            }
+            #endregion
+
+            #region Normal processing
+
+            // Mutable Region: Update the record in-place
+            if (logicalAddress >= hlog.ReadOnlyAddress)
+            {
+                if (Constants.kFoldOverSnapshot)
+                {
+                    Debug.Assert(Layout.GetInfo(physicalAddress)->Version == threadCtx.version);
+                }
+                Functions.InPlaceUpdater(pendingContext.key, pendingContext.input, Layout.GetValue(physicalAddress));
+                status = OperationStatus.SUCCESS;
+                goto LatchRelease; 
+            }
+
+            // Fuzzy Region: Must go pending due to lost-update anomaly
+            else if (logicalAddress >= hlog.SafeReadOnlyAddress)
+            {
+                status = OperationStatus.RETRY_LATER;
+                goto UpdateFailureContext; // Go pending
+            }
+
+            // Safe Read-Only Region: Create a record in the mutable region
+            else if (logicalAddress >= hlog.HeadAddress)
+            {
+                goto CreateNewRecord;
+            }
+
+            // Disk Region: Need to issue async io requests
+            else if (logicalAddress >= hlog.BeginAddress)
+            {
+                status = OperationStatus.RECORD_ON_DISK;
+                goto UpdateFailureContext; // Go pending
+            }
+
+            // No record exists - create new
+            else
+            {
+                goto CreateNewRecord;
+            }
+
+            #endregion
+
+            #region Create new record in mutable region
+            CreateNewRecord:
+            {
+                recordSize = (logicalAddress < hlog.BeginAddress) ?
+                                Layout.GetInitialPhysicalSize(pendingContext.key,
+                                                              pendingContext.input) :
+                                Layout.GetPhysicalSize(physicalAddress);
+                BlockAllocate(recordSize, out long newLogicalAddress);
+                var newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress);
+                var recordInfo = Layout.GetInfo(newPhysicalAddress);
+                RecordInfo.WriteInfo(recordInfo, pendingContext.version,
+                                        true, false, false,
+                                        entry.Address);
+                Key.Copy(key, Layout.GetKey(newPhysicalAddress));
+                if (logicalAddress < hlog.BeginAddress)
+                {
+                    Functions.InitialUpdater(pendingContext.key, 
+                                             pendingContext.input,
+                                             Layout.GetValue(newPhysicalAddress));
+                }
+                else if (logicalAddress >= hlog.HeadAddress)
+                {
+                    Functions.CopyUpdater(pendingContext.key, 
+                                            pendingContext.input,
+                                            Layout.GetValue(physicalAddress),
+                                            Layout.GetValue(newPhysicalAddress));
+                }
+                else
+                {
+                    // record slipped onto disk
+                    Layout.GetInfo(newPhysicalAddress)->Invalid = true;
+                    status = OperationStatus.RETRY_NOW;
+                    goto LatchRelease;
+                }
+
+                var updatedEntry = default(HashBucketEntry);
+                updatedEntry.Tag = tag;
+                updatedEntry.Address = newLogicalAddress & Constants.kAddressMask;
+                updatedEntry.Pending = entry.Pending;
+                updatedEntry.Tentative = false;
+
+                var foundEntry = default(HashBucketEntry);
+                foundEntry.word = Interlocked.CompareExchange(
+                                        ref bucket->bucket_entries[slot],
+                                        updatedEntry.word, entry.word);
+
+                if (foundEntry.word == entry.word)
+                {
+                    status = OperationStatus.SUCCESS;
+                    goto LatchRelease;
+                }
+                else
+                {
+                    // ah, CAS failed
+                    Layout.GetInfo(newPhysicalAddress)->Invalid = true;
+                    status = OperationStatus.RETRY_NOW;
+                    goto LatchRelease;
+                }
+            }
+            #endregion
+
+            #region Update failure context
+            UpdateFailureContext:
+            {
+                pendingContext.entry.word = entry.word;
+                pendingContext.logicalAddress = logicalAddress;
+            }
+            #endregion
+
+            #region Latch release
+            LatchRelease:
+            {
+                switch (latchOperation)
+                {
+                    case LatchOperation.ReleaseExclusive:
+                        HashBucket.ReleaseExclusiveLatch(bucket);
+                        break;
+                    case LatchOperation.ReleaseShared:
+                        throw new Exception("Should not release shared latch here!");
+                    default:
+                        break;
+                }
+            }
+            #endregion
+
+            if(status == OperationStatus.RETRY_NOW)
+            {
+                return InternalRetryPendingRMW(ctx, ref pendingContext);
+            }
+            else
+            {
+                return status;
+            }
+        }
+
+        /// <summary>
+        /// Continue a pending RMW operation with the record retrieved from disk.
+        /// </summary>
+        /// <param name="ctx">thread (or session) context under which operation must be executed.</param>
+        /// <param name="request">record read from the disk.</param>
+        /// <param name="pendingContext">internal context for the pending RMW operation</param>
+        /// <returns>
+        /// <list type="table">
+        ///     <listheader>
+        ///     <term>Value</term>
+        ///     <term>Description</term>
+        ///     </listheader>
+        ///     <item>
+        ///     <term>SUCCESS</term>
+        ///     <term>The value has been successfully updated(or inserted).</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RECORD_ON_DISK</term>
+        ///     <term>The record corresponding to 'key' is on disk. Issue async IO to retrieve record and retry later.</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>RETRY_LATER</term>
+        ///     <term>Cannot  be processed immediately due to system state. Add to pending list and retry later.</term>
+        ///     </item>
+        /// </list>
+        /// </returns>
+        protected OperationStatus InternalContinuePendingRMW(
+                                    ExecutionContext ctx,
+                                    AsyncIOContext request,
+                                    ref PendingContext pendingContext)
+        {
+            var recordSize = default(int);
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var logicalAddress = Constants.kInvalidAddress;
+            var physicalAddress = default(long);
+
+            var hash = Key.GetHashCode(pendingContext.key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+            #region Trace Back for Record on In-Memory HybridLog
+            var entry = default(HashBucketEntry);
+            FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+            logicalAddress = entry.Address;
+            if (logicalAddress >= hlog.HeadAddress)
+            {
+                physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                if (!Key.Equals(pendingContext.key, Layout.GetKey(physicalAddress)))
+                {
+                    logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+                    TraceBackForKeyMatch(pendingContext.key,
+                                            logicalAddress,
+                                            hlog.HeadAddress,
+                                            out logicalAddress,
+                                            out physicalAddress);
+                }
+            }
+            #endregion
+
+            var previousFirstRecordAddress = pendingContext.entry.Address;
+            if (logicalAddress > previousFirstRecordAddress)
+            {
+                goto Retry;
+            }
+
+            #region Create record in mutable region
+            if (request.logicalAddress < hlog.BeginAddress)
+            {
+                recordSize = Layout.GetInitialPhysicalSize(pendingContext.key,
+                                                           pendingContext.input);
+            }
+            else
+            {
+                physicalAddress = (long)request.record.GetValidPointer();
+                recordSize = Layout.GetPhysicalSize(physicalAddress);
+            }
+            BlockAllocate(recordSize, out long newLogicalAddress);
+            var newPhysicalAddress = hlog.GetPhysicalAddress(newLogicalAddress);
+            var recordInfo = Layout.GetInfo(newPhysicalAddress);
+            RecordInfo.WriteInfo(recordInfo, ctx.version,
+                                true, false, false,
+                                entry.Address);
+            Key.Copy(pendingContext.key, Layout.GetKey(newPhysicalAddress));
+            if (request.logicalAddress < hlog.BeginAddress)
+            {
+                Functions.InitialUpdater(pendingContext.key,
+                                         pendingContext.input,
+                                         Layout.GetValue(newPhysicalAddress));
+            }
+            else
+            {
+                Functions.CopyUpdater(pendingContext.key,
+                                      pendingContext.input,
+                                      Layout.GetValue(physicalAddress),
+                                      Layout.GetValue(newPhysicalAddress));
+            }
+
+            request.record.Return();
+
+            var updatedEntry = default(HashBucketEntry);
+            updatedEntry.Tag = tag;
+            updatedEntry.Address = newLogicalAddress & Constants.kAddressMask;
+            updatedEntry.Pending = entry.Pending;
+            updatedEntry.Tentative = false;
+
+            var foundEntry = default(HashBucketEntry);
+            foundEntry.word = Interlocked.CompareExchange(
+                                        ref bucket->bucket_entries[slot],
+                                        updatedEntry.word, entry.word);
+
+            if (foundEntry.word == entry.word)
+            {
+                return OperationStatus.SUCCESS;
+            }
+            else
+            {
+                Layout.GetInfo(newPhysicalAddress)->Invalid = true;
+                goto Retry;
+            }
+            #endregion
+
+            Retry:
+            return InternalRetryPendingRMW(ctx, ref pendingContext);
+        }
+
+        #endregion
+
+        #region Helper Functions
+
+        /// <summary>
+        /// Performs appropriate handling based on the internal failure status of the trial.
+        /// </summary>
+        /// <param name="ctx">Thread (or session) context under which operation was tried to execute.</param>
+        /// <param name="pendingContext">Internal context of the operation.</param>
+        /// <param name="status">Internal status of the trial.</param>
+        /// <returns>
+        /// <list type="table">
+        ///     <listheader>
+        ///     <term>Value</term>
+        ///     <term>Description</term>
+        ///     </listheader>
+        ///     <item>
+        ///     <term>OK</term>
+        ///     <term>The operation has been completed successfully.</term>
+        ///     </item>
+        ///     <item>
+        ///     <term>PENDING</term>
+        ///     <term>The operation is still pending and will callback when done.</term>
+        ///     </item>
+        /// </list>
+        /// </returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected Status HandleOperationStatus(
+                    ExecutionContext ctx,
+                    PendingContext pendingContext,
+                    OperationStatus status)
+        {
+            if (status == OperationStatus.CPR_SHIFT_DETECTED)
+            {
+                #region Epoch Synchronization
+                var version = ctx.version;
+                Debug.Assert(threadCtx.version == version);
+                Debug.Assert(threadCtx.phase == Phase.PREPARE);
+                Refresh();
+                Debug.Assert(threadCtx.version == version + 1);
+                Debug.Assert(threadCtx.phase == Phase.IN_PROGRESS);
+
+                pendingContext.version = threadCtx.version;
+                #endregion
+
+                #region Retry as (v+1) Operation
+                var internalStatus = default(OperationStatus);
+                switch (pendingContext.type)
+                {
+                    case OperationType.READ:
+                        internalStatus = InternalRead(pendingContext.key,
+                                                      pendingContext.input,
+                                                      pendingContext.output,
+                                                      pendingContext.userContext,
+                                                      ref pendingContext);
+                        break;
+                    case OperationType.UPSERT:
+                        internalStatus = InternalUpsert(pendingContext.key,
+                                                        pendingContext.value,
+                                                        pendingContext.userContext,
+                                                        ref pendingContext);
+                        break;
+                    case OperationType.RMW:
+                        internalStatus = InternalRetryPendingRMW(threadCtx, ref pendingContext);
+                        break;
+                }
+
+                Debug.Assert(internalStatus != OperationStatus.CPR_SHIFT_DETECTED);
+                status = internalStatus;
+                #endregion
+            }
+
+            if (status == OperationStatus.SUCCESS)
+            {
+                return Status.OK;
+            }
+            else if (status == OperationStatus.RECORD_ON_DISK)
+            {
+                //Add context to dictionary
+                pendingContext.id = ctx.totalPending++;
+                ctx.ioPendingRequests.Add(pendingContext.id, pendingContext);
+
+                // Issue asynchronous I/O request
+                AsyncIOContext request = default(AsyncIOContext);
+                request.id = pendingContext.id;
+                request.key = pendingContext.key;
+                request.logicalAddress = pendingContext.logicalAddress;
+                request.callbackQueue = ctx.readyResponses;
+                request.record = default(SectorAlignedMemory);
+                AsyncGetFromDisk(pendingContext.logicalAddress,
+                                 Layout.GetAveragePhysicalSize(),
+                                 AsyncGetFromDiskCallback,
+                                 request);
+
+                return Status.PENDING;
+            }
+            else if (status == OperationStatus.RETRY_LATER)
+            {
+                ctx.retryRequests.Enqueue(pendingContext);
+                return Status.PENDING;
+            }
+            else
+            {
+                return Status.ERROR;
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void AcquireSharedLatch(Key* key)
+        {
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var hash = Key.GetHashCode(key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+            var entry = default(HashBucketEntry);
+            FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+            HashBucket.TryAcquireSharedLatch(bucket);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void ReleaseSharedLatch(Key* key)
+        {
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+            var hash = Key.GetHashCode(key);
+            var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+            var entry = default(HashBucketEntry);
+            FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+            HashBucket.ReleaseSharedLatch(bucket);
+        }
+
+        protected void HeavyEnter(long hash)
+        {
+            if (threadCtx.phase == Phase.GC)
+                GarbageCollectBuckets(hash);
+            if (threadCtx.phase == Phase.PREPARE_GROW)
+            {
+                // We spin-wait as a simplification
+                // Could instead do a "heavy operation" here
+                while (_systemState.phase != Phase.IN_PROGRESS_GROW)
+                    Thread.SpinWait(100);
+                Refresh();
+            }
+            if (threadCtx.phase == Phase.IN_PROGRESS_GROW)
+            {
+                SplitBuckets(hash);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void BlockAllocate(int recordSize, out long logicalAddress)
+        {
+            logicalAddress = hlog.Allocate(recordSize);
+            if (logicalAddress >= 0) return;
+
+            while (logicalAddress < 0 && -logicalAddress >= hlog.ReadOnlyAddress)
+            {
+                InternalRefresh();
+                hlog.CheckForAllocateComplete(ref logicalAddress);
+                if (logicalAddress < 0)
+                {
+                    Thread.Sleep(10);
+                }
+            }
+
+            logicalAddress = logicalAddress < 0 ? -logicalAddress : logicalAddress;
+
+            if (logicalAddress < hlog.ReadOnlyAddress)
+            {
+                Console.WriteLine("Allocated address is read-only, retrying");
+                BlockAllocate(recordSize, out logicalAddress);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool TraceBackForKeyMatch(
+                                    Key* key,
+                                    long fromLogicalAddress,
+                                    long minOffset,
+                                    out long foundLogicalAddress,
+                                    out long foundPhysicalAddress)
+        {
+            foundLogicalAddress = fromLogicalAddress;
+            while (foundLogicalAddress >= minOffset)
+            {
+                foundPhysicalAddress = hlog.GetPhysicalAddress(foundLogicalAddress);
+                if (Key.Equals(key, Layout.GetKey(foundPhysicalAddress)))
+                {
+                    return true;
+                }
+                else
+                {
+                    foundLogicalAddress = ((RecordInfo*)foundPhysicalAddress)->PreviousAddress;
+                    Debug.WriteLine("Tracing back");
+                    continue;
+                }
+            }
+            foundPhysicalAddress = Constants.kInvalidAddress;
+            return false;
+        }
+        #endregion
+
+        #region Garbage Collection
+        protected internal long[] gcStatus;
+        protected long numPendingChunksToBeGCed;
+
+        protected void GarbageCollectBuckets(long hash, bool force = false)
+        {
+            if (numPendingChunksToBeGCed == 0) return;
+
+            long masked_bucket_index = hash & state[resizeInfo.version].size_mask;
+            int offset = (int)(masked_bucket_index >> Constants.kSizeofChunkBits);
+
+            int numChunks = (int)(state[resizeInfo.version].size / Constants.kSizeofChunk);
+            if (numChunks == 0) numChunks = 1; // at least one chunk
+
+            if (!Utility.IsPowerOfTwo(numChunks))
+            {
+                throw new Exception("Invalid number of chunks: " + numChunks);
+            }
+
+            for (int i = offset; i < offset + numChunks; i++)
+            {
+                if (0 == Interlocked.CompareExchange(ref gcStatus[i & (numChunks - 1)], 1, 0))
+                {
+                    int version = resizeInfo.version;
+                    long chunkSize = state[version].size / numChunks;
+                    long ptr = chunkSize * (i & (numChunks - 1));
+
+                    HashBucket* src_start = state[version].tableAligned + ptr;
+                    CleanBucket(src_start, chunkSize);
+
+                    // GC for chunk is done
+                    gcStatus[i & (numChunks - 1)] = 2;
+
+                    if (Interlocked.Decrement(ref numPendingChunksToBeGCed) == 0)
+                    {
+                        long context = 0;
+                        GlobalMoveToNextState(_systemState, SystemState.Make(Phase.REST, _systemState.version), ref context);
+                        return;
+                    }
+                    if (!force)
+                        break;
+                }
+            }
+        }
+
+        private void CleanBucket(HashBucket* _src_start, long chunkSize)
+        {
+            HashBucketEntry entry = default(HashBucketEntry);
+
+            for (int i = 0; i < chunkSize; i++)
+            {
+                var src_start = _src_start + i;
+
+                do
+                {
+                    for (int index = 0; index < Constants.kOverflowBucketIndex; ++index)
+                    {
+                        entry.word = *(((long*)src_start) + index);
+                        if (entry.Address != Constants.kInvalidAddress && entry.Address < hlog.BeginAddress)
+                        {
+                            Interlocked.CompareExchange(ref *(((long*)src_start) + index), Constants.kInvalidAddress, entry.word);
+                        }
+                    }
+
+                    if (*(((long*)src_start) + Constants.kOverflowBucketIndex) == 0) break;
+                    src_start = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(*(((long*)src_start) + Constants.kOverflowBucketIndex));
+                } while (true);
+            }
+        }
+        #endregion
+
+        #region Split Index
+        protected void SplitBuckets(long hash)
+        {
+            long masked_bucket_index = hash & state[1 - resizeInfo.version].size_mask;
+            int offset = (int)(masked_bucket_index >> Constants.kSizeofChunkBits);
+
+            int numChunks = (int)(state[1 - resizeInfo.version].size / Constants.kSizeofChunk);
+            if (numChunks == 0) numChunks = 1; // at least one chunk
+
+
+            if (!Utility.IsPowerOfTwo(numChunks))
+            {
+                throw new Exception("Invalid number of chunks: " + numChunks);
+            }
+            for (int i = offset; i < offset + numChunks; i++)
+            {
+                if (0 == Interlocked.CompareExchange(ref splitStatus[i & (numChunks - 1)], 1, 0))
+                {
+                    long chunkSize = state[1 - resizeInfo.version].size / numChunks;
+                    long ptr = chunkSize * (i & (numChunks - 1));
+
+                    HashBucket* src_start = state[1 - resizeInfo.version].tableAligned + ptr;
+                    HashBucket* dest_start0 = state[resizeInfo.version].tableAligned + ptr;
+                    HashBucket* dest_start1 = state[resizeInfo.version].tableAligned + state[1 - resizeInfo.version].size + ptr;
+
+                    SplitChunk(src_start, dest_start0, dest_start1, chunkSize);
+
+                    // split for chunk is done
+                    splitStatus[i & (numChunks - 1)] = 2;
+
+                    if (Interlocked.Decrement(ref numPendingChunksToBeSplit) == 0)
+                    {
+                        // GC old version of hash table
+                        state[1 - resizeInfo.version] = default(InternalHashTable);
+
+                        long context = 0;
+                        GlobalMoveToNextState(_systemState, SystemState.Make(Phase.REST, _systemState.version), ref context);
+                        return;
+                    }
+                    break;
+                }
+            }
+
+            while (Interlocked.Read(ref splitStatus[offset & (numChunks - 1)]) == 1)
+            {
+
+            }
+
+        }
+
+        protected void SplitChunk(
+                    HashBucket* _src_start,
+                    HashBucket* _dest_start0,
+                    HashBucket* _dest_start1,
+                    long chunkSize)
+        {
+            for (int i = 0; i < chunkSize; i++)
+            {
+                var src_start = _src_start + i;
+
+                long* left = (long*)(_dest_start0 + i);
+                long* right = (long*)(_dest_start1 + i);
+                long* left_end = left + Constants.kOverflowBucketIndex;
+                long* right_end = right + Constants.kOverflowBucketIndex;
+
+                HashBucketEntry entry = default(HashBucketEntry);
+                do
+                {
+                    for (int index = 0; index < Constants.kOverflowBucketIndex; ++index)
+                    {
+                        entry.word = *(((long*)src_start) + index);
+                        if (Constants.kInvalidEntry == entry.word)
+                        {
+                            continue;
+                        }
+
+                        var logicalAddress = entry.Address;
+                        if (logicalAddress >= hlog.HeadAddress)
+                        {
+                            var physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                            var hash = Key.GetHashCode(Layout.GetKey(physicalAddress));
+                            if ((hash & state[resizeInfo.version].size_mask) >> (state[resizeInfo.version].size_bits - 1) == 0)
+                            {
+                                // Insert in left
+                                if (left == left_end)
+                                {
+                                    var new_bucket = (HashBucket*)overflowBucketsAllocator.Allocate();
+                                    *left = (long)new_bucket;
+                                    left = (long*)new_bucket;
+                                    left_end = left + Constants.kOverflowBucketIndex;
+                                }
+
+                                *left = entry.word;
+                                left++;
+
+                                // Insert previous address in right
+                                entry.Address = TraceBackForOtherChainStart(Layout.GetInfo(physicalAddress)->PreviousAddress, 1);
+                                if (entry.Address != Constants.kInvalidAddress)
+                                {
+                                    if (right == right_end)
+                                    {
+                                        var new_bucket = (HashBucket*)overflowBucketsAllocator.Allocate();
+                                        *right = (long)new_bucket;
+                                        right = (long*)new_bucket;
+                                        right_end = right + Constants.kOverflowBucketIndex;
+                                    }
+
+                                    *right = entry.word;
+                                    right++;
+                                }
+                            }
+                            else
+                            {
+                                // Insert in right
+                                if (right == right_end)
+                                {
+                                    var new_bucket = (HashBucket*)overflowBucketsAllocator.Allocate();
+                                    *right = (long)new_bucket;
+                                    right = (long*)new_bucket;
+                                    right_end = right + Constants.kOverflowBucketIndex;
+                                }
+
+                                *right = entry.word;
+                                right++;
+
+                                // Insert previous address in left
+                                entry.Address = TraceBackForOtherChainStart(Layout.GetInfo(physicalAddress)->PreviousAddress, 0);
+                                if (entry.Address != Constants.kInvalidAddress)
+                                {
+                                    if (left == left_end)
+                                    {
+                                        var new_bucket = (HashBucket*)overflowBucketsAllocator.Allocate();
+                                        *left = (long)new_bucket;
+                                        left = (long*)new_bucket;
+                                        left_end = left + Constants.kOverflowBucketIndex;
+                                    }
+
+                                    *left = entry.word;
+                                    left++;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            // Insert in both new locations
+
+                            // Insert in left
+                            if (left == left_end)
+                            {
+                                var new_bucket = (HashBucket*)overflowBucketsAllocator.Allocate();
+                                *left = (long)new_bucket;
+                                left = (long*)new_bucket;
+                                left_end = left + Constants.kOverflowBucketIndex;
+                            }
+
+                            *left = entry.word;
+                            left++;
+
+                            // Insert in right
+                            if (right == right_end)
+                            {
+                                var new_bucket = (HashBucket*)overflowBucketsAllocator.Allocate();
+                                *right = (long)new_bucket;
+                                right = (long*)new_bucket;
+                                right_end = right + Constants.kOverflowBucketIndex;
+                            }
+
+                            *right = entry.word;
+                            right++;
+                        }
+                    }
+
+                    if (*(((long*)src_start) + Constants.kOverflowBucketIndex) == 0) break;
+                    src_start = (HashBucket*)overflowBucketsAllocator.GetPhysicalAddress(*(((long*)src_start) + Constants.kOverflowBucketIndex));
+                } while (true);
+            }
+        }
+
+        private long TraceBackForOtherChainStart(long logicalAddress, int bit)
+        {
+            while (logicalAddress >= hlog.HeadAddress)
+            {
+                var physicalAddress = hlog.GetPhysicalAddress(logicalAddress);
+                var hash = Key.GetHashCode(Layout.GetKey(physicalAddress));
+                if ((hash & state[resizeInfo.version].size_mask) >> (state[resizeInfo.version].size_bits - 1) == bit)
+                {
+                    return logicalAddress;
+                }
+                logicalAddress = Layout.GetInfo(physicalAddress)->PreviousAddress;
+            }
+            return logicalAddress;
+        }
+        #endregion
+    }
+}
diff --git a/cs/src/core/Index/FASTER/FASTERThread.cs b/cs/src/core/Index/FASTER/FASTERThread.cs
new file mode 100644
index 000000000..1ab708cf6
--- /dev/null
+++ b/cs/src/core/Index/FASTER/FASTERThread.cs
@@ -0,0 +1,315 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe partial class FasterKV : FASTERBase, IFASTER
+    {
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal Guid InternalAcquire()
+        {
+            Phase phase = _systemState.phase;
+            if (phase != Phase.REST)
+            {
+                throw new Exception("Can acquire only in REST phase!");
+            }
+            Guid guid = Guid.NewGuid();
+            InitLocalContext(ref threadCtx, guid);
+            InternalRefresh();
+            return threadCtx.guid;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected long InternalContinue(Guid guid)
+        {
+            if (_hybridLogCheckpoint.info.continueTokens != null)
+            {
+                if (_hybridLogCheckpoint.info.continueTokens.TryGetValue(guid, out long serialNum))
+                {
+                    return serialNum;
+                }
+            }
+
+            Console.WriteLine("Cannot continue!");
+            Debug.Assert(false);
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal void InternalRefresh()
+        {
+            epoch.ProtectAndDrain();
+
+            // We check if we are in normal mode
+            var newPhaseInfo = SystemState.Copy(ref _systemState);
+            if (threadCtx.phase == Phase.REST && newPhaseInfo.phase == Phase.REST)
+            {
+                return;
+            }
+
+            // Moving to non-checkpointing phases
+            if (newPhaseInfo.phase == Phase.GC || newPhaseInfo.phase == Phase.PREPARE_GROW || newPhaseInfo.phase == Phase.IN_PROGRESS_GROW)
+            {
+                threadCtx.phase = newPhaseInfo.phase;
+                return;
+            }
+
+            HandleCheckpointingPhases();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void InternalRelease()
+        {
+            Debug.Assert(threadCtx.retryRequests.Count == 0 &&
+                    threadCtx.ioPendingRequests.Count == 0);
+            if (prevThreadCtx != default(ExecutionContext))
+            {
+                Debug.Assert(prevThreadCtx.retryRequests.Count == 0 &&
+                    prevThreadCtx.ioPendingRequests.Count == 0);
+            }
+            Debug.Assert(threadCtx.phase == Phase.REST);
+            epoch.Release();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void InitLocalContext(ref ExecutionContext context, Guid token)
+        {
+            context = new ExecutionContext
+            {
+                phase = _systemState.phase,
+                version = _systemState.version,
+                markers = new bool[8],
+                serialNum = 0,
+                totalPending = 0,
+                guid = token,
+                retryRequests = new Queue<PendingContext>(),
+                readyResponses = new BlockingCollection<AsyncIOContext>(),
+                ioPendingRequests = new Dictionary<long, PendingContext>()
+            };
+
+            for(int i = 0; i < 8; i++)
+            {
+                context.markers[i] = false;
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected bool InternalCompletePending(bool wait = false)
+        {
+            do
+            {
+                bool done = true;
+
+                #region Previous pending requests
+                if (threadCtx.phase == Phase.IN_PROGRESS
+                    ||
+                    threadCtx.phase == Phase.WAIT_PENDING)
+                {
+                    CompleteIOPendingRequests(prevThreadCtx);
+                    Refresh();
+                    CompleteRetryRequests(prevThreadCtx);
+
+                    done &= (prevThreadCtx.ioPendingRequests.Count == 0);
+                    done &= (prevThreadCtx.retryRequests.Count == 0);
+                }
+                #endregion
+
+                if (!(threadCtx.phase == Phase.IN_PROGRESS
+                      || 
+                      threadCtx.phase == Phase.WAIT_PENDING))
+                {
+                    CompleteIOPendingRequests(threadCtx);
+                }
+                Refresh();
+                CompleteRetryRequests(threadCtx);
+
+                done &= (threadCtx.ioPendingRequests.Count == 0);
+                done &= (threadCtx.retryRequests.Count == 0);
+
+                if (done)
+                {
+                    return true;
+                }
+            } while (wait);
+
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void CompleteRetryRequests(ExecutionContext context)
+        {
+            int count = context.retryRequests.Count;
+            for (int i = 0; i < count; i++)
+            {
+                var pendingContext = context.retryRequests.Dequeue();
+                InternalRetryRequestAndCallback(context, pendingContext);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void CompleteIOPendingRequests(ExecutionContext context)
+        {
+            while (context.readyResponses.TryTake(out AsyncIOContext request))
+            {
+                InternalContinuePendingRequestAndCallback(context, request);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void InternalRetryRequestAndCallback(
+                                    ExecutionContext ctx,
+                                    PendingContext pendingContext)
+        {
+            var status = default(Status);
+            var internalStatus = default(OperationStatus);
+
+            #region Entry latch operation
+            var handleLatches = false;
+            if ((ctx.version < threadCtx.version) // Thread has already shifted to (v+1)
+                ||
+                (threadCtx.phase == Phase.PREPARE)) // Thread still in version v, but acquired shared-latch 
+            {
+                handleLatches = true;
+            }
+            #endregion
+
+            // Issue retry command
+            switch(pendingContext.type)
+            {
+                case OperationType.RMW:
+                    internalStatus = InternalRetryPendingRMW(ctx, ref pendingContext);
+                    break;
+                case OperationType.UPSERT:
+                    internalStatus = InternalUpsert(pendingContext.key, 
+                                                    pendingContext.value, 
+                                                    pendingContext.userContext, 
+                                                    ref pendingContext);
+                    break;
+                case OperationType.READ:
+                    throw new Exception("Cannot happen!");
+            }
+            
+
+            // Handle operation status
+            if (internalStatus == OperationStatus.SUCCESS)
+            {
+                status = Status.OK;
+            }
+            else
+            {
+                status = HandleOperationStatus(ctx, pendingContext, internalStatus);
+            }
+
+            // If done, callback user code.
+            if (status == Status.OK)
+            {
+                if (handleLatches)
+                    ReleaseSharedLatch(pendingContext.key);
+
+                switch (pendingContext.type)
+                {
+                    case OperationType.RMW:
+                        Functions.RMWCompletionCallback(pendingContext.key,
+                                                pendingContext.input,
+                                                pendingContext.userContext);
+                        break;
+                    case OperationType.UPSERT:
+                        Functions.UpsertCompletionCallback(pendingContext.key,
+                                                 pendingContext.value,
+                                                 pendingContext.userContext);
+                        break;
+                    default:
+                        throw new Exception("Operation type not allowed for retry");
+                }
+                
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        protected void InternalContinuePendingRequestAndCallback(
+                                    ExecutionContext ctx,
+                                    AsyncIOContext request)
+        {
+            var handleLatches = false;
+            if ((ctx.version < threadCtx.version) // Thread has already shifted to (v+1)
+                ||
+                (threadCtx.phase == Phase.PREPARE)) // Thread still in version v, but acquired shared-latch 
+            {
+                handleLatches = true;
+            }
+
+            if (ctx.ioPendingRequests.TryGetValue(request.id, out PendingContext pendingContext))
+            {
+                var status = default(Status);
+                var internalStatus = default(OperationStatus);
+
+                // Remove from pending dictionary
+                ctx.ioPendingRequests.Remove(request.id);
+
+                // Issue the continue command
+                if (pendingContext.type == OperationType.READ)
+                {
+                    internalStatus = InternalContinuePendingRead(ctx, request, ref pendingContext);
+                }
+                else
+                {
+                    internalStatus = InternalContinuePendingRMW(ctx, request, ref pendingContext); ;
+                }
+                
+                // Delete key, value, record
+                if (Key.HasObjectsToSerialize())
+                {
+                    var physicalAddress = (long)request.record.GetValidPointer();
+                    Key.Free(Layout.GetKey(physicalAddress));
+                }
+                if (Value.HasObjectsToSerialize())
+                {
+                    var physicalAddress = (long)request.record.GetValidPointer();
+                    Value.Free(Layout.GetValue(physicalAddress));
+                }
+                request.record.Return();
+
+                // Handle operation status
+                if (internalStatus == OperationStatus.SUCCESS)
+                {
+                    status = Status.OK;
+                }
+                else
+                {
+                    status = HandleOperationStatus(ctx, pendingContext, internalStatus);
+                }
+
+                // If done, callback user code
+                if(status == Status.OK)
+                {
+                    if (handleLatches)
+                        ReleaseSharedLatch(pendingContext.key);
+
+                    if (pendingContext.type == OperationType.READ)
+                    {
+                        Functions.ReadCompletionCallback(pendingContext.key, 
+                                                         pendingContext.input, 
+                                                         pendingContext.output, 
+                                                         pendingContext.userContext);
+                    }
+                    else
+                    {
+                        Functions.RMWCompletionCallback(pendingContext.key,
+                                                        pendingContext.input,
+                                                        pendingContext.userContext);
+                    }
+                }
+            }
+        }
+
+    }
+}
diff --git a/cs/src/core/Index/FASTER/IFASTER.cs b/cs/src/core/Index/FASTER/IFASTER.cs
new file mode 100644
index 000000000..71b35923f
--- /dev/null
+++ b/cs/src/core/Index/FASTER/IFASTER.cs
@@ -0,0 +1,38 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe interface IFASTER
+    {
+        /* Thread-related operations */
+        Guid StartSession();
+        long ContinueSession(Guid guid);
+        void StopSession();
+        void Refresh();
+        bool TakeFullCheckpoint(out Guid token);
+        bool TakeIndexCheckpoint(out Guid token);
+        bool TakeHybridLogCheckpoint(out Guid token);
+        void Recover(Guid fullcheckpointToken);
+        void Recover(Guid indexToken, Guid hybridLogToken);
+
+        /* Store Interface */
+        Status Read(Key* key, Input* input, Output* output, Context* context, long lsn);
+        Status Upsert(Key* key, Value* value, Context* context, long lsn);
+        Status RMW(Key* key, Input* input, Context* context, long lsn);
+        Status Delete(Key* key, Context* context, long lsn);
+        bool CompletePending(bool wait);
+
+        /* Statistics */
+        long Size { get; }
+        void DumpDistribution();
+    }
+}
diff --git a/cs/src/core/Index/FASTER/IndexCheckpoint.cs b/cs/src/core/Index/FASTER/IndexCheckpoint.cs
new file mode 100644
index 000000000..3bb575616
--- /dev/null
+++ b/cs/src/core/Index/FASTER/IndexCheckpoint.cs
@@ -0,0 +1,173 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+
+
+    public unsafe partial class FASTERBase
+    {
+        // Derived class facing persistence API
+        protected IndexCheckpointInfo _indexCheckpoint;
+
+        protected void TakeIndexFuzzyCheckpoint()
+        {
+            var ht_version = resizeInfo.version;
+
+            TakeMainIndexCheckpoint(ht_version,
+                                    _indexCheckpoint.main_ht_device,
+                                    out ulong ht_num_bytes_written);
+            overflowBucketsAllocator.TakeCheckpoint(
+                                    _indexCheckpoint.ofb_device,
+                                    out ulong ofb_num_bytes_written);
+            _indexCheckpoint.info.num_ht_bytes = ht_num_bytes_written;
+            _indexCheckpoint.info.num_ofb_bytes = ofb_num_bytes_written;
+        }
+
+        internal void TakeIndexFuzzyCheckpoint(int ht_version, IDevice device,
+                                            out ulong numBytesWritten, IDevice ofbdevice,
+                                           out ulong ofbnumBytesWritten, out int num_ofb_buckets)
+        {
+            TakeMainIndexCheckpoint(ht_version, device, out numBytesWritten);
+            overflowBucketsAllocator.TakeCheckpoint(ofbdevice, out ofbnumBytesWritten);
+            num_ofb_buckets = overflowBucketsAllocator.GetMaxValidAddress();
+        }
+
+        internal bool IsIndexFuzzyCheckpointCompleted(bool waitUntilComplete = false)
+        {
+            bool completed1 = IsMainIndexCheckpointCompleted(waitUntilComplete);
+            bool completed2 = overflowBucketsAllocator.IsCheckpointCompleted(waitUntilComplete);
+            return completed1 && completed2;
+        }
+
+
+        // Implementation of an asynchronous checkpointing scheme 
+        // for main hash index of FASTER
+        protected CountdownEvent mainIndexCheckpointEvent;
+
+        private void TakeMainIndexCheckpoint(int tableVersion,
+                                            IDevice device,
+                                            out ulong numBytes)
+        {
+            BeginMainIndexCheckpoint(tableVersion, device, out numBytes);
+        }
+
+        private void BeginMainIndexCheckpoint(
+                                           int version,
+                                           IDevice device,
+                                           out ulong numBytesWritten)
+        {
+            long totalSize = state[version].size * sizeof(HashBucket);
+            int numChunks = (int)(totalSize >> 23);
+            if (numChunks == 0) numChunks = 1;
+
+            uint chunkSize = (uint)(totalSize / numChunks);
+            mainIndexCheckpointEvent = new CountdownEvent(1);
+            HashBucket* start = state[version].tableAligned;
+            int startNumChunks = 1;
+
+            var sw = new Stopwatch();
+
+            HashIndexPageAsyncFlushResult result = new HashIndexPageAsyncFlushResult
+            {
+                start = start,
+                numChunks = numChunks,
+                numIssued = startNumChunks,
+                numFinished = 0,
+                chunkSize = chunkSize,
+                device = device,
+                sw = sw
+            };
+
+            numBytesWritten = 0;
+
+            sw.Start();
+            for (int index = 0; index < startNumChunks; index++)
+            {
+                long chunkStartBucket = (long)start + (index * chunkSize);
+                //sw.Restart();
+                device.WriteAsync((IntPtr)chunkStartBucket, ((ulong)index) * chunkSize, chunkSize, AsyncPageFlushCallback, result);
+                //sw.Stop();
+                //Console.WriteLine("Initial WriteAsync {0}: {1}", index, sw.ElapsedMilliseconds);
+            }
+            numBytesWritten = ((ulong)numChunks) * chunkSize;
+        }
+
+
+        private bool IsMainIndexCheckpointCompleted(bool waitUntilComplete = false)
+        {
+            bool completed = mainIndexCheckpointEvent.IsSet;
+            if (!completed && waitUntilComplete)
+            {
+                mainIndexCheckpointEvent.Wait();
+                return true;
+            }
+            return completed;
+        }
+
+        private void AsyncPageFlushCallback(
+                                            uint errorCode,
+                                            uint numBytes,
+                                            NativeOverlapped* overlap)
+        {
+            //Set the page status to flushed
+            var result = (HashIndexPageAsyncFlushResult)Overlapped.Unpack(overlap).AsyncResult;
+
+            try
+            {
+                if (errorCode != 0)
+                {
+                    Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                if (Interlocked.Increment(ref result.numFinished) == result.numChunks)
+                {
+                    // We are done
+                    Console.WriteLine("Done with all index writes");
+                    result.sw.Stop();
+                    Console.WriteLine("Total time: {0}", result.sw.ElapsedMilliseconds);
+                    mainIndexCheckpointEvent.Signal();
+                }
+                else
+                {
+                    int nextChunk = Interlocked.Increment(ref result.numIssued) - 1;
+
+                    if (nextChunk < result.numChunks)
+                    {
+                        long chunkStartBucket = (long)result.start + (nextChunk * result.chunkSize);
+                        //Stopwatch sw = new Stopwatch();
+                        //sw.Start();
+                        result.device.WriteAsync(
+                            (IntPtr)chunkStartBucket,
+                            ((ulong)nextChunk) * result.chunkSize,
+                            result.chunkSize,
+                            AsyncPageFlushCallback,
+                            result);
+                        //sw.Stop();
+                        //Console.WriteLine("WriteAsync {0}: {1}", nextChunk, sw.ElapsedMilliseconds);
+                    }
+                }
+                Overlapped.Free(overlap);
+            }
+        }
+
+    }
+
+}
diff --git a/cs/src/core/Index/FASTER/IndexRecovery.cs b/cs/src/core/Index/FASTER/IndexRecovery.cs
new file mode 100644
index 000000000..1cd366937
--- /dev/null
+++ b/cs/src/core/Index/FASTER/IndexRecovery.cs
@@ -0,0 +1,148 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public unsafe partial class FASTERBase
+    {
+        // Derived class exposed API
+        protected void RecoverFuzzyIndex(IndexRecoveryInfo info)
+        {
+            var ht_version = resizeInfo.version;
+            var token = info.token;
+            Debug.Assert(state[ht_version].size == info.table_size);
+
+            BeginMainIndexRecovery(ht_version,
+                             DirectoryConfiguration.GetPrimaryHashTableFileName(token),
+                             info.num_ht_bytes);
+
+            overflowBucketsAllocator.Recover(
+                             DirectoryConfiguration.GetOverflowBucketsFileName(token),
+                             info.num_buckets,
+                             info.num_ofb_bytes);
+
+        }
+
+        internal void RecoverFuzzyIndex(int ht_version, IDevice device, ulong num_ht_bytes, IDevice ofbdevice, int num_buckets, ulong num_ofb_bytes)
+        {
+            _BeginMainIndexRecovery(ht_version, device, num_ht_bytes);
+            overflowBucketsAllocator.Recover(ofbdevice, num_buckets, num_ofb_bytes);
+        }
+
+        internal bool IsFuzzyIndexRecoveryComplete(bool waitUntilComplete = false)
+        {
+            bool completed1 = IsMainIndexRecoveryCompleted(waitUntilComplete);
+            bool completed2 = overflowBucketsAllocator.IsRecoveryCompleted(waitUntilComplete);
+            return completed1 && completed2;
+        }
+
+        //Main Index Recovery Functions
+        private int numChunksToBeRecovered;
+
+        private void BeginMainIndexRecovery(
+                                        int version,
+                                        string filename,
+                                        ulong num_bytes)
+        {
+            _BeginMainIndexRecovery(
+                version, 
+                new WrappedDevice(new SegmentedLocalStorageDevice(filename, 1L << 30, false, false, true)), 
+                num_bytes);
+        }
+
+        private void _BeginMainIndexRecovery(
+                                int version,
+                                IDevice device,
+                                ulong num_bytes)
+        {
+            numChunksToBeRecovered = 1; // Constants.kNumMergeChunks;
+            long chunkSize = state[version].size / 1; // Constants.kNumMergeChunks;
+            HashBucket* start = state[version].tableAligned;
+            uint sizeOfPage = (uint)chunkSize * (uint)sizeof(HashBucket);
+
+            uint num_bytes_read = 0;
+            for (int index = 0; index < 1 /*Constants.kNumMergeChunks*/; index++)
+            {
+                HashBucket* chunkStartBucket = start + (index * chunkSize);
+                HashIndexPageAsyncReadResult result = default(HashIndexPageAsyncReadResult);
+                result.chunkIndex = index;
+                device.ReadAsync(num_bytes_read, (IntPtr)chunkStartBucket, sizeOfPage, AsyncPageReadCallback, result);
+                num_bytes_read += sizeOfPage;
+            }
+            Debug.Assert(num_bytes_read == num_bytes);
+        }
+
+        private bool IsMainIndexRecoveryCompleted(
+                                        bool waitUntilComplete = false)
+        {
+            bool completed = (numChunksToBeRecovered == 0);
+            if (!completed && waitUntilComplete)
+            {
+                while (numChunksToBeRecovered != 0)
+                {
+                    Thread.Sleep(10);
+                }
+            }
+            return completed;
+        }
+
+        protected unsafe void AsyncPageReadCallback(
+                                        uint errorCode,
+                                        uint numBytes,
+                                        NativeOverlapped* overlap)
+        {
+            try
+            {
+                if (errorCode != 0)
+                {
+                    System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                System.Diagnostics.Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                Interlocked.Decrement(ref numChunksToBeRecovered);
+            }
+        }
+
+        protected void DeleteTentativeEntries()
+        {
+            HashBucketEntry entry = default(HashBucketEntry);
+
+            int version = resizeInfo.version;
+            var table_size_ = state[version].size;
+            var ptable_ = state[version].tableAligned;
+
+            for (long bucket = 0; bucket < table_size_; ++bucket)
+            {
+                HashBucket b = *(ptable_ + bucket);
+                while (true)
+                {
+                    for (int bucket_entry = 0; bucket_entry < Constants.kOverflowBucketIndex; ++bucket_entry)
+                    {
+                        entry.word = b.bucket_entries[bucket_entry];
+                        if (entry.Tentative)
+                            b.bucket_entries[bucket_entry] = 0;
+                    }
+
+                    if (b.bucket_entries[Constants.kOverflowBucketIndex] == 0) break;
+                    b = *((HashBucket*)overflowBucketsAllocator.GetPhysicalAddress((b.bucket_entries[Constants.kOverflowBucketIndex])));
+                }
+            }
+        }
+    }
+}
diff --git a/cs/src/core/Index/FASTER/Recovery.cs b/cs/src/core/Index/FASTER/Recovery.cs
new file mode 100644
index 000000000..5ed05fb48
--- /dev/null
+++ b/cs/src/core/Index/FASTER/Recovery.cs
@@ -0,0 +1,544 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#pragma warning disable 0162
+
+using System;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe partial class PersistentMemoryMalloc<T> : IAllocator
+    {
+        public void AsyncReadPageFromDiskRecovery<TContext>(
+                                        long readPageStart, 
+                                        int numPages, 
+                                        IOCompletionCallback callback, 
+                                        TContext context,
+                                        long recoveryDevicePageOffset = 0,
+                                        IDevice recoveryDevice = null)
+        {
+            if (Key.HasObjectsToSerialize() || Value.HasObjectsToSerialize())
+            {
+                throw new Exception("Reading pages with object log not yet supported");
+            }
+
+            for (long readPage = readPageStart; readPage < (readPageStart + numPages); readPage++)
+            {
+                int pageIndex = (int)(readPage % BufferSize);
+                if (values[pageIndex] == null)
+                {
+                    // Allocate a new page
+                    AllocatePage(pageIndex);
+                }
+                else
+                {
+                    //Clear an old used page
+                    Array.Clear(values[pageIndex], 0, values[pageIndex].Length);
+                }
+                var asyncResult = new PageAsyncReadResult<TContext>()
+                {
+                    page = readPage,
+                    context = context
+                };
+
+                if(recoveryDevice == null)
+                {
+                    device.ReadAsync((ulong)(AlignedPageSizeBytes * readPage),
+                                             pointers[pageIndex],
+                                             (uint)(PageSize * PrivateRecordSize),
+                                             callback,
+                                             asyncResult);
+                }
+                else
+                {
+                    ulong offsetInFile = (ulong)(AlignedPageSizeBytes * (readPage - recoveryDevicePageOffset));
+                    recoveryDevice.ReadAsync(offsetInFile,
+                                            pointers[pageIndex],
+                                            (uint)(PageSize * PrivateRecordSize),
+                                            callback,
+                                            asyncResult);
+                }
+            }
+        }
+
+        public void AsyncFlushPageToDiskRecovery<TContext>(
+                                        long flushPageStart, 
+                                        int numPages, 
+                                        IOCompletionCallback callback, 
+                                        TContext context)
+        {
+            for (long flushPage = flushPageStart; flushPage < (flushPageStart + numPages); flushPage++)
+            {
+                int pageIndex = GetPageIndexForPage(flushPage);
+                var asyncResult = new PageAsyncFlushResult<TContext>()
+                {
+                    page = flushPage,
+                    context = context
+                };
+                device.WriteAsync(pointers[flushPage % BufferSize],
+                                  (ulong)(AlignedPageSizeBytes * flushPage),
+                                  (uint)(PageSize * PrivateRecordSize),
+                                  callback,
+                                  asyncResult);
+            }
+        }
+
+        public void RecoveryReset(long tailAddress, long headAddress)
+        {
+            long tailPage = GetPage(tailAddress);
+            long offsetInPage = GetOffsetInPage(tailAddress);
+            TailPageOffset.Page = (int)tailPage;
+            TailPageOffset.Offset = (int)offsetInPage;
+            TailPageIndex = GetPageIndexForPage(TailPageOffset.Page);
+
+            // issue read request to all pages until head lag
+            HeadAddress = headAddress;
+            SafeHeadAddress = headAddress;
+            FlushedUntilAddress = headAddress;
+            ReadOnlyAddress = tailAddress;
+            SafeReadOnlyAddress = tailAddress;
+            
+            for(var addr = headAddress; addr < tailAddress; addr += PageSize)
+            {
+                var pageIndex = GetPageIndexForAddress(addr);
+                PageStatusIndicator[pageIndex].PageFlushCloseStatus.PageCloseStatus = CloseStatus.Open;
+            }
+        }
+    }
+
+    public unsafe partial class FasterKV
+    {
+        protected void InternalRecover(Guid indexToken, Guid hybridLogToken)
+        {
+            _indexCheckpoint.Recover(indexToken);
+            _hybridLogCheckpoint.Recover(hybridLogToken);
+
+            var l1 = _indexCheckpoint.info.finalLogicalAddress;
+            var l2 = _hybridLogCheckpoint.info.finalLogicalAddress;
+            var v = _hybridLogCheckpoint.info.version;
+            if (l1 > l2)
+            {
+                throw new Exception("Cannot recover from (" + indexToken.ToString() + "," + hybridLogToken.ToString() + ") checkpoint pair!\n");
+            }
+
+            _systemState.phase = Phase.REST;
+            _systemState.version = (v + 1);
+
+            RecoverFuzzyIndex(_indexCheckpoint.info);
+
+            IsFuzzyIndexRecoveryComplete(true);
+
+            DeleteTentativeEntries();
+
+            if(Constants.kFoldOverSnapshot)
+            {
+                RecoverHybridLog(_indexCheckpoint.info, _hybridLogCheckpoint.info);
+            }
+            else
+            {
+                RecoverHybridLogFromSnapshotFile(_indexCheckpoint.info, _hybridLogCheckpoint.info);
+            }
+
+            RestoreHybridLog(_hybridLogCheckpoint.info.finalLogicalAddress);
+        }
+
+        private void RestoreHybridLog(long untilAddress)
+        {
+            
+            var tailPage = hlog.GetPage(untilAddress);
+            var headPage = default(long);
+            if(untilAddress > hlog.GetStartLogicalAddress(tailPage))
+            {
+                headPage = (tailPage + 1) - hlog.GetHeadOffsetLagInPages(); ;
+            }
+            else
+            {
+                headPage = tailPage - hlog.GetHeadOffsetLagInPages();
+            }
+            headPage = headPage > 0 ? headPage : 0;
+
+            var recoveryStatus = new RecoveryStatus(hlog.GetCapacityNumPages(), headPage, tailPage);
+            for(int i = 0; i < recoveryStatus.capacity; i++)
+            {
+                recoveryStatus.readStatus[i] = ReadStatus.Done;
+            }
+
+            var numPages = 0;
+            for (var page = headPage; page <= tailPage; page++)
+            {
+                var pageIndex = hlog.GetPageIndexForPage(page);
+                recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending;
+                numPages++;
+            }
+
+            hlog.AsyncReadPageFromDiskRecovery(headPage, numPages, AsyncReadPagesCallback, recoveryStatus);
+
+            var done = false;
+            while (!done)
+            {
+                done = true;
+                for (long page = headPage; page <= tailPage; page++)
+                {
+                    int pageIndex = hlog.GetPageIndexForPage(page);
+                    if (recoveryStatus.readStatus[pageIndex] == ReadStatus.Pending)
+                    {
+                        done = false;
+                        break;
+                    }
+                }
+            }
+
+            var headAddress = hlog.GetStartLogicalAddress(headPage);
+            if(headAddress == 0)
+            {
+                headAddress += hlog.RecordSize;
+            }
+            hlog.RecoveryReset(untilAddress, headAddress);
+        }
+
+        enum ReadStatus { Pending, Done };
+        enum FlushStatus { Pending, Done };
+        class RecoveryStatus
+        {
+            public long startPage;
+            public long endPage;
+            public int capacity;
+
+            public IDevice recoveryDevice;
+            public long recoveryDevicePageOffset;
+
+            public ReadStatus[] readStatus;
+            public FlushStatus[] flushStatus;
+
+            public RecoveryStatus(int capacity, 
+                                  long startPage, 
+                                  long endPage)
+            {
+                this.capacity = capacity;
+                this.startPage = startPage;
+                this.endPage = endPage;
+                readStatus = new ReadStatus[capacity];
+                flushStatus = new FlushStatus[capacity];
+                for (int i = 0; i < capacity; i++)
+                {
+                    flushStatus[i] = FlushStatus.Done;
+                    readStatus[i] = ReadStatus.Pending;
+                }
+            }
+        }
+
+        protected void RecoverHybridLog(IndexRecoveryInfo indexRecoveryInfo, 
+                                        HybridLogRecoveryInfo recoveryInfo)
+        {
+            var fromAddress = indexRecoveryInfo.startLogicalAddress;
+            var untilAddress = recoveryInfo.finalLogicalAddress;
+
+            var startPage = hlog.GetPage(fromAddress);
+            var endPage = hlog.GetPage(untilAddress);
+            if(untilAddress > hlog.GetStartLogicalAddress(endPage))
+            {
+                endPage++;
+            }
+
+            // By default first page has one extra record
+            var capacity = hlog.GetCapacityNumPages();
+            var recoveryStatus = new RecoveryStatus(capacity, startPage, endPage);
+            
+            int totalPagesToRead = (int)(endPage - startPage);
+            int numPagesToReadFirst = Math.Min(capacity, totalPagesToRead);
+
+            // Issue request to read pages as much as possible
+            hlog.AsyncReadPageFromDiskRecovery(startPage, numPagesToReadFirst, AsyncReadPagesCallback, recoveryStatus);
+
+            for (long page = startPage; page < endPage; page++)
+            {
+                // Ensure page has been read into memory
+                int pageIndex = hlog.GetPageIndexForPage(page);
+                while (recoveryStatus.readStatus[pageIndex] == ReadStatus.Pending)
+                {
+                    Thread.Sleep(10);
+                }
+                
+                var startLogicalAddress = hlog.GetStartLogicalAddress(page);
+                var endLogicalAddress = hlog.GetStartLogicalAddress(page + 1);
+                
+                var pageFromAddress = 0L;
+                if (fromAddress > startLogicalAddress && fromAddress < endLogicalAddress)
+                {
+                    pageFromAddress = hlog.GetOffsetInPage(fromAddress);
+                }
+
+                var pageUntilAddress = hlog.GetPageSize();
+                if (endLogicalAddress > untilAddress)
+                {
+                    pageUntilAddress = hlog.GetOffsetInPage(untilAddress);
+                }
+
+                var physicalAddress = hlog.GetPhysicalAddress(startLogicalAddress);
+                RecoverFromPage(fromAddress, pageFromAddress, pageUntilAddress,
+                                startLogicalAddress, physicalAddress, recoveryInfo.version);
+
+                // OS thread flushes current page and issues a read request if necessary
+                recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending;
+                recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending;
+
+                hlog.AsyncFlushPageToDiskRecovery(page, 1, AsyncFlushPageCallback, recoveryStatus);
+            }
+
+            // Assert that all pages have been flushed
+            var done = false;
+            while(!done)
+            {
+                done = true;
+                for(long page = startPage; page < endPage; page++)
+                {
+                    int pageIndex = hlog.GetPageIndexForPage(page);
+                    if(recoveryStatus.flushStatus[pageIndex] == FlushStatus.Pending)
+                    {
+                        done = false;
+                        break;
+                    }
+                }
+            }
+
+            
+        }
+
+        protected void RecoverHybridLogFromSnapshotFile(
+                                        IndexRecoveryInfo indexRecoveryInfo,
+                                        HybridLogRecoveryInfo recoveryInfo)
+        {
+            var fileStartAddress = recoveryInfo.flushedLogicalAddress;
+            var fromAddress = indexRecoveryInfo.startLogicalAddress;
+            var untilAddress = recoveryInfo.finalLogicalAddress;
+
+            // Compute startPage and endPage
+            var startPage = hlog.GetPage(fileStartAddress);
+            var endPage = hlog.GetPage(untilAddress);
+            if (untilAddress > hlog.GetStartLogicalAddress(endPage))
+            {
+                endPage++;
+            }
+
+            // By default first page has one extra record
+            var capacity = hlog.GetCapacityNumPages();
+            var recoveryStatus = new RecoveryStatus(capacity, startPage, endPage);
+
+            recoveryStatus.recoveryDevice = new LocalStorageDevice(DirectoryConfiguration.GetHybridLogCheckpointFileName(recoveryInfo.guid), false, false, true);
+            recoveryStatus.recoveryDevicePageOffset = startPage;
+
+            // Initially issue read request for all pages that can be held in memory
+            int totalPagesToRead = (int)(endPage - startPage);
+            int numPagesToReadFirst = Math.Min(capacity, totalPagesToRead);
+
+
+            hlog.AsyncReadPageFromDiskRecovery(startPage,
+                                            numPagesToReadFirst,
+                                            AsyncReadPagesCallback,
+                                            recoveryStatus,
+                                            recoveryStatus.recoveryDevicePageOffset,
+                                            recoveryStatus.recoveryDevice);
+
+
+
+            for (long page = startPage; page < endPage; page++)
+            {
+                // Ensure the page is read from file
+                int pageIndex = hlog.GetPageIndexForPage(page);
+                while (recoveryStatus.readStatus[pageIndex] == ReadStatus.Pending)
+                {
+                    Thread.Sleep(10);
+                }
+
+                // Page at hand
+                var startLogicalAddress = hlog.GetStartLogicalAddress(page);
+                var endLogicalAddress = hlog.GetStartLogicalAddress(page + 1);
+
+                // Perform recovery if page in fuzzy portion of the log
+                if (fromAddress < endLogicalAddress)
+                {
+                    /*
+                     * Handling corner-cases:
+                     * ----------------------
+                     * When fromAddress is in the middle of the page, 
+                     * then start recovery only from corresponding offset 
+                     * in page. Similarly, if untilAddress falls in the
+                     * middle of the page, perform recovery only until that
+                     * offset. Otherwise, scan the entire page [0, PageSize)
+                     */
+                    var pageFromAddress = 0L;
+                    if(fromAddress > startLogicalAddress && fromAddress < endLogicalAddress)
+                    {
+                        pageFromAddress = hlog.GetOffsetInPage(fromAddress);
+                    }
+
+                    var pageUntilAddress = hlog.GetPageSize();
+                    if(endLogicalAddress > untilAddress)
+                    {
+                        pageUntilAddress = hlog.GetOffsetInPage(untilAddress);
+                    }
+
+                    var physicalAddress = hlog.GetPhysicalAddress(startLogicalAddress);
+                    RecoverFromPage(fromAddress, pageFromAddress, pageUntilAddress,
+                                    startLogicalAddress, physicalAddress, recoveryInfo.version);
+
+                }
+
+                // OS thread flushes current page and issues a read request if necessary
+                recoveryStatus.readStatus[pageIndex] = ReadStatus.Pending;
+                recoveryStatus.flushStatus[pageIndex] = FlushStatus.Pending;
+                hlog.AsyncFlushPageToDiskRecovery(page, 1, AsyncFlushPageCallback, recoveryStatus);
+            }
+
+            // Assert and wait until all pages have been flushed
+            var done = false;
+            while (!done)
+            {
+                done = true;
+                for (long page = startPage; page < endPage; page++)
+                {
+                    int pageIndex = hlog.GetPageIndexForPage(page);
+                    if (recoveryStatus.flushStatus[pageIndex] == FlushStatus.Pending)
+                    {
+                        done = false;
+                        break;
+                    }
+                }
+            }
+        }
+
+        private void RecoverFromPage(long startRecoveryAddress, 
+                                     long fromLogicalAddressInPage,
+                                     long untilLogicalAddressInPage,
+                                     long pageLogicalAddress,
+                                     long pagePhysicalAddress,
+                                     int version)
+        {
+            var key = default(Key*);
+            var hash = default(long);
+            var tag = default(ushort);
+            var info = default(RecordInfo*);
+            var pointer = default(long);
+            var recordStart = default(long);
+            var bucket = default(HashBucket*);
+            var entry = default(HashBucketEntry);
+            var slot = default(int);
+
+            pointer = fromLogicalAddressInPage;
+            while (pointer < untilLogicalAddressInPage)
+            {
+                recordStart = pagePhysicalAddress + pointer;
+                info = Layout.GetInfo(recordStart);
+
+                if (info->IsNull())
+                {
+                    pointer += RecordInfo.GetLength();
+                    continue;
+                }
+
+                if (!info->Invalid)
+                {
+                    key = Layout.GetKey(recordStart);
+                    hash = Key.GetHashCode(key);
+                    tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+                    entry = default(HashBucketEntry);
+                    FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+                    
+                    if (info->Version <= version)
+                    {
+                        entry.Address = pageLogicalAddress + pointer;
+                        entry.Tag = tag;
+                        entry.Pending = false;
+                        entry.Tentative = false;
+                        bucket->bucket_entries[slot] = entry.word;
+                    }
+                    else
+                    {
+                        info->Invalid = true;
+                        if(info->PreviousAddress < startRecoveryAddress)
+                        {
+                            entry.Address = info->PreviousAddress;
+                            entry.Tag = tag;
+                            entry.Pending = false;
+                            entry.Tentative = false;
+                            bucket->bucket_entries[slot] = entry.word;
+                        } 
+                    }
+                }
+                pointer += Layout.GetPhysicalSize(recordStart);
+            }
+        }
+
+        private void AsyncReadPagesCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap)
+        {
+            //Set the page status to flushed
+            var result = (PageAsyncReadResult<RecoveryStatus>)Overlapped.Unpack(overlap).AsyncResult;
+            try
+            {
+                if (errorCode != 0)
+                {
+                    System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                System.Diagnostics.Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                int index = hlog.GetPageIndexForPage(result.page);
+                result.context.readStatus[index] = ReadStatus.Done;
+                Interlocked.MemoryBarrier();
+                Overlapped.Free(overlap);
+            }
+        }
+
+        private void AsyncFlushPageCallback(uint errorCode, uint numBytes,  NativeOverlapped* overlap)
+        {
+            //Set the page status to flushed
+            var result = (PageAsyncFlushResult<RecoveryStatus>)Overlapped.Unpack(overlap).AsyncResult;
+            try
+            {
+                if (errorCode != 0)
+                {
+                    System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                System.Diagnostics.Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                int index = hlog.GetPageIndexForPage(result.page);
+                result.context.flushStatus[index] = FlushStatus.Done;
+                if(result.page + result.context.capacity < result.context.endPage)
+                {
+                    long readPage = result.page + result.context.capacity;
+                    if(Constants.kFoldOverSnapshot)
+                    {
+                        hlog.AsyncReadPageFromDiskRecovery(readPage, 1, AsyncReadPagesCallback, result.context);
+                    }
+                    else
+                    {
+                        hlog.AsyncReadPageFromDiskRecovery(readPage, 1, AsyncReadPagesCallback, 
+                                                            result.context,
+                                                            result.context.recoveryDevicePageOffset,
+                                                            result.context.recoveryDevice);
+                    }
+                }
+                Interlocked.MemoryBarrier();
+                Overlapped.Free(overlap);
+            }
+        }
+
+
+
+    }
+}
diff --git a/cs/src/core/Index/UserCode/Context.cs b/cs/src/core/Index/UserCode/Context.cs
new file mode 100644
index 000000000..0a35ed717
--- /dev/null
+++ b/cs/src/core/Index/UserCode/Context.cs
@@ -0,0 +1,22 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.InteropServices;
+
+namespace FASTER.core
+{
+    [StructLayout(LayoutKind.Explicit, Size = 12)]
+    public unsafe struct Context
+    {
+        [FieldOffset(0)]
+        public ulong start;
+
+        [FieldOffset(8)]
+        public int threadId;
+
+        public static Context* MoveToContext(Context* context)
+        {
+            return context;
+        }
+    }
+}
diff --git a/cs/src/core/Index/UserCode/Functions.cs b/cs/src/core/Index/UserCode/Functions.cs
new file mode 100644
index 000000000..a42e6b6f7
--- /dev/null
+++ b/cs/src/core/Index/UserCode/Functions.cs
@@ -0,0 +1,154 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define REPLACE_STORE
+//#define COUNT_STORE
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading;
+using System.Diagnostics;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public unsafe struct Functions
+    {
+        public static void RMWCompletionCallback(Key* key, Input* input, Context* ctx)
+        {
+        }
+
+        public static void ReadCompletionCallback(Key* key, Input* input, Output* output, Context* ctx)
+        {
+        }
+
+        public static void UpsertCompletionCallback(Key* key, Value* value, Context* ctx)
+        {
+        }
+
+        public static void PersistenceCallback(long thread_id, long serial_num)
+        {
+            Debug.WriteLine("Thread {0} repors persistence until {1}", thread_id, serial_num);
+        }
+#if COUNT_STORE
+        // Read functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleReader(Key* key, Input* input, Value* value, Output* dst)
+        {
+            Value.Copy(value, (Value*)dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentReader(Key* key, Input* input, Value* value, Output* dst)
+        {
+            Value.AcquireReadLock(value);
+            Value.Copy(value, (Value*)dst);
+            Value.ReleaseReadLock(value);
+        }
+
+        // Upsert functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleWriter(Key* key, Value* src, Value* dst)
+        {
+            Value.Copy(src, dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentWriter(Key* key, Value* src, Value* dst)
+        {
+            Value.AcquireWriteLock(dst);
+            Value.Copy(src, dst);
+            Value.ReleaseWriteLock(dst);
+        }
+
+        // RMW functions
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int InitialValueLength(Key* key, Input* input)
+        {
+            return Value.GetLength(default(Value*));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InitialUpdater(Key* key, Input* input, Value* value)
+        {
+            value->value = ((Value*)input)->value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InPlaceUpdater(Key* key, Input* input, Value* addr)
+        {
+            Interlocked.Add(ref addr->value, ((Value*)input)->value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyUpdater(Key* key, Input* input, Value* oldValue, Value* newValue)
+        {
+            newValue->value = oldValue->value + ((Value*)input)->value;
+        }
+
+#elif REPLACE_STORE
+
+        // Read functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleReader(Key* key, Input* input, Value* value, Output* dst)
+        {
+            Value.Copy(value, (Value*)dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentReader(Key* key, Input* input, Value* value, Output* dst)
+        {
+            Value.AcquireReadLock(value);
+            Value.Copy(value, (Value*)dst);
+            Value.ReleaseReadLock(value);
+        }
+
+        // Upsert functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleWriter(Key* key, Value* src, Value* dst)
+        {
+            Value.Copy(src, dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentWriter(Key* key, Value* src, Value* dst)
+        {
+            Value.AcquireWriteLock(dst);
+            Value.Copy(src, dst);
+            Value.ReleaseWriteLock(dst);
+        }
+
+        // RMW functions
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int InitialValueLength(Key* key, Input* input)
+        {
+            return Value.GetLength(default(Value*));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InitialUpdater(Key* key, Input* input, Value* value)
+        {
+            Value.Copy((Value*)input, value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InPlaceUpdater(Key* key, Input* input, Value* value)
+        {
+            Value.AcquireWriteLock(value);
+            Value.Copy((Value*)input, value);
+            Value.ReleaseWriteLock(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyUpdater(Key* key, Input* input, Value* oldValue, Value* newValue)
+        {
+            Value.Copy((Value*)input, newValue);
+        }
+#endif
+    }
+}
diff --git a/cs/src/core/Index/UserCode/Input.cs b/cs/src/core/Index/UserCode/Input.cs
new file mode 100644
index 000000000..f74e546ef
--- /dev/null
+++ b/cs/src/core/Index/UserCode/Input.cs
@@ -0,0 +1,17 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace FASTER.core
+{
+    public unsafe struct Input
+    {
+        public long value;
+
+        public static Input* MoveToContext(Input* input)
+        {
+            return input;
+        }
+    }
+}
diff --git a/cs/src/core/Index/UserCode/Key.cs b/cs/src/core/Index/UserCode/Key.cs
new file mode 100644
index 000000000..03e4ff198
--- /dev/null
+++ b/cs/src/core/Index/UserCode/Key.cs
@@ -0,0 +1,170 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define EIGHT_BYTE_KEY 
+//#define FIXED_SIZE_KEY
+//#define VARIABLE_SIZE_KEY
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+#if FIXED_SIZE_KEY
+    [StructLayout(LayoutKind.Explicit, Size = kSizeInBytes)]
+    public unsafe struct Key
+    {
+        public const int kNumFields = 1;
+
+        public const int kSizeInBytes = kNumFields * sizeof(long);
+
+        [FieldOffset(0)]
+        public fixed long values[kNumFields];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool IsEqual(Key* k1, Key* k2)
+        {
+            return Utility.IsEqual((byte*)k1, (byte*)k2, kSizeInBytes);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long GetHash(Key* key)
+        {
+            return Utility.HashBytes((byte*)key, kSizeInBytes);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(Key* src, Key* dst)
+        {
+            Utility.Copy((byte*)src, (byte*)dst, kSizeInBytes);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLogicalSize(Key* key)
+        {
+            return kSizeInBytes;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetPhysicalSize(Key* key)
+        {
+            return kSizeInBytes;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int EstimateAveragePhysicalSize()
+        {
+            return kSizeInBytes;
+        }
+    }
+#elif EIGHT_BYTE_KEY
+    [StructLayout(LayoutKind.Explicit, Size = kSizeInBytes)]
+    public unsafe struct Key
+    {
+        public const int kSizeInBytes = sizeof(long);
+
+        [FieldOffset(0)]
+        public long value;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public new long GetHashCode()
+        {
+            return Utility.GetHashCode(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool Equals(Key* k1, Key* k2)
+        {
+            return k1->value == k2->value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long GetHashCode(Key* key)
+        {
+            return Utility.GetHashCode(*((long*)key));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(Key* src, Key* dst)
+        {
+            dst->value = src->value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(Key* key)
+        {
+            return kSizeInBytes;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int EstimateAveragePhysicalSize()
+        {
+            return kSizeInBytes;
+        }
+        
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(Key* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(Key* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Free(Key* key)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static Key* MoveToContext(Key* key)
+        {
+            return key;
+        }
+    }
+#elif VARIABLE_SIZE_KEY
+    public unsafe struct Key
+    {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool IsEqual(Key* k1, Key* k2)
+        {
+            return Utility.IsEqual((byte*)k1, (byte*)k2);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long GetHash(Key* key)
+        {
+            return Utility.HashBytes((byte*)key, GetPhysicalSize(key));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(Key* src, Key* dst)
+        {
+            Utility.Copy((byte*)src, (byte*)dst, GetPhysicalSize(src));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLogicalSize(Key* key)
+        {
+            return *((int*)key);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetPhysicalSize(Key* key)
+        {
+            return GetLogicalSize(key) + sizeof(int);
+        }
+    }
+#endif
+}
diff --git a/cs/src/core/Index/UserCode/Output.cs b/cs/src/core/Index/UserCode/Output.cs
new file mode 100644
index 000000000..bbdebd7f4
--- /dev/null
+++ b/cs/src/core/Index/UserCode/Output.cs
@@ -0,0 +1,23 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    [StructLayout(LayoutKind.Explicit)]
+    public unsafe struct Output
+    {
+        [FieldOffset(0)]
+        public Value value;
+
+        public static Output* MoveToContext(Output* value)
+        {
+            return value;
+        }
+
+    }
+}
diff --git a/cs/src/core/Index/UserCode/Value.cs b/cs/src/core/Index/UserCode/Value.cs
new file mode 100644
index 000000000..bfb2dc1bd
--- /dev/null
+++ b/cs/src/core/Index/UserCode/Value.cs
@@ -0,0 +1,314 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define EIGHT_BYTE_VALUE
+//#define FIXED_SIZE_VALUE
+//#define FIXED_SIZE_VALUE_WITH_LOCK
+
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+
+#if EIGHT_BYTE_VALUE
+    [StructLayout(LayoutKind.Explicit, Size = kSizeInBytes)]
+    public unsafe struct Value
+    {
+        public const int kValuesStartOffset = 0;
+        public const int kSizeInBytes = sizeof(long) + kValuesStartOffset;
+
+        [FieldOffset(kValuesStartOffset)]
+        public long value;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireReadLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseReadLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireWriteLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseWriteLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(Value* src, Value* dst)
+        {
+            dst->value = src->value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(Value* value)
+        {
+            return kSizeInBytes;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetPhysicalSize(Value* value)
+        {
+            return kSizeInBytes;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetInitialPhysicalSize(Input* input)
+        {
+            return kSizeInBytes;
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int EstimateAveragePhysicalSize()
+        {
+            return kSizeInBytes;
+        }
+
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(Value* key, Stream toStream)
+        {
+        }
+
+        public static void Deserialize(Value* key, Stream fromStream)
+        {
+        }
+
+        public static void Free(Value* value)
+        {
+        }
+
+        public static Value* MoveToContext(Value* value)
+        {
+            return value;
+        }
+    }
+#elif FIXED_SIZE_VALUE_WITH_LOCK
+    [StructLayout(LayoutKind.Explicit, Size = kSizeInBytes)]
+    public unsafe struct Value
+    {
+        public const int kNumFields = 1;
+
+        public const int kValuesStartOffset = sizeof(long);
+
+        public const int kSizeInBytes = kNumFields * sizeof(long) + kValuesStartOffset;
+
+        public const long expected_exclusive_lock_word = 0;
+
+        [FieldOffset(0)]
+        public int lock_data;
+
+        [FieldOffset(4)]
+        public int dummy;
+
+        [FieldOffset(kValuesStartOffset)]
+        public fixed long values[kNumFields];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireReadLock(Value* value)
+        {
+            //incremented value
+            var val = Interlocked.Increment(ref value->lock_data);
+            if (val < 0)
+            {
+                do
+                {
+                    //found value
+                    val = Interlocked.CompareExchange(ref value->lock_data, 1, 0);
+                    if (val == 0)
+                    {
+                        break;
+                    }
+                    else if (val > 0)
+                    {
+                        val = Interlocked.Increment(ref value->lock_data);
+                    }
+                } while (val < 0);
+            }
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseReadLock(Value* value)
+        {
+            Interlocked.Decrement(ref value->lock_data);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireWriteLock(Value* value)
+        {
+            var found_value = Interlocked.CompareExchange(ref value->lock_data, int.MinValue, 0);
+            if (found_value != 0)
+            {
+                int num_iterations = 1000;
+                Thread.SpinWait(num_iterations);
+                while (Interlocked.CompareExchange(ref value->lock_data, int.MinValue, 0) != 0)
+                {
+                    Thread.SpinWait(num_iterations);
+                    num_iterations <<= 1;
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseWriteLock(Value* value)
+        {
+            Interlocked.Exchange(ref value->lock_data, 0);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(Value* src, Value* dst)
+        {
+            if (kNumFields == 12)
+            {
+                *(dst->values + 0) = *(src->values + 0);
+                *(dst->values + 1) = *(src->values + 1);
+                *(dst->values + 2) = *(src->values + 2);
+                *(dst->values + 3) = *(src->values + 3);
+                *(dst->values + 4) = *(src->values + 4);
+                *(dst->values + 5) = *(src->values + 5);
+                *(dst->values + 6) = *(src->values + 6);
+                *(dst->values + 7) = *(src->values + 7);
+                *(dst->values + 8) = *(src->values + 8);
+                *(dst->values + 9) = *(src->values + 9);
+                *(dst->values + 10) = *(src->values + 10);
+                *(dst->values + 11) = *(src->values + 11);
+                *(dst->values + 12) = *(src->values + 12);
+            }
+            else
+            {
+                for (int i = 0; i < kNumFields; i++)
+                    *(dst->values + i) = *(src->values + i);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(Value* value)
+        {
+            return kSizeInBytes;
+        }
+
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(Value* key, Stream toStream)
+        {
+        }
+
+        public static void Deserialize(Value* key, Stream fromStream)
+        {
+        }
+
+        public static void Free(Value* value)
+        {
+        }
+
+        public static Value* MoveToContext(Value* value)
+        {
+            return value;
+        }
+    }
+#elif FIXED_SIZE_VALUE
+    [StructLayout(LayoutKind.Explicit, Size = kSizeInBytes)]
+    public unsafe struct Value
+    {
+        public const int kNumFields = 1;
+
+        public const int kValuesStartOffset = 0;
+
+        public const int kSizeInBytes = kNumFields * sizeof(long);
+
+        [FieldOffset(kValuesStartOffset)]
+        public fixed long values[kNumFields];
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireReadLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseReadLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireWriteLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseWriteLock(Value* value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(Value* src, Value* dst)
+        {
+            Utility.Copy((byte*)src, (byte*)dst, kSizeInBytes);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(Value* value)
+        {
+            return kSizeInBytes;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetPhysicalSize(Value* value)
+        {
+            return kSizeInBytes;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetInitialPhysicalSize(Input* input)
+        {
+            return kSizeInBytes;
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int EstimateAveragePhysicalSize()
+        {
+            return kSizeInBytes;
+        }
+
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(Value* key, Stream toStream)
+        {
+        }
+
+        public static void Deserialize(Value* key, Stream fromStream)
+        {
+        }
+
+        public static void Free(Value* value)
+        {
+        }
+
+        public static Value* MoveToContext(Value* value)
+        {
+            return value;
+        }
+    }
+#endif
+}
diff --git a/cs/src/core/ManagedLayer/BlittableTypeWrapper.cs b/cs/src/core/ManagedLayer/BlittableTypeWrapper.cs
new file mode 100644
index 000000000..c3e695e5d
--- /dev/null
+++ b/cs/src/core/ManagedLayer/BlittableTypeWrapper.cs
@@ -0,0 +1,32 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.InteropServices;
+
+namespace FASTER.core
+{
+    [StructLayout(LayoutKind.Explicit, Size = 8)]
+    public struct BlittableTypeWrapper
+    {
+        [FieldOffset(0)]
+        public long ptr;
+
+        public ref T GetObject<T>()
+        {
+            return ref MallocFixedPageSize<T>.Instance.Get(ptr);
+        }
+
+        public static BlittableTypeWrapper Create<T>(T obj)
+        {
+            var ptr = MallocFixedPageSize<T>.Instance.Allocate();
+            MallocFixedPageSize<T>.Instance.Set(ptr, ref obj);
+            return new BlittableTypeWrapper { ptr = ptr };
+        }
+
+        public void Free<T>()
+        {
+            MallocFixedPageSize<T>.Instance.FreeAtEpoch(ptr);
+            ptr = 0;
+        }
+    }
+}
diff --git a/cs/src/core/ManagedLayer/FASTERFactory.cs b/cs/src/core/ManagedLayer/FASTERFactory.cs
new file mode 100644
index 000000000..b500e0aac
--- /dev/null
+++ b/cs/src/core/ManagedLayer/FASTERFactory.cs
@@ -0,0 +1,58 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.IO;
+using System.Linq.Expressions;
+using static FASTER.core.Roslyn.Helper;
+
+namespace FASTER.core
+{
+    public static class FASTERFactory
+    {
+        // Statically generate default instance to speed up further invocations
+
+        // private static IManagedFAST<BlittableKey, BlittableValue, BlittableInput, BlittableOutput, BlittableContext>
+        // dummyToGetCodeGenFaster = HashTableManager.GetManagedFasterHashTable<BlittableKey, BlittableValue, BlittableInput, BlittableOutput, BlittableContext, UserFunctions>(128, new NullDevice(), new UserFunctions(), false);
+        // static FASTERFactory()
+        // {
+        //    if (dummyToGetCodeGenFaster == null) throw new Exception();
+        // }
+
+        public static IDevice CreateLogDevice(string logPath, long segmentSize = 1L << 30)
+        {
+            IDevice logDevice = new NullDevice();
+            if (!String.IsNullOrWhiteSpace(logPath))
+                logDevice = new WrappedDevice(new SegmentedLocalStorageDevice(logPath, segmentSize, false, false, true, false));
+
+            return logDevice;
+        }
+
+        public static TIFaster
+            Create<TKey, TValue, TInput, TOutput, TContext, TFunctions, TIFaster>(
+            long indexSizeBuckets, IDevice logDevice, 
+            long LogTotalSizeBytes = 17179869184, double LogMutableFraction = 0.9, int LogPageSizeBits = 25,
+            string checkpointDir = null
+            )
+        {
+                
+            return
+                HashTableManager.GetFasterHashTable
+                                <TKey, TValue, TInput, TOutput,
+                                TContext, TFunctions, TIFaster>
+                                (indexSizeBuckets, logDevice, checkpointDir, 
+                                LogTotalSizeBytes, LogMutableFraction, LogPageSizeBits);
+        }
+
+        public static IManagedFAST<TKey, TValue, TInput, TOutput, TContext>
+            Create<TKey, TValue, TInput, TOutput, TContext, TFunctions>
+            (long indexSizeBuckets, IDevice logDevice, 
+            TFunctions functions,
+            long LogTotalSizeBytes = 17179869184, double LogMutableFraction = 0.9, int LogPageSizeBits = 25,
+            bool treatValueAsAtomic = false, string checkpointDir = null)
+            where TFunctions : IUserFunctions<TKey, TValue, TInput, TOutput, TContext>
+        {
+            return HashTableManager.GetMixedManagedFasterHashTable<TKey, TValue, TInput, TOutput, TContext, TFunctions>(indexSizeBuckets, logDevice, checkpointDir, functions, treatValueAsAtomic, LogTotalSizeBytes, LogMutableFraction, LogPageSizeBits);
+        }
+    }
+}
diff --git a/cs/src/core/ManagedLayer/IFASTERKey.cs b/cs/src/core/ManagedLayer/IFASTERKey.cs
new file mode 100644
index 000000000..fea49be88
--- /dev/null
+++ b/cs/src/core/ManagedLayer/IFASTERKey.cs
@@ -0,0 +1,16 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.IO;
+
+namespace FASTER.core
+{
+    public interface IFASTERKey<TKey>
+    {
+        TKey Clone();
+        void Deserialize(Stream fromStream);
+        bool Equals(TKey other);
+        long GetHashCode64();
+        void Serialize(Stream toStream);
+    }
+}
diff --git a/cs/src/core/ManagedLayer/IFASTERValue.cs b/cs/src/core/ManagedLayer/IFASTERValue.cs
new file mode 100644
index 000000000..1e82b8145
--- /dev/null
+++ b/cs/src/core/ManagedLayer/IFASTERValue.cs
@@ -0,0 +1,14 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.IO;
+
+namespace FASTER.core
+{
+    public interface IFASTERValue<TValue>
+    {
+        TValue Clone();
+        void Deserialize(Stream fromStream);
+        void Serialize(Stream toStream);
+    }
+}
diff --git a/cs/src/core/ManagedLayer/IFASTER_Mixed.cs b/cs/src/core/ManagedLayer/IFASTER_Mixed.cs
new file mode 100644
index 000000000..7747cdc80
--- /dev/null
+++ b/cs/src/core/ManagedLayer/IFASTER_Mixed.cs
@@ -0,0 +1,33 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe interface IFASTER_Mixed
+    {
+        /* Thread-related operations */
+        Guid StartSession();
+        long ContinueSession(Guid guid);
+        void StopSession();
+        void Refresh();
+
+        /* Store Interface */
+        Status Read(MixedKeyWrapper* key, MixedInputWrapper* input, MixedOutputWrapper* output, MixedContextWrapper* context, long lsn);
+        Status Upsert(MixedKeyWrapper* key, MixedValueWrapper* value, MixedContextWrapper* context, long lsn);
+        Status RMW(MixedKeyWrapper* key, MixedInputWrapper* input, MixedContextWrapper* context, long lsn);
+        Status Delete(MixedKeyWrapper* key, MixedContextWrapper* context, long lsn);
+        bool CompletePending(bool wait);
+
+        /* Statistics */
+        long Size { get; }
+        void DumpDistribution();
+    }
+}
diff --git a/cs/src/core/ManagedLayer/IManagedFAST.cs b/cs/src/core/ManagedLayer/IManagedFAST.cs
new file mode 100644
index 000000000..63d8c30c0
--- /dev/null
+++ b/cs/src/core/ManagedLayer/IManagedFAST.cs
@@ -0,0 +1,30 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+
+namespace FASTER.core
+{
+    public interface IManagedFAST<K, V, I, O, C>
+    {
+        /* Thread-related operations */
+        Guid StartSession();
+        long ContinueSession(Guid guid);
+        void StopSession();
+        void Refresh();
+
+
+        /* Store Interface */
+
+        Status Read(K key, I input, ref O output, C context, long lsn);
+        Status RMW(K key, I input, C context, long lsn);
+        Status Upsert(K key, V value, C context, long lsn);
+        Status Delete(K key, C context, long lsn);
+        bool CompletePending(bool wait);
+
+        /* Statistics */
+        long Size { get; }
+        void DumpDistribution();
+    }
+}
diff --git a/cs/src/core/ManagedLayer/IUserFunctions.cs b/cs/src/core/ManagedLayer/IUserFunctions.cs
new file mode 100644
index 000000000..3a512ebd7
--- /dev/null
+++ b/cs/src/core/ManagedLayer/IUserFunctions.cs
@@ -0,0 +1,24 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace FASTER.core
+{
+    public interface IUserFunctions<K, V, I, O, C>
+    {
+        // Callbacks
+        void RMWCompletionCallback(C ctx);
+        void ReadCompletionCallback(C ctx, O output);
+        void UpsertCompletionCallback(C ctx);
+
+
+        // Read function
+        void Reader(K key, I input, V value, ref O dst);
+
+        // RMW functions
+        void InitialUpdater(K key, I input, ref V value);
+        void InPlaceUpdater(K key, I input, ref V value);
+        void CopyUpdater(K key, I input, V oldValue, ref V newValue);
+    }
+}
diff --git a/cs/src/core/ManagedLayer/MixedContextWrapper.cs b/cs/src/core/ManagedLayer/MixedContextWrapper.cs
new file mode 100644
index 000000000..0966658d0
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedContextWrapper.cs
@@ -0,0 +1,43 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+
+namespace FASTER.core
+{
+    [FASTER.core.Roslyn.TypeKind("internal")]
+#if BLIT_CONTEXT
+    public unsafe struct MixedContextWrapper
+    {
+        public MixedContext context;
+
+        public static void Copy(MixedContextWrapper* src, MixedContextWrapper* dst)
+        {
+            dst->context = src->context;
+        }
+
+        public static MixedContextWrapper* MoveToContext(MixedContextWrapper* context)
+        {
+            var addr = (MixedContextWrapper*)
+                MallocFixedPageSize<MixedContextWrapper>.PhysicalInstance.Allocate();
+            Copy(context, addr);
+            return addr;
+        }
+    }
+#else
+    public unsafe struct MixedContextWrapper
+    {
+        public BlittableTypeWrapper context;
+
+        public static MixedContextWrapper* MoveToContext(MixedContextWrapper* context)
+        {
+            return context;
+        }
+
+        public static void Free(MixedContextWrapper* context)
+        {
+            ((BlittableTypeWrapper*)(&context))->Free<MixedContext>();
+        }
+
+    }
+#endif
+}
diff --git a/cs/src/core/ManagedLayer/MixedFunctionsWrapper.cs b/cs/src/core/ManagedLayer/MixedFunctionsWrapper.cs
new file mode 100644
index 000000000..59f1356bc
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedFunctionsWrapper.cs
@@ -0,0 +1,132 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define GENERIC_BLIT_VALUE
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace FASTER.core
+{
+    [FASTER.core.Roslyn.TypeKind("user")]
+    public unsafe class MixedFunctionsWrapper
+    {
+        public static MixedUserFunctions userFunctions;
+
+        public static void RMWCompletionCallback(MixedKeyWrapper* key, MixedInputWrapper* input, MixedContextWrapper* ctx)
+        {
+            userFunctions.RMWCompletionCallback(
+                UserType.Convert(ctx));
+
+            UserType.FreeFromContext(key);
+            UserType.FreeFromContext(input);
+            UserType.FreeFromContext(ctx);
+        }
+
+        public static void ReadCompletionCallback(MixedKeyWrapper* key, MixedInputWrapper* input, MixedOutputWrapper* output, MixedContextWrapper* ctx)
+        {
+            userFunctions.ReadCompletionCallback(
+                UserType.Convert(ctx),
+                UserType.Convert(output));
+
+            UserType.FreeFromContext(key);
+            UserType.FreeFromContext(input);
+            UserType.FreeFromContext(output);
+            UserType.FreeFromContext(ctx);
+        }
+
+        public static void UpsertCompletionCallback(MixedKeyWrapper* key, MixedValueWrapper* value, MixedContextWrapper* ctx)
+        {
+            userFunctions.UpsertCompletionCallback(
+                UserType.Convert(ctx));
+
+            UserType.FreeFromContext(key);
+            UserType.FreeFromContext(value);
+            UserType.FreeFromContext(ctx);
+        }
+
+        public static void PersistenceCallback(long thread_id, long serial_num)
+        {
+            Debug.WriteLine("Thread {0} reports persistence until {1}", thread_id, serial_num);
+        }
+
+        // Read functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleReader(MixedKeyWrapper* key, MixedInputWrapper* input, MixedValueWrapper* value, MixedOutputWrapper* dst)
+        {
+            userFunctions.Reader(
+                UserType.Convert(key), 
+                UserType.Convert(input),
+                UserType.Convert(value),
+                ref UserType.Convert(dst));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentReader(MixedKeyWrapper* key, MixedInputWrapper* input, MixedValueWrapper* value, MixedOutputWrapper* dst)
+        {
+            MixedValueWrapper.AcquireReadLock(value);
+
+            userFunctions.Reader(
+                UserType.Convert(key),
+                UserType.Convert(input),
+                UserType.Convert(value),
+                ref UserType.Convert(dst));
+
+            MixedValueWrapper.ReleaseReadLock(value);
+        }
+
+        // Upsert functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleWriter(MixedKeyWrapper* key, MixedValueWrapper* src, MixedValueWrapper* dst)
+        {
+            MixedValueWrapper.Copy(src, dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentWriter(MixedKeyWrapper* key, MixedValueWrapper* src, MixedValueWrapper* dst)
+        {
+            MixedValueWrapper.AcquireWriteLock(dst);
+            MixedValueWrapper.Copy(src, dst);
+            MixedValueWrapper.ReleaseWriteLock(dst);
+        }
+
+        // RMW functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int InitialValueLength(MixedKeyWrapper* key, MixedInputWrapper* input)
+        {
+            return MixedValueWrapper.GetLength(null);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InitialUpdater(MixedKeyWrapper* key, MixedInputWrapper* input, MixedValueWrapper* value)
+        {
+            userFunctions.InitialUpdater(
+                UserType.Convert(key),
+                UserType.Convert(input),
+                ref UserType.Convert(value));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InPlaceUpdater(MixedKeyWrapper* key, MixedInputWrapper* input, MixedValueWrapper* value)
+        {
+            MixedValueWrapper.AcquireWriteLock(value);
+
+            userFunctions.InPlaceUpdater(
+                UserType.Convert(key),
+                UserType.Convert(input),
+                ref UserType.Convert(value));
+
+            MixedValueWrapper.ReleaseWriteLock(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyUpdater(MixedKeyWrapper* key, MixedInputWrapper* input, MixedValueWrapper* oldValue, MixedValueWrapper* newValue)
+        {
+            userFunctions.CopyUpdater(
+                UserType.Convert(key),
+                UserType.Convert(input),
+                UserType.Convert(oldValue),
+                ref UserType.Convert(newValue));
+        }
+    }
+}
diff --git a/cs/src/core/ManagedLayer/MixedInputWrapper.cs b/cs/src/core/ManagedLayer/MixedInputWrapper.cs
new file mode 100644
index 000000000..0cbc715eb
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedInputWrapper.cs
@@ -0,0 +1,55 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace FASTER.core
+{
+    [FASTER.core.Roslyn.TypeKind("internal")]
+#if BLIT_INPUT
+    public unsafe struct MixedInputWrapper
+    {
+        public MixedInput input;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedInputWrapper* input)
+        {
+            return sizeof(MixedInputWrapper);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(MixedInputWrapper* src, MixedInputWrapper* dst)
+        {
+            dst->input = src->input;
+        }
+
+        public static MixedInputWrapper* MoveToContext(MixedInputWrapper* input)
+        {
+            var addr = (MixedInputWrapper*)
+                MallocFixedPageSize<MixedInputWrapper>.PhysicalInstance.Allocate();
+            Copy(input, addr);
+            return addr;
+        }
+    }
+#else
+    public unsafe struct MixedInputWrapper
+    {
+        public BlittableTypeWrapper input;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedInputWrapper* input)
+        {
+            return sizeof(void*);
+        }
+        public static void Free(MixedInputWrapper* input)
+        {
+            ((BlittableTypeWrapper*)(&input))->Free<MixedInput>();
+        }
+
+        public static MixedInputWrapper* MoveToContext(MixedInputWrapper* input)
+        {
+            return input;
+        }
+    }
+#endif
+}
diff --git a/cs/src/core/ManagedLayer/MixedKeyWrapper.cs b/cs/src/core/ManagedLayer/MixedKeyWrapper.cs
new file mode 100644
index 000000000..8cc020fbf
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedKeyWrapper.cs
@@ -0,0 +1,133 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+//#define BLIT_KEY
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+
+
+namespace FASTER.core
+{
+    [FASTER.core.Roslyn.TypeKind("internal")]
+#if BLIT_KEY
+    public unsafe struct MixedKeyWrapper
+    {
+        public MixedKey key;
+
+        public static long GetHashCode(MixedKeyWrapper* key)
+        {
+            return UserType.Convert(key).GetHashCode();
+        }
+
+        public static bool Equals(MixedKeyWrapper* k1, MixedKeyWrapper* k2)
+        {
+            return UserType.Convert(k1).Equals(UserType.Convert(k2));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedKeyWrapper* key)
+        {
+            return sizeof(MixedKeyWrapper);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(MixedKeyWrapper* src, MixedKeyWrapper* dst)
+        {
+            dst->key = src->key;
+        }
+
+    #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(MixedKeyWrapper* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(MixedKeyWrapper* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Free(MixedKeyWrapper* key)
+        {
+            throw new NotImplementedException();
+        }
+
+    #endregion
+
+        public static MixedKeyWrapper* MoveToContext(MixedKeyWrapper* key)
+        {
+            var addr = (MixedKeyWrapper*)
+                MallocFixedPageSize<MixedKeyWrapper>.PhysicalInstance.Allocate();
+            Copy(key, addr);
+            return addr;
+        }
+    }
+#else
+    public unsafe struct MixedKeyWrapper
+    {
+        public BlittableTypeWrapper key;
+
+        public static long GetHashCode(MixedKeyWrapper* key)
+        {
+            return key->key.GetObject<MixedKey>().GetHashCode64();
+        }
+
+        public static bool Equals(MixedKeyWrapper* k1, MixedKeyWrapper* k2)
+        {
+            return k1->key.GetObject<MixedKey>().Equals(k2->key.GetObject<MixedKey>());
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedKeyWrapper* key)
+        {
+            return sizeof(void*);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(MixedKeyWrapper* src, MixedKeyWrapper* dst)
+        {
+            dst->key = BlittableTypeWrapper.Create(src->key.GetObject<MixedKey>().Clone());
+        }
+
+#region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return true;
+        }
+
+        public static void Serialize(MixedKeyWrapper* key, Stream toStream)
+        {
+            key->key.GetObject<MixedKey>().Serialize(toStream);
+        }
+
+        public static void Deserialize(MixedKeyWrapper* key, Stream fromStream)
+        {
+            MixedKey k = new MixedKey();
+            k.Deserialize(fromStream);
+            key->key = BlittableTypeWrapper.Create(k);
+        }
+
+        public static void Free(MixedKeyWrapper* key)
+        {
+            key->key.Free<MixedKey>();
+        }
+#endregion
+
+        public static MixedKeyWrapper* MoveToContext(MixedKeyWrapper* key)
+        {
+            var keyPtr = (MixedKeyWrapper*)
+                MallocFixedPageSize<long>.PhysicalInstance.Allocate();
+            *keyPtr = *key;
+            return keyPtr;
+        }
+    }
+#endif
+
+}
diff --git a/cs/src/core/ManagedLayer/MixedManagedFAST.cs b/cs/src/core/ManagedLayer/MixedManagedFAST.cs
new file mode 100644
index 000000000..793411fa7
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedManagedFAST.cs
@@ -0,0 +1,481 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace FASTER.core
+{
+    [FASTER.core.Roslyn.TypeKind("user")]
+    public unsafe class MixedManagedFast
+        :
+        IManagedFAST<MixedKey, MixedValue, MixedInput, MixedOutput, MixedContext>
+    {
+        private IFASTER_Mixed store;
+        public long Size => store.Size;
+
+        public MixedManagedFast(long size, IDevice logDevice, string checkpointDir, MixedUserFunctions functions, long LogTotalSizeBytes = 17179869184, double LogMutableFraction = 0.9, int LogPageSizeBits = 25)
+        {
+            MixedFunctionsWrapper.userFunctions = functions;
+
+            store = HashTableManager.GetFasterHashTable
+                <MixedKeyWrapper, MixedValueWrapper, MixedInputWrapper,
+                MixedOutputWrapper, MixedContextWrapper, MixedFunctionsWrapper,
+                IFASTER_Mixed
+                >
+                (size, logDevice, checkpointDir, LogTotalSizeBytes, LogMutableFraction, LogPageSizeBits);
+        }
+
+        public bool CompletePending(bool wait)
+        {
+            return store.CompletePending(wait);
+        }
+
+        public Status Delete(MixedKey key, MixedContext context, long lsn)
+        {
+            MixedKeyWrapper* keyWrapper;
+            MixedContextWrapper* contextWrapper;
+#if BLIT_KEY && !GENERIC_BLIT_KEY
+                keyWrapper = (MixedKeyWrapper*)&key;
+#elif GENERIC_BLIT_KEY // implies BLIT_KEY
+            {
+                keyWrapper = (MixedKeyWrapper*)Unsafe.AsPointer(ref key);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(key);
+                keyWrapper = (MixedKeyWrapper*)&w;
+            }
+#endif
+#if BLIT_CONTEXT
+            {
+            contextWrapper = (MixedContextWrapper*)&context;
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(context);
+                contextWrapper = (MixedContextWrapper*)w.ptr;
+            }
+#endif
+            var ret = store.Delete(keyWrapper, contextWrapper, lsn);
+
+            if (ret == Status.OK)
+            {
+#if !BLIT_KEY
+                {
+                    MixedKeyWrapper.Free(keyWrapper);
+                }
+#endif
+#if !BLIT_CONTEXT
+                {
+                    MixedContextWrapper.Free(contextWrapper);
+                }
+#endif
+            }
+
+            return ret;
+        }
+
+        public void DumpDistribution()
+        {
+            store.DumpDistribution();
+        }
+
+        public Guid StartSession()
+        {
+            return store.StartSession();
+        }
+
+        public long ContinueSession(Guid guid)
+        {
+            return store.ContinueSession(guid);
+        }
+
+        public void StopSession()
+        {
+            store.StopSession();
+        }
+
+        public void Refresh()
+        {
+            store.Refresh();
+        }
+
+        public Status Read(MixedKey key, MixedInput input, ref MixedOutput output, MixedContext context, long lsn)
+        {
+            MixedKeyWrapper* keyWrapper;
+            MixedInputWrapper* inputWrapper;
+            MixedOutputWrapper* outputWrapper;
+            MixedContextWrapper* contextWrapper;
+            
+
+#if BLIT_KEY && !GENERIC_BLIT_KEY
+            {
+                keyWrapper = (MixedKeyWrapper*)&key;
+            }
+#elif GENERIC_BLIT_KEY // implies BLIT_KEY
+            {
+                keyWrapper = (MixedKeyWrapper*)Unsafe.AsPointer(ref key);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(key);
+                keyWrapper = (MixedKeyWrapper*)&w;
+            }
+#endif
+#if BLIT_INPUT && !GENERIC_BLIT_INPUT
+            {
+                inputWrapper = (MixedInputWrapper*)&input;
+            }
+#elif GENERIC_BLIT_INPUT
+            {
+                inputWrapper = (MixedInputWrapper*)Unsafe.AsPointer(ref input);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(input);
+                inputWrapper = (MixedInputWrapper*)w.ptr;
+            }
+#endif
+#if BLIT_OUTPUT && !GENERIC_BLIT_OUTPUT
+            MixedOutput localOutput = output;
+            {
+                outputWrapper = (MixedOutputWrapper*)&localOutput;
+            }
+#elif GENERIC_BLIT_OUTPUT
+            {
+                outputWrapper = (MixedOutputWrapper*)Unsafe.AsPointer(ref output);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(output);
+                outputWrapper = (MixedOutputWrapper*)w.ptr;
+            }
+#endif
+#if BLIT_CONTEXT
+            {
+                contextWrapper = (MixedContextWrapper*)&context;
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(context);
+                contextWrapper = (MixedContextWrapper*)w.ptr;
+            }
+#endif
+
+            var ret =
+            store.Read(
+                keyWrapper,
+                inputWrapper,
+                outputWrapper,
+                contextWrapper,
+                lsn);
+
+            if (ret == Status.OK)
+            {
+
+#if !BLIT_KEY
+                {
+                    MixedKeyWrapper.Free(keyWrapper);
+                }
+#endif
+#if !BLIT_INPUT
+                {
+                    MixedInputWrapper.Free(inputWrapper);
+                }
+#endif
+
+#if BLIT_OUTPUT && !GENERIC_BLIT_OUTPUT
+                {
+                    output = localOutput;
+                }
+#else
+#if !BLIT_OUTPUT
+                {
+                    output = ((BlittableTypeWrapper*)(&outputWrapper))->GetObject<MixedOutput>();
+                    MixedOutputWrapper.Free(outputWrapper);
+                }
+#endif
+#endif
+
+#if !BLIT_CONTEXT
+                {
+                    MixedContextWrapper.Free(contextWrapper);
+                }
+#endif
+            }
+
+            return ret;
+        }
+
+        public Status RMW(MixedKey key, MixedInput input, MixedContext context, long lsn)
+        {
+            MixedKeyWrapper* keyWrapper;
+            MixedInputWrapper* inputWrapper;
+            MixedContextWrapper* contextWrapper;
+
+
+#if BLIT_KEY && !GENERIC_BLIT_KEY
+            {
+                keyWrapper = (MixedKeyWrapper*)&key;
+            }
+#elif GENERIC_BLIT_KEY // implies BLIT_KEY
+            {
+                keyWrapper = (MixedKeyWrapper*)Unsafe.AsPointer(ref key);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(key);
+                keyWrapper = (MixedKeyWrapper*)&w;
+            }
+#endif
+#if BLIT_INPUT && !GENERIC_BLIT_INPUT
+            {
+                inputWrapper = (MixedInputWrapper*)&input;
+            }
+#elif GENERIC_BLIT_INPUT
+            {
+                inputWrapper = (MixedInputWrapper*)Unsafe.AsPointer(ref input);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(input);
+                inputWrapper = (MixedInputWrapper*)w.ptr;
+
+            }
+#endif
+#if BLIT_CONTEXT
+            {
+                contextWrapper = (MixedContextWrapper*)&context;
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(context);
+                contextWrapper = (MixedContextWrapper*)w.ptr;
+            }
+#endif
+
+            var ret =
+                store.RMW(
+                keyWrapper,
+                inputWrapper,
+                contextWrapper,
+                lsn);
+
+            if (ret == Status.OK)
+            {
+#if !BLIT_KEY
+                {
+                    MixedKeyWrapper.Free(keyWrapper);
+                }
+#endif
+#if !BLIT_INPUT
+                {
+                    MixedInputWrapper.Free(inputWrapper);
+                }
+#endif
+#if !BLIT_CONTEXT
+                {
+                    MixedContextWrapper.Free(contextWrapper);
+                }
+#endif
+            }
+
+            return ret;
+        }
+
+        public Status Upsert(MixedKey key, MixedValue value, MixedContext context, long lsn)
+        {
+            MixedKeyWrapper* keyWrapper;
+            MixedValueWrapper* valueWrapper;
+            MixedContextWrapper* contextWrapper;
+
+#if BLIT_KEY && !GENERIC_BLIT_KEY
+            {
+                keyWrapper = (MixedKeyWrapper*)&key;
+            }
+#elif GENERIC_BLIT_KEY // implies BLIT_KEY
+            {
+                keyWrapper = (MixedKeyWrapper*)Unsafe.AsPointer(ref key);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(key);
+                keyWrapper = (MixedKeyWrapper*)&w;
+            }
+#endif
+#if BLIT_VALUE && !GENERIC_BLIT_VALUE
+            {
+                valueWrapper = (MixedValueWrapper*)&value;
+            }
+#elif GENERIC_BLIT_VALUE
+            {
+                valueWrapper = (MixedValueWrapper*)Unsafe.AsPointer(ref value);
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(value);
+                valueWrapper = (MixedValueWrapper*)&w;
+
+            }
+#endif
+#if BLIT_CONTEXT
+            {
+                contextWrapper = (MixedContextWrapper*)&context;
+            }
+#else
+            {
+                var w = BlittableTypeWrapper.Create(context);
+                contextWrapper = (MixedContextWrapper*)w.ptr;
+            }
+#endif
+
+            var ret =
+                store.Upsert(
+                    keyWrapper,
+                    valueWrapper,
+                    contextWrapper,
+                    lsn);
+
+            if (ret == Status.OK)
+            {
+#if !BLIT_KEY
+                {
+                    MixedKeyWrapper.Free(keyWrapper);
+                }
+#endif
+#if !BLIT_VALUE
+                {
+                    MixedValueWrapper.Free(valueWrapper);
+                }
+#endif
+#if !BLIT_CONTEXT
+                {
+                    MixedContextWrapper.Free(contextWrapper);
+                }
+#endif
+            }
+
+            return ret;
+        }
+    }
+
+    [FASTER.core.Roslyn.TypeKind("user")]
+    public unsafe static class UserType
+    {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ref MixedKey Convert(MixedKeyWrapper* k)
+        {
+#if BLIT_KEY && !GENERIC_BLIT_KEY
+            return ref k->key;
+#elif BLIT_KEY && GENERIC_BLIT_KEY
+            return ref Unsafe.AsRef<MixedKey>(k);
+#else
+            return ref ((BlittableTypeWrapper*)k)->GetObject<MixedKey>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ref MixedValue Convert(MixedValueWrapper* v)
+        {
+#if BLIT_VALUE && !GENERIC_BLIT_VALUE
+            return ref v->value;
+#elif BLIT_VALUE && GENERIC_BLIT_VALUE
+            return ref Unsafe.AsRef<MixedValue>(v);
+#else
+            return ref ((BlittableTypeWrapper*)v)->GetObject<MixedValue>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ref MixedInput Convert(MixedInputWrapper* i)
+        {
+#if BLIT_INPUT && !GENERIC_BLIT_INPUT
+            return ref i->input;
+#elif BLIT_INPUT && GENERIC_BLIT_INPUT
+            return ref Unsafe.AsRef<MixedInput>(i);
+#else
+            return ref ((BlittableTypeWrapper*)&i)->GetObject<MixedInput>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ref MixedOutput Convert(MixedOutputWrapper* o)
+        {
+#if BLIT_OUTPUT && !GENERIC_BLIT_OUTPUT
+            return ref o->output;
+#elif BLIT_OUTPUT && GENERIC_BLIT_OUTPUT
+            return ref Unsafe.AsRef<MixedOutput>(o);
+#else
+            return ref ((BlittableTypeWrapper*)&o)->GetObject<MixedOutput>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ref MixedContext Convert(MixedContextWrapper* c)
+        {
+#if BLIT_CONTEXT && !GENERIC_BLIT_CONTEXT
+            return ref c->context;
+#elif BLIT_CONTEXT && GENERIC_BLIT_CONTEXT
+            return ref Unsafe.AsRef<MixedContext>(c);
+#else
+            return ref ((BlittableTypeWrapper*)&c)->GetObject<MixedContext>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void FreeFromContext(MixedKeyWrapper* k)
+        {
+#if BLIT_KEY
+            MallocFixedPageSize<MixedKeyWrapper>.PhysicalInstance.FreeAtEpoch((long)k);
+#else
+            ((BlittableTypeWrapper*)k)->Free<MixedKey>();
+            MallocFixedPageSize<long>.PhysicalInstance.FreeAtEpoch((long)k);
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void FreeFromContext(MixedValueWrapper* v)
+        {
+#if BLIT_VALUE
+            MallocFixedPageSize<MixedValueWrapper>.PhysicalInstance.FreeAtEpoch((long)v);
+#else
+            ((BlittableTypeWrapper*)v)->Free<MixedValue>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void FreeFromContext(MixedInputWrapper* i)
+        {
+#if BLIT_INPUT
+            MallocFixedPageSize<MixedInputWrapper>.PhysicalInstance.FreeAtEpoch((long)i);
+#else
+            ((BlittableTypeWrapper*)&i)->Free<MixedInput>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void FreeFromContext(MixedOutputWrapper* o)
+        {
+#if BLIT_OUTPUT
+            MallocFixedPageSize<MixedOutputWrapper>.PhysicalInstance.FreeAtEpoch((long)o);
+#else
+            ((BlittableTypeWrapper*)&o)->Free<MixedOutput>();
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void FreeFromContext(MixedContextWrapper* c)
+        {
+#if BLIT_CONTEXT
+            MallocFixedPageSize<MixedContextWrapper>.PhysicalInstance.FreeAtEpoch((long)c);
+#else
+            ((BlittableTypeWrapper*)&c)->Free<MixedContext>();
+#endif
+        }
+
+    }
+
+}
diff --git a/cs/src/core/ManagedLayer/MixedOutputWrapper.cs b/cs/src/core/ManagedLayer/MixedOutputWrapper.cs
new file mode 100644
index 000000000..130babcc3
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedOutputWrapper.cs
@@ -0,0 +1,38 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace FASTER.core
+{
+
+    [FASTER.core.Roslyn.TypeKind("internal")]
+#if BLIT_OUTPUT
+    public unsafe struct MixedOutputWrapper
+    {
+        public MixedOutput output;
+
+        public static MixedOutputWrapper* MoveToContext(MixedOutputWrapper* output)
+        {
+            var addr = (MixedOutputWrapper*)
+                MallocFixedPageSize<MixedOutputWrapper>.PhysicalInstance.Allocate();
+            return addr;
+        }
+    }
+#else
+    public unsafe struct MixedOutputWrapper
+    {
+        public BlittableTypeWrapper output;
+
+        public static void Free(MixedOutputWrapper* output)
+        {
+            ((BlittableTypeWrapper*)(&output))->Free<MixedOutput>();
+        }
+
+        public static MixedOutputWrapper* MoveToContext(MixedOutputWrapper* output)
+        {
+            return output;
+        }
+    }
+#endif
+}
diff --git a/cs/src/core/ManagedLayer/MixedUnwrappedTypes.cs b/cs/src/core/ManagedLayer/MixedUnwrappedTypes.cs
new file mode 100644
index 000000000..8707193b9
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedUnwrappedTypes.cs
@@ -0,0 +1,96 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.IO;
+
+namespace FASTER.core
+{
+    public
+#if BLIT_KEY
+    struct
+#else
+    class
+#endif
+        MixedKey : IFASTERKey<MixedKey>
+    {
+        public long key;
+
+        public MixedKey Clone()
+        {
+            return this;
+        }
+        public new long GetHashCode()
+        {
+            return Utility.GetHashCode(key);
+        }
+        public long GetHashCode64()
+        {
+            return Utility.GetHashCode(key);
+        }
+
+        public bool Equals(MixedKey other)
+        {
+            return key == other.key;
+        }
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(key);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            key = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+    public
+#if BLIT_VALUE
+    struct
+#else
+    class
+#endif
+ MixedValue : IFASTERValue<MixedValue>
+    {
+        public long value;
+        public MixedValue Clone()
+        {
+            return this;
+        }
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(value);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            value = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+    public
+#if BLIT_INPUT
+    struct
+#else
+    class
+#endif
+ MixedInput
+    {
+    }
+    public
+#if BLIT_OUTPUT
+    struct
+#else
+    class
+#endif
+ MixedOutput
+    {
+    }
+    public
+#if BLIT_CONTEXT
+    struct
+#else
+    class
+#endif
+ MixedContext
+    {
+    }
+}
diff --git a/cs/src/core/ManagedLayer/MixedUserFunctions.cs b/cs/src/core/ManagedLayer/MixedUserFunctions.cs
new file mode 100644
index 000000000..ef5c18c92
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedUserFunctions.cs
@@ -0,0 +1,44 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace FASTER.core
+{
+    public class MixedUserFunctions : IUserFunctions<MixedKey, MixedValue, MixedInput, MixedOutput, MixedContext>
+    {
+        public void ReadCompletionCallback(MixedContext ctx, MixedOutput output)
+        {
+        }
+
+        public void RMWCompletionCallback(MixedContext ctx)
+        {
+        }
+
+        public void UpsertCompletionCallback(MixedContext ctx)
+        {
+        }
+
+        // Read function
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Reader(MixedKey key, MixedInput input, MixedValue value, ref MixedOutput dst)
+        {
+           // dst.value = value.value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void InitialUpdater(MixedKey key, MixedInput input, ref MixedValue value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void InPlaceUpdater(MixedKey key, MixedInput input, ref MixedValue value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void CopyUpdater(MixedKey key, MixedInput input, MixedValue oldValue, ref MixedValue newValue)
+        {
+        }
+    }
+}
diff --git a/cs/src/core/ManagedLayer/MixedValueWrapper.cs b/cs/src/core/ManagedLayer/MixedValueWrapper.cs
new file mode 100644
index 000000000..47d928506
--- /dev/null
+++ b/cs/src/core/ManagedLayer/MixedValueWrapper.cs
@@ -0,0 +1,345 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Serialization;
+using System.Threading;
+
+namespace FASTER.core
+{
+    [FASTER.core.Roslyn.TypeKind("internal")]
+#if VALUE_ATOMIC
+#if BLIT_VALUE
+    public unsafe struct MixedValueWrapper
+    {
+        public MixedValue value;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedValueWrapper* input)
+        {
+            return sizeof(MixedValueWrapper);
+        }
+
+        public static void Copy(MixedValueWrapper* src, MixedValueWrapper* dst)
+        {
+            dst->value = src->value;
+        }
+
+        // Shared read/write capabilities on value
+        public static void AcquireReadLock(MixedValueWrapper* value)
+        {
+        }
+
+        public static void ReleaseReadLock(MixedValueWrapper* value)
+        {
+        }
+
+        public static void AcquireWriteLock(MixedValueWrapper* value)
+        {
+        }
+
+        public static void ReleaseWriteLock(MixedValueWrapper* value)
+        {
+        }
+
+    #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(MixedValueWrapper* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(MixedValueWrapper* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(MixedValueWrapper* key)
+        {
+            throw new NotImplementedException();
+        }
+    #endregion
+
+        public static MixedValueWrapper* MoveToContext(MixedValueWrapper* value)
+        {
+            return value;
+        }
+    }
+#else
+    public unsafe struct MixedValueWrapper
+    {
+        public BlittableTypeWrapper value;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedValueWrapper* input)
+        {
+            return sizeof(void*);
+        }
+
+        public static void Copy(MixedValueWrapper* src, MixedValueWrapper* dst)
+        {
+            dst->value = BlittableTypeWrapper.Create(src->value.GetObject<MixedValue>().Clone());
+        }
+
+        // Shared read/write capabilities on value
+        public static void AcquireReadLock(MixedValueWrapper* value)
+        {
+        }
+
+        public static void ReleaseReadLock(MixedValueWrapper* value)
+        {
+        }
+
+        public static void AcquireWriteLock(MixedValueWrapper* value)
+        {
+        }
+
+        public static void ReleaseWriteLock(MixedValueWrapper* value)
+        {
+        }
+
+    #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return true;
+        }
+
+        public static void Serialize(MixedValueWrapper* value, Stream toStream)
+        {
+            value->value.GetObject<MixedValue>().Serialize(toStream);
+        }
+
+        public static void Deserialize(MixedValueWrapper* value, Stream fromStream)
+        {
+            MixedValue v = new MixedValue();
+            v.Deserialize(fromStream);
+            value->value = BlittableTypeWrapper.Create(v);
+        }
+
+        public static void Free(MixedValueWrapper* value)
+        {
+            value->value.Free<MixedValue>();
+        }
+    #endregion
+
+        public static MixedValueWrapper* MoveToContext(MixedValueWrapper* value)
+        {
+            return value;
+        }
+    }
+#endif
+#else
+#if BLIT_VALUE
+    public unsafe struct MixedValueWrapper
+    {
+        public MixedValue value;
+        public int lock_data;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedValueWrapper* input)
+        {
+            return sizeof(MixedValueWrapper);
+        }
+
+        public static void Copy(MixedValueWrapper* src, MixedValueWrapper* dst)
+        {
+            dst->value = src->value;
+            dst->lock_data = 0;
+        }
+
+        // Shared read/write capabilities on value
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireReadLock(MixedValueWrapper* value)
+        {
+            //incremented value
+            var val = Interlocked.Increment(ref value->lock_data);
+            if (val < 0)
+            {
+                do
+                {
+                    //found value
+                    val = Interlocked.CompareExchange(ref value->lock_data, 1, 0);
+                    if (val == 0)
+                    {
+                        break;
+                    }
+                    else if (val > 0)
+                    {
+                        val = Interlocked.Increment(ref value->lock_data);
+                    }
+                } while (val < 0);
+            }
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseReadLock(MixedValueWrapper* value)
+        {
+            Interlocked.Decrement(ref value->lock_data);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireWriteLock(MixedValueWrapper* value)
+        {
+            var found_value = Interlocked.CompareExchange(ref value->lock_data, int.MinValue, 0);
+            if (found_value != 0)
+            {
+                int num_iterations = 1000;
+                Thread.SpinWait(num_iterations);
+                while (Interlocked.CompareExchange(ref value->lock_data, int.MinValue, 0) != 0)
+                {
+                    Thread.SpinWait(num_iterations);
+                    num_iterations <<= 1;
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseWriteLock(MixedValueWrapper* value)
+        {
+            Interlocked.Exchange(ref value->lock_data, 0);
+        }
+
+    #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(MixedValueWrapper* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(MixedValueWrapper* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(MixedValueWrapper* key)
+        {
+            throw new NotImplementedException();
+        }
+    #endregion
+
+        public static MixedValueWrapper* MoveToContext(MixedValueWrapper* value)
+        {
+            var valuePtr = (MixedValueWrapper*)MallocFixedPageSize<long>.PhysicalInstance.Allocate();
+            *valuePtr = *value;
+            return valuePtr;
+        }
+    }
+
+#else
+    [StructLayout(LayoutKind.Explicit, Size = 12)]
+    public unsafe struct MixedValueWrapper
+    {
+        [FieldOffset(0)]
+        public BlittableTypeWrapper value;
+        [FieldOffset(8)]
+        public int lock_data;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(MixedValueWrapper* input)
+        {
+            return sizeof(void*) + sizeof(int);
+        }
+
+        public static void Copy(MixedValueWrapper* src, MixedValueWrapper* dst)
+        {
+            dst->lock_data = 0;
+            dst->value = BlittableTypeWrapper.Create(src->value.GetObject<MixedValue>().Clone());
+        }
+
+        // Shared read/write capabilities on value
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireReadLock(MixedValueWrapper* value)
+        {
+            //incremented value
+            var val = Interlocked.Increment(ref value->lock_data);
+            if (val < 0)
+            {
+                do
+                {
+                    //found value
+                    val = Interlocked.CompareExchange(ref value->lock_data, 1, 0);
+                    if (val == 0)
+                    {
+                        break;
+                    }
+                    else if (val > 0)
+                    {
+                        val = Interlocked.Increment(ref value->lock_data);
+                    }
+                } while (val < 0);
+            }
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseReadLock(MixedValueWrapper* value)
+        {
+            Interlocked.Decrement(ref value->lock_data);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void AcquireWriteLock(MixedValueWrapper* value)
+        {
+            var found_value = Interlocked.CompareExchange(ref value->lock_data, int.MinValue, 0);
+            if (found_value != 0)
+            {
+                int num_iterations = 1000;
+                Thread.SpinWait(num_iterations);
+                while (Interlocked.CompareExchange(ref value->lock_data, int.MinValue, 0) != 0)
+                {
+                    Thread.SpinWait(num_iterations);
+                    num_iterations <<= 1;
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ReleaseWriteLock(MixedValueWrapper* value)
+        {
+            Interlocked.Exchange(ref value->lock_data, 0);
+        }
+
+#region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return true;
+        }
+
+        public static void Serialize(MixedValueWrapper* value, Stream toStream)
+        {
+            value->value.GetObject<MixedValue>().Serialize(toStream);
+        }
+
+        public static void Deserialize(MixedValueWrapper* value, Stream fromStream)
+        {
+            MixedValue v = new MixedValue();
+            v.Deserialize(fromStream);
+            value->value = BlittableTypeWrapper.Create(v);
+        }
+
+        public static void Free(MixedValueWrapper* value)
+        {
+            value->value.Free<MixedValue>();
+        }
+#endregion
+
+        public static MixedValueWrapper* MoveToContext(MixedValueWrapper* value)
+        {
+            var valuePtr = (MixedValueWrapper*)MallocFixedPageSize<long>.PhysicalInstance.Allocate();
+            *valuePtr = *value;
+            return valuePtr;
+        }
+    }
+#endif
+#endif
+}
diff --git a/cs/src/core/Properties/AssemblyInfo.cs b/cs/src/core/Properties/AssemblyInfo.cs
new file mode 100644
index 000000000..a274e87fb
--- /dev/null
+++ b/cs/src/core/Properties/AssemblyInfo.cs
@@ -0,0 +1,24 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyCopyright("Copyright ©  2017")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("01002755-60ca-40ee-94d9-11c07eb58786")]
+
+[assembly: InternalsVisibleTo("FASTER.test")]
diff --git a/cs/src/core/Properties/Resources.Designer.cs b/cs/src/core/Properties/Resources.Designer.cs
new file mode 100644
index 000000000..00d5269c7
--- /dev/null
+++ b/cs/src/core/Properties/Resources.Designer.cs
@@ -0,0 +1,713 @@
+﻿//------------------------------------------------------------------------------
+// <auto-generated>
+//     This code was generated by a tool.
+//     Runtime Version:4.0.30319.42000
+//
+//     Changes to this file may cause incorrect behavior and will be lost if
+//     the code is regenerated.
+// </auto-generated>
+//------------------------------------------------------------------------------
+
+namespace FASTER.core.Properties {
+    using System;
+    
+    
+    /// <summary>
+    ///   A strongly-typed resource class, for looking up localized strings, etc.
+    /// </summary>
+    // This class was auto-generated by the StronglyTypedResourceBuilder
+    // class via a tool like ResGen or Visual Studio.
+    // To add or remove a member, edit your .ResX file then rerun ResGen
+    // with the /str option, or rebuild your VS project.
+    [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "15.0.0.0")]
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
+    [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
+    internal class Resources {
+        
+        private static global::System.Resources.ResourceManager resourceMan;
+        
+        private static global::System.Globalization.CultureInfo resourceCulture;
+        
+        [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
+        internal Resources() {
+        }
+        
+        /// <summary>
+        ///   Returns the cached ResourceManager instance used by this class.
+        /// </summary>
+        [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
+        internal static global::System.Resources.ResourceManager ResourceManager {
+            get {
+                if (object.ReferenceEquals(resourceMan, null)) {
+                    global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("FASTER.core.Properties.Resources", typeof(Resources).Assembly);
+                    resourceMan = temp;
+                }
+                return resourceMan;
+            }
+        }
+        
+        /// <summary>
+        ///   Overrides the current thread's CurrentUICulture property for all
+        ///   resource lookups using this strongly typed resource class.
+        /// </summary>
+        [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
+        internal static global::System.Globalization.CultureInfo Culture {
+            get {
+                return resourceCulture;
+            }
+            set {
+                resourceCulture = value;
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.IO;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Runtime.Serialization;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public unsafe partial class FasterKV : FASTERBase, IFASTER
+        ///    {
+        ///
+        ///        private void AsyncGetFromDisk(long fromLogical,  [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string AsyncIO {
+            get {
+                return ResourceManager.GetString("AsyncIO", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///#define CALLOC
+        ///
+        ///using System;
+        ///using System.Threading;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.IO;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public static class Config
+        ///    {
+        ///        //public static string CheckpointDirectory = Path.GetTempPath() + &quot;fasterlogs&quot;;
+        ///        public static string CheckpointDirectory = &quot;D:\\data&quot;;
+        ///    }
+        ///
+        ///    pub [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string AsyncResultTypes {
+            get {
+                return ResourceManager.GetString("AsyncResultTypes", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///#pragma warning disable 0162
+        ///
+        /////#define WAIT_FOR_INDEX_CHECKPOINT
+        ///
+        ///using System;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.IO;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{   
+        ///
+        ///    public unsafe partial class FasterKV : FASTERBase, IFASTER
+        ///   {
+        ///        public class [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string Checkpoint {
+            get {
+                return ResourceManager.GetString("Checkpoint", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.IO;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public enum OperationType
+        ///    {
+        ///        READ,
+        ///        RMW,
+        ///        UPSERT,
+        ///        INSERT,
+        ///        DELETE
+        ///    }
+        ///
+        ///    public enum OperationStatus
+        ///    {
+        ///        SUCCESS,
+        ///   [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string Contexts {
+            get {
+                return ResourceManager.GetString("Contexts", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public unsafe partial class FasterKV : FASTERBase, IFASTER
+        ///    { 
+        ///        public PersistentMemoryMalloc&lt;byte&gt; hlog;
+        ///
+        ///        public static int numFuzzy = 0;
+        ///        public sta [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string FASTER {
+            get {
+                return ResourceManager.GetString("FASTER", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Generic;
+        ///using System.Linq;
+        ///using System.Text;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///using System.Runtime.InteropServices;
+        ///using System.Diagnostics;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public static class Constants
+        ///    {
+        ///        /// Size of cache line in bytes
+        ///        public const int kCacheLineBytes = 64;
+        ///
+        ///        public const bool kFol [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string FASTERBase {
+            get {
+                return ResourceManager.GetString("FASTERBase", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///#define CPR
+        ///
+        ///using System;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public unsafe partial class FasterKV : FASTERBase, IFASTER
+        ///    {
+        ///        enum LatchOperation : byte
+        ///        {
+        ///            None,
+        ///            ReleaseShared,
+        ///     [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string FASTERImpl {
+            get {
+                return ResourceManager.GetString("FASTERImpl", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public unsafe partial class FasterKV : FASTERBase, IFASTER
+        ///    {
+        ///
+        ///        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        ///        protected Guid InternalAcquire()
+        ///        { [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string FASTERThread {
+            get {
+                return ResourceManager.GetString("FASTERThread", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public unsafe interface IFASTER_Mixed
+        ///    {
+        ///        /* Thread-related operations */
+        ///        Guid StartSession();
+        ///        long ContinueSession(Guid guid);
+        ///        void StopSess [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string IFASTER_Mixed {
+            get {
+                return ResourceManager.GetString("IFASTER_Mixed", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.IO;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Runtime.InteropServices;
+        ///using System.Text;
+        ///using System.Threading;
+        ///using System.Threading.Tasks;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///
+        ///
+        ///    public unsafe partial class FASTERBase
+        ///    {
+        ///        // Derived class facing persistence API
+        ///        pr [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string IndexCheckpoint {
+            get {
+                return ResourceManager.GetString("IndexCheckpoint", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Collections.Generic;
+        ///using System.Diagnostics;
+        ///using System.IO;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Runtime.InteropServices;
+        ///using System.Text;
+        ///using System.Threading;
+        ///using System.Threading.Tasks;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public unsafe partial class FASTERBase
+        ///    {
+        ///        // Derived class exposed API
+        ///        protected void Re [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string IndexRecovery {
+            get {
+                return ResourceManager.GetString("IndexRecovery", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///#define FIXED_SIZE
+        /////#define VARIABLE_SIZE
+        ///using System;
+        ///using System.Collections.Generic;
+        ///using System.Linq;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Text;
+        ///using System.Threading.Tasks;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///#if FIXED_SIZE
+        ///    public unsafe struct Layout
+        ///    {
+        ///        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        ///        public static RecordInfo* GetInfo(long physicalAddress [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string Layout {
+            get {
+                return ResourceManager.GetString("Layout", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    [FASTER.core.Roslyn.TypeKind(&quot;internal&quot;)]
+        ///#if BLIT_CONTEXT
+        ///    public unsafe struct MixedContextWrapper
+        ///    {
+        ///        public MixedContext context;
+        ///
+        ///        public static void Copy(MixedContextWrapper* src, MixedContextWrapper* dst)
+        ///        {
+        ///            dst-&gt;context = src-&gt;context;
+        ///        }
+        ///
+        ///        public static MixedContextWrapper* MoveToContext(MixedContextWrappe [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedContextWrapper {
+            get {
+                return ResourceManager.GetString("MixedContextWrapper", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///#define GENERIC_BLIT_VALUE
+        ///
+        ///using System.Diagnostics;
+        ///using System.Runtime.CompilerServices;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    [FASTER.core.Roslyn.TypeKind(&quot;user&quot;)]
+        ///    public unsafe class MixedFunctionsWrapper
+        ///    {
+        ///        public static MixedUserFunctions userFunctions;
+        ///
+        ///        public static void RMWCompletionCallback(MixedKeyWrapper* key, MixedInputWrapper* input, MixedContextWrapper* ctx)
+        ///        [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedFunctionsWrapper {
+            get {
+                return ResourceManager.GetString("MixedFunctionsWrapper", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System.Runtime.CompilerServices;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    [FASTER.core.Roslyn.TypeKind(&quot;internal&quot;)]
+        ///#if BLIT_INPUT
+        ///    public unsafe struct MixedInputWrapper
+        ///    {
+        ///        public MixedInput input;
+        ///
+        ///        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        ///        public static int GetLength(MixedInputWrapper* input)
+        ///        {
+        ///            return sizeof(MixedInputWrapper);
+        ///        }
+        ///
+        /// [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedInputWrapper {
+            get {
+                return ResourceManager.GetString("MixedInputWrapper", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        /////#define BLIT_KEY
+        ///using System;
+        ///using System.IO;
+        ///using System.Runtime.CompilerServices;
+        ///
+        ///
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    [FASTER.core.Roslyn.TypeKind(&quot;internal&quot;)]
+        ///#if BLIT_KEY
+        ///    public unsafe struct MixedKeyWrapper
+        ///    {
+        ///        public MixedKey key;
+        ///
+        ///        public static long GetHashCode(MixedKeyWrapper* key)
+        ///        {
+        ///            return UserType.Convert(key).GetHashCode();
+        ///        }
+        ///
+        ///   [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedKeyWrapper {
+            get {
+                return ResourceManager.GetString("MixedKeyWrapper", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using FASTER.core;
+        ///using System;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Runtime.InteropServices;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    [FASTER.core.Roslyn.TypeKind(&quot;user&quot;)]
+        ///    public unsafe class MixedManagedFast
+        ///        :
+        ///        IManagedFAST&lt;MixedKey, MixedValue, MixedInput, MixedOutput, MixedContext&gt;
+        ///    {
+        ///        private IFASTER_Mixed store;
+        ///        public long Size =&gt; store.Size;
+        ///
+        ///    [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedManagedFAST {
+            get {
+                return ResourceManager.GetString("MixedManagedFAST", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System.Runtime.CompilerServices;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///
+        ///    [FASTER.core.Roslyn.TypeKind(&quot;internal&quot;)]
+        ///#if BLIT_OUTPUT
+        ///    public unsafe struct MixedOutputWrapper
+        ///    {
+        ///        public MixedOutput output;
+        ///
+        ///        public static MixedOutputWrapper* MoveToContext(MixedOutputWrapper* output)
+        ///        {
+        ///            var addr = (MixedOutputWrapper*)
+        ///                MallocFixedPageSize&lt;MixedOutpu [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedOutputWrapper {
+            get {
+                return ResourceManager.GetString("MixedOutputWrapper", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.IO;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public
+        ///#if BLIT_KEY
+        ///    struct
+        ///#else
+        ///    class
+        ///#endif
+        ///        MixedKey : IFASTERKey&lt;MixedKey&gt;
+        ///    {
+        ///        public long key;
+        ///
+        ///        public MixedKey Clone()
+        ///        {
+        ///            return this;
+        ///        }
+        ///        public new long GetHashCode()
+        ///        {
+        ///            return Utility.GetHashCode(key);
+        ///        }
+        ///        public long Get [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedUnwrappedTypes {
+            get {
+                return ResourceManager.GetString("MixedUnwrappedTypes", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System.Runtime.CompilerServices;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public class MixedUserFunctions : IUserFunctions&lt;MixedKey, MixedValue, MixedInput, MixedOutput, MixedContext&gt;
+        ///    {
+        ///        public void ReadCompletionCallback(MixedContext ctx, MixedOutput output)
+        ///        {
+        ///        }
+        ///
+        ///        public void RMWCompletionCallback(MixedContext ctx)
+        ///        {
+        ///        }
+        ///
+        ///        public void UpsertCompl [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedUserFunctions {
+            get {
+                return ResourceManager.GetString("MixedUserFunctions", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.IO;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Runtime.InteropServices;
+        ///using System.Runtime.Serialization;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    [FASTER.core.Roslyn.TypeKind(&quot;internal&quot;)]
+        ///#if VALUE_ATOMIC
+        ///#if BLIT_VALUE
+        ///    public unsafe struct MixedValueWrapper
+        ///    {
+        ///        public MixedValue value;
+        ///
+        ///        [MethodImpl(MethodImplOptions.Aggr [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string MixedValueWrapper {
+            get {
+                return ResourceManager.GetString("MixedValueWrapper", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///#define CALLOC
+        ///using System;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Threading;
+        ///using System.Runtime.InteropServices;
+        ///using System.Collections.Concurrent;
+        ///using System.Collections.Generic;
+        ///using System.Linq.Expressions;
+        ///using System.IO;
+        ///using System.Diagnostics;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///
+        ///	public enum FlushStatus : int { Flushed, InProgress };
+        ///
+        ///	public enum CloseStatus : int { Close [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string PersistentMemoryMalloc {
+            get {
+                return ResourceManager.GetString("PersistentMemoryMalloc", resourceCulture);
+            }
+        }
+        
+        /// <summary>
+        ///   Looks up a localized string similar to // Copyright (c) Microsoft Corporation. All rights reserved.
+        ///// Licensed under the MIT license.
+        ///
+        ///using System;
+        ///using System.Diagnostics;
+        ///using System.IO;
+        ///using System.Runtime.CompilerServices;
+        ///using System.Runtime.InteropServices;
+        ///using System.Threading;
+        ///
+        ///namespace FASTER.core
+        ///{
+        ///    public unsafe partial class PersistentMemoryMalloc&lt;T&gt; : IAllocator
+        ///    {
+        ///        public void AsyncReadPageFromDiskRecovery&lt;TContext&gt;(
+        ///                                        long readPageStart, 
+        ///                  [rest of string was truncated]&quot;;.
+        /// </summary>
+        internal static string Recovery {
+            get {
+                return ResourceManager.GetString("Recovery", resourceCulture);
+            }
+        }
+    }
+}
diff --git a/cs/src/core/Properties/Resources.resx b/cs/src/core/Properties/Resources.resx
new file mode 100644
index 000000000..0b4999d65
--- /dev/null
+++ b/cs/src/core/Properties/Resources.resx
@@ -0,0 +1,190 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<root>
+  <!-- 
+    Microsoft ResX Schema 
+    
+    Version 2.0
+    
+    The primary goals of this format is to allow a simple XML format 
+    that is mostly human readable. The generation and parsing of the 
+    various data types are done through the TypeConverter classes 
+    associated with the data types.
+    
+    Example:
+    
+    ... ado.net/XML headers & schema ...
+    <resheader name="resmimetype">text/microsoft-resx</resheader>
+    <resheader name="version">2.0</resheader>
+    <resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
+    <resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
+    <data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
+    <data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
+    <data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
+        <value>[base64 mime encoded serialized .NET Framework object]</value>
+    </data>
+    <data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
+        <value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
+        <comment>This is a comment</comment>
+    </data>
+                
+    There are any number of "resheader" rows that contain simple 
+    name/value pairs.
+    
+    Each data row contains a name, and value. The row also contains a 
+    type or mimetype. Type corresponds to a .NET class that support 
+    text/value conversion through the TypeConverter architecture. 
+    Classes that don't support this are serialized and stored with the 
+    mimetype set.
+    
+    The mimetype is used for serialized objects, and tells the 
+    ResXResourceReader how to depersist the object. This is currently not 
+    extensible. For a given mimetype the value must be set accordingly:
+    
+    Note - application/x-microsoft.net.object.binary.base64 is the format 
+    that the ResXResourceWriter will generate, however the reader can 
+    read any of the formats listed below.
+    
+    mimetype: application/x-microsoft.net.object.binary.base64
+    value   : The object must be serialized with 
+            : System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
+            : and then encoded with base64 encoding.
+    
+    mimetype: application/x-microsoft.net.object.soap.base64
+    value   : The object must be serialized with 
+            : System.Runtime.Serialization.Formatters.Soap.SoapFormatter
+            : and then encoded with base64 encoding.
+
+    mimetype: application/x-microsoft.net.object.bytearray.base64
+    value   : The object must be serialized into a byte array 
+            : using a System.ComponentModel.TypeConverter
+            : and then encoded with base64 encoding.
+    -->
+  <xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
+    <xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
+    <xsd:element name="root" msdata:IsDataSet="true">
+      <xsd:complexType>
+        <xsd:choice maxOccurs="unbounded">
+          <xsd:element name="metadata">
+            <xsd:complexType>
+              <xsd:sequence>
+                <xsd:element name="value" type="xsd:string" minOccurs="0" />
+              </xsd:sequence>
+              <xsd:attribute name="name" use="required" type="xsd:string" />
+              <xsd:attribute name="type" type="xsd:string" />
+              <xsd:attribute name="mimetype" type="xsd:string" />
+              <xsd:attribute ref="xml:space" />
+            </xsd:complexType>
+          </xsd:element>
+          <xsd:element name="assembly">
+            <xsd:complexType>
+              <xsd:attribute name="alias" type="xsd:string" />
+              <xsd:attribute name="name" type="xsd:string" />
+            </xsd:complexType>
+          </xsd:element>
+          <xsd:element name="data">
+            <xsd:complexType>
+              <xsd:sequence>
+                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
+                <xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
+              </xsd:sequence>
+              <xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
+              <xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
+              <xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
+              <xsd:attribute ref="xml:space" />
+            </xsd:complexType>
+          </xsd:element>
+          <xsd:element name="resheader">
+            <xsd:complexType>
+              <xsd:sequence>
+                <xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
+              </xsd:sequence>
+              <xsd:attribute name="name" type="xsd:string" use="required" />
+            </xsd:complexType>
+          </xsd:element>
+        </xsd:choice>
+      </xsd:complexType>
+    </xsd:element>
+  </xsd:schema>
+  <resheader name="resmimetype">
+    <value>text/microsoft-resx</value>
+  </resheader>
+  <resheader name="version">
+    <value>2.0</value>
+  </resheader>
+  <resheader name="reader">
+    <value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
+  </resheader>
+  <resheader name="writer">
+    <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
+  </resheader>
+  <assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
+  <data name="AsyncIO" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\AsyncIO.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="AsyncResultTypes" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Utilities\AsyncResultTypes.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="Checkpoint" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\Checkpoint.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="Contexts" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\Common\Contexts.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="FASTER" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\FASTER.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="FASTERBase" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\FASTERBase.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="FASTERImpl" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\FASTERImpl.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="FASTERThread" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\FASTERThread.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="IFASTER_Mixed" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\IFASTER_Mixed.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="IndexCheckpoint" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\IndexCheckpoint.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="IndexRecovery" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\IndexRecovery.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="Layout" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\Common\Layout.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedContextWrapper" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedContextWrapper.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedFunctionsWrapper" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedFunctionsWrapper.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedInputWrapper" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedInputWrapper.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedKeyWrapper" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedKeyWrapper.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedManagedFAST" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedManagedFAST.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedOutputWrapper" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedOutputWrapper.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedUnwrappedTypes" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedUnwrappedTypes.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedUserFunctions" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedUserFunctions.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="MixedValueWrapper" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\ManagedLayer\MixedValueWrapper.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="PersistentMemoryMalloc" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Allocator\PersistentMemoryMalloc.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+  <data name="Recovery" type="System.Resources.ResXFileRef, System.Windows.Forms">
+    <value>..\Index\FASTER\Recovery.cs;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8</value>
+  </data>
+</root>
\ No newline at end of file
diff --git a/cs/src/core/Utilities/AsyncResultTypes.cs b/cs/src/core/Utilities/AsyncResultTypes.cs
new file mode 100644
index 000000000..9937a036d
--- /dev/null
+++ b/cs/src/core/Utilities/AsyncResultTypes.cs
@@ -0,0 +1,152 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#define CALLOC
+
+using System;
+using System.Threading;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace FASTER.core
+{
+    public static class Config
+    {
+        //public static string CheckpointDirectory = Path.GetTempPath() + "fasterlogs";
+        public static string CheckpointDirectory = "D:\\data";
+    }
+
+    public struct AsyncGetFromDiskResult<TContext> : IAsyncResult
+    {
+        //public SectorAlignedMemory record;
+        //public SectorAlignedMemory objBuffer;
+        public TContext context;
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+
+    public class PageAsyncFlushResult : IAsyncResult
+    {
+        public long page;
+        public bool partial;
+        public long untilAddress;
+        public int count;
+        public CountdownEvent handle;
+        public ISegmentedDevice objlogDevice;
+        public SectorAlignedMemory freeBuffer1;
+        public SectorAlignedMemory freeBuffer2;
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+
+    public struct PageAsyncReadResult<TContext> : IAsyncResult
+    {
+        public long page;
+        public TContext context; 
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+
+    public struct PageAsyncFlushResult<TContext> : IAsyncResult
+    {
+        public long page;
+        public TContext context;
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+
+    public unsafe class HashIndexPageAsyncFlushResult : IAsyncResult
+    {
+        public HashBucket* start;
+        public int numChunks;
+        public int numIssued;
+        public int numFinished;
+        public uint chunkSize;
+        public IDevice device;
+        public Stopwatch sw;
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+		public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+		public object AsyncState => throw new NotImplementedException();
+
+		public bool CompletedSynchronously => throw new NotImplementedException();
+	}
+
+    public struct HashIndexPageAsyncReadResult : IAsyncResult
+    {
+        public int chunkIndex;
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+
+    public struct OverflowPagesFlushAsyncResult : IAsyncResult
+    {
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+
+    public struct OverflowPagesReadAsyncResult : IAsyncResult
+    {
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+
+    public struct CountdownEventAsyncResult : IAsyncResult
+    {
+        public CountdownEvent countdown;
+        public Action action;
+
+        public bool IsCompleted => throw new NotImplementedException();
+
+        public WaitHandle AsyncWaitHandle => throw new NotImplementedException();
+
+        public object AsyncState => throw new NotImplementedException();
+
+        public bool CompletedSynchronously => throw new NotImplementedException();
+    }
+}
diff --git a/cs/src/core/Utilities/Native32.cs b/cs/src/core/Utilities/Native32.cs
new file mode 100644
index 000000000..d2f6fc68c
--- /dev/null
+++ b/cs/src/core/Utilities/Native32.cs
@@ -0,0 +1,364 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+
+namespace FASTER.core
+{
+    using System;
+    using System.Runtime.InteropServices;
+    using System.Security;
+    using Microsoft.Win32.SafeHandles;
+    using System.Threading;
+
+    /// <summary>
+    /// Interop with WINAPI for file I/O, threading, and NUMA functions.
+    /// </summary>
+    public static unsafe class Native32
+    {
+        #region io constants and flags
+
+        public const uint INFINITE = unchecked((uint)-1);
+
+        public const int ERROR_IO_PENDING = 997;
+        public const uint ERROR_IO_INCOMPLETE = 996;
+        public const uint ERROR_NOACCESS = 998;
+        public const uint ERROR_HANDLE_EOF = 38;
+
+        public const int ERROR_FILE_NOT_FOUND = 0x2;
+        public const int ERROR_PATH_NOT_FOUND = 0x3;
+        public const int ERROR_INVALID_DRIVE = 0x15;
+
+
+        public const uint FILE_BEGIN = 0;
+        public const uint FILE_CURRENT = 1;
+        public const uint FILE_END = 2;
+
+        public const uint FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100;
+        public const uint FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200;
+        public const uint FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000;
+
+        public const uint INVALID_HANDLE_VALUE = unchecked((uint)-1);
+
+        public const uint GENERIC_READ = 0x80000000;
+        public const uint GENERIC_WRITE = 0x40000000;
+        public const uint GENERIC_EXECUTE = 0x20000000;
+        public const uint GENERIC_ALL = 0x10000000;
+
+        public const uint READ_CONTROL = 0x00020000;
+        public const uint FILE_READ_ATTRIBUTES = 0x0080;
+        public const uint FILE_READ_DATA = 0x0001;
+        public const uint FILE_READ_EA = 0x0008;
+        public const uint STANDARD_RIGHTS_READ = READ_CONTROL;
+        public const uint FILE_APPEND_DATA = 0x0004;
+        public const uint FILE_WRITE_ATTRIBUTES = 0x0100;
+        public const uint FILE_WRITE_DATA = 0x0002;
+        public const uint FILE_WRITE_EA = 0x0010;
+        public const uint STANDARD_RIGHTS_WRITE = READ_CONTROL;
+
+        public const uint FILE_GENERIC_READ =
+            FILE_READ_ATTRIBUTES
+            | FILE_READ_DATA
+            | FILE_READ_EA
+            | STANDARD_RIGHTS_READ;
+        public const uint FILE_GENERIC_WRITE =
+            FILE_WRITE_ATTRIBUTES
+            | FILE_WRITE_DATA
+            | FILE_WRITE_EA
+            | STANDARD_RIGHTS_WRITE
+            | FILE_APPEND_DATA;
+
+        public const uint FILE_SHARE_DELETE = 0x00000004;
+        public const uint FILE_SHARE_READ = 0x00000001;
+        public const uint FILE_SHARE_WRITE = 0x00000002;
+
+        public const uint CREATE_ALWAYS = 2;
+        public const uint CREATE_NEW = 1;
+        public const uint OPEN_ALWAYS = 4;
+        public const uint OPEN_EXISTING = 3;
+        public const uint TRUNCATE_EXISTING = 5;
+
+        public const uint FILE_FLAG_DELETE_ON_CLOSE = 0x04000000;
+        public const uint FILE_FLAG_NO_BUFFERING = 0x20000000;
+        public const uint FILE_FLAG_OPEN_NO_RECALL = 0x00100000;
+        public const uint FILE_FLAG_OVERLAPPED = 0x40000000;
+        public const uint FILE_FLAG_RANDOM_ACCESS = 0x10000000;
+        public const uint FILE_FLAG_SEQUENTIAL_SCAN = 0x08000000;
+        public const uint FILE_FLAG_WRITE_THROUGH = 0x80000000;
+        public const uint FILE_ATTRIBUTE_ENCRYPTED = 0x4000;
+
+        /// <summary>
+        /// Represents additional options for creating unbuffered overlapped file stream.
+        /// </summary>
+        [Flags]
+        public enum UnbufferedFileOptions : uint
+        {
+            None = 0,
+            WriteThrough = 0x80000000,
+            DeleteOnClose = 0x04000000,
+            OpenReparsePoint = 0x00200000,
+            Overlapped = 0x40000000,
+        }
+
+        #endregion
+
+        #region io functions
+
+        [DllImport("Kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
+        public static extern SafeFileHandle CreateFileW(
+            [In] string lpFileName,
+            [In] UInt32 dwDesiredAccess,
+            [In] UInt32 dwShareMode,
+            [In] IntPtr lpSecurityAttributes,
+            [In] UInt32 dwCreationDisposition,
+            [In] UInt32 dwFlagsAndAttributes,
+            [In] IntPtr hTemplateFile);
+
+        [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
+        public static extern void CloseHandle(
+            [In] SafeHandle handle);
+
+        [DllImport("Kernel32.dll", SetLastError = true)]
+        public static extern bool ReadFile(
+            [In] SafeFileHandle hFile,
+            [Out] IntPtr lpBuffer,
+            [In] UInt32 nNumberOfBytesToRead,
+            [Out] out UInt32 lpNumberOfBytesRead,
+            [In] NativeOverlapped* lpOverlapped);
+
+        [DllImport("Kernel32.dll", SetLastError = true)]
+        public static extern bool WriteFile(
+            [In] SafeFileHandle hFile,
+            [In] IntPtr lpBuffer,
+            [In] UInt32 nNumberOfBytesToWrite,
+            [Out] out UInt32 lpNumberOfBytesWritten,
+            [In] NativeOverlapped* lpOverlapped);
+
+        [DllImport("Kernel32.dll", SetLastError = true)]
+        public static extern bool GetOverlappedResult(
+            [In] SafeFileHandle hFile,
+            [In] NativeOverlapped* lpOverlapped,
+            [Out] out UInt32 lpNumberOfBytesTransferred,
+            [In] bool bWait);
+
+        [DllImport("adv-file-ops.dll", SetLastError = true)]
+        public static extern bool CreateAndSetFileSize(ref string filename, Int64 file_size);
+
+        [DllImport("adv-file-ops.dll", SetLastError = true)]
+        public static extern bool EnableProcessPrivileges();
+
+        [DllImport("adv-file-ops.dll", SetLastError = true)]
+        public static extern bool EnableVolumePrivileges(ref string filename, SafeFileHandle hFile);
+
+        [DllImport("adv-file-ops.dll", SetLastError = true)]
+        public static extern bool SetFileSize(SafeFileHandle hFile, Int64 file_size);
+
+        public enum EMoveMethod : uint
+        {
+            Begin = 0,
+            Current = 1,
+            End = 2
+        }
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern uint SetFilePointer(
+              [In] SafeFileHandle hFile,
+              [In] int lDistanceToMove,
+              [In, Out] ref int lpDistanceToMoveHigh,
+              [In] EMoveMethod dwMoveMethod);
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern uint SetFilePointerEx(
+              [In] SafeFileHandle hFile,
+              [In] long lDistanceToMove,
+              [In, Out] IntPtr lpDistanceToMoveHigh,
+              [In] EMoveMethod dwMoveMethod);
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern bool SetEndOfFile(
+            [In] SafeFileHandle hFile);
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern IntPtr CreateIoCompletionPort(
+            [In] SafeFileHandle fileHandle,
+            [In] IntPtr existingCompletionPort,
+            [In] UInt32 completionKey,
+            [In] UInt32 numberOfConcurrentThreads);
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern UInt32 GetLastError();
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static unsafe extern bool GetQueuedCompletionStatus(
+            [In] IntPtr completionPort,
+            [Out] out UInt32 ptrBytesTransferred,
+            [Out] out UInt32 ptrCompletionKey,
+            [Out] NativeOverlapped** lpOverlapped,
+            [In] UInt32 dwMilliseconds);
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern bool PostQueuedCompletionStatus(
+            [In] IntPtr completionPort,
+            [In] UInt32 bytesTrasferred,
+            [In] UInt32 completionKey,
+            [In] IntPtr lpOverlapped);
+
+        [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)]
+        public static extern bool GetDiskFreeSpace(string lpRootPathName,
+           out uint lpSectorsPerCluster,
+           out uint lpBytesPerSector,
+           out uint lpNumberOfFreeClusters,
+           out uint lpTotalNumberOfClusters);
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        [return: MarshalAs(UnmanagedType.Bool)]
+        public static extern bool DeleteFileW([MarshalAs(UnmanagedType.LPWStr)]string lpFileName);
+        #endregion
+
+        #region thread and numa functions
+        [DllImport("kernel32.dll")]
+        public static extern IntPtr GetCurrentThread();
+        [DllImport("kernel32")]
+        public static extern uint GetCurrentThreadId();
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern uint GetCurrentProcessorNumber();
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern uint GetActiveProcessorCount(uint count);
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern ushort GetActiveProcessorGroupCount();
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern int SetThreadGroupAffinity(IntPtr hThread, ref GROUP_AFFINITY GroupAffinity, ref GROUP_AFFINITY PreviousGroupAffinity);
+
+        [DllImport("kernel32.dll", SetLastError = true)]
+        public static extern int GetThreadGroupAffinity(IntPtr hThread, ref GROUP_AFFINITY PreviousGroupAffinity);
+
+        public static uint ALL_PROCESSOR_GROUPS = 0xffff;
+
+        [System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
+        public struct GROUP_AFFINITY
+        {
+            public ulong Mask;
+            public uint Group;
+            public uint Reserved1;
+            public uint Reserved2;
+            public uint Reserved3;
+        }
+
+        /// <summary>
+        /// Accepts thread id = 0, 1, 2, ... and sprays them round-robin
+        /// across all cores (viewed as a flat space). On NUMA machines,
+        /// this gives us [socket, core] ordering of affinitization. That is, 
+        /// if there are N cores per socket, then thread indices of 0 to N-1 map
+        /// to the range [socket 0, core 0] to [socket 0, core N-1].
+        /// </summary>
+        /// <param name="threadIdx">Index of thread (from 0 onwards)</param>
+        public static void AffinitizeThreadRoundRobin(uint threadIdx)
+        {
+            uint nrOfProcessors = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
+            ushort nrOfProcessorGroups = GetActiveProcessorGroupCount();
+            uint nrOfProcsPerGroup = nrOfProcessors / nrOfProcessorGroups;
+
+            GROUP_AFFINITY groupAffinityThread = default(GROUP_AFFINITY);
+            GROUP_AFFINITY oldAffinityThread = default(GROUP_AFFINITY);
+
+            IntPtr thread = GetCurrentThread();
+            GetThreadGroupAffinity(thread, ref groupAffinityThread);
+
+            threadIdx = threadIdx % nrOfProcessors;
+
+            groupAffinityThread.Mask = (ulong)1L << ((int)(threadIdx % (int)nrOfProcsPerGroup));
+            groupAffinityThread.Group = (uint)(threadIdx / nrOfProcsPerGroup);
+
+            if (SetThreadGroupAffinity(thread, ref groupAffinityThread, ref oldAffinityThread) == 0)
+            {
+                Console.WriteLine("Unable to set group affinity");
+            }
+        }
+
+        /// <summary>
+        /// Accepts thread id = 0, 1, 2, ... and sprays them round-robin
+        /// across all cores (viewed as a flat space). On NUMA machines,
+        /// this gives us [core, socket] ordering of affinitization. That is, 
+        /// if there are N cores per socket, then thread indices of 0 to N-1 map
+        /// to the range [socket 0, core 0] to [socket N-1, core 0].
+        /// </summary>
+        /// <param name="threadIdx">Index of thread (from 0 onwards)</param>
+        public static void AffinitizeThreadShardedTwoNuma(uint threadIdx)
+        {
+            uint nrOfProcessors = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
+            ushort nrOfProcessorGroups = 2; // GetActiveProcessorGroupCount();
+            uint nrOfProcsPerGroup = nrOfProcessors / nrOfProcessorGroups;
+
+            threadIdx = threadIdx % 2 == 0 ? threadIdx / 2 : (nrOfProcsPerGroup + (threadIdx - 1) / 2);
+            AffinitizeThreadRoundRobin(threadIdx);
+            return;
+        }
+        #endregion
+    }
+
+    /// <summary>
+    /// Methods to perform high-resolution low-overhead timing
+    /// </summary>
+    public static class HiResTimer
+    {
+        private const string lib = "kernel32.dll";
+        [DllImport(lib)]
+        [SuppressUnmanagedCodeSecurity]
+        public static extern int QueryPerformanceCounter(ref Int64 count);
+
+        [DllImport(lib)]
+        [SuppressUnmanagedCodeSecurity]
+        public static extern int QueryPerformanceFrequency(ref Int64 frequency);
+
+        [DllImport(lib)]
+        [SuppressUnmanagedCodeSecurity]
+        private static extern void GetSystemTimePreciseAsFileTime(out long filetime);
+
+        [DllImport(lib)]
+        [SuppressUnmanagedCodeSecurity]
+        private static extern void GetSystemTimeAsFileTime(out long filetime);
+
+        [DllImport("readtsc.dll")]
+        [SuppressUnmanagedCodeSecurity]
+        public static extern ulong Rdtsc();
+
+        public static long Freq;
+
+        public static long EstimateCPUFrequency()
+        {
+            long oldCps = 0, cps = 0;
+            ulong startC, endC;
+            long accuracy = 500; // wait for consecutive measurements to get within 300 clock cycles
+
+            int i = 0;
+            while (i < 5)
+            {
+                GetSystemTimeAsFileTime(out long startT);
+                startC = Rdtsc();
+
+                while (true)
+                {
+                    GetSystemTimeAsFileTime(out long endT);
+                    endC = Rdtsc();
+
+                    if (endT - startT >= 10000000)
+                    {
+                        cps = (long)(10000000 * (endC - startC) / (double)(endT - startT));
+                        break;
+                    }
+                }
+
+
+                if ((oldCps > (cps - accuracy)) && (oldCps < (cps + accuracy)))
+                {
+                    Freq = cps;
+                    return cps;
+                }
+                oldCps = cps;
+                i++;
+            }
+            Freq = cps;
+            return cps;
+        }
+    }
+}
diff --git a/cs/src/core/Utilities/NativeBufferPool.cs b/cs/src/core/Utilities/NativeBufferPool.cs
new file mode 100644
index 000000000..d22628b41
--- /dev/null
+++ b/cs/src/core/Utilities/NativeBufferPool.cs
@@ -0,0 +1,144 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using Microsoft.Win32.SafeHandles;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.ComponentModel;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public unsafe struct SectorAlignedMemory
+    {
+        public GCHandle handle;
+        public byte[] buffer;
+        public int offset;
+        public byte* aligned_pointer;
+
+        public int valid_offset;
+        public int required_bytes;
+        public int available_bytes;
+
+        public int level;
+        public NativeSectorAlignedBufferPool pool;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Return()
+        {
+            pool.Return(this);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void CopyValidBytesToAddress(byte *pt)
+        {
+            byte* ps = aligned_pointer + valid_offset;
+            byte* pe = ps + required_bytes;
+            while (ps < pe)
+            {
+                *pt = *ps;
+                pt++;
+                ps++;
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public byte* GetValidPointer()
+        {
+            return aligned_pointer + valid_offset;
+        }
+    }
+
+    /// <summary>
+    /// NativeSectorAlignedBufferPool is a pool of memory. 
+    /// Internally, it is organized as an array of concurrent queues where each concurrent
+    /// queue represents a memory of size in particular range. queue[i] contains memory 
+    /// segments each of size (2^i * sectorSize).
+    /// </summary>
+    public class NativeSectorAlignedBufferPool
+    {
+        private const int levels = 32;
+        private readonly int recordSize;
+        private readonly int sectorSize;
+        private ConcurrentQueue<SectorAlignedMemory>[] queue;
+
+        public static NativeSectorAlignedBufferPool Instance = new NativeSectorAlignedBufferPool(1, 512);
+
+        public NativeSectorAlignedBufferPool(int recordSize, int sectorSize)
+        {
+            queue = new ConcurrentQueue<SectorAlignedMemory>[levels];
+            this.recordSize = recordSize;
+            this.sectorSize = sectorSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public unsafe void Return(SectorAlignedMemory page)
+        {
+            Debug.Assert(queue[page.level] != null);
+            queue[page.level].Enqueue(page);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static int Position(int v)
+        {
+            if (v == 1) return 0;
+            v--;
+
+            int r = 0; // r will be lg(v)
+            while (true) // unroll for more speed...
+            {
+                v = v >> 1;
+                if (v == 0) break;
+                r++;
+            }
+            return r + 1;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public unsafe SectorAlignedMemory Get(int numRecords)
+        {
+            int requiredSize = sectorSize + (((numRecords) * recordSize + (sectorSize - 1)) & ~(sectorSize - 1));
+            int index = Position(requiredSize / sectorSize);
+            if (queue[index] == null)
+            {
+                var localPool = new ConcurrentQueue<SectorAlignedMemory>();
+                Interlocked.CompareExchange(ref queue[index], localPool, null);
+            }
+
+            if (queue[index].TryDequeue(out SectorAlignedMemory page))
+            {
+                return page;
+            }
+
+            page.level = index;
+            page.buffer = new byte[sectorSize * (1 << index)];
+            page.handle = GCHandle.Alloc(page.buffer, GCHandleType.Pinned);
+            page.aligned_pointer = (byte*)(((long)page.handle.AddrOfPinnedObject() + (sectorSize - 1)) & ~(sectorSize - 1));
+            page.offset = (int) ((long)page.aligned_pointer - (long)page.handle.AddrOfPinnedObject());
+            page.pool = this;
+            return page;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Free()
+        {
+            for (int i = 0; i < levels; i++)
+            {
+                if (queue[i] == null) continue;
+                while (queue[i].TryDequeue(out SectorAlignedMemory result))
+                {
+                    result.handle.Free();
+                    result.buffer = null;
+                }
+            }
+        }
+    }
+}
diff --git a/cs/src/core/Utilities/SafeConcurrentDictionary.cs b/cs/src/core/Utilities/SafeConcurrentDictionary.cs
new file mode 100644
index 000000000..9a62c8822
--- /dev/null
+++ b/cs/src/core/Utilities/SafeConcurrentDictionary.cs
@@ -0,0 +1,233 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace FASTER.core
+{
+    /// <summary>
+    /// A dictionary that supports concurrency with similar interface to .NET's ConcurrentDictionary.
+    /// However, this dictionary changes the implementation of AddOrUpdate and GetOrAdd functions to
+    /// guarantee atomicity per-key for factory lambdas.
+    /// </summary>
+    /// <typeparam name="TKey">Type of keys in the dictionary</typeparam>
+    /// <typeparam name="TValue">Type of values in the dictionary</typeparam>
+    internal sealed class SafeConcurrentDictionary<TKey, TValue> : IEnumerable<KeyValuePair<TKey, TValue>>
+    {
+        private readonly ConcurrentDictionary<TKey, TValue> dictionary = new ConcurrentDictionary<TKey, TValue>();
+
+        private readonly ConcurrentDictionary<TKey, object> keyLocks = new ConcurrentDictionary<TKey, object>();
+
+        /// <summary>
+        /// Returns the count of the dictionary.
+        /// </summary>
+        public int Count
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
+            {
+                return dictionary.Count;
+            }
+        }
+
+        /// <summary>
+        /// Returns whether or not the dictionary is empty.
+        /// </summary>
+        public bool IsEmpty
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
+            {
+                return dictionary.IsEmpty;
+            }
+        }
+
+        /// <summary>
+        /// Gets or sets the value associated with a key.
+        /// </summary>
+        public TValue this[TKey key]
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
+            {
+                return dictionary[key];
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            set
+            {
+                dictionary[key] = value;
+            }
+        }
+
+        /// <summary>
+        /// Returns a collection of the keys in the dictionary.
+        /// </summary>
+        public ICollection<TKey> Keys
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
+            {
+                return dictionary.Keys;
+            }
+        }
+
+        /// <summary>
+        /// Returns a collection of the values in the dictionary.
+        /// </summary>
+        public ICollection<TValue> Values
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
+            {
+                return dictionary.Values;
+            }
+        }
+
+        /// <summary>
+        /// Adds or updates a key/value pair to the dictionary.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public TValue AddOrUpdate(TKey key, Func<TKey, TValue> addValueFactory, Func<TKey, TValue, TValue> updateValueFactory)
+        {
+            lock (GetLock(key))
+            {
+                return dictionary.AddOrUpdate(key, addValueFactory, updateValueFactory);
+            }
+        }
+
+        /// <summary>
+        /// Adds or updates a key/value pair to the dictionary.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public TValue AddOrUpdate(TKey key, TValue addValue, Func<TKey, TValue, TValue> updateValueFactory)
+        {
+            lock (GetLock(key))
+            {
+                return dictionary.AddOrUpdate(key, addValue, updateValueFactory);
+            }
+        }
+
+        /// <summary>
+        /// Adds a key/value pair to the dictionary if it does not exist.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public TValue GetOrAdd(TKey key, Func<TKey, TValue> valueFactory)
+        {
+            TValue value;
+            if (dictionary.TryGetValue(key, out value))
+            {
+                return value;
+            }
+            lock (GetLock(key))
+            {
+                return dictionary.GetOrAdd(key, valueFactory);
+            }
+        }
+
+        /// <summary>
+        /// Adds a key/value pair to the dictionary if it does not exist.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public TValue GetOrAdd(TKey key, TValue value)
+        {
+            return dictionary.GetOrAdd(key, value);
+        }
+
+        /// <summary>
+        /// Clears the dictionary.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void Clear()
+        {
+            dictionary.Clear();
+            keyLocks.Clear();
+        }
+
+        /// <summary>
+        /// Returns whether or not the dictionary contains the specified key.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool ContainsKey(TKey key)
+        {
+            return dictionary.ContainsKey(key);
+        }
+
+        /// <summary>
+        /// Returns an enumerator of the elements in the dictionary.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public IEnumerator<KeyValuePair<TKey, TValue>> GetEnumerator()
+        {
+            return dictionary.GetEnumerator();
+        }
+
+        IEnumerator IEnumerable.GetEnumerator()
+        {
+            return GetEnumerator();
+        }
+
+        /// <summary>
+        /// Copies the key/value pairs to a new array.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public KeyValuePair<TKey, TValue>[] ToArray()
+        {
+            return dictionary.ToArray();
+        }
+
+        /// <summary>
+        /// Attempts to add the specified key/value to the dictionary if it does not exist.
+        /// Returns true or false depending on if the value was added or not, respectively.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryAdd(TKey key, TValue value)
+        {
+            return dictionary.TryAdd(key, value);
+        }
+
+        /// <summary>
+        /// Attempts to get the value for the specified key.
+        /// Returns true if the key was in the dictionary or false otherwise.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryGetValue(TKey key, out TValue value)
+        {
+            return dictionary.TryGetValue(key, out value);
+        }
+
+        /// <summary>
+        /// Attempts to remove the value for the specified key.
+        /// Returns true if the key was in the dictionary or false otherwise.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryRemove(TKey key, out TValue value)
+        {
+            return dictionary.TryRemove(key, out value);
+        }
+
+        /// <summary>
+        /// Compares the existing value for the specified key with a specified value,
+        /// and updates it if and only if it is a match. Returns true is updated or
+        /// false otherwise.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public bool TryUpdate(TKey key, TValue newValue, TValue comparisonValue)
+        {
+            return dictionary.TryUpdate(key, newValue, comparisonValue);
+        }
+
+        /// <summary>
+        /// Retrieves lock associated with a key (creating it if it does not exist).
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private object GetLock(TKey key)
+        {
+            return keyLocks.GetOrAdd(key, v => new object());
+        }
+    }
+}
diff --git a/cs/src/core/Utilities/StateTransitions.cs b/cs/src/core/Utilities/StateTransitions.cs
new file mode 100644
index 000000000..9baed1ccc
--- /dev/null
+++ b/cs/src/core/Utilities/StateTransitions.cs
@@ -0,0 +1,73 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace FASTER.core
+{
+    public enum ResizeOperationStatus : int { IN_PROGRESS, DONE };
+
+    [StructLayout(LayoutKind.Explicit, Size = 8)]
+    public unsafe struct ResizeInfo
+    {
+        [FieldOffset(0)]
+        public ResizeOperationStatus status;
+
+        [FieldOffset(4)]
+        public int version;
+
+        [FieldOffset(0)]
+        public long word;
+    }
+
+    public enum Phase : int {
+        PREP_INDEX_CHECKPOINT, INDEX_CHECKPOINT,
+        PREPARE, IN_PROGRESS,
+        WAIT_PENDING, WAIT_FLUSH,
+        REST,
+        PERSISTENCE_CALLBACK,
+        GC,
+        PREPARE_GROW, IN_PROGRESS_GROW,
+        INTERMEDIATE,
+    };
+
+    [StructLayout(LayoutKind.Explicit, Size = 8)]
+    public unsafe struct SystemState
+    {
+        [FieldOffset(0)]
+        public Phase phase;
+
+        [FieldOffset(4)]
+        public int version;
+
+        [FieldOffset(0)]
+        public long word;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static SystemState Copy(ref SystemState other)
+        {
+            var info = default(SystemState);
+            info.word = other.word;
+            return info;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static SystemState Make(Phase status, int version)
+        {
+            var info = default(SystemState);
+            info.phase = status;
+            info.version = version;
+            return info;
+        }
+
+    }
+
+}
diff --git a/cs/src/core/Utilities/Status.cs b/cs/src/core/Utilities/Status.cs
new file mode 100644
index 000000000..254327f63
--- /dev/null
+++ b/cs/src/core/Utilities/Status.cs
@@ -0,0 +1,29 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+// *********************************************************************
+//            Copyright (C) Microsoft. All rights reserved.       
+// 
+//  @File: 
+// 
+//  @Owner: 
+//  @Test:
+// 
+//  Purpose:
+// 
+//  Notes:
+// 
+//  @EndHeader@
+// *********************************************************************
+namespace FASTER.core
+{
+    using System.Runtime.InteropServices;
+
+    public enum Status
+    {
+        OK,
+        PENDING,
+        ERROR,
+        DONE
+    }
+}
diff --git a/cs/src/core/Utilities/Utility.cs b/cs/src/core/Utilities/Utility.cs
new file mode 100644
index 000000000..d837f8176
--- /dev/null
+++ b/cs/src/core/Utilities/Utility.cs
@@ -0,0 +1,243 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using System.Runtime.InteropServices;
+using System.Security;
+using System.IO;
+using System.Runtime.CompilerServices;
+using Microsoft.Win32.SafeHandles;
+using System.Diagnostics;
+using System.Threading;
+
+namespace FASTER.core
+{
+    public unsafe struct Empty
+    {
+        public static Empty* MoveToContext(Empty* empty)
+        {
+            return empty;
+        }
+    }
+
+
+    public static class Utility
+    {
+        /// <summary>
+        /// Helper function used to check if two keys are equal
+        /// </summary>
+        /// <param name="src"></param>
+        /// <param name="dest"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public unsafe static bool IsEqual(byte* src, byte* dest)
+        {
+            if (*(int*)src == *(int*)dest)
+            {
+                for (int i = 0; i < *(int*)src; i++)
+                {
+                    if (*(src + 4 + i) != *(dest + 4 + i))
+                    {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public unsafe static bool IsEqual(byte* src, byte* dst, int length)
+        {
+            for (int i = 0; i < length; i++)
+            {
+                if (*(src + i) != *(dst + i))
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public unsafe static void Copy(byte* src, byte* dest, int numBytes)
+        {
+            for(int i = 0; i < numBytes; i++)
+            {
+                *(dest + i) = *(src + i);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long GetHashCode(long input)
+        {
+            long local_rand = input;
+            long local_rand_hash = 8;
+
+            local_rand_hash = 40343 * local_rand_hash + ((local_rand) & 0xFFFF);
+            local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 16) & 0xFFFF);
+            local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 32) & 0xFFFF);
+            local_rand_hash = 40343 * local_rand_hash + (local_rand >> 48);
+            local_rand_hash = 40343 * local_rand_hash;
+
+            return (long)Rotr64((ulong)local_rand_hash, 45);
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe long HashBytes(byte* pbString, int len)
+        {
+            const long magicno = 40343;
+            char* pwString = (char*)pbString;
+            int cbBuf = len / 2;
+            ulong hashState = (ulong)len;
+
+            for (int i = 0; i < cbBuf; i++, pwString++)
+                hashState = magicno * hashState + (ulong)*pwString;
+
+            if ((len & 1) > 0)
+            {
+                char* pC = (char*)pwString;
+                hashState = magicno * hashState + (ulong)*pC;
+            }
+
+            return (long)Rotr64(magicno * hashState, 4);
+        }
+    
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ulong Rotr64(ulong x, int n)
+        {
+            return (((x) >> n) | ((x) << (64 - n)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool IsPowerOfTwo(long x)
+        {
+            return (x > 0) && ((x & (x - 1)) == 0);
+        }
+
+        static readonly int[] MultiplyDeBruijnBitPosition2 = new int[32]
+        {
+            0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+            31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+        };
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLogBase2(int x)
+        {
+            return MultiplyDeBruijnBitPosition2[(uint)(x * 0x077CB531U) >> 27];
+        }
+
+        public static int GetLogBase2(UInt64 value)
+        {
+            int i;
+            for (i = -1; value != 0; i++)
+                value >>= 1;
+
+            return (i == -1) ? 0 : i;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool Is32Bit(long x)
+        {
+            return ((ulong)x < 4294967295ul);
+        }
+
+        /// <summary>
+        /// Finds the first bit. Returns the "index" of the first bit that is 1 in the
+        /// given value starting from the least significant bit. Returns 0 if value
+        /// is 0.
+        /// </summary>
+        /// <param name="value"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint FindFirstBitSet(ulong value)
+        {
+            if (value == 0)
+            {
+                return 0;
+            }
+
+            ulong bit = 1;
+            uint checks = 1;
+            while ((value & bit) == 0)
+            {
+                bit <<= 1;
+                ++checks;
+            }
+            return checks;
+        }
+
+        /// <summary>
+        /// Turn on the given bit specified by mask in the target.
+        /// </summary>
+        /// <param name="mask"></param>
+        /// <param name="target"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long TurnOnBit(long mask, long target)
+        {
+            return target |= mask;
+        }
+
+        /// <summary>
+        /// Turn on the given bit specified by mask in the target.
+        /// </summary>
+        /// <param name="mask"></param>
+        /// <param name="target"></param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ulong TurnOnBit(ulong mask, ulong target)
+        {
+            return target |= mask;
+        }
+
+        /// <summary>
+        /// A 32-bit murmur3 implementation.
+        /// </summary>
+        /// <param name="h"></param>
+        /// <returns></returns>
+        public static int Murmur3(int h)
+        {
+            uint a = (uint)h;
+            a ^= a >> 16;
+            a *= 0x85ebca6b;
+            a ^= a >> 13;
+            a *= 0xc2b2ae35;
+            a ^= a >> 16;
+            return (int)a;
+        }
+
+        public static unsafe void AsyncCountdownCallback(uint errorCode, uint numBytes, NativeOverlapped* overlap)
+        {
+            try
+            {
+                if (errorCode != 0)
+                {
+                    System.Diagnostics.Trace.TraceError("OverlappedStream GetQueuedCompletionStatus error: {0}", errorCode);
+                }
+            }
+            catch (Exception ex)
+            {
+                System.Diagnostics.Trace.TraceError("Completion Callback error, {0}", ex.Message);
+            }
+            finally
+            {
+                CountdownEventAsyncResult result = (CountdownEventAsyncResult)Overlapped.Unpack(overlap).AsyncResult;
+                if(result.countdown == null)
+                {
+                    throw new Exception("Countdown event cannot be null!");
+                }
+                result.countdown.Signal();
+                if(result.countdown.IsSet)
+                {
+                    result.action();
+                }
+                Overlapped.Free(overlap);
+            }
+        }
+    }
+}
diff --git a/cs/src/native/adv-file-ops/adv-file-ops.cpp b/cs/src/native/adv-file-ops/adv-file-ops.cpp
new file mode 100644
index 000000000..2636b2211
--- /dev/null
+++ b/cs/src/native/adv-file-ops/adv-file-ops.cpp
@@ -0,0 +1,134 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <windows.h>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+
+std::string FormatWin32AndHRESULT(DWORD win32_result) {
+  std::stringstream ss;
+  ss << "Win32(" << win32_result << ") HRESULT("
+     << std::showbase << std::uppercase << std::setfill('0') << std::hex
+     << HRESULT_FROM_WIN32(win32_result) << ")";
+  return ss.str();
+}
+
+extern "C"
+__declspec(dllexport) bool EnableProcessPrivileges() {
+	HANDLE token;
+
+	TOKEN_PRIVILEGES token_privileges;
+	token_privileges.PrivilegeCount = 1;
+	token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+	if (!LookupPrivilegeValue(0, SE_MANAGE_VOLUME_NAME,
+		&token_privileges.Privileges[0].Luid)) return false;
+	if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &token)) return false;
+	if (!AdjustTokenPrivileges(token, 0, (PTOKEN_PRIVILEGES)&token_privileges, 0, 0, 0)) return false;
+	if (GetLastError() != ERROR_SUCCESS) return false;
+
+	::CloseHandle(token);
+
+	return true;
+}
+
+extern "C"
+__declspec(dllexport) bool EnableVolumePrivileges(std::string& filename, HANDLE file_handle)
+{
+	std::string volume_string = "\\\\.\\" + filename.substr(0, 2);
+	HANDLE volume_handle = ::CreateFile(volume_string.c_str(), 0, 0, nullptr, OPEN_EXISTING,
+		FILE_ATTRIBUTE_NORMAL, nullptr);
+	if (INVALID_HANDLE_VALUE == volume_handle) {
+		// std::cerr << "Error retrieving volume handle: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+
+	MARK_HANDLE_INFO mhi;
+	mhi.UsnSourceInfo = USN_SOURCE_DATA_MANAGEMENT;
+	mhi.VolumeHandle = volume_handle;
+	mhi.HandleInfo = MARK_HANDLE_PROTECT_CLUSTERS;
+
+	DWORD bytes_returned = 0;
+	BOOL result = DeviceIoControl(file_handle, FSCTL_MARK_HANDLE, &mhi, sizeof(MARK_HANDLE_INFO), nullptr,
+		0, &bytes_returned, nullptr);
+
+	if (!result) {
+		// std::cerr << "Error in DeviceIoControl: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+
+	::CloseHandle(volume_handle);
+	return true;
+}
+
+
+extern "C"
+__declspec(dllexport) bool SetFileSize(HANDLE file_handle, int64_t file_size)
+{
+	LARGE_INTEGER li;
+	li.QuadPart = file_size;
+
+	BOOL result = ::SetFilePointerEx(file_handle, li, NULL, FILE_BEGIN);
+	if (!result) {
+		std::cerr << "SetFilePointer failed with error: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+
+	// Set a fixed file length
+	result = ::SetEndOfFile(file_handle);
+	if (!result) {
+		std::cerr << "SetEndOfFile failed with error: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+
+	result = ::SetFileValidData(file_handle, file_size);
+	if (!result) {
+		std::cerr << "SetFileValidData failed with error: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+	return true;
+}
+
+extern "C"
+__declspec(dllexport) bool CreateAndSetFileSize(std::string& filename, int64_t file_size)
+{
+	BOOL result = ::EnableProcessPrivileges();
+	if (!result) {
+		std::cerr << "EnableProcessPrivileges failed with error: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+
+	DWORD desired_access = GENERIC_READ | GENERIC_WRITE;
+	DWORD const flags = FILE_FLAG_RANDOM_ACCESS | FILE_FLAG_NO_BUFFERING;
+	DWORD create_disposition = CREATE_ALWAYS;
+	DWORD shared_mode = FILE_SHARE_READ;
+
+	// Create our test file
+	HANDLE file_handle = ::CreateFile(filename.c_str(), desired_access, shared_mode, NULL,
+		create_disposition, flags, NULL);
+	if (INVALID_HANDLE_VALUE == file_handle) {
+		std::cerr << "write file (" << filename << ") not created. Error: " <<
+			FormatWin32AndHRESULT(::GetLastError()) << std::endl;
+		return false;
+	}
+
+	result = ::EnableVolumePrivileges(filename, file_handle);
+	if (!result) {
+		std::cerr << "EnableVolumePrivileges failed with error: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+
+	result = ::SetFileSize(file_handle, file_size);
+	if (!result) {
+		std::cerr << "SetFileSize failed with error: " << FormatWin32AndHRESULT(::GetLastError());
+		return false;
+	}
+
+	::CloseHandle(file_handle);
+
+	return true;
+}
+
+
diff --git a/cs/src/native/adv-file-ops/adv-file-ops.vcxproj b/cs/src/native/adv-file-ops/adv-file-ops.vcxproj
new file mode 100644
index 000000000..b9330f314
--- /dev/null
+++ b/cs/src/native/adv-file-ops/adv-file-ops.vcxproj
@@ -0,0 +1,80 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{5852AC33-6B01-44F5-BAF3-2AAF796E8449}</ProjectGuid>
+    <RootNamespace>directdrivereadwrite</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+    <ProjectName>adv-file-ops</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)native\bin\$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)native\bin\$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <ControlFlowGuard>Guard</ControlFlowGuard>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="adv-file-ops.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/native/readtsc/readtsc.cpp b/cs/src/native/readtsc/readtsc.cpp
new file mode 100644
index 000000000..4e4261487
--- /dev/null
+++ b/cs/src/native/readtsc/readtsc.cpp
@@ -0,0 +1,10 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+#include <intrin.h>
+#pragma intrinsic(__rdtsc)
+extern "C"
+__declspec(dllexport) unsigned __int64 __stdcall Rdtsc()
+{
+	return __rdtsc();
+}
diff --git a/cs/src/native/readtsc/readtsc.vcxproj b/cs/src/native/readtsc/readtsc.vcxproj
new file mode 100644
index 000000000..e83554553
--- /dev/null
+++ b/cs/src/native/readtsc/readtsc.vcxproj
@@ -0,0 +1,89 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="readtsc.cpp" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{A6510B80-BD50-4C11-9712-64C3B3865AFF}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>readtsc</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>
+    </LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)native\bin\$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_WINDOWS;_USRDLL;READTSC_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_WINDOWS;_USRDLL;READTSC_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ControlFlowGuard>Guard</ControlFlowGuard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/native/readtsc/readtsc.vcxproj.filters b/cs/src/native/readtsc/readtsc.vcxproj.filters
new file mode 100644
index 000000000..31054dc91
--- /dev/null
+++ b/cs/src/native/readtsc/readtsc.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="readtsc.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/playground/ClassCache/ClassCache.csproj b/cs/src/playground/ClassCache/ClassCache.csproj
new file mode 100644
index 000000000..15e1d4e12
--- /dev/null
+++ b/cs/src/playground/ClassCache/ClassCache.csproj
@@ -0,0 +1,15 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <TargetFramework>net46</TargetFramework>
+    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win7-x64</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\FASTER.core.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/cs/src/playground/ClassCache/Program.cs b/cs/src/playground/ClassCache/Program.cs
new file mode 100644
index 000000000..57a9ba2f2
--- /dev/null
+++ b/cs/src/playground/ClassCache/Program.cs
@@ -0,0 +1,81 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace ClassCache
+{
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            var log = FASTERFactory.CreateLogDevice(Path.GetTempPath() + "hybridlog");
+            var h = FASTERFactory.Create
+                <CacheKey, CacheValue, CacheInput, CacheOutput, CacheContext, CacheFunctions>
+                (1L << 20, log, new CacheFunctions());
+
+            h.StartSession();
+
+            const int max = 10000000;
+
+            Stopwatch sw = new Stopwatch();
+            sw.Start();
+            for (int i = 0; i < max; i++)
+            {
+                if (i % 256 == 0)
+                {
+                    h.Refresh();
+                    if (i % (1<<19) == 0)
+                    {
+                        long workingSet = Process.GetCurrentProcess().WorkingSet64;
+                        Console.WriteLine($"{i}: {workingSet / 1048576}M");
+                    }
+                }
+
+                h.Upsert(new CacheKey(i), new CacheValue(i), default(CacheContext), 0);
+            }
+            sw.Stop();
+            Console.WriteLine("Total time to upsert {0} elements: {1:0.000} secs ({2:0.00} inserts/sec)", max, sw.ElapsedMilliseconds/1000.0, max / (sw.ElapsedMilliseconds / 1000.0));
+
+            
+            Console.WriteLine("Issuing uniform random read workload");
+
+            var rnd = new Random();
+
+            int statusPending = 0;
+            var o = new CacheOutput();
+
+            sw.Restart();
+            for (int i = 0; i < max; i++)
+            {
+                long key = rnd.Next(max);
+
+                var status = h.Read(new CacheKey(key), default(CacheInput), ref o, default(CacheContext), 0);
+
+                switch (status)
+                {
+                    case Status.PENDING:
+                        h.CompletePending(true);
+                        statusPending++; break;
+                    case Status.ERROR:
+                        throw new Exception("Error!");
+                }
+                if (o.value.value != key)
+                    throw new Exception("Read error!");
+            }
+            sw.Stop();
+            Console.WriteLine("Total time to read {0} elements: {1:0.000} secs ({2:0.00} reads/sec)", max, sw.ElapsedMilliseconds / 1000.0, max / (sw.ElapsedMilliseconds / 1000.0));
+            Console.WriteLine($"Reads completed with PENDING: {statusPending}");
+
+            Console.WriteLine("Done");
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/playground/ClassCache/Types.cs b/cs/src/playground/ClassCache/Types.cs
new file mode 100644
index 000000000..1fe577eef
--- /dev/null
+++ b/cs/src/playground/ClassCache/Types.cs
@@ -0,0 +1,122 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace ClassCache
+{
+    public class CacheKey : IFASTERKey<CacheKey>
+    {
+        public long key;
+
+        public CacheKey() { }
+
+        public CacheKey(long first)
+        {
+            key = first;
+        }
+
+        public bool Equals(CacheKey other)
+        {
+            return key == other.key;
+        }
+
+        public long GetHashCode64()
+        {
+            return Utility.GetHashCode(key);
+        }
+
+        public CacheKey Clone()
+        {
+            return this;
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void Serialize(Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+    }
+
+    public class CacheValue : IFASTERValue<CacheValue>
+    {
+        public long value;
+
+        public CacheValue() { }
+
+        public CacheValue(long first)
+        {
+            value = first;
+        }
+
+        public CacheValue Clone()
+        {
+            return this;
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void Serialize(Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+    }
+
+    public struct CacheInput
+    {
+    }
+
+    public struct CacheOutput
+    {
+        public CacheValue value;
+    }
+
+    public struct CacheContext
+    {
+    }
+
+    public class CacheFunctions : IUserFunctions<CacheKey, CacheValue, CacheInput, CacheOutput, CacheContext>
+    {
+        public void CopyUpdater(CacheKey key, CacheInput input, CacheValue oldValue, ref CacheValue newValue)
+        {
+        }
+
+        public void InitialUpdater(CacheKey key, CacheInput input, ref CacheValue value)
+        {
+        }
+
+        public void InPlaceUpdater(CacheKey key, CacheInput input, ref CacheValue value)
+        {
+        }
+
+        public void ReadCompletionCallback(CacheContext ctx, CacheOutput output)
+        {
+        }
+
+        public void Reader(CacheKey key, CacheInput input, CacheValue value, ref CacheOutput dst)
+        {
+            dst.value = value;
+        }
+
+        public void RMWCompletionCallback(CacheContext ctx)
+        {
+        }
+
+        public void UpsertCompletionCallback(CacheContext ctx)
+        {
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample1/App.config b/cs/src/playground/ManagedSample1/App.config
new file mode 100644
index 000000000..d69a9b153
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/App.config
@@ -0,0 +1,6 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6" />
+    </startup>
+</configuration>
diff --git a/cs/src/playground/ManagedSample1/Functions.cs b/cs/src/playground/ManagedSample1/Functions.cs
new file mode 100644
index 000000000..bddecffb1
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/Functions.cs
@@ -0,0 +1,89 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace ManagedSample1
+{
+    public unsafe class Functions
+    {
+        public static void RMWCompletionCallback(KeyStruct* key, InputStruct* output, Empty* ctx)
+        {
+        }
+
+        public static void ReadCompletionCallback(KeyStruct* key, InputStruct* input, OutputStruct* output, Empty* ctx)
+        {
+        }
+
+        public static void UpsertCompletionCallback(KeyStruct* key, ValueStruct* output, Empty* ctx)
+        {
+        }
+
+        public static void PersistenceCallback(long thread_id, long serial_num)
+        {
+            Debug.WriteLine("Thread {0} repors persistence until {1}", thread_id, serial_num);
+        }
+
+        // Read functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleReader(KeyStruct* key, InputStruct* input, ValueStruct* value, OutputStruct* dst)
+        {
+            ValueStruct.Copy(value, (ValueStruct*)dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentReader(KeyStruct* key, InputStruct* input, ValueStruct* value, OutputStruct* dst)
+        {
+            ValueStruct.AcquireReadLock(value);
+            ValueStruct.Copy(value, (ValueStruct*)dst);
+            ValueStruct.ReleaseReadLock(value);
+        }
+
+        // Upsert functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleWriter(KeyStruct* key, ValueStruct* src, ValueStruct* dst)
+        {
+            ValueStruct.Copy(src, dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentWriter(KeyStruct* key, ValueStruct* src, ValueStruct* dst)
+        {
+            ValueStruct.AcquireWriteLock(dst);
+            ValueStruct.Copy(src, dst);
+            ValueStruct.ReleaseWriteLock(dst);
+        }
+
+        // RMW functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int InitialValueLength(KeyStruct* key, InputStruct* input)
+        {
+            return ValueStruct.GetLength(default(ValueStruct*));
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InitialUpdater(KeyStruct* key, InputStruct* input, ValueStruct* value)
+        {
+            ValueStruct.Copy((ValueStruct*)input, value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InPlaceUpdater(KeyStruct* key, InputStruct* input, ValueStruct* value)
+        {
+            ValueStruct.AcquireWriteLock(value);
+            value->vfield1 += input->ifield1;
+            value->vfield2 += input->ifield2;
+            ValueStruct.ReleaseWriteLock(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyUpdater(KeyStruct* key, InputStruct* input, ValueStruct* oldValue, ValueStruct* newValue)
+        {
+            newValue->vfield1 = oldValue->vfield1 + input->ifield1;
+            newValue->vfield2 = oldValue->vfield2 + input->ifield2;
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample1/ICustomFaster.cs b/cs/src/playground/ManagedSample1/ICustomFaster.cs
new file mode 100644
index 000000000..1f253b35b
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/ICustomFaster.cs
@@ -0,0 +1,29 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+
+namespace ManagedSample1
+{
+    public unsafe interface ICustomFaster
+    {
+        /* Thread-related operations */
+        Guid StartSession();
+        long ContinueSession(Guid guid);
+        void StopSession();
+        void Refresh();
+
+        /* Store Interface */
+        Status Read(KeyStruct* key, InputStruct* input, OutputStruct* output, Empty* context, long lsn);
+        Status Upsert(KeyStruct* key, ValueStruct* value, Empty* context, long lsn);
+        Status RMW(KeyStruct* key, InputStruct* input, Empty* context, long lsn);
+        Status Delete(KeyStruct* key, Empty* context, long lsn);
+        bool CompletePending(bool wait);
+
+        /* Statistics */
+        long Size { get; }
+        void DumpDistribution();
+    }
+}
+
diff --git a/cs/src/playground/ManagedSample1/InputStruct.cs b/cs/src/playground/ManagedSample1/InputStruct.cs
new file mode 100644
index 000000000..fac487c0c
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/InputStruct.cs
@@ -0,0 +1,18 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace ManagedSample1
+{
+    public unsafe struct InputStruct
+    {
+        public long ifield1;
+        public long ifield2;
+
+        public static InputStruct* MoveToContext(InputStruct* input)
+        {
+            return input;
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample1/KeyStruct.cs b/cs/src/playground/ManagedSample1/KeyStruct.cs
new file mode 100644
index 000000000..5e502b84f
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/KeyStruct.cs
@@ -0,0 +1,66 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace ManagedSample1
+{
+    public unsafe struct KeyStruct
+    {
+        public const int physicalSize = sizeof(long) + sizeof(long);
+        public long kfield1;
+        public long kfield2;
+
+        public static long GetHashCode(KeyStruct* key)
+        {
+            return Utility.GetHashCode(*((long*)key));
+        }
+        public static bool Equals(KeyStruct* k1, KeyStruct* k2)
+        {
+            return k1->kfield1 == k2->kfield1 && k1->kfield2 == k2->kfield2;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(KeyStruct* key)
+        {
+            return physicalSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(KeyStruct* src, KeyStruct* dst)
+        {
+            dst->kfield1 = src->kfield1;
+            dst->kfield2 = src->kfield2;
+        }
+
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(KeyStruct* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(KeyStruct* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Free(KeyStruct* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+
+        public static KeyStruct* MoveToContext(KeyStruct* key)
+        {
+            return key;
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample1/ManagedSample1.csproj b/cs/src/playground/ManagedSample1/ManagedSample1.csproj
new file mode 100644
index 000000000..30eb92754
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/ManagedSample1.csproj
@@ -0,0 +1,39 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net46</TargetFramework>
+    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win7-x64</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <RootNamespace>ManagedSample1</RootNamespace>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <DefineConstants>TRACE;DEBUG</DefineConstants>
+    <DebugType>full</DebugType>
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
+    <DefineConstants>TRACE</DefineConstants>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\x64\Release\</OutputPath>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\FASTER.core.csproj" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/playground/ManagedSample1/OutputStruct.cs b/cs/src/playground/ManagedSample1/OutputStruct.cs
new file mode 100644
index 000000000..86036e09f
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/OutputStruct.cs
@@ -0,0 +1,16 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+namespace ManagedSample1
+{
+    public unsafe struct OutputStruct
+    {
+        public ValueStruct value;
+
+        public static OutputStruct* MoveToContext(OutputStruct* output)
+        {
+            return output;
+        }
+
+    }
+}
diff --git a/cs/src/playground/ManagedSample1/Program.cs b/cs/src/playground/ManagedSample1/Program.cs
new file mode 100644
index 000000000..4be9e9dcb
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/Program.cs
@@ -0,0 +1,56 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace ManagedSample1
+{
+    public class Program
+    {
+        static unsafe void Main(string[] args)
+        {
+            var fht = FASTERFactory.Create
+                <KeyStruct, ValueStruct, InputStruct, OutputStruct, Empty, Functions, ICustomFaster>
+                (128, FASTERFactory.CreateLogDevice(""), LogMutableFraction: 0.5);
+
+            fht.StartSession();
+
+            OutputStruct output = default(OutputStruct);
+
+            var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 };
+            var value = new ValueStruct { vfield1 = 23, vfield2 = 24 };
+
+            // Upsert item into store, and read it back
+            fht.Upsert(&key1, &value, null, 0);
+            fht.Read(&key1, null, &output, null, 0);
+
+            if ((output.value.vfield1 != value.vfield1) || (output.value.vfield2 != value.vfield2))
+                Console.WriteLine("Error!");
+            else
+                Console.WriteLine("Success!");
+
+            var key2 = new KeyStruct { kfield1 = 15, kfield2 = 16 };
+            var input = new InputStruct { ifield1 = 25, ifield2 = 26 };
+
+            // Two read-modify-write (RMW) operations (sum aggregator)
+            // Followed by read of result
+            fht.RMW(&key2, &input, null, 0);
+            fht.RMW(&key2, &input, null, 0);
+            fht.Read(&key2, null, &output, null, 0);
+
+            if ((output.value.vfield1 != input.ifield1*2) || (output.value.vfield2 != input.ifield2*2))
+                Console.WriteLine("Error!");
+            else
+                Console.WriteLine("Success!");
+
+            fht.StopSession();
+
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample1/Properties/AssemblyInfo.cs b/cs/src/playground/ManagedSample1/Properties/AssemblyInfo.cs
new file mode 100644
index 000000000..5e08438c2
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/Properties/AssemblyInfo.cs
@@ -0,0 +1,22 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyCopyright("Copyright ©  2017")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components.  If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("17bdd0a5-98e5-464a-8a00-050d9ff4c562")]
diff --git a/cs/src/playground/ManagedSample1/ValueStruct.cs b/cs/src/playground/ManagedSample1/ValueStruct.cs
new file mode 100644
index 000000000..19d419510
--- /dev/null
+++ b/cs/src/playground/ManagedSample1/ValueStruct.cs
@@ -0,0 +1,72 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace ManagedSample1
+{
+    public unsafe struct ValueStruct
+    {
+        public const int physicalSize = sizeof(long) + sizeof(long);
+        public long vfield1;
+        public long vfield2;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(ValueStruct* input)
+        {
+            return physicalSize;
+        }
+
+        public static void Copy(ValueStruct* src, ValueStruct* dst)
+        {
+            dst->vfield1 = src->vfield1;
+            dst->vfield2 = src->vfield2;
+        }
+
+        // Shared read/write capabilities on value
+        public static void AcquireReadLock(ValueStruct* value)
+        {
+        }
+
+        public static void ReleaseReadLock(ValueStruct* value)
+        {
+        }
+
+        public static void AcquireWriteLock(ValueStruct* value)
+        {
+        }
+
+        public static void ReleaseWriteLock(ValueStruct* value)
+        {
+        }
+
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(ValueStruct* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(ValueStruct* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Free(ValueStruct* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+
+        public static ValueStruct* MoveToContext(ValueStruct* value)
+        {
+            return value;
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample2/App.config b/cs/src/playground/ManagedSample2/App.config
new file mode 100644
index 000000000..2d2a12d81
--- /dev/null
+++ b/cs/src/playground/ManagedSample2/App.config
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6"/>
+    </startup>
+</configuration>
diff --git a/cs/src/playground/ManagedSample2/CustomFunctions.cs b/cs/src/playground/ManagedSample2/CustomFunctions.cs
new file mode 100644
index 000000000..32cc55bce
--- /dev/null
+++ b/cs/src/playground/ManagedSample2/CustomFunctions.cs
@@ -0,0 +1,48 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+
+namespace ManagedSample2
+{
+    public class CustomFunctions : IUserFunctions<KeyStruct, ValueStruct, InputStruct, OutputStruct, Empty>
+    {
+        public void RMWCompletionCallback(Empty ctx)
+        {
+        }
+
+        public void ReadCompletionCallback(Empty ctx, OutputStruct output)
+        {
+        }
+
+        public void UpsertCompletionCallback(Empty ctx)
+        {
+        }
+
+        public void CopyUpdater(KeyStruct key, InputStruct input, ValueStruct oldValue, ref ValueStruct newValue)
+        {
+        }
+
+        public int InitialValueLength(KeyStruct key, InputStruct input)
+        {
+            return sizeof(int) + sizeof(int);
+        }
+
+        public void InitialUpdater(KeyStruct key, InputStruct input, ref ValueStruct value)
+        {
+            value.vfield1 = input.ifield1;
+            value.vfield2 = input.ifield2;
+        }
+
+        public void InPlaceUpdater(KeyStruct key, InputStruct input, ref ValueStruct value)
+        {
+            value.vfield1 += input.ifield1;
+            value.vfield2 += input.ifield2;
+        }
+
+        public void Reader(KeyStruct key, InputStruct input, ValueStruct value, ref OutputStruct dst)
+        {
+            dst.value = value;
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample2/CustomTypes.cs b/cs/src/playground/ManagedSample2/CustomTypes.cs
new file mode 100644
index 000000000..b6d468dc7
--- /dev/null
+++ b/cs/src/playground/ManagedSample2/CustomTypes.cs
@@ -0,0 +1,29 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+
+namespace ManagedSample2
+{
+    public struct KeyStruct
+    {
+        public int kfield1;
+        public int kfield2;
+    }
+
+    public struct ValueStruct
+    {
+        public int vfield1;
+        public int vfield2;
+    }
+
+    public struct InputStruct
+    {
+        public int ifield1;
+        public int ifield2;
+    }
+    
+    public struct OutputStruct
+    {
+        public ValueStruct value;
+    }
+}
diff --git a/cs/src/playground/ManagedSample2/ManagedSample2.csproj b/cs/src/playground/ManagedSample2/ManagedSample2.csproj
new file mode 100644
index 000000000..cd48ca1f7
--- /dev/null
+++ b/cs/src/playground/ManagedSample2/ManagedSample2.csproj
@@ -0,0 +1,39 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net46</TargetFramework>
+    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win7-x64</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <RootNamespace>ManagedSample2</RootNamespace>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <DefineConstants>TRACE;DEBUG</DefineConstants>
+    <DebugType>full</DebugType>
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
+    <DefineConstants>TRACE</DefineConstants>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\x64\Release\</OutputPath>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\FASTER.core.csproj" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/playground/ManagedSample2/Program.cs b/cs/src/playground/ManagedSample2/Program.cs
new file mode 100644
index 000000000..d2856cc2a
--- /dev/null
+++ b/cs/src/playground/ManagedSample2/Program.cs
@@ -0,0 +1,53 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using FASTER.core;
+
+namespace ManagedSample2
+{
+    class Program
+    {
+        static void Main(string[] args)
+        { 
+            var fht = FASTERFactory.Create
+                <KeyStruct, ValueStruct, InputStruct, OutputStruct, 
+                Empty, CustomFunctions>
+                (128, new NullDevice(), new CustomFunctions());
+
+            fht.StartSession();
+
+            Empty context;
+            OutputStruct output = new OutputStruct();
+
+            var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 };
+            var value = new ValueStruct { vfield1 = 23, vfield2 = 24 };
+            fht.Upsert(key1, value, default(Empty), 0);
+            fht.Read(key1, default(InputStruct), ref output, context, 0);
+
+            if ((output.value.vfield1 != value.vfield1) || (output.value.vfield2 != value.vfield2))
+                Console.WriteLine("Error!");
+            else
+                Console.WriteLine("Success!");
+
+            KeyStruct key2 = new KeyStruct { kfield1 = 15, kfield2 = 16 };
+            InputStruct input = new InputStruct { ifield1 = 25, ifield2 = 26 };
+            fht.RMW(key2, input, context, 0);
+            fht.RMW(key2, input, context, 0);
+            fht.Read(key2, default(InputStruct), ref output, context, 0);
+
+            if ((output.value.vfield1 != input.ifield1 * 2) || (output.value.vfield2 != input.ifield2 * 2))
+                Console.WriteLine("Error!");
+            else
+                Console.WriteLine("Success!");
+
+            fht.StopSession();
+
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample2/Properties/AssemblyInfo.cs b/cs/src/playground/ManagedSample2/Properties/AssemblyInfo.cs
new file mode 100644
index 000000000..a0a43c83d
--- /dev/null
+++ b/cs/src/playground/ManagedSample2/Properties/AssemblyInfo.cs
@@ -0,0 +1,23 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyCopyright("Copyright ©  2017")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components.  If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("7db87633-9cab-4ae4-9ed0-aa6e77448486")]
+
diff --git a/cs/src/playground/ManagedSample3/App.config b/cs/src/playground/ManagedSample3/App.config
new file mode 100644
index 000000000..731f6de6c
--- /dev/null
+++ b/cs/src/playground/ManagedSample3/App.config
@@ -0,0 +1,6 @@
+﻿<?xml version="1.0" encoding="utf-8" ?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1" />
+    </startup>
+</configuration>
\ No newline at end of file
diff --git a/cs/src/playground/ManagedSample3/ManagedSample3.csproj b/cs/src/playground/ManagedSample3/ManagedSample3.csproj
new file mode 100644
index 000000000..6de2822b6
--- /dev/null
+++ b/cs/src/playground/ManagedSample3/ManagedSample3.csproj
@@ -0,0 +1,44 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net46</TargetFramework>
+    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win7-x64</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <RootNamespace>ManagedSample3</RootNamespace>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <DefineConstants>TRACE;DEBUG</DefineConstants>
+    <DebugType>full</DebugType>
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
+    <DefineConstants>TRACE</DefineConstants>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\x64\Release\</OutputPath>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Remove="Generated\**" />
+    <EmbeddedResource Remove="Generated\**" />
+    <None Remove="Generated\**" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\FASTER.core.csproj" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/playground/ManagedSample3/Program.cs b/cs/src/playground/ManagedSample3/Program.cs
new file mode 100644
index 000000000..50a73de49
--- /dev/null
+++ b/cs/src/playground/ManagedSample3/Program.cs
@@ -0,0 +1,144 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace ManagedSample3
+{
+    public class MyKey
+    {
+        public int key;
+        public MyKey Clone()
+        {
+            return this;
+        }
+
+        public long GetHashCode64()
+        {
+            return Utility.GetHashCode(key);
+        }
+
+        public bool Equals(MyKey otherKey)
+        {
+            return key == otherKey.key;
+        }
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(key);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            key = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+
+    public class MyValue
+    {
+        public int value;
+        public MyValue Clone()
+        {
+            return this;
+        }
+
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(value);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            value = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+
+    public class MyInput
+    {
+    }
+
+    public class MyOutput
+    {
+        public MyValue value;
+    }
+
+
+    public class MyContext
+    {
+    }
+
+    public class MyFunctions : IUserFunctions<MyKey, MyValue, MyInput, MyOutput, MyContext>
+    {
+        public void RMWCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void ReadCompletionCallback(MyContext ctx, MyOutput output)
+        {
+        }
+
+        public void UpsertCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void CopyUpdater(MyKey key, MyInput input, MyValue oldValue, ref MyValue newValue)
+        {
+        }
+
+        public int InitialValueLength(MyKey key, MyInput input)
+        {
+            return sizeof(int) + sizeof(int);
+        }
+
+        public void InitialUpdater(MyKey key, MyInput input, ref MyValue value)
+        {
+        }
+
+        public void InPlaceUpdater(MyKey key, MyInput input, ref MyValue value)
+        {
+        }
+
+        public void Reader(MyKey key, MyInput input, MyValue value, ref MyOutput dst)
+        {
+            dst.value = value;
+        }
+    }
+
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            var log = FASTERFactory.CreateLogDevice(Path.GetTempPath() + "hybridlog.log");
+            var h = FASTERFactory.Create
+                <MyKey, MyValue, MyInput, MyOutput, MyContext, MyFunctions>
+                (128, log, new MyFunctions(),
+                LogPageSizeBits: 10,
+                LogTotalSizeBytes: 1L << 14
+                );
+              
+            h.StartSession();
+
+            for (int i = 0; i < 20000; i++)
+            {
+                h.Upsert(new MyKey { key = i }, new MyValue { value = i }, default(MyContext), 0);
+                if (i % 32 == 0) h.Refresh();
+            }
+            MyOutput g1 = new MyOutput();
+            h.Read(new MyKey { key = 23 }, new MyInput(), ref g1, new MyContext(), 0);
+
+            h.CompletePending(true);
+
+            MyOutput g2 = new MyOutput();
+            h.Read(new MyKey { key = 46 }, new MyInput(), ref g2, new MyContext(), 0);
+            h.CompletePending(true);
+
+            Console.WriteLine("Success!");
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/playground/ManagedSample3/Properties/AssemblyInfo.cs b/cs/src/playground/ManagedSample3/Properties/AssemblyInfo.cs
new file mode 100644
index 000000000..e0d6ca822
--- /dev/null
+++ b/cs/src/playground/ManagedSample3/Properties/AssemblyInfo.cs
@@ -0,0 +1,22 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyCopyright("Copyright ©  2018")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components.  If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("3e571c7c-59b5-485c-ac78-3f34d3511cd2")]
diff --git a/cs/src/playground/ManagedSample4/App.config b/cs/src/playground/ManagedSample4/App.config
new file mode 100644
index 000000000..731f6de6c
--- /dev/null
+++ b/cs/src/playground/ManagedSample4/App.config
@@ -0,0 +1,6 @@
+﻿<?xml version="1.0" encoding="utf-8" ?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1" />
+    </startup>
+</configuration>
\ No newline at end of file
diff --git a/cs/src/playground/ManagedSample4/ManagedSample4.csproj b/cs/src/playground/ManagedSample4/ManagedSample4.csproj
new file mode 100644
index 000000000..b05614c8b
--- /dev/null
+++ b/cs/src/playground/ManagedSample4/ManagedSample4.csproj
@@ -0,0 +1,38 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net46</TargetFramework>
+    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win7-x64</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <RootNamespace>ManagedSample4</RootNamespace>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <DefineConstants>TRACE;DEBUG</DefineConstants>
+    <DebugType>full</DebugType>
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
+    <DefineConstants>TRACE</DefineConstants>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\x64\Release\</OutputPath>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Remove="Generated\**" />
+    <EmbeddedResource Remove="Generated\**" />
+    <None Remove="Generated\**" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\FASTER.core.csproj" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/playground/ManagedSample4/Program.cs b/cs/src/playground/ManagedSample4/Program.cs
new file mode 100644
index 000000000..142c3862f
--- /dev/null
+++ b/cs/src/playground/ManagedSample4/Program.cs
@@ -0,0 +1,158 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace ManagedSample4
+{
+    
+    public struct Wrap<T>
+    {
+        public T field;
+
+        public new long GetHashCode()
+        {
+            return Utility.GetHashCode(field.GetHashCode());
+        }
+    }
+
+    public class MyKey
+    {
+        public int key;
+        public MyKey Clone()
+        {
+            return this;
+        }
+
+        public long GetHashCode64()
+        {
+            return Utility.GetHashCode(key);
+        }
+
+        public bool Equals(MyKey otherKey)
+        {
+            return key == otherKey.key;
+        }
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(key);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            key = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+
+    public class MyValue
+    {
+        public int value;
+        public MyValue Clone()
+        {
+            return this;
+        }
+
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(value);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            value = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+
+    public class MyInput
+    {
+    }
+
+    public class MyOutput
+    {
+        public MyValue value;
+    }
+
+
+    public class MyContext
+    {
+    }
+
+    public class MyFunctions : IUserFunctions<Wrap<int>, Wrap<int>, Wrap<int>, Wrap<int>, MyContext>
+    {
+        public void RMWCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void ReadCompletionCallback(MyContext ctx, Wrap<int> output)
+        {
+        }
+        public void UpsertCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void CopyUpdater(Wrap<int> key, Wrap<int> input, Wrap<int> oldValue, ref Wrap<int> newValue)
+        {
+            newValue.field = oldValue.field + input.field;
+        }
+
+        public int InitialValueLength(Wrap<int> key, Wrap<int> input)
+        {
+            return sizeof(int) + sizeof(int);
+        }
+
+        public void InitialUpdater(Wrap<int> key, Wrap<int> input, ref Wrap<int> value)
+        {
+            value.field = input.field;
+        }
+
+        public void InPlaceUpdater(Wrap<int> key, Wrap<int> input, ref Wrap<int> value)
+        {
+            value.field += input.field;
+        }
+
+        public void Reader(Wrap<int> key, Wrap<int> input, Wrap<int> value, ref Wrap<int> dst)
+        {
+            dst.field = value.field;
+        }
+    }
+
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            var log = FASTERFactory.CreateLogDevice(Path.GetTempPath() + "hybridlog.log");
+            var h = FASTERFactory.Create
+                <Wrap<int>, Wrap<int>, Wrap<int>, Wrap<int>, MyContext, MyFunctions>
+                (128, log, new MyFunctions(),
+                LogPageSizeBits: 10,
+                LogTotalSizeBytes: 1L << 14
+                );
+
+            h.StartSession();
+
+            for (int i = 0; i <20000; i++)
+            {
+                h.RMW(new Wrap<int> { field = i }, new Wrap<int> { field = i }, default(MyContext), 0);
+                h.RMW(new Wrap<int> { field = i }, new Wrap<int> { field = i }, default(MyContext), 0);
+                if (i % 32 == 0) h.Refresh();
+            }
+            Wrap<int> g1 = new Wrap<int>();
+            h.Read(new Wrap<int> { field = 19999 }, new Wrap<int>(), ref g1, new MyContext(), 0);
+
+            h.CompletePending(true);
+
+            Wrap<int> g2 = new Wrap<int>();
+            h.Read(new Wrap<int> { field = 46 }, new Wrap<int>(), ref g2, new MyContext(), 0);
+            h.CompletePending(true);
+
+            Console.WriteLine("Success!");
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/playground/NestedTypesTest/App.config b/cs/src/playground/NestedTypesTest/App.config
new file mode 100644
index 000000000..731f6de6c
--- /dev/null
+++ b/cs/src/playground/NestedTypesTest/App.config
@@ -0,0 +1,6 @@
+﻿<?xml version="1.0" encoding="utf-8" ?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1" />
+    </startup>
+</configuration>
\ No newline at end of file
diff --git a/cs/src/playground/NestedTypesTest/Functions.cs b/cs/src/playground/NestedTypesTest/Functions.cs
new file mode 100644
index 000000000..c4cf00ba7
--- /dev/null
+++ b/cs/src/playground/NestedTypesTest/Functions.cs
@@ -0,0 +1,172 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.ComponentModel;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.Serialization;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace NestedTypesTest
+{
+    public class MyFunctions : IUserFunctions<
+#if BLIT_KEY && GENERIC_BLIT_KEY
+                CompoundGroupKey<Empty, TimeKey<int>>
+#else
+                MyKey
+#endif
+                ,
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+                WrappedState<long>
+#else
+                MyValue
+#endif
+                ,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+                WrappedInput<int, long>
+#else
+                MyInput
+#endif
+                ,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+                WrappedState<long>
+#else
+                MyOutput
+#endif
+                ,
+#if BLIT_CONTEXT && GENERIC_BLIT_CONTEXT
+                
+#else
+                MyContext
+#endif
+        >
+    {
+        public void RMWCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void ReadCompletionCallback(MyContext ctx,
+#if BLIT_OUTPUT && GENERIC_BLIT_OUTPUT
+            WrappedState<long> output
+#else
+            MyOutput
+#endif
+            )
+        {
+        }
+        public void UpsertCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void CopyUpdater(
+#if BLIT_KEY && GENERIC_BLIT_KEY
+            CompoundGroupKey<Empty, TimeKey<int>>
+#else
+            MyKey
+#endif
+            key,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+            WrappedInput<int, long>
+#else
+            MyInput
+#endif
+            input,
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+            WrappedState<long> oldValue,
+            ref WrappedState<long> newValue
+#else
+            MyValue oldValue,
+            ref MyValue newValue
+#endif
+            )
+        {
+        }
+
+        public int InitialValueLength(CompoundGroupKey<Empty, TimeKey<int>> key, WrappedInput<int, long> input)
+        {
+            return sizeof(int) + sizeof(int);
+        }
+
+        public void InitialUpdater(
+#if BLIT_KEY && GENERIC_BLIT_KEY
+            CompoundGroupKey<Empty, TimeKey<int>>
+#else
+            MyKey
+#endif
+            key,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+            WrappedInput<int, long>
+#else
+            MyInput
+#endif
+            input,
+            ref
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+            WrappedState<long>
+#else
+            MyValue
+#endif
+            value
+            )
+        {
+        }
+
+        public void InPlaceUpdater(
+#if BLIT_KEY && GENERIC_BLIT_KEY
+            CompoundGroupKey<Empty, TimeKey<int>>
+#else
+            MyKey
+#endif
+            key,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+            WrappedInput<int, long>
+#else
+            MyInput
+#endif
+            input,
+            ref
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+            WrappedState<long>
+#else
+            MyValue
+#endif
+            value)
+        {
+        }
+
+        public void Reader(
+#if BLIT_KEY && GENERIC_BLIT_KEY
+            CompoundGroupKey<Empty, TimeKey<int>>
+#else
+            MyKey
+#endif
+            key,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+            WrappedInput<int, long>
+#else
+            MyInput
+#endif
+            input,
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+            WrappedState<long>
+#else
+            MyValue
+#endif
+            value,
+            ref
+#if BLIT_OUTPUT && GENERIC_BLIT_OUTPUT
+            WrappedState<long>
+#else
+            MyOutput
+#endif
+            dst)
+        {
+            //dst.value = value;
+        }
+    }
+}
diff --git a/cs/src/playground/NestedTypesTest/NestedTypesTest.csproj b/cs/src/playground/NestedTypesTest/NestedTypesTest.csproj
new file mode 100644
index 000000000..0e0834fe0
--- /dev/null
+++ b/cs/src/playground/NestedTypesTest/NestedTypesTest.csproj
@@ -0,0 +1,45 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net46</TargetFramework>
+    <Platforms>x64</Platforms>
+    <RuntimeIdentifier>win7-x64</RuntimeIdentifier>
+    <DefineConstants>$(DefineConstants);BLIT_KEY;GENERIC_BLIT_KEY;BLIT_VALUE;GENERIC_BLIT_VALUE;BLIT_INPUT;GENERIC_BLIT_INPUT;BLIT_OUTPUT;GENERIC_BLIT_OUTPUT</DefineConstants>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <RootNamespace>NestedTypesTest</RootNamespace>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+    <Prefer32Bit>true</Prefer32Bit>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)' == 'Debug'">
+    <DefineConstants>$(DefineConstants);TRACE;DEBUG</DefineConstants>
+    <DebugType>full</DebugType>
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)' == 'Release'">
+    <DefineConstants>$(DefineConstants);TRACE</DefineConstants>
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\x64\Release\</OutputPath>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Remove="Generated\**" />
+    <EmbeddedResource Remove="Generated\**" />
+    <None Remove="Generated\**" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\FASTER.core.csproj" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/cs/src/playground/NestedTypesTest/Program.cs b/cs/src/playground/NestedTypesTest/Program.cs
new file mode 100644
index 000000000..e2b213241
--- /dev/null
+++ b/cs/src/playground/NestedTypesTest/Program.cs
@@ -0,0 +1,129 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace NestedTypesTest
+{
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            var log = FASTERFactory.CreateLogDevice(Path.GetTempPath() + "hybridlog.log");
+            var h = FASTERFactory.Create
+                <
+#if BLIT_KEY && GENERIC_BLIT_KEY
+                CompoundGroupKey<Empty, TimeKey<int>>
+#else
+                MyKey
+#endif
+                ,
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+                WrappedState<long>
+#else
+                MyValue
+#endif
+                ,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+                WrappedInput<int, long>
+#else
+                MyInput
+#endif
+                ,
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+                WrappedState<long>
+#else
+                MyOutput
+#endif
+                ,
+#if BLIT_CONTEXT && GENERIC_BLIT_CONTEXT
+                
+#else
+                MyContext
+#endif
+                , MyFunctions>
+                (128, log, new MyFunctions(),
+                LogPageSizeBits: 10,
+                LogTotalSizeBytes: 1L << 14
+                );
+
+            h.StartSession();
+
+            for (int i = 0; i < 20000; i++)
+            {
+                var key =
+#if BLIT_KEY && GENERIC_BLIT_KEY
+                    new CompoundGroupKey<Empty, TimeKey<int>> {  /*= i*/ }
+#else
+                    new MyKey { key = i, }
+#endif
+                    ;
+                var value =
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+                    new WrappedState<long> { state = i }
+#else
+                    new MyValue { value = i, }
+#endif
+                    ;
+                h.Upsert(key, value, default(MyContext), 0);
+                if (i % 32 == 0) h.Refresh();
+            }
+
+            var key1 =
+#if BLIT_KEY && GENERIC_BLIT_KEY
+                    new CompoundGroupKey<Empty, TimeKey<int>> { /*field = 23*/ }
+#else
+                    new MyKey { key = 23, }
+#endif
+                    ;
+            var input1 =
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+                    new WrappedInput<int, long>()
+#else
+                    new MyInput()
+#endif
+                    ;
+#if BLIT_OUTPUT && GENERIC_BLIT_OUTPUT
+            WrappedState<long> g1 = new WrappedState<long>();
+#else
+            MyOutput g1 = new MyOutput();
+#endif
+
+            h.Read(key1, input1, ref g1, new MyContext(), 0);
+
+            h.CompletePending(true);
+
+            var key2 =
+#if BLIT_KEY && GENERIC_BLIT_KEY
+                    new CompoundGroupKey<Empty, TimeKey<int>> { /*field = 46*/ }
+#else
+                    new MyKey { key = 46, }
+#endif
+                    ;
+            var input2 =
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+                    new WrappedInput<int, long>()
+#else
+                    new MyInput()
+#endif
+                    ;
+#if BLIT_OUTPUT && GENERIC_BLIT_OUTPUT
+            WrappedState<long> g2 = new WrappedState<long>();
+#else
+            MyOutput g2 = new MyOutput();
+#endif
+            h.Read(key2, input2, ref g2, new MyContext(), 0);
+
+            h.CompletePending(true);
+
+            Console.WriteLine("Success!");
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/playground/NestedTypesTest/Types.cs b/cs/src/playground/NestedTypesTest/Types.cs
new file mode 100644
index 000000000..2b04a9f58
--- /dev/null
+++ b/cs/src/playground/NestedTypesTest/Types.cs
@@ -0,0 +1,353 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.ComponentModel;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.Serialization;
+using System.Text;
+using System.Threading.Tasks;
+
+// !BLIT_T && !GENERIC_BLIT_T ==> T is a class or otherwise unblittable type
+
+namespace NestedTypesTest
+{
+    #region Key
+
+#if BLIT_KEY && GENERIC_BLIT_KEY
+
+    // CompoundGroupKey<Empty, TimeKey<int>>
+
+    /// <summary>
+    /// Represents key value for a nested group apply branch.
+    /// </summary>
+    /// <typeparam name="TOuterKey">Key type for outer branch. Where there is no containing
+    /// branch, this is Empty.</typeparam>
+    /// <typeparam name="TInnerKey">Key type for nested branch.</typeparam>
+    [DataContract]
+    [EditorBrowsable(EditorBrowsableState.Never)]
+    public struct CompoundGroupKey<TOuterKey, TInnerKey>
+    {
+        /// <summary>
+        /// The value of the inner grouping key.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TInnerKey _innerGroup;
+        /// <summary>
+        /// The value of the outer grouping key.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TOuterKey _outerGroup;
+        /// <summary>
+        /// A hash code incorporating both key elements.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public int _hashCode;
+
+        /// <summary>
+        /// Currently for internal use only - do not use directly.
+        /// </summary>
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TInnerKey InnerGroup => _innerGroup;
+        /// <summary>
+        /// Currently for internal use only - do not use directly.
+        /// </summary>
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TOuterKey OuterGroup => _outerGroup;
+
+        /// <summary>
+        /// Provides a string representation of the compound grouping key.
+        /// </summary>
+        /// <returns>A string representation of the compound grouping key.</returns>
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public override string ToString() => new { OuterGroup, InnerGroup }.ToString();
+
+        /// <summary>
+        /// Provides a hashcode of the compound grouping key.
+        /// </summary>
+        /// <returns>A hashcode of the compound grouping key.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public override int GetHashCode() => _hashCode;
+    }
+
+    /// <summary>
+    /// Currently for internal use only - do not use directly.
+    /// </summary>
+    [DataContract]
+    [EditorBrowsable(EditorBrowsableState.Never)]
+    public struct TimeKey<TKey>
+    {
+        /// <summary>
+        /// Currently for internal use only - do not use directly.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public long timestamp;
+
+        /// <summary>
+        /// 
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TKey key;
+    }
+
+#else // key is MyKey
+    public
+#if BLIT_KEY
+        struct
+#else
+        class
+#endif
+        MyKey
+    {
+        public int key;
+        public MyKey Clone()
+        {
+            return this;
+        }
+
+        public long GetHashCode64()
+        {
+            return Utility.GetHashCode(key);
+        }
+
+        public bool Equals(MyKey otherKey)
+        {
+            return key == otherKey.key;
+        }
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(key);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            key = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+#endif
+
+
+    #endregion
+
+    #region Value
+#if BLIT_VALUE && GENERIC_BLIT_VALUE
+
+    // WrappedState<long>
+
+    /// <summary>
+    /// Currently for internal use only - do not use directly.
+    /// </summary>
+    [DataContract]
+    [EditorBrowsable(EditorBrowsableState.Never)]
+    public struct WrappedState<TState>
+    {
+        /// <summary>
+        /// Currently for internal use only - do not use directly.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public long timestamp;
+
+        /// <summary>
+        /// 
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TState state;
+
+        /// <summary>
+        /// Currently for internal use only - do not use directly.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public ulong active;
+
+        /// <summary>
+        /// 
+        /// </summary>
+        /// <returns></returns>
+        public WrappedState<TState> Clone()
+        {
+            return this;
+        }
+
+        //static ISerializer<TState> stateSerializer =
+        //    StreamableSerializer.Create<TState>(new SerializerSettings { });
+
+        /// <summary>
+        /// 
+        /// </summary>
+        /// <param name="toStream"></param>
+        public void Serialize(Stream toStream)
+        {
+            var w = new BinaryWriter(toStream);
+            w.Write(timestamp);
+            w.Write(active);
+            //stateSerializer.Serialize(toStream, state);
+        }
+
+        /// <summary>
+        /// 
+        /// </summary>
+        /// <param name="fromStream"></param>
+        public void Deserialize(Stream fromStream)
+        {
+            var r = new BinaryReader(fromStream);
+            timestamp = r.ReadInt64();
+            active = r.ReadUInt64();
+            //state = stateSerializer.Deserialize(fromStream);
+        }
+    }
+#else // value is MyValue
+    public
+#if BLIT_VALUE
+        struct
+#else
+        class
+#endif
+    MyValue
+    {
+        public int value;
+        public MyValue Clone()
+        {
+            return this;
+        }
+
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(value);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            value = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+#endif
+    #endregion
+
+    #region Input
+
+#if BLIT_INPUT && GENERIC_BLIT_INPUT
+
+    // WrappedInput<int, long>
+
+    /// <summary>
+    /// 
+    /// </summary>
+    public enum AggregationType : byte
+    {
+        /// <summary>
+        /// 
+        /// </summary>
+        ACCUMULATE,
+        /// <summary>
+        /// 
+        /// </summary>
+        DEACCUMULATE,
+        /// <summary>
+        /// 
+        /// </summary>
+        DIFFERENCE
+    }
+
+    /// <summary>
+    /// Currently for internal use only - do not use directly.
+    /// </summary>
+    [DataContract]
+    [EditorBrowsable(EditorBrowsableState.Never)]
+    public struct WrappedInput<TInput, TState>
+    {
+        /// <summary>
+        /// Currently for internal use only - do not use directly.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public long timestamp;
+
+        /// <summary>
+        /// 
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TInput input;
+
+        /// <summary>
+        /// 
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public TState state;
+
+        /// <summary>
+        /// Currently for internal use only - do not use directly.
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public ulong active;
+
+        /// <summary>
+        /// 
+        /// </summary>
+        [DataMember]
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public AggregationType aggregationType;
+    }
+
+#else // input is MyInput
+    public
+#if BLIT_INPUT
+        struct
+#else
+        class
+#endif
+ MyInput
+    {
+    }
+#endif
+    #endregion
+
+    #region Output
+#if BLIT_OUTPUT && GENERIC_BLIT_OUTPUT
+    // WrappedState<long>
+#else
+            public 
+#if BLIT_OUTPUT
+    struct
+#else
+    class 
+#endif
+    MyOutput
+    {
+        public MyValue value;
+    }
+
+#endif
+    #endregion
+
+    #region Context
+
+#if BLIT_CONTEXT && GENERIC_BLIT_CONTEXT
+#else
+    public
+#if BLIT_CONTEXT
+        struct
+#else
+        class
+#endif
+        MyContext
+    {
+    }
+#endif
+    #endregion
+}
diff --git a/cs/src/playground/SumStore/AdId.cs b/cs/src/playground/SumStore/AdId.cs
new file mode 100644
index 000000000..544a8553e
--- /dev/null
+++ b/cs/src/playground/SumStore/AdId.cs
@@ -0,0 +1,62 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace SumStore
+{
+    public unsafe struct AdId
+    {
+        public const int physicalSize = sizeof(long);
+        public long adId;
+
+        public static long GetHashCode(AdId* key)
+        {
+            return Utility.GetHashCode(*((long*)key));
+        }
+        public static bool Equals(AdId* k1, AdId* k2)
+        {
+            return k1->adId == k2->adId;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(AdId* key)
+        {
+            return physicalSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(AdId* src, AdId* dst)
+        {
+            dst->adId = src->adId;
+        }
+
+        public static AdId* MoveToContext(AdId* value)
+        {
+            return value;
+        }
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(AdId* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(AdId* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(AdId* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+    }
+}
diff --git a/cs/src/playground/SumStore/App.config b/cs/src/playground/SumStore/App.config
new file mode 100644
index 000000000..afa57761a
--- /dev/null
+++ b/cs/src/playground/SumStore/App.config
@@ -0,0 +1,9 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+    <startup> 
+        <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6" />
+    </startup>
+  <runtime>
+    <gcAllowVeryLargeObjects enabled="true" />
+  </runtime>
+</configuration>
diff --git a/cs/src/playground/SumStore/ConcurrentRecoveryTest.cs b/cs/src/playground/SumStore/ConcurrentRecoveryTest.cs
new file mode 100644
index 000000000..da063fd00
--- /dev/null
+++ b/cs/src/playground/SumStore/ConcurrentRecoveryTest.cs
@@ -0,0 +1,362 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace SumStore
+{
+    public class ConcurrentRecoveryTest : IFASTERRecoveryTest
+    {
+        const long numUniqueKeys = (1 << 22);
+        const long keySpace = (1L << 14);
+        const long numOps = (1L << 25);
+        const long refreshInterval = (1 << 8);
+        const long completePendingInterval = (1 << 12);
+        const long checkpointInterval = (1 << 22);
+        int threadCount;
+        int numActiveThreads;
+        ICustomFaster fht;
+        BlockingCollection<Input[]> inputArrays;
+        List<Guid> tokens;
+        public ConcurrentRecoveryTest(int threadCount)
+        {
+            this.threadCount = threadCount;
+            tokens = new List<Guid>();
+            var log = FASTERFactory.CreateLogDevice(DirectoryConfiguration.GetHybridLogFileName());
+
+            // Create FASTER index
+            fht = FASTERFactory.Create
+                <AdId, NumClicks, Input, Output, Empty, Functions, ICustomFaster>
+                (keySpace, log);
+            numActiveThreads = 0;
+
+            inputArrays = new BlockingCollection<Input[]>();
+
+            Prepare();
+        }
+
+        public unsafe void Prepare()
+        {
+            Console.WriteLine("Creating Input Arrays");
+
+            Thread[] workers = new Thread[threadCount];
+            for (int idx = 0; idx < threadCount; ++idx)
+            {
+                int x = idx;
+                workers[idx] = new Thread(() => CreateInputArrays(x));
+            }
+
+
+            // Start threads.
+            foreach (Thread worker in workers)
+            {
+                worker.Start();
+            }
+
+            // Wait until all are completed
+            foreach (Thread worker in workers)
+            {
+                worker.Join();
+            }
+        }
+
+        private unsafe void CreateInputArrays(int threadId)
+        {
+            var inputArray = new Input[numOps];
+            for (int i = 0; i < numOps; i++)
+            {
+                inputArray[i].adId.adId = i % numUniqueKeys;
+                inputArray[i].numClicks.numClicks = 1;
+            }
+
+            inputArrays.Add(inputArray);
+        }
+
+
+        public unsafe void Populate()
+        {
+            Thread[] workers = new Thread[threadCount];
+            for (int idx = 0; idx < threadCount; ++idx)
+            {
+                int x = idx;
+                workers[idx] = new Thread(() => PopulateWorker(x));
+            }
+
+            Console.WriteLine("Ready to Populate, Press [Enter]");
+            Console.ReadLine(); 
+
+            // Start threads.
+            foreach (Thread worker in workers)
+            {
+                worker.Start();
+            }
+
+            // Wait until all are completed
+            foreach (Thread worker in workers)
+            {
+                worker.Join();
+            }
+
+
+            foreach (var token in tokens)
+            {
+                Console.WriteLine(token);
+            }
+        }
+
+        private unsafe void PopulateWorker(int threadId)
+        {
+            Native32.AffinitizeThreadRoundRobin((uint)threadId);
+
+            Empty context;
+
+            var success = inputArrays.TryTake(out Input[] inputArray);
+            if(!success)
+            {
+                Console.WriteLine("No input array for {0}", threadId);
+                return;
+            }
+
+            // Register thread with the store
+            fht.StartSession();
+
+            Interlocked.Increment(ref numActiveThreads);
+
+            // Process the batch of input data
+            fixed (Input* input = inputArray)
+            {
+                for (long i = 0; i < numOps; i++)
+                {
+                    fht.RMW(&((input + i)->adId), input + i, &context, i);
+
+
+                    if ((i+1) % checkpointInterval == 0 && numActiveThreads == threadCount)
+                    {
+                        if(fht.TakeFullCheckpoint(out Guid token))
+                        {
+                            tokens.Add(token);
+                        }
+                    }
+
+                    if (i % completePendingInterval == 0)
+                    {
+                        fht.CompletePending(false);
+                    }
+                    else if (i % refreshInterval == 0)
+                    {
+                        fht.Refresh();
+                    }
+                }
+            }
+
+            // Make sure operations are completed
+            fht.CompletePending(true);
+
+            // Deregister thread from FASTER
+            fht.StopSession();
+
+            //Interlocked.Decrement(ref numActiveThreads);
+
+            Console.WriteLine("Populate successful on thread {0}", threadId);
+        }
+
+
+        public unsafe void Continue()
+        {
+            Console.WriteLine("Ready to Run. version to recover? [Enter]");
+            var line = Console.ReadLine();
+            Guid token = Guid.Parse(line);
+
+            Console.WriteLine("Recovering version {0}", token);
+            fht.Recover(token, token);
+            Console.WriteLine("Recovery Done!");
+
+            var checkpointInfo = default(HybridLogRecoveryInfo);
+            checkpointInfo.Recover(token);
+
+            threadCount = checkpointInfo.numThreads;
+
+            Console.WriteLine("Continuing");
+            Thread[] workers = new Thread[threadCount];
+            int idx = 0;
+            for (int i = 0; i < threadCount; i++)
+            {
+                int x = idx++;
+                Guid guid = checkpointInfo.guids[i];
+                workers[x] = new Thread(() => ContinueWorker(x, guid));
+            }
+
+            // Start threads.
+            foreach (Thread worker in workers)
+            {
+                worker.Start();
+            }
+
+            // Wait until all are completed
+            foreach (Thread worker in workers)
+            {
+                worker.Join();
+            }
+        }
+
+        private unsafe void ContinueWorker(int threadId, Guid guid)
+        {
+            Native32.AffinitizeThreadRoundRobin((uint)threadId);
+
+            Empty context;
+
+            var success = inputArrays.TryTake(out Input[] inputArray);
+            if (!success)
+            {
+                Console.WriteLine("No input array for {0}", threadId);
+                return;
+            }
+
+            // Register thread with the store
+            var startNum = fht.ContinueSession(guid);
+
+            Interlocked.Increment(ref numActiveThreads);
+
+            Console.WriteLine("Thread {0} starting from {1}", threadId, startNum + 1);
+
+            // Prpcess the batch of input data
+            fixed (Input* input = inputArray)
+            {
+                for (long i = startNum + 1; i < numOps; i++)
+                {
+                    fht.RMW(&((input + i)->adId), input + i, &context, i);
+
+                    if ((i+1) % checkpointInterval == 0 && numActiveThreads == threadCount)
+                    {
+                        if (fht.TakeFullCheckpoint(out Guid token))
+                        {
+                            Console.WriteLine("Calling TakeCheckpoint");
+                        }
+                    }
+
+                    if (i % completePendingInterval == 0)
+                    {
+                        fht.CompletePending(false);
+                    }
+                    else if (i % refreshInterval == 0)
+                    {
+                        fht.Refresh();
+                    }
+                }
+            }
+
+            // Make sure operations are completed
+            fht.CompletePending(true);
+
+            // Deregister thread from FASTER
+            fht.StopSession();
+
+            //Interlocked.Decrement(ref numActiveThreads);
+
+            Console.WriteLine("Populate successful on thread {0}", threadId);
+        }
+
+        public unsafe void RecoverAndTest(Guid indexToken, Guid hybridLogToken)
+        {
+            // Recover
+            fht.Recover(indexToken, hybridLogToken);
+
+            // Create array for reading
+            Empty context;
+            var inputArray = new Input[numUniqueKeys];
+            for (int i = 0; i < numUniqueKeys; i++)
+            {
+                inputArray[i].adId.adId = i;
+                inputArray[i].numClicks.numClicks = 0;
+            }
+
+            // Register with thread
+            fht.StartSession();
+
+            // Issue read requests
+            fixed (Input* input = inputArray)
+            {
+                for (var i = 0; i < numUniqueKeys; i++)
+                {
+                    fht.Read(&((input + i)->adId), null, (Output*)&((input + i)->numClicks), &context, i);
+                }
+            }
+
+            // Complete all pending requests
+            fht.CompletePending(true);
+
+            // Release
+            fht.StopSession();
+
+            // Test outputs
+            var checkpointInfo = default(HybridLogRecoveryInfo);
+            checkpointInfo.Recover(hybridLogToken);
+
+            // Compute expected array
+            long[] expected = new long[numUniqueKeys];
+            foreach(var guid in checkpointInfo.continueTokens.Keys)
+            {
+                var sno = checkpointInfo.continueTokens[guid];
+                for (long i = 0; i <= sno; i++)
+                {
+                    var id = i % numUniqueKeys;
+                    expected[id]++;
+                }
+            }
+
+            int numCompleted = threadCount - checkpointInfo.continueTokens.Count;
+            for(int t = 0; t < numCompleted; t++)
+            {
+                var sno = numOps;
+                for (long i = 0; i < sno; i++)
+                {
+                    var id = i % numUniqueKeys;
+                    expected[id]++;
+                }
+            }
+
+
+            // Assert if expected is same as found
+            var counts = new Dictionary<long, long>();
+            var sum = 0L;
+            bool error = false;
+            for (long i = 0; i < numUniqueKeys; i++)
+            {
+                if(expected[i] != inputArray[i].numClicks.numClicks)
+                {
+                    long diff = inputArray[i].numClicks.numClicks - expected[i];
+                    if (!counts.ContainsKey(diff))
+                    {
+                        counts.Add(diff, 0);
+                    }
+                    counts[diff] = counts[diff] + 1;
+                    sum += diff;
+                    Console.WriteLine("Debug error for AdId {0}: Expected ({1}), Found({2})", inputArray[i].adId.adId, expected[i], inputArray[i].numClicks.numClicks);
+                    error = true;
+                }
+            }
+
+            if(sum > 0)
+            {
+                foreach (var key in counts.Keys)
+                {
+                    Console.WriteLine("{0}: {1}", key, counts[key]);
+                }
+                Console.WriteLine("Sum : {0:X}, (1 << {1})", sum, Math.Log(sum, 2));
+            }
+            if (error)
+                Console.WriteLine("Test failed");
+            else 
+                Console.WriteLine("Test successful");
+        }
+    }
+}
diff --git a/cs/src/playground/SumStore/ConcurrentTest.cs b/cs/src/playground/SumStore/ConcurrentTest.cs
new file mode 100644
index 000000000..60163e36e
--- /dev/null
+++ b/cs/src/playground/SumStore/ConcurrentTest.cs
@@ -0,0 +1,244 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace SumStore
+{
+    public class ConcurrentTest: IFASTERRecoveryTest
+    {
+        const long numUniqueKeys = (1 << 22);
+        const long keySpace = (1L << 14);
+        const long numOps = (1L << 25);
+        const long refreshInterval = (1 << 8);
+        const long completePendingInterval = (1 << 12);
+        const long checkpointInterval = (1 << 22);
+        int threadCount;
+        int numActiveThreads;
+        ICustomFaster fht;
+        BlockingCollection<Input[]> inputArrays;
+        long[] threadNumOps;
+
+        public ConcurrentTest(int threadCount)
+        {
+            this.threadCount = threadCount;
+
+            // Create FASTER index
+            var log = FASTERFactory.CreateLogDevice(DirectoryConfiguration.GetHybridLogFileName());
+            fht = FASTERFactory.Create
+                <AdId, NumClicks, Input, Output, Empty, Functions, ICustomFaster>
+                (keySpace, log);
+            numActiveThreads = 0;
+
+            inputArrays = new BlockingCollection<Input[]>();
+            threadNumOps = new long[threadCount];
+            Prepare();
+        }
+
+        public unsafe void Prepare()
+        {
+            Console.WriteLine("Creating Input Arrays");
+
+            Thread[] workers = new Thread[threadCount];
+            for (int idx = 0; idx < threadCount; ++idx)
+            {
+                int x = idx;
+                workers[idx] = new Thread(() => CreateInputArrays(x));
+            }
+
+
+            // Start threads.
+            foreach (Thread worker in workers)
+            {
+                worker.Start();
+            }
+
+            // Wait until all are completed
+            foreach (Thread worker in workers)
+            {
+                worker.Join();
+            }
+        }
+
+        private unsafe void CreateInputArrays(int threadId)
+        {
+            var inputArray = new Input[numOps];
+            for (int i = 0; i < numOps; i++)
+            {
+                inputArray[i].adId.adId = i % numUniqueKeys;
+                inputArray[i].numClicks.numClicks = 1;
+            }
+
+            inputArrays.Add(inputArray);
+        }
+
+
+        public unsafe void Populate()
+        {
+            Thread[] workers = new Thread[threadCount];
+            for (int idx = 0; idx < threadCount; ++idx)
+            {
+                int x = idx;
+                workers[idx] = new Thread(() => PopulateWorker(x));
+            }
+
+            Console.WriteLine("Ready to Populate, Press [Enter]");
+            Console.ReadLine(); 
+
+            // Start threads.
+            foreach (Thread worker in workers)
+            {
+                worker.Start();
+            }
+
+            // Wait until all are completed
+            foreach (Thread worker in workers)
+            {
+                worker.Join();
+            }
+
+            Test();
+        }
+
+        private unsafe void PopulateWorker(int threadId)
+        {
+            Native32.AffinitizeThreadRoundRobin((uint)threadId);
+
+            Empty context;
+
+            var success = inputArrays.TryTake(out Input[] inputArray);
+            if(!success)
+            {
+                Console.WriteLine("No input array for {0}", threadId);
+                return;
+            }
+
+            // Register thread with the store
+            fht.StartSession();
+
+            Interlocked.Increment(ref numActiveThreads);
+
+            // Process the batch of input data
+            var random = new Random(threadId + 1);
+            threadNumOps[threadId] = (numOps / 2) + random.Next() % (numOps / 4);
+            
+            fixed (Input* input = inputArray)
+            {
+                for (long i = 0; i < threadNumOps[threadId]; i++)
+                {
+                    fht.RMW(&((input + i)->adId), input + i, &context, i);
+
+                    if (i % completePendingInterval == 0)
+                    {
+                        fht.CompletePending(false);
+                    }
+                    else if (i % refreshInterval == 0)
+                    {
+                        fht.Refresh();
+                    }
+                }
+            }
+
+            // Make sure operations are completed
+            fht.CompletePending(true);
+
+            // Deregister thread from FASTER
+            fht.StopSession();
+
+            //Interlocked.Decrement(ref numActiveThreads);
+
+            Console.WriteLine("Populate successful on thread {0}", threadId);
+        }
+
+        public unsafe void Test()
+        {
+
+            // Create array for reading
+            Empty context;
+            var inputArray = new Input[numUniqueKeys];
+            for (int i = 0; i < numUniqueKeys; i++)
+            {
+                inputArray[i].adId.adId = i;
+                inputArray[i].numClicks.numClicks = 0;
+            }
+
+            // Register with thread
+            fht.StartSession();
+
+            // Issue read requests
+            fixed (Input* input = inputArray)
+            {
+                for (var i = 0; i < numUniqueKeys; i++)
+                {
+                    fht.Read(&((input + i)->adId), null, (Output*)&((input + i)->numClicks), &context, i);
+                }
+            }
+
+            // Complete all pending requests
+            fht.CompletePending(true);
+
+            // Release
+            fht.StopSession();
+
+            // Compute expected array
+            long[] expected = new long[numUniqueKeys];
+            for(long j = 0; j < threadCount; j++)
+            {
+                var sno = threadNumOps[j];
+                for (long i = 0; i < sno; i++)
+                {
+                    var id = i % numUniqueKeys;
+                    expected[id]++;
+                }
+            }
+
+
+            // Assert if expected is same as found
+            var counts = new Dictionary<long, long>();
+            var sum = 0L;
+            for (long i = 0; i < numUniqueKeys; i++)
+            {
+                if(expected[i] != inputArray[i].numClicks.numClicks)
+                {
+                    long diff = inputArray[i].numClicks.numClicks - expected[i];
+                    if (!counts.ContainsKey(diff))
+                    {
+                        counts.Add(diff, 0);
+                    }
+                    counts[diff] = counts[diff] + 1;
+                    sum += diff;
+                    Console.WriteLine("Debug error for AdId {0}: Expected ({1}), Found({2})", inputArray[i].adId.adId, expected[i], inputArray[i].numClicks.numClicks);
+                }
+            }
+
+            if(sum > 0)
+            {
+                foreach (var key in counts.Keys)
+                {
+                    Console.WriteLine("{0}: {1}", key, counts[key]);
+                }
+                Console.WriteLine("Sum : {0:X}, (1 << {1})", sum, Math.Log(sum, 2));
+            }
+            Console.WriteLine("Test successful");
+        }
+
+        public void Continue()
+        {
+            throw new NotImplementedException();
+        }
+
+        public void RecoverAndTest(Guid indexToken, Guid hybridLogToken)
+        {
+            throw new NotImplementedException();
+        }
+    }
+}
diff --git a/cs/src/playground/SumStore/Functions.cs b/cs/src/playground/SumStore/Functions.cs
new file mode 100644
index 000000000..3ad8c44a1
--- /dev/null
+++ b/cs/src/playground/SumStore/Functions.cs
@@ -0,0 +1,87 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace SumStore
+{
+    public unsafe class Functions
+    {
+        public static void RMWCompletionCallback(AdId* key, Input* input, Empty* ctx)
+        {
+        }
+
+        public static void ReadCompletionCallback(AdId* key, Input* input, Output* output, Empty* ctx)
+        {
+        }
+
+        public static void UpsertCompletionCallback(AdId* key, NumClicks* input, Empty* ctx)
+        {
+        }
+
+        public static void PersistenceCallback(long thread_id, long serial_num)
+        {
+            Console.WriteLine("Thread {0} reports persistence until {1}", thread_id, serial_num);
+        }
+
+        // Read functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleReader(AdId* key, Input* input, NumClicks* value, Output* dst)
+        {
+            NumClicks.Copy(value, (NumClicks*)dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentReader(AdId* key, Input* input, NumClicks* value, Output* dst)
+        {
+            NumClicks.AcquireReadLock(value);
+            NumClicks.Copy(value, (NumClicks*)dst);
+            NumClicks.ReleaseReadLock(value);
+        }
+
+        // Upsert functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleWriter(AdId* key, NumClicks* src, NumClicks* dst)
+        {
+            NumClicks.Copy(src, dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentWriter(AdId* key, NumClicks* src, NumClicks* dst)
+        {
+            NumClicks.AcquireWriteLock(dst);
+            NumClicks.Copy(src, dst);
+            NumClicks.ReleaseWriteLock(dst);
+        }
+
+        // RMW functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int InitialValueLength(AdId* key, Input* input)
+        {
+            return NumClicks.GetLength(default(NumClicks*));
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InitialUpdater(AdId* key, Input* input, NumClicks* value)
+        {
+            NumClicks.Copy(&input->numClicks, value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InPlaceUpdater(AdId* key, Input* input, NumClicks* value)
+        {
+            Interlocked.Add(ref value->numClicks, input->numClicks.numClicks);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyUpdater(AdId* key, Input* input, NumClicks* oldValue, NumClicks* newValue)
+        {
+            newValue->numClicks += oldValue->numClicks + input->numClicks.numClicks;
+        }
+    }
+}
diff --git a/cs/src/playground/SumStore/ICustomFaster.cs b/cs/src/playground/SumStore/ICustomFaster.cs
new file mode 100644
index 000000000..25b1aa83d
--- /dev/null
+++ b/cs/src/playground/SumStore/ICustomFaster.cs
@@ -0,0 +1,36 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+
+namespace SumStore
+{
+    public unsafe interface ICustomFaster
+    {
+        /* Thread-related operations */
+        Guid StartSession();
+        long ContinueSession(Guid guid);
+        void StopSession();
+        void Refresh();
+        bool TakeFullCheckpoint(out Guid token);
+        bool TakeIndexCheckpoint(out Guid token);
+        bool TakeHybridLogCheckpoint(out Guid token);
+        void Recover(Guid fullcheckpointToken);
+        void Recover(Guid indexToken, Guid hybridLogToken);
+
+        /* Store Interface */
+        Status Read(AdId* key, Input* input, Output* output, Empty* context, long lsn);
+        Status Upsert(AdId* key, NumClicks* value, Empty* context, long lsn);
+        Status RMW(AdId* key, Input* input, Empty* context, long lsn);
+        Status Delete(AdId* key, Empty* context, long lsn);
+        bool CompletePending(bool wait);
+
+        /* Statistics */
+        long Size { get; }
+        void DumpDistribution();
+
+        
+    }
+}
+
diff --git a/cs/src/playground/SumStore/Input.cs b/cs/src/playground/SumStore/Input.cs
new file mode 100644
index 000000000..ee19fc9c8
--- /dev/null
+++ b/cs/src/playground/SumStore/Input.cs
@@ -0,0 +1,19 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace SumStore
+{
+    public unsafe struct Input
+    {
+        public AdId adId;
+        public NumClicks numClicks;
+
+        public static Input* MoveToContext(Input* value)
+        {
+            return value;
+        }
+
+    }
+}
diff --git a/cs/src/playground/SumStore/NumClicks.cs b/cs/src/playground/SumStore/NumClicks.cs
new file mode 100644
index 000000000..87837d4e6
--- /dev/null
+++ b/cs/src/playground/SumStore/NumClicks.cs
@@ -0,0 +1,69 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+
+namespace SumStore
+{
+    public unsafe struct NumClicks
+    {
+        public const int physicalSize = sizeof(long);
+        public long numClicks;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(NumClicks* input)
+        {
+            return physicalSize;
+        }
+
+        public static void Copy(NumClicks* src, NumClicks* dst)
+        {
+            dst->numClicks = src->numClicks;
+        }
+
+        // Shared read/write capabilities on value
+        public static void AcquireReadLock(NumClicks* value)
+        {
+        }
+
+        public static void ReleaseReadLock(NumClicks* value)
+        {
+        }
+
+        public static void AcquireWriteLock(NumClicks* value)
+        {
+        }
+
+        public static void ReleaseWriteLock(NumClicks* value)
+        {
+        }
+
+        public static NumClicks* MoveToContext(NumClicks* value)
+        {
+            return value;
+        }
+
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(NumClicks* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(NumClicks* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(NumClicks* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+    }
+}
diff --git a/cs/src/playground/SumStore/Output.cs b/cs/src/playground/SumStore/Output.cs
new file mode 100644
index 000000000..48e719a18
--- /dev/null
+++ b/cs/src/playground/SumStore/Output.cs
@@ -0,0 +1,16 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+namespace SumStore
+{
+    public unsafe struct Output
+    {
+        public NumClicks value;
+
+        public static Output* MoveToContext(Output* value)
+        {
+            return value;
+        }
+
+    }
+}
diff --git a/cs/src/playground/SumStore/Program.cs b/cs/src/playground/SumStore/Program.cs
new file mode 100644
index 000000000..af8d26669
--- /dev/null
+++ b/cs/src/playground/SumStore/Program.cs
@@ -0,0 +1,67 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace SumStore
+{
+    public interface IFASTERRecoveryTest
+    {
+        void Populate();
+        void Continue();
+        void RecoverAndTest(Guid indexToken, Guid hybridLogToken);
+    }
+    public class Program
+    {
+        static unsafe void Main(string[] args)
+        {
+            int nextArg = 0;
+            var test = default(IFASTERRecoveryTest);
+            var type = args[nextArg++];
+            if(type == "single")
+            {
+                test = new SingleThreadedRecoveryTest();
+            }
+            else if(type == "concurrent")
+            {
+                int threadCount = int.Parse(args[nextArg++]);
+                test = new ConcurrentRecoveryTest(threadCount);
+            }
+            else if(type == "test")
+            {
+                int threadCount = int.Parse(args[nextArg++]);
+                test = new ConcurrentTest(threadCount);
+            }
+            else
+            {
+                Debug.Assert(false);
+            }
+
+            var task = args[nextArg++];
+            if (task == "populate")
+            {
+                test.Populate();
+            }
+            else if(task == "recover")
+            {
+                Guid version = Guid.Parse(args[nextArg++]);
+                test.RecoverAndTest(version, version);
+            }
+            else if(task == "continue")
+            {
+                test.Continue();
+            }
+            else
+            {
+                Debug.Assert(false);
+            }
+        }
+    }
+}
diff --git a/cs/src/playground/SumStore/Properties/AssemblyInfo.cs b/cs/src/playground/SumStore/Properties/AssemblyInfo.cs
new file mode 100644
index 000000000..3661b7dd0
--- /dev/null
+++ b/cs/src/playground/SumStore/Properties/AssemblyInfo.cs
@@ -0,0 +1,39 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("ManagedSample1")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("ManagedSample1")]
+[assembly: AssemblyCopyright("Copyright ©  2017")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components.  If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("17bdd0a5-98e5-464a-8a00-050d9ff4c562")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/cs/src/playground/SumStore/SingleThreadedRecoveryTest.cs b/cs/src/playground/SumStore/SingleThreadedRecoveryTest.cs
new file mode 100644
index 000000000..932088743
--- /dev/null
+++ b/cs/src/playground/SumStore/SingleThreadedRecoveryTest.cs
@@ -0,0 +1,171 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using FASTER.core;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace SumStore
+{
+    public class SingleThreadedRecoveryTest : IFASTERRecoveryTest
+    {
+        const long numUniqueKeys = (1 << 23);
+        const long keySpace = (1L << 15);
+        const long numOps = (1L << 25);
+        const long refreshInterval = (1 << 8);
+        const long completePendingInterval = (1 << 12);
+        const long checkpointInterval = (1 << 20);
+        ICustomFaster fht;
+
+        public SingleThreadedRecoveryTest()
+        {
+            // Create FASTER index
+            var log = FASTERFactory.CreateLogDevice(DirectoryConfiguration.GetHybridLogFileName());
+            fht = FASTERFactory.Create
+                <AdId, NumClicks, Input, Output, Empty, Functions, ICustomFaster>
+                (keySpace, log);
+        }
+
+        public void Continue()
+        {
+            throw new NotImplementedException();
+        }
+
+        public unsafe void Populate()
+        {
+            List<Guid> tokens = new List<Guid>();
+
+            Empty context;
+
+            // Prepare the dataset
+            var inputArray = new Input[numOps];
+            for (int i = 0; i < numOps; i++)
+            {
+                inputArray[i].adId.adId = i % numUniqueKeys;
+                inputArray[i].numClicks.numClicks = 1;
+            }
+
+            // Register thread with FASTER
+            fht.StartSession();
+
+            // Prpcess the batch of input data
+            fixed (Input* input = inputArray)
+            {
+                for (int i = 0; i < numOps; i++)
+                {
+                    fht.RMW(&((input + i)->adId), input + i, &context, i);
+
+                    if (i % checkpointInterval == 0)
+                    {
+                        if(fht.TakeFullCheckpoint(out Guid token))
+                        {
+                            tokens.Add(token);
+                        }
+                    }
+
+                    if (i % completePendingInterval == 0)
+                    {
+                        fht.CompletePending(false);
+                    }
+                    else if (i % refreshInterval == 0)
+                    {
+                        fht.Refresh();
+                    }
+                }
+            }
+
+            // Make sure operations are completed
+            fht.CompletePending(true);
+
+            // Deregister thread from FASTER
+            fht.StopSession();
+
+            Console.WriteLine("Populate successful");
+            foreach(var token in tokens)
+            {
+                Console.WriteLine(token);
+            }
+            Console.ReadLine();
+        }
+
+        public unsafe void RecoverAndTest(Guid indexToken, Guid hybridLogToken)
+        {
+            // Recover
+            fht.Recover(indexToken, hybridLogToken);
+
+            // Create array for reading
+            Empty context;
+            var inputArray = new Input[numUniqueKeys];
+            for (int i = 0; i < numUniqueKeys; i++)
+            {
+                inputArray[i].adId.adId = i;
+                inputArray[i].numClicks.numClicks = 0;
+            }
+
+            // Register with thread
+            fht.StartSession();
+
+            // Issue read requests
+            fixed (Input* input = inputArray)
+            {
+                for (var i = 0; i < numUniqueKeys; i++)
+                {
+                    fht.Read(&((input + i)->adId), null, (Output*)&((input + i)->numClicks), &context, i);
+                }
+            }
+
+            // Complete all pending requests
+            fht.CompletePending(true);
+
+            // Release
+            fht.StopSession();
+
+            // Test outputs
+            var recoveryInfo = default(HybridLogRecoveryInfo);
+            recoveryInfo.Recover(hybridLogToken);
+
+            int num_threads = recoveryInfo.numThreads;
+            DirectoryInfo info = new DirectoryInfo(DirectoryConfiguration.GetHybridLogCheckpointFolder(hybridLogToken));
+            List<ExecutionContext> cpr_points = new List<ExecutionContext>();
+            foreach (var file in info.GetFiles())
+            {
+                if (file.Name != "info.dat" && file.Name != "snapshot.dat")
+                {
+                    using (var reader = new StreamReader(file.FullName))
+                    {
+                        var ctx = new ExecutionContext();
+                        ctx.Load(reader);
+                        cpr_points.Add(ctx);
+                    }
+                }
+            }
+
+            // Compute expected array
+            long[] expected = new long[numUniqueKeys];
+            long[] found = new long[numUniqueKeys];
+            long sno = cpr_points.First().serialNum;
+            for (long i = 0; i <= sno; i++)
+            {
+                var id = i % numUniqueKeys;
+                expected[id]++;
+            }
+
+            // Assert if expected is same as found
+            for (long i = 0; i < numUniqueKeys; i++)
+            {
+                if (expected[i] != inputArray[i].numClicks.numClicks)
+                {
+                    Console.WriteLine("Debug error for AdId {0}: Expected ({1}), Found({2})", inputArray[i].adId.adId, expected[i], inputArray[i].numClicks.numClicks);
+                }
+            }
+            Console.WriteLine("Test successful");
+
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/cs/src/playground/SumStore/SumStore.csproj b/cs/src/playground/SumStore/SumStore.csproj
new file mode 100644
index 000000000..8f23a956f
--- /dev/null
+++ b/cs/src/playground/SumStore/SumStore.csproj
@@ -0,0 +1,70 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{05D61B37-9714-4234-9961-384A63F7175E}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <RootNamespace>SumStore</RootNamespace>
+    <AssemblyName>SumStore</AssemblyName>
+    <TargetFrameworkVersion>v4.6</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
+    <TargetFrameworkProfile />
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <Prefer32Bit>true</Prefer32Bit>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+    <OutputPath>bin\x64\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <Prefer32Bit>true</Prefer32Bit>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="ConcurrentTest.cs" />
+    <Compile Include="ConcurrentRecoveryTest.cs" />
+    <Compile Include="Functions.cs" />
+    <Compile Include="ICustomFaster.cs" />
+    <Compile Include="Input.cs" />
+    <Compile Include="AdId.cs" />
+    <Compile Include="Output.cs" />
+    <Compile Include="Program.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="NumClicks.cs" />
+    <Compile Include="SingleThreadedRecoveryTest.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="App.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\FASTER.core.csproj">
+      <Project>{01002755-60ca-40ee-94d9-11c07eb58786}</Project>
+      <Name>FASTER.core</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+</Project>
\ No newline at end of file
diff --git a/cs/src/test/BasicFASTERTests.cs b/cs/src/test/BasicFASTERTests.cs
new file mode 100644
index 000000000..299bb6f87
--- /dev/null
+++ b/cs/src/test/BasicFASTERTests.cs
@@ -0,0 +1,137 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using FASTER.core;
+using System.IO;
+
+namespace FASTER.test
+{
+
+    [TestClass]
+    public class BasicFASTERTests
+    {
+        private ICustomFaster fht;
+
+        [TestInitialize]
+        public void Setup()
+        {
+            var log = FASTERFactory.CreateLogDevice(Path.GetTempPath() + "\\hybridlog_native.log");
+            fht = FASTERFactory.Create
+                <KeyStruct, ValueStruct, InputStruct, OutputStruct, Empty, Functions, ICustomFaster>
+                (128, log);
+            fht.StartSession();
+        }
+
+        [TestCleanup]
+        public void TearDown()
+        {
+            fht.StopSession();
+            fht = null;
+        }
+
+
+
+        [TestMethod]
+        public unsafe void NativeInMemWriteRead()
+        {
+            OutputStruct output = default(OutputStruct);
+
+            var key1 = new KeyStruct { kfield1 = 13, kfield2 = 14 };
+            var value = new ValueStruct { vfield1 = 23, vfield2 = 24 };
+
+            fht.Upsert(&key1, &value, null, 0);
+            fht.Read(&key1, null, &output, null, 0);
+
+            Assert.IsTrue(output.value.vfield1 == value.vfield1);
+            Assert.IsTrue(output.value.vfield2 == value.vfield2);
+        }
+
+        [TestMethod]
+        public unsafe void NativeInMemWriteRead2()
+        {
+
+            Random r = new Random(10);
+
+            for (int c = 0; c < 1000; c++)
+            {
+                var i = r.Next(10000);
+                var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 };
+                var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 };
+                fht.Upsert(&key1, &value, null, 0);
+            }
+
+            r = new Random(10);
+
+            for (int c = 0; c < 1000; c++)
+            {
+                var i = r.Next(10000);
+                OutputStruct output = default(OutputStruct);
+                var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 };
+                var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 };
+
+                if (fht.Read(&key1, null, &output, null, 0) == Status.PENDING)
+                {
+                    fht.CompletePending(true);
+                }
+
+                Assert.IsTrue(output.value.vfield1 == value.vfield1);
+                Assert.IsTrue(output.value.vfield2 == value.vfield2);
+            }
+        }
+
+        [TestMethod]
+        public unsafe void NativeInMemRMW1()
+        {
+            var nums = Enumerable.Range(0, 1000).ToArray();
+            var rnd = new Random(11);
+            for (int i = 0; i < nums.Length; ++i)
+            {
+                int randomIndex = rnd.Next(nums.Length);
+                int temp = nums[randomIndex];
+                nums[randomIndex] = nums[i];
+                nums[i] = temp;
+            }
+
+            for (int j = 0; j < nums.Length; ++j)
+            {
+                var i = nums[j];
+                var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 };
+                var input = new InputStruct { ifield1 = i, ifield2 = i + 1 };
+                fht.RMW(&key1, &input, null, 0);
+            }
+            for (int j = 0; j < nums.Length; ++j)
+            {
+                var i = nums[j];
+                var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 };
+                var input = new InputStruct { ifield1 = i, ifield2 = i + 1 };
+                fht.RMW(&key1, &input, null, 0);
+            }
+
+
+            for (int j = 0; j < nums.Length; ++j)
+            {
+                var i = nums[j];
+
+                OutputStruct output = default(OutputStruct);
+                var key1 = new KeyStruct { kfield1 = i, kfield2 = i + 1 };
+                var value = new ValueStruct { vfield1 = i, vfield2 = i + 1 };
+
+                if (fht.Read(&key1, null, &output, null, 0) == Status.PENDING)
+                {
+                    fht.CompletePending(true);
+                }
+
+                Assert.IsTrue(output.value.vfield1 == 2*value.vfield1, "found " + output.value.vfield1 + ", expected " + 2 * value.vfield1);
+                Assert.IsTrue(output.value.vfield2 == 2*value.vfield2);
+            }
+        }
+
+    }
+}
diff --git a/cs/src/test/ComponentRecoveryTests.cs b/cs/src/test/ComponentRecoveryTests.cs
new file mode 100644
index 000000000..8796a8fd1
--- /dev/null
+++ b/cs/src/test/ComponentRecoveryTests.cs
@@ -0,0 +1,150 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System.IO;
+using FASTER.core;
+using System.Threading;
+
+namespace FASTER.test.recovery
+{
+
+    [TestClass]
+    public class ComponentRecoveryTests
+    {
+        [TestInitialize]
+        public void Setup()
+        {
+        }
+
+        [TestCleanup]
+        public void TearDown()
+        {
+        }
+
+        [TestMethod]
+        public unsafe void MallocFixedPageSizeRecoveryTest()
+        {
+            int seed = 123;
+            var rand1 = new Random(seed);
+            IDevice device = new LocalStorageDevice("test_ofb.dat", false, false, true, true);
+            var allocator = new MallocFixedPageSize<HashBucket>();
+
+            //do something
+            int numBucketsToAdd = 16 * MallocFixedPageSize<HashBucket>.PageSize;
+            long[] logicalAddresses = new long[numBucketsToAdd];
+            for (int i = 0; i < numBucketsToAdd; i++)
+            {
+                long logicalAddress = allocator.Allocate();
+                logicalAddresses[i] = logicalAddress;
+                var bucket = (HashBucket*)allocator.GetPhysicalAddress(logicalAddress);
+                for (int j = 0; j < Constants.kOverflowBucketIndex; j++)
+                {
+                    bucket->bucket_entries[j] = rand1.Next();
+                }
+            }
+
+            //issue call to checkpoint
+            allocator.begin_checkpoint(device, 0, out ulong numBytesWritten);
+            //wait until complete
+            allocator.IsCheckpointCompleted(true);
+
+
+            var recoveredAllocator = new MallocFixedPageSize<HashBucket>();
+            //issue call to recover
+            recoveredAllocator.begin_recovery(device, 0, numBucketsToAdd, numBytesWritten, out ulong numBytesRead);
+            //wait until complete
+            recoveredAllocator.IsRecoveryCompleted(true);
+
+            Assert.IsTrue(numBytesWritten == numBytesRead);
+
+            var rand2 = new Random(seed);
+            for (int i = 0; i < numBucketsToAdd; i++)
+            {
+                var logicalAddress = logicalAddresses[i];
+                var bucket = (HashBucket*)recoveredAllocator.GetPhysicalAddress(logicalAddress);
+                for (int j = 0; j < Constants.kOverflowBucketIndex; j++)
+                {
+                    Assert.IsTrue(bucket->bucket_entries[j] == rand2.Next());
+                }
+            }
+        }
+
+        [TestMethod]
+        public unsafe void TestFuzzyIndexRecovery()
+        {
+            int seed = 123;
+            int size = 1 << 16;
+            long numAdds = 1 << 18;
+
+            IDevice ht_device = new LocalStorageDevice("ht.dat", false, false, true, true);
+            IDevice ofb_device = new LocalStorageDevice("ofb.dat", false, false, true, true);
+
+            var hash_table1 = new FASTERBase();
+            hash_table1.Initialize(size, 512);
+
+            //do something
+            var bucket = default(HashBucket*);
+            var slot = default(int);
+
+            var keyGenerator1 = new Random(seed);
+            var valueGenerator = new Random(seed + 1);
+            for (int i = 0; i < numAdds; i++)
+            {
+                long key = keyGenerator1.Next();
+                var hash = Utility.GetHashCode(key);
+                var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+                var entry = default(HashBucketEntry);
+                hash_table1.FindOrCreateTag(hash, tag, ref bucket, ref slot, ref entry);
+
+                hash_table1.UpdateSlot(bucket, slot, entry.word, valueGenerator.Next(), out long found_word);
+            }
+
+            //issue checkpoint call
+            hash_table1.TakeIndexFuzzyCheckpoint(0, ht_device, out ulong ht_num_bytes_written,
+                ofb_device, out ulong ofb_num_bytes_written, out int num_ofb_buckets);
+
+            //wait until complete
+            hash_table1.IsIndexFuzzyCheckpointCompleted(true);
+
+            var hash_table2 = new FASTERBase();
+            hash_table2.Initialize(size, 512);
+
+            //issue recover call
+            hash_table2.RecoverFuzzyIndex(0, ht_device, ht_num_bytes_written, ofb_device, num_ofb_buckets, ofb_num_bytes_written);
+            //wait until complete
+            hash_table2.IsFuzzyIndexRecoveryComplete(true);
+
+            //verify
+            var keyGenerator2 = new Random(seed);
+
+            var bucket1 = default(HashBucket*);
+            var bucket2 = default(HashBucket*);
+            var slot1 = default(int);
+            var slot2 = default(int);
+
+            var entry1 = default(HashBucketEntry);
+            var entry2 = default(HashBucketEntry);
+            for (int i = 0; i < 2 * numAdds; i++)
+            {
+                long key = keyGenerator2.Next();
+                var hash = Utility.GetHashCode(key);
+                var tag = (ushort)((ulong)hash >> Constants.kHashTagShift);
+
+                var exists1 = hash_table1.FindTag(hash, tag, ref bucket1, ref slot1, ref entry1);
+                var exists2 = hash_table2.FindTag(hash, tag, ref bucket2, ref slot2, ref entry2);
+
+                Assert.IsTrue(exists1 == exists2);
+
+                if (exists1)
+                {
+                    Assert.IsTrue(entry1.word == entry2.word);
+                }
+            }
+        }
+    }
+}
diff --git a/cs/src/test/FASTER.test.csproj b/cs/src/test/FASTER.test.csproj
new file mode 100644
index 000000000..02e2b237f
--- /dev/null
+++ b/cs/src/test/FASTER.test.csproj
@@ -0,0 +1,127 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{0DC7F5A2-E963-4E7F-BD37-6F7864B726F2}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>FASTER.test</RootNamespace>
+    <AssemblyName>FASTER.test</AssemblyName>
+    <TargetFrameworkVersion>v4.6</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <ProjectTypeGuids>{3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">10.0</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+    <ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages</ReferencePath>
+    <IsCodedUITest>False</IsCodedUITest>
+    <TestProjectType>UnitTest</TestProjectType>
+    <TargetFrameworkProfile />
+    <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
+    <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
+    <DebugSymbols>true</DebugSymbols>
+    <OutputPath>bin\x64\Debug\</OutputPath>
+    <DefineConstants>TRACE;DEBUG;METRICS</DefineConstants>
+    <DebugType>full</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SignAssembly>false</SignAssembly>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
+    <OutputPath>bin\x64\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'ReleaseWithMetrics|x64'">
+    <OutputPath>bin\x64\ReleaseWithMetrics\</OutputPath>
+    <DefineConstants>TRACE;METRICS</DefineConstants>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <Optimize>true</Optimize>
+    <DebugType>pdbonly</DebugType>
+    <PlatformTarget>x64</PlatformTarget>
+    <ErrorReport>prompt</ErrorReport>
+    <CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+  </ItemGroup>
+  <Choose>
+    <When Condition="('$(VisualStudioVersion)' == '10.0' or '$(VisualStudioVersion)' == '') and '$(TargetFrameworkVersion)' == 'v3.5'">
+      <ItemGroup>
+        <Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=10.1.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL" />
+      </ItemGroup>
+    </When>
+    <Otherwise>
+      <ItemGroup>
+        <Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework" />
+      </ItemGroup>
+    </Otherwise>
+  </Choose>
+  <ItemGroup>
+    <Compile Include="ComponentRecoveryTests.cs" />
+    <Compile Include="FullRecoveryTests.cs" />
+    <Compile Include="ObjectFASTERTests.cs" />
+    <Compile Include="BasicFASTERTests.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="ObjectTestTypes.cs" />
+    <Compile Include="RecoveryTestTypes.cs" />
+    <Compile Include="TestTypes.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="app.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="..\native\bin\x64\Release\readtsc.dll">
+      <Link>readtsc.dll</Link>
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\core\FASTER.core.csproj">
+      <Project>{01002755-60ca-40ee-94d9-11c07eb58786}</Project>
+      <Name>FASTER.core</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <Choose>
+    <When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
+      <ItemGroup>
+        <Reference Include="Microsoft.VisualStudio.QualityTools.CodedUITestFramework, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
+          <Private>False</Private>
+        </Reference>
+        <Reference Include="Microsoft.VisualStudio.TestTools.UITest.Common, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
+          <Private>False</Private>
+        </Reference>
+        <Reference Include="Microsoft.VisualStudio.TestTools.UITest.Extension, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
+          <Private>False</Private>
+        </Reference>
+        <Reference Include="Microsoft.VisualStudio.TestTools.UITesting, Version=10.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
+          <Private>False</Private>
+        </Reference>
+      </ItemGroup>
+    </When>
+  </Choose>
+  <Import Project="$(VSToolsPath)\TeamTest\Microsoft.TestTools.targets" Condition="Exists('$(VSToolsPath)\TeamTest\Microsoft.TestTools.targets')" />
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <PropertyGroup>
+    <PostBuildEvent>
+    </PostBuildEvent>
+  </PropertyGroup>
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file
diff --git a/cs/src/test/FullRecoveryTests.cs b/cs/src/test/FullRecoveryTests.cs
new file mode 100644
index 000000000..64e2ec624
--- /dev/null
+++ b/cs/src/test/FullRecoveryTests.cs
@@ -0,0 +1,205 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System.IO;
+using FASTER.core;
+
+namespace FASTER.test.recovery.sumstore
+{
+
+    [TestClass]
+    public class FullRecoveryTests
+    {
+        const long numUniqueKeys = (1 << 14);
+        const long keySpace = (1L << 14);
+        const long numOps = (1L << 19);
+        const long refreshInterval = (1L << 8);
+        const long completePendingInterval = (1L << 10);
+        const long checkpointInterval = (1L << 16);
+        private ICustomFaster fht;
+        private string test_path;
+        private Guid token;
+
+        [TestInitialize]
+        public void Setup()
+        {
+            if (test_path == null)
+            {
+                test_path = Path.GetTempPath() + Path.GetRandomFileName();
+                if (!Directory.Exists(test_path))
+                    Directory.CreateDirectory(test_path);
+            }
+
+            var log = FASTERFactory.CreateLogDevice(test_path + "\\hlog");
+
+            fht = 
+                FASTERFactory.Create
+                <AdId, NumClicks, Input, Output, Empty, Functions, ICustomFaster>
+                (keySpace, log, checkpointDir: test_path);
+        }
+
+        [TestCleanup]
+        public void TearDown()
+        {
+            fht.StopSession();
+            fht = null;
+        }
+
+        public static void DeleteDirectory(string path)
+        {
+            foreach (string directory in Directory.GetDirectories(path))
+            {
+                DeleteDirectory(directory);
+            }
+
+            try
+            {
+                Directory.Delete(path, true);
+            }
+            catch (IOException)
+            {
+                Directory.Delete(path, true);
+            }
+            catch (UnauthorizedAccessException)
+            {
+                Directory.Delete(path, true);
+            }
+        }
+
+        [TestMethod]
+        public void RecoveryTest1()
+        {
+            Populate();
+            Setup();
+            RecoverAndTest(token, token);
+        }
+
+        public unsafe void Populate()
+        {
+            Empty context;
+
+            // Prepare the dataset
+            var inputArray = new Input[numOps];
+            for (int i = 0; i < numOps; i++)
+            {
+                inputArray[i].adId.adId = i % numUniqueKeys;
+                inputArray[i].numClicks.numClicks = 1;
+            }
+
+            // Register thread with FASTER
+            fht.StartSession();
+
+            Guid nextToken;
+            // Prpcess the batch of input data
+            bool first = true;
+            fixed (Input* input = inputArray)
+            {
+                for (int i = 0; i < numOps; i++)
+                {
+                    fht.RMW(&((input + i)->adId), input + i, &context, i);
+
+                    if ((i+1) % checkpointInterval == 0)
+                    {
+                        if (first)
+                            fht.TakeFullCheckpoint(out token);
+                        else
+                            fht.TakeFullCheckpoint(out nextToken);
+
+                        first = false;
+                    }
+
+                    if (i % completePendingInterval == 0)
+                    {
+                        fht.CompletePending(false);
+                    }
+                    else if (i % refreshInterval == 0)
+                    {
+                        fht.Refresh();
+                    }
+                }
+            }
+
+            // Make sure operations are completed
+            fht.CompletePending(true);
+
+            // Deregister thread from FASTER
+            fht.StopSession();
+        }
+
+        public unsafe void RecoverAndTest(Guid cprVersion, Guid indexVersion)
+        {
+            // Recover
+            fht.Recover(cprVersion, indexVersion);
+
+            // Create array for reading
+            Empty context;
+            var inputArray = new Input[numUniqueKeys];
+            for (int i = 0; i < numUniqueKeys; i++)
+            {
+                inputArray[i].adId.adId = i;
+                inputArray[i].numClicks.numClicks = 0;
+            }
+
+            // Register with thread
+            fht.StartSession();
+
+            // Issue read requests
+            fixed (Input* input = inputArray)
+            {
+                for (var i = 0; i < numUniqueKeys; i++)
+                {
+                    fht.Read(&((input + i)->adId), null, (Output*)&((input + i)->numClicks), &context, i);
+                }
+            }
+
+            // Complete all pending requests
+            fht.CompletePending(true);
+
+            // Release
+            fht.StopSession();
+
+            // Test outputs
+            Config.CheckpointDirectory = test_path;
+            var recoveryInfo = default(HybridLogRecoveryInfo);
+            recoveryInfo.Recover(cprVersion);
+
+            int num_threads = recoveryInfo.numThreads;
+            DirectoryInfo info = new DirectoryInfo(DirectoryConfiguration.GetHybridLogCheckpointFolder(cprVersion));
+            List<ExecutionContext> cpr_points = new List<ExecutionContext>();
+            foreach (var file in info.GetFiles())
+            {
+                if (file.Name != "info.dat" && file.Name != "snapshot.dat")
+                {
+                    using (var reader = new StreamReader(file.FullName))
+                    {
+                        var ctx = new ExecutionContext();
+                        ctx.Load(reader);
+                        cpr_points.Add(ctx);
+                    }
+                }
+            }
+
+            // Compute expected array
+            long[] expected = new long[numUniqueKeys];
+            long[] found = new long[numUniqueKeys];
+            long sno = cpr_points.First().serialNum;
+            for (long i = 0; i <= sno; i++)
+            {
+                var id = i % numUniqueKeys;
+                expected[id]++;
+            }
+
+            // Assert if expected is same as found
+            for (long i = 0; i < numUniqueKeys; i++)
+            {
+                Assert.IsTrue(
+                    expected[i] == inputArray[i].numClicks.numClicks,
+                    "Debug error for AdId {0}: Expected ({1}), Found({2})", inputArray[i].adId.adId, expected[i], inputArray[i].numClicks.numClicks);
+            }
+        }
+    }
+}
diff --git a/cs/src/test/ObjectFASTERTests.cs b/cs/src/test/ObjectFASTERTests.cs
new file mode 100644
index 000000000..aeb41e2e5
--- /dev/null
+++ b/cs/src/test/ObjectFASTERTests.cs
@@ -0,0 +1,69 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using FASTER.core;
+using System.IO;
+
+namespace FASTER.test
+{
+
+    [TestClass]
+    public class ObjectFASTERTests
+    {
+        private static IManagedFAST<MyKey, MyValue, MyInput, MyOutput, MyContext> fht;
+
+        [ClassInitialize]
+        public static void Setup(TestContext t)
+        {
+            var log = FASTERFactory.CreateLogDevice(Path.GetTempPath() + "\\hybridlog_object.log");
+            fht = FASTERFactory.Create
+                <MyKey, MyValue, MyInput, MyOutput, MyContext, MyFunctions>
+                (indexSizeBuckets: 128, logDevice: log, functions: new MyFunctions(), 
+                LogMutableFraction: 0.1, LogPageSizeBits: 9, LogTotalSizeBytes: 512*16
+                );
+            fht.StartSession();
+        }
+
+        [ClassCleanup]
+        public static void TearDown()
+        {
+            fht.StopSession();
+            fht = null;
+        }
+
+
+
+        [TestMethod]
+        public void ObjectInMemWriteRead()
+        {
+            var key1 = new MyKey { key = 9999999 };
+            var value = new MyValue { value = 23 };
+
+            MyOutput output = new MyOutput();
+            fht.Upsert(key1, value, null, 0);
+            fht.Read(key1, null, ref output, null, 0);
+
+            Assert.IsTrue(output.value.value == value.value);
+        }
+
+        [TestMethod]
+        public void ObjectDiskWriteRead()
+        {
+            for (int i = 0; i < 2000; i++)
+                fht.Upsert(new MyKey { key = i }, new MyValue { value = i }, default(MyContext), 0);
+
+            MyOutput g1 = new MyOutput();
+            fht.Read(new MyKey { key = 23 }, new MyInput(), ref g1, new MyContext(), 0);
+
+            fht.CompletePending(true);
+            Assert.IsTrue(g1.value.value == 23);
+        }
+    }
+}
diff --git a/cs/src/test/ObjectTestTypes.cs b/cs/src/test/ObjectTestTypes.cs
new file mode 100644
index 000000000..c198c1885
--- /dev/null
+++ b/cs/src/test/ObjectTestTypes.cs
@@ -0,0 +1,119 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using FASTER.core;
+using System.Runtime.CompilerServices;
+using System.IO;
+using System.Diagnostics;
+
+namespace FASTER.test
+{
+    public class MyKey
+    {
+        public int key;
+        public MyKey Clone()
+        {
+            return this;
+        }
+
+        public long GetHashCode64()
+        {
+            return Utility.GetHashCode(key);
+        }
+
+        public bool Equals(MyKey otherKey)
+        {
+            return key == otherKey.key;
+        }
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(key);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            key = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+
+    public class MyValue
+    {
+        public int value;
+        public MyValue Clone()
+        {
+            return this;
+        }
+
+        public void Serialize(Stream toStream)
+        {
+            new BinaryWriter(toStream).Write(value);
+        }
+
+        public void Deserialize(Stream fromStream)
+        {
+            value = new BinaryReader(fromStream).ReadInt32();
+        }
+    }
+
+    public class MyInput
+    {
+        public int value;
+    }
+
+    public class MyOutput
+    {
+        public MyValue value;
+    }
+
+
+    public class MyContext
+    {
+    }
+
+    public class MyFunctions : IUserFunctions<MyKey, MyValue, MyInput, MyOutput, MyContext>
+    {
+        public void RMWCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void ReadCompletionCallback(MyContext ctx, MyOutput output)
+        {
+        }
+
+        public void UpsertCompletionCallback(MyContext ctx)
+        {
+        }
+
+        public void CopyUpdater(MyKey key, MyInput input, MyValue oldValue, ref MyValue newValue)
+        {
+            newValue.value = oldValue.value + input.value;
+        }
+
+        public int InitialValueLength(MyKey key, MyInput input)
+        {
+            return sizeof(int);
+        }
+
+        public void InitialUpdater(MyKey key, MyInput input, ref MyValue value)
+        {
+            value.value = input.value;
+        }
+
+        public void InPlaceUpdater(MyKey key, MyInput input, ref MyValue value)
+        {
+            value.value += input.value;
+        }
+
+        public void Reader(MyKey key, MyInput input, MyValue value, ref MyOutput dst)
+        {
+            dst.value = value;
+        }
+    }
+}
diff --git a/cs/src/test/Properties/AssemblyInfo.cs b/cs/src/test/Properties/AssemblyInfo.cs
new file mode 100644
index 000000000..5f67371fd
--- /dev/null
+++ b/cs/src/test/Properties/AssemblyInfo.cs
@@ -0,0 +1,39 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("FASTER.test")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("FASTER.test")]
+[assembly: AssemblyCopyright("Copyright ©  2016")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("7204b801-fb6f-465b-858f-f6493fe7aaf7")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/cs/src/test/RecoveryTestTypes.cs b/cs/src/test/RecoveryTestTypes.cs
new file mode 100644
index 000000000..58b098af8
--- /dev/null
+++ b/cs/src/test/RecoveryTestTypes.cs
@@ -0,0 +1,253 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using FASTER.core;
+using System.Runtime.CompilerServices;
+using System.IO;
+using System.Diagnostics;
+
+namespace FASTER.test.recovery.sumstore
+{
+    public unsafe struct AdId
+    {
+        public const int physicalSize = sizeof(long);
+        public long adId;
+
+        public static long GetHashCode(AdId* key)
+        {
+            return Utility.GetHashCode(*((long*)key));
+        }
+        public static bool Equals(AdId* k1, AdId* k2)
+        {
+            return k1->adId == k2->adId;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(AdId* key)
+        {
+            return physicalSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(AdId* src, AdId* dst)
+        {
+            dst->adId = src->adId;
+        }
+
+        public static AdId* MoveToContext(AdId* value)
+        {
+            return value;
+        }
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(AdId* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(AdId* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(AdId* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+    }
+
+    public unsafe struct Input
+    {
+        public AdId adId;
+        public NumClicks numClicks;
+
+        public static Input* MoveToContext(Input* value)
+        {
+            return value;
+        }
+
+    }
+
+    public unsafe struct NumClicks
+    {
+        public const int physicalSize = sizeof(long);
+        public long numClicks;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(NumClicks* input)
+        {
+            return physicalSize;
+        }
+
+        public static void Copy(NumClicks* src, NumClicks* dst)
+        {
+            dst->numClicks = src->numClicks;
+        }
+
+        // Shared read/write capabilities on value
+        public static void AcquireReadLock(NumClicks* value)
+        {
+        }
+
+        public static void ReleaseReadLock(NumClicks* value)
+        {
+        }
+
+        public static void AcquireWriteLock(NumClicks* value)
+        {
+        }
+
+        public static void ReleaseWriteLock(NumClicks* value)
+        {
+        }
+
+        public static NumClicks* MoveToContext(NumClicks* value)
+        {
+            return value;
+        }
+
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(NumClicks* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(NumClicks* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(NumClicks* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+    }
+
+    public unsafe struct Output
+    {
+        public NumClicks value;
+
+        public static Output* MoveToContext(Output* value)
+        {
+            return value;
+        }
+
+    }
+
+    public unsafe interface ICustomFaster
+    {
+        /* Thread-related operations */
+        Guid StartSession();
+        long ContinueSession(Guid guid);
+        void StopSession();
+        void Refresh();
+        bool TakeFullCheckpoint(out Guid token);
+        bool TakeIndexCheckpoint(out Guid token);
+        bool TakeHybridLogCheckpoint(out Guid token);
+        void Recover(Guid fullcheckpointToken);
+        void Recover(Guid indexToken, Guid hybridLogToken);
+
+        /* Store Interface */
+        Status Read(AdId* key, Input* input, Output* output, Empty* context, long lsn);
+        Status Upsert(AdId* key, NumClicks* value, Empty* context, long lsn);
+        Status RMW(AdId* key, Input* input, Empty* context, long lsn);
+        Status Delete(AdId* key, Empty* context, long lsn);
+        bool CompletePending(bool wait);
+
+        /* Statistics */
+        long Size { get; }
+        void DumpDistribution();
+    }
+
+    public unsafe class Functions
+    {
+        public static void RMWCompletionCallback(AdId* key, Input* input, Empty* ctx)
+        {
+        }
+
+        public static void ReadCompletionCallback(AdId* key, Input* input, Output* output, Empty* ctx)
+        {
+        }
+
+        public static void UpsertCompletionCallback(AdId* key, NumClicks* input, Empty* ctx)
+        {
+        }
+
+        public static void PersistenceCallback(long thread_id, long serial_num)
+        {
+            Console.WriteLine("Thread {0} reports persistence until {1}", thread_id, serial_num);
+        }
+
+        // Read functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleReader(AdId* key, Input* input, NumClicks* value, Output* dst)
+        {
+            NumClicks.Copy(value, (NumClicks*)dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentReader(AdId* key, Input* input, NumClicks* value, Output* dst)
+        {
+            NumClicks.AcquireReadLock(value);
+            NumClicks.Copy(value, (NumClicks*)dst);
+            NumClicks.ReleaseReadLock(value);
+        }
+
+        // Upsert functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleWriter(AdId* key, NumClicks* src, NumClicks* dst)
+        {
+            NumClicks.Copy(src, dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentWriter(AdId* key, NumClicks* src, NumClicks* dst)
+        {
+            NumClicks.AcquireWriteLock(dst);
+            NumClicks.Copy(src, dst);
+            NumClicks.ReleaseWriteLock(dst);
+        }
+
+        // RMW functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int InitialValueLength(AdId* key, Input* input)
+        {
+            return NumClicks.GetLength(default(NumClicks*));
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InitialUpdater(AdId* key, Input* input, NumClicks* value)
+        {
+            NumClicks.Copy(&input->numClicks, value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InPlaceUpdater(AdId* key, Input* input, NumClicks* value)
+        {
+            Interlocked.Add(ref value->numClicks, input->numClicks.numClicks);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyUpdater(AdId* key, Input* input, NumClicks* oldValue, NumClicks* newValue)
+        {
+            newValue->numClicks += oldValue->numClicks + input->numClicks.numClicks;
+        }
+    }
+}
diff --git a/cs/src/test/TestTypes.cs b/cs/src/test/TestTypes.cs
new file mode 100644
index 000000000..0f9fd4131
--- /dev/null
+++ b/cs/src/test/TestTypes.cs
@@ -0,0 +1,253 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+using System;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using FASTER.core;
+using System.Runtime.CompilerServices;
+using System.IO;
+using System.Diagnostics;
+
+namespace FASTER.test
+{
+
+    public unsafe struct KeyStruct
+    {
+        public const int physicalSize = sizeof(long) + sizeof(long);
+        public long kfield1;
+        public long kfield2;
+
+        public static long GetHashCode(KeyStruct* key)
+        {
+            return Utility.GetHashCode(*((long*)key));
+        }
+        public static bool Equals(KeyStruct* k1, KeyStruct* k2)
+        {
+            return k1->kfield1 == k2->kfield1 && k1->kfield2 == k2->kfield2;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(KeyStruct* key)
+        {
+            return physicalSize;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Copy(KeyStruct* src, KeyStruct* dst)
+        {
+            dst->kfield1 = src->kfield1;
+            dst->kfield2 = src->kfield2;
+        }
+
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(KeyStruct* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(KeyStruct* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(KeyStruct* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+
+        public static KeyStruct* MoveToContext(KeyStruct* key)
+        {
+            return key;
+        }
+    }
+    public unsafe struct ValueStruct
+    {
+        public const int physicalSize = sizeof(long) + sizeof(long);
+        public long vfield1;
+        public long vfield2;
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int GetLength(ValueStruct* input)
+        {
+            return physicalSize;
+        }
+
+        public static void Copy(ValueStruct* src, ValueStruct* dst)
+        {
+            dst->vfield1 = src->vfield1;
+            dst->vfield2 = src->vfield2;
+        }
+
+        // Shared read/write capabilities on value
+        public static void AcquireReadLock(ValueStruct* value)
+        {
+        }
+
+        public static void ReleaseReadLock(ValueStruct* value)
+        {
+        }
+
+        public static void AcquireWriteLock(ValueStruct* value)
+        {
+        }
+
+        public static void ReleaseWriteLock(ValueStruct* value)
+        {
+        }
+
+        #region Serialization
+        public static bool HasObjectsToSerialize()
+        {
+            return false;
+        }
+
+        public static void Serialize(ValueStruct* key, Stream toStream)
+        {
+            throw new NotImplementedException();
+        }
+
+        public static void Deserialize(ValueStruct* key, Stream fromStream)
+        {
+            throw new NotImplementedException();
+        }
+        public static void Free(ValueStruct* key)
+        {
+            throw new NotImplementedException();
+        }
+        #endregion
+
+        public static ValueStruct* MoveToContext(ValueStruct* value)
+        {
+            return value;
+        }
+    }
+    public unsafe struct InputStruct
+    {
+        public long ifield1;
+        public long ifield2;
+
+        public static InputStruct* MoveToContext(InputStruct* input)
+        {
+            return input;
+        }
+    }
+    public unsafe struct OutputStruct
+    {
+        public ValueStruct value;
+
+        public static OutputStruct* MoveToContext(OutputStruct* output)
+        {
+            return output;
+        }
+
+    }
+    public unsafe interface ICustomFaster
+    {
+        /* Thread-related operations */
+        Guid StartSession();
+        long ContinueSession(Guid guid);
+        void StopSession();
+        void Refresh();
+
+        /* Store Interface */
+        Status Read(KeyStruct* key, InputStruct* input, OutputStruct* output, Empty* context, long lsn);
+        Status Upsert(KeyStruct* key, ValueStruct* value, Empty* context, long lsn);
+        Status RMW(KeyStruct* key, InputStruct* input, Empty* context, long lsn);
+        Status Delete(KeyStruct* key, Empty* context, long lsn);
+        bool CompletePending(bool wait);
+
+        /* Statistics */
+        long Size { get; }
+        void DumpDistribution();
+    }
+    public unsafe class Functions
+    {
+        public static void RMWCompletionCallback(KeyStruct* key, InputStruct* output, Empty* ctx)
+        {
+        }
+
+        public static void ReadCompletionCallback(KeyStruct* key, InputStruct* input, OutputStruct* output, Empty* ctx)
+        {
+        }
+
+        public static void UpsertCompletionCallback(KeyStruct* key, ValueStruct* output, Empty* ctx)
+        {
+        }
+
+        public static void PersistenceCallback(long thread_id, long serial_num)
+        {
+            Debug.WriteLine("Thread {0} repors persistence until {1}", thread_id, serial_num);
+        }
+
+        // Read functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleReader(KeyStruct* key, InputStruct* input, ValueStruct* value, OutputStruct* dst)
+        {
+            ValueStruct.Copy(value, (ValueStruct*)dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentReader(KeyStruct* key, InputStruct* input, ValueStruct* value, OutputStruct* dst)
+        {
+            ValueStruct.AcquireReadLock(value);
+            ValueStruct.Copy(value, (ValueStruct*)dst);
+            ValueStruct.ReleaseReadLock(value);
+        }
+
+        // Upsert functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void SingleWriter(KeyStruct* key, ValueStruct* src, ValueStruct* dst)
+        {
+            ValueStruct.Copy(src, dst);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void ConcurrentWriter(KeyStruct* key, ValueStruct* src, ValueStruct* dst)
+        {
+            ValueStruct.AcquireWriteLock(dst);
+            ValueStruct.Copy(src, dst);
+            ValueStruct.ReleaseWriteLock(dst);
+        }
+
+        // RMW functions
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int InitialValueLength(KeyStruct* key, InputStruct* input)
+        {
+            return ValueStruct.GetLength(default(ValueStruct*));
+        }
+
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InitialUpdater(KeyStruct* key, InputStruct* input, ValueStruct* value)
+        {
+            ValueStruct.Copy((ValueStruct*)input, value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void InPlaceUpdater(KeyStruct* key, InputStruct* input, ValueStruct* value)
+        {
+            ValueStruct.AcquireWriteLock(value);
+            value->vfield1 += input->ifield1;
+            value->vfield2 += input->ifield2;
+            ValueStruct.ReleaseWriteLock(value);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void CopyUpdater(KeyStruct* key, InputStruct* input, ValueStruct* oldValue, ValueStruct* newValue)
+        {
+            newValue->vfield1 = oldValue->vfield1 + input->ifield1;
+            newValue->vfield2 = oldValue->vfield2 + input->ifield2;
+        }
+    }
+
+}
diff --git a/cs/src/test/app.config b/cs/src/test/app.config
new file mode 100644
index 000000000..53ebb6e47
--- /dev/null
+++ b/cs/src/test/app.config
@@ -0,0 +1,21 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+  <runtime>
+    <gcServer enabled="true" />
+    <gcAllowVeryLargeObjects enabled="true" />
+    <assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
+      <dependentAssembly>
+        <assemblyIdentity name="System.Collections.Immutable" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-1.2.1.0" newVersion="1.2.1.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.IO.FileSystem" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.0.2.0" newVersion="4.0.2.0" />
+      </dependentAssembly>
+      <dependentAssembly>
+        <assemblyIdentity name="System.Security.Cryptography.Primitives" publicKeyToken="b03f5f7f11d50a3a" culture="neutral" />
+        <bindingRedirect oldVersion="0.0.0.0-4.0.1.0" newVersion="4.0.1.0" />
+      </dependentAssembly>
+    </assemblyBinding>
+    </runtime>
+</configuration>
\ No newline at end of file