From 91f600544801e05acc251cfef4013a8a1645d06a Mon Sep 17 00:00:00 2001 From: Dennis Bakhuis Date: Sat, 20 Jun 2020 12:44:54 +0200 Subject: [PATCH 1/3] DOC: document support for in-memory HDFStore GH33166 I currently have added to the docstring that **kwargs passes its parameters to PyTables. Maybe this is too much but I also added an example using **kwargs, passing the driver paramter to create an in-memory HDFStore. Furthermore, I have added HDFStore class to the reference api, as it was not autogenerated and also made **kwargs more clear that it passes its parameters to PyTables. Added in the cookbook the method of creating in-memory HDFStores, including an example. --- doc/source/reference/io.rst | 1 + doc/source/user_guide/cookbook.rst | 19 ++++++++++++++ pandas/io/pytables.py | 42 ++++++++++++++++++++---------- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst index 0037d4a4410c3..3de1028486009 100644 --- a/doc/source/reference/io.rst +++ b/doc/source/reference/io.rst @@ -74,6 +74,7 @@ HDFStore: PyTables (HDF5) :toctree: api/ read_hdf + HDFStore HDFStore.put HDFStore.append HDFStore.get diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 56ef6fc479f2c..50b946999092a 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -1166,6 +1166,25 @@ Storing Attributes to a group node store.close() os.remove('test.h5') +You can create or load a HDFStore in-memory by passing the ``driver`` +parameter to PyTables. Changes are only written to disk when the HDFStore +is closed. + +.. ipython:: python + + store = pd.HDFStore('test.h5', 'w', diver='H5FD_CORE') + + df = pd.DataFrame(np.random.randn(8, 3)) + store['test'] = df + + # only after closing the store, data is written to disk: + store.close() + +.. ipython:: python + :suppress: + + os.remove('test.h5') + .. _cookbook.binary: Binary files diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8aac8f9531512..bd105d89776a5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -447,20 +447,19 @@ class HDFStore: Parameters ---------- - path : string - File path to HDF5 file + path : str + File path to HDF5 file. mode : {'a', 'w', 'r', 'r+'}, default 'a' - - ``'r'`` - Read-only; no data can be modified. - ``'w'`` - Write; a new file is created (an existing file with the same - name would be deleted). - ``'a'`` - Append; an existing file is opened for reading and writing, - and if the file does not exist it is created. - ``'r+'`` - It is similar to ``'a'``, but the file must already exist. + ``'r'`` + Read-only; no data can be modified. + ``'w'`` + Write; a new file is created (an existing file with the same + name would be deleted). + ``'a'`` + Append; an existing file is opened for reading and writing, + and if the file does not exist it is created. + ``'r+'`` + It is similar to ``'a'``, but the file must already exist. complevel : int, 0-9, default None Specifies a compression level for data. A value of 0 or None disables compression. @@ -473,7 +472,9 @@ class HDFStore: Specifying a compression library which is not available issues a ValueError. fletcher32 : bool, default False - If applying compression use the fletcher32 checksum + If applying compression use the fletcher32 checksum. + **kwargs + These parameters will be passed to the PyTables open_file method. Examples -------- @@ -482,6 +483,17 @@ class HDFStore: >>> store['foo'] = bar # write to HDF5 >>> bar = store['foo'] # retrieve >>> store.close() + + **Create or load HDF5 file in-memory** + + When passing the 'driver' option to the PyTables open_file method through + **kwargs, the HDF5 file is loaded or created in-memory and will only be + written when closed: + + >>> bar = pd.DataFrame(np.random.randn(10, 4)) + >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE') + >>> store['foo'] = bar + >>> store.close() # only now, data is written to disk """ _handle: Optional["File"] @@ -634,6 +646,8 @@ def open(self, mode: str = "a", **kwargs): ---------- mode : {'a', 'w', 'r', 'r+'}, default 'a' See HDFStore docstring or tables.open_file for info about modes + **kwargs + These parameters will be passed to the PyTables open_file method. """ tables = _tables() From 94e26da2a3379ffa129f1bdf516195f9e20febcd Mon Sep 17 00:00:00 2001 From: Dennis Bakhuis Date: Sat, 20 Jun 2020 15:37:20 +0200 Subject: [PATCH 2/3] indentation correction --- pandas/io/pytables.py | 55 +++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bd105d89776a5..ef147363b564b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -448,33 +448,33 @@ class HDFStore: Parameters ---------- path : str - File path to HDF5 file. + File path to HDF5 file. mode : {'a', 'w', 'r', 'r+'}, default 'a' - ``'r'`` - Read-only; no data can be modified. - ``'w'`` - Write; a new file is created (an existing file with the same - name would be deleted). - ``'a'`` - Append; an existing file is opened for reading and writing, - and if the file does not exist it is created. - ``'r+'`` - It is similar to ``'a'``, but the file must already exist. + ``'r'`` + Read-only; no data can be modified. + ``'w'`` + Write; a new file is created (an existing file with the same + name would be deleted). + ``'a'`` + Append; an existing file is opened for reading and writing, + and if the file does not exist it is created. + ``'r+'`` + It is similar to ``'a'``, but the file must already exist. complevel : int, 0-9, default None - Specifies a compression level for data. - A value of 0 or None disables compression. + Specifies a compression level for data. + A value of 0 or None disables compression. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' - Specifies the compression library to be used. - As of v0.20.2 these additional compressors for Blosc are supported - (default if no compressor specified: 'blosc:blosclz'): - {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - 'blosc:zlib', 'blosc:zstd'}. - Specifying a compression library which is not available issues - a ValueError. + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. fletcher32 : bool, default False - If applying compression use the fletcher32 checksum. + If applying compression use the fletcher32 checksum. **kwargs - These parameters will be passed to the PyTables open_file method. + These parameters will be passed to the PyTables open_file method. Examples -------- @@ -483,17 +483,6 @@ class HDFStore: >>> store['foo'] = bar # write to HDF5 >>> bar = store['foo'] # retrieve >>> store.close() - - **Create or load HDF5 file in-memory** - - When passing the 'driver' option to the PyTables open_file method through - **kwargs, the HDF5 file is loaded or created in-memory and will only be - written when closed: - - >>> bar = pd.DataFrame(np.random.randn(10, 4)) - >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE') - >>> store['foo'] = bar - >>> store.close() # only now, data is written to disk """ _handle: Optional["File"] From c4229728f2370b25686082c0d6088a952d0ace81 Mon Sep 17 00:00:00 2001 From: Dennis Bakhuis Date: Sat, 20 Jun 2020 17:50:31 +0200 Subject: [PATCH 3/3] removed base class from doc/source/reference/io.rst --- doc/source/reference/io.rst | 1 - pandas/io/pytables.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst index 3de1028486009..0037d4a4410c3 100644 --- a/doc/source/reference/io.rst +++ b/doc/source/reference/io.rst @@ -74,7 +74,6 @@ HDFStore: PyTables (HDF5) :toctree: api/ read_hdf - HDFStore HDFStore.put HDFStore.append HDFStore.get diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ef147363b564b..800e9474cc0f8 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -450,6 +450,7 @@ class HDFStore: path : str File path to HDF5 file. mode : {'a', 'w', 'r', 'r+'}, default 'a' + ``'r'`` Read-only; no data can be modified. ``'w'`` @@ -483,6 +484,17 @@ class HDFStore: >>> store['foo'] = bar # write to HDF5 >>> bar = store['foo'] # retrieve >>> store.close() + + **Create or load HDF5 file in-memory** + + When passing the `driver` option to the PyTables open_file method through + **kwargs, the HDF5 file is loaded or created in-memory and will only be + written when closed: + + >>> bar = pd.DataFrame(np.random.randn(10, 4)) + >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE') + >>> store['foo'] = bar + >>> store.close() # only now, data is written to disk """ _handle: Optional["File"]