diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1ed6f2c172787..1a62427b08057 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -44,7 +44,6 @@ ValueLabelTypeMismatch, ) from pandas.util._decorators import ( - doc, set_module, ) from pandas.util._exceptions import find_stack_level @@ -2377,10 +2376,6 @@ def _dtype_to_default_stata_fmt( raise NotImplementedError(f"Data type {dtype} not supported.") -@doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "fname", -) class StataWriter(StataParser): """ A class for writing Stata binary dta files @@ -2412,11 +2407,36 @@ class StataWriter(StataParser): variable_labels : dict Dictionary containing columns as keys and variable labels as values. Each label must be 80 characters or smaller. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'fname' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} + and other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to + create a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. .. versionchanged:: 1.4.0 Zstandard support. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. value_labels : dict of dicts Dictionary containing columns as keys and dictionaries of column value @@ -2449,14 +2469,14 @@ class StataWriter(StataParser): >>> writer.write_file() Directly write a zip file - >>> compression = {{"method": "zip", "archive_name": "data_file.dta"}} + >>> compression = {"method": "zip", "archive_name": "data_file.dta"} >>> writer = StataWriter("./data_file.zip", data, compression=compression) >>> writer.write_file() Save a DataFrame with dates >>> from datetime import datetime >>> data = pd.DataFrame([[datetime(2000, 1, 1)]], columns=["date"]) - >>> writer = StataWriter("./date_data_file.dta", data, {{"date": "tw"}}) + >>> writer = StataWriter("./date_data_file.dta", data, {"date": "tw"}) >>> writer.write_file() """