-
-
Notifications
You must be signed in to change notification settings - Fork 17.8k
/
spss.py
93 lines (75 loc) · 2.79 KB
/
spss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
)
from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
from pandas.util._validators import check_dtype_backend
from pandas.core.dtypes.inference import is_list_like
from pandas.io.common import stringify_path
if TYPE_CHECKING:
from collections.abc import Sequence
from pathlib import Path
from pandas._typing import DtypeBackend
from pandas import DataFrame
def read_spss(
path: str | Path,
usecols: Sequence[str] | None = None,
convert_categoricals: bool = True,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
**kwargs: Any,
) -> DataFrame:
"""
Load an SPSS file from the file path, returning a DataFrame.
Parameters
----------
path : str or Path
File path.
usecols : list-like, optional
Return a subset of the columns. If None, return all columns.
convert_categoricals : bool, default is True
Convert categorical columns into pd.Categorical.
dtype_backend : {'numpy_nullable', 'pyarrow'}
Back-end data type applied to the resultant :class:`DataFrame`
(still experimental). If not specified, the default behavior
is to not use nullable data types. If specified, the behavior
is as follows:
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
* ``"pyarrow"``: returns pyarrow-backed
nullable :class:`ArrowDtype` :class:`DataFrame`
.. versionadded:: 2.0
**kwargs
Additional keyword arguments that can be passed to :func:`pyreadstat.read_sav`.
.. versionadded:: 3.0
Returns
-------
DataFrame
DataFrame based on the SPSS file.
See Also
--------
read_csv : Read a comma-separated values (csv) file into a pandas DataFrame.
read_excel : Read an Excel file into a pandas DataFrame.
read_sas : Read an SAS file into a pandas DataFrame.
read_orc : Load an ORC object into a pandas DataFrame.
read_feather : Load a feather-format object into a pandas DataFrame.
Examples
--------
>>> df = pd.read_spss("spss_data.sav") # doctest: +SKIP
"""
pyreadstat = import_optional_dependency("pyreadstat")
check_dtype_backend(dtype_backend)
if usecols is not None:
if not is_list_like(usecols):
raise TypeError("usecols must be list-like.")
usecols = list(usecols) # pyreadstat requires a list
df, metadata = pyreadstat.read_sav(
stringify_path(path),
usecols=usecols,
apply_value_formats=convert_categoricals,
**kwargs,
)
df.attrs = metadata.__dict__
if dtype_backend is not lib.no_default:
df = df.convert_dtypes(dtype_backend=dtype_backend)
return df