Skip to content

Commit

Permalink
feat(python): add "calamine" support to read_excel, using `fastexce…
Browse files Browse the repository at this point in the history
…l` (~8-10x speedup) (#14000)
  • Loading branch information
alexander-beedie committed Jan 26, 2024
1 parent 96b8b93 commit 4e19d62
Show file tree
Hide file tree
Showing 7 changed files with 403 additions and 149 deletions.
41 changes: 26 additions & 15 deletions py-polars/polars/dataframe/frame.py
Expand Up @@ -52,6 +52,7 @@
_check_for_pyarrow,
dataframe_api_compat,
hvplot,
import_optional,
)
from polars.dependencies import numpy as np
from polars.dependencies import pandas as pd
Expand Down Expand Up @@ -3072,15 +3073,8 @@ def write_excel(
... sheet_zoom=125,
... )
""" # noqa: W505
try:
import xlsxwriter
from xlsxwriter.utility import xl_cell_to_rowcol
except ImportError:
msg = (
"Excel export requires xlsxwriter"
"\n\nPlease run: pip install XlsxWriter"
)
raise ImportError(msg) from None
xlsxwriter = import_optional("xlsxwriter", err_prefix="Excel export requires")
from xlsxwriter.utility import xl_cell_to_rowcol

# setup workbook/worksheet
wb, ws, can_close = _xl_setup_workbook(workbook, worksheet)
Expand Down Expand Up @@ -6750,7 +6744,10 @@ def drop_in_place(self, name: str) -> Series:

def cast(
self,
dtypes: Mapping[ColumnNameOrSelector, PolarsDataType] | PolarsDataType,
dtypes: (
Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType]
| PolarsDataType
),
*,
strict: bool = True,
) -> DataFrame:
Expand Down Expand Up @@ -6791,12 +6788,19 @@ def cast(
│ 3.0 ┆ 8 ┆ 2022-05-06 │
└─────┴─────┴────────────┘
Cast all frame columns to the specified dtype:
Cast all frame columns matching one dtype (or dtype group) to another dtype:
>>> df.cast(pl.String).to_dict(as_series=False)
{'foo': ['1', '2', '3'],
'bar': ['6.0', '7.0', '8.0'],
'ham': ['2020-01-02', '2021-03-04', '2022-05-06']}
>>> df.cast({pl.Date: pl.Datetime})
shape: (3, 3)
┌─────┬─────┬─────────────────────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ datetime[μs] │
╞═════╪═════╪═════════════════════╡
│ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
│ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
│ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
└─────┴─────┴─────────────────────┘
Use selectors to define the columns being cast:
Expand All @@ -6812,6 +6816,13 @@ def cast(
│ 2 ┆ 7 ┆ 2021-03-04 │
│ 3 ┆ 8 ┆ 2022-05-06 │
└─────┴─────┴────────────┘
Cast all frame columns to the specified dtype:
>>> df.cast(pl.String).to_dict(as_series=False)
{'foo': ['1', '2', '3'],
'bar': ['6.0', '7.0', '8.0'],
'ham': ['2020-01-02', '2021-03-04', '2022-05-06']}
"""
return self.lazy().cast(dtypes, strict=strict).collect(_eager=True)

Expand Down
44 changes: 44 additions & 0 deletions py-polars/polars/dependencies.py
Expand Up @@ -229,6 +229,50 @@ def _check_for_pydantic(obj: Any, *, check_type: bool = True) -> bool:
)


def import_optional(
module_name: str,
err_prefix: str = "Required package",
err_suffix: str = "not installed",
min_version: str | tuple[int, ...] | None = None,
) -> Any:
"""
Import an optional dependency, returning the module.
Parameters
----------
module_name : str
Name of the dependency to import.
err_prefix : str, optional
Error prefix to use in the raised exception (appears before the module name).
err_suffix: str, optional
Error suffix to use in the raised exception (follows the module name).
min_version : {str, tuple[int]}, optional
If a minimum module version is required, specify it here.
"""
from polars.exceptions import ModuleUpgradeRequired
from polars.utils.various import parse_version

try:
module = import_module(module_name)
except ImportError:
prefix = f"{err_prefix.strip(' ')} " if err_prefix else ""
suffix = f" {err_prefix.strip(' ')}" if err_suffix else ""
err_message = (
f"{prefix}'{module_name}'{suffix}.\n"
f"Please install it using the command `pip install {module_name}`."
)
raise ImportError(err_message) from None

if min_version:
min_version = parse_version(min_version)
mod_version = parse_version(module.__version__)
if mod_version < min_version:
msg = f"requires module_name {min_version} or higher, found {mod_version}"
raise ModuleUpgradeRequired(msg)

return module


__all__ = [
# lazy-load rarely-used/heavy builtins (for fast startup)
"dataclasses",
Expand Down

0 comments on commit 4e19d62

Please sign in to comment.