Move coerce_arrow into internals (#1846)

To remove the public api dependency in utils.py
pola-rs · Nov 21, 2021 · 3a2fad9 · 3a2fad9
1 parent bf44119
commit 3a2fad9
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 45 deletions.
diff --git a/py-polars/polars/internals/construction.py b/py-polars/polars/internals/construction.py
@@ -25,7 +25,6 @@
     _DOCUMENTING = False
 except ImportError:
     _DOCUMENTING = True
-from polars.utils import coerce_arrow
 
 if TYPE_CHECKING:
     import pandas as pd
@@ -417,3 +416,36 @@ def pandas_to_pydf(
     }
     arrow_table = pa.table(arrow_dict)
     return arrow_to_pydf(arrow_table, columns=columns, rechunk=rechunk)
+
+
+def coerce_arrow(array: "pa.Array") -> "pa.Array":
+    # also coerces timezone to naive representation
+    # units are accounted for by pyarrow
+    if "timestamp" in str(array.type):
+        warnings.warn(
+            "Conversion of (potentially) timezone aware to naive datetimes. TZ information may be lost",
+        )
+        ts_ms = pa.compute.cast(array, pa.timestamp("ms"), safe=False)
+        ms = pa.compute.cast(ts_ms, pa.int64())
+        del ts_ms
+        array = pa.compute.cast(ms, pa.timestamp("ms"))
+        del ms
+    # note: Decimal256 could not be cast to float
+    elif isinstance(array.type, pa.Decimal128Type):
+        array = pa.compute.cast(array, pa.float64())
+
+    if hasattr(array, "num_chunks") and array.num_chunks > 1:
+        # we have to coerce before combining chunks, because pyarrow panics if
+        # offsets overflow
+        if pa.types.is_string(array.type):
+            array = pa.compute.cast(array, pa.large_utf8())
+        elif pa.types.is_list(array.type):
+            # pyarrow does not seem to support casting from list to largelist
+            # so we use convert to large list ourselves and do the re-alloc on polars/arrow side
+            chunks = []
+            for arr in array.iterchunks():
+                chunks.append(pli.Series._from_arrow("", arr).to_arrow())
+            array = pa.chunked_array(chunks)
+
+        array = array.combine_chunks()
+    return array
diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py
@@ -1,53 +1,9 @@
 import ctypes
 import typing as tp
-import warnings
 from typing import Any, Dict, Tuple, Union
 
 import numpy as np
 
-try:
-    import pyarrow as pa
-    import pyarrow.compute
-
-    _PYARROW_AVAILABLE = True
-except ImportError:
-    _PYARROW_AVAILABLE = False
-
-import polars as pl  # TODO: this is public API
-
-
-def coerce_arrow(array: "pa.Array") -> "pa.Array":
-    # also coerces timezone to naive representation
-    # units are accounted for by pyarrow
-    if "timestamp" in str(array.type):
-        warnings.warn(
-            "Conversion of (potentially) timezone aware to naive datetimes. TZ information may be lost",
-        )
-        ts_ms = pa.compute.cast(array, pa.timestamp("ms"), safe=False)
-        ms = pa.compute.cast(ts_ms, pa.int64())
-        del ts_ms
-        array = pa.compute.cast(ms, pa.timestamp("ms"))
-        del ms
-    # note: Decimal256 could not be cast to float
-    elif isinstance(array.type, pa.Decimal128Type):
-        array = pa.compute.cast(array, pa.float64())
-
-    if hasattr(array, "num_chunks") and array.num_chunks > 1:
-        # we have to coerce before combining chunks, because pyarrow panics if
-        # offsets overflow
-        if pa.types.is_string(array.type):
-            array = pa.compute.cast(array, pa.large_utf8())
-        elif pa.types.is_list(array.type):
-            # pyarrow does not seem to support casting from list to largelist
-            # so we use convert to large list ourselves and do the re-alloc on polars/arrow side
-            chunks = []
-            for arr in array.iterchunks():
-                chunks.append(pl.from_arrow(arr).to_arrow())
-            array = pa.chunked_array(chunks)
-
-        array = array.combine_chunks()
-    return array
-
 
 def _process_null_values(
     null_values: Union[None, str, tp.List[str], Dict[str, str]] = None,