Skip to content

Commit

Permalink
FIX-#3945: bump omnisci version to 5.10.1 (#3946)
Browse files Browse the repository at this point in the history
Co-authored-by: Igoshev, Yaroslav <yaroslav.igoshev@intel.com>
Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev and YarShev committed Jan 26, 2022
1 parent 4b70725 commit 018515f
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 19 deletions.
14 changes: 13 additions & 1 deletion modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from collections import OrderedDict
import numpy as np
import pandas
import datetime
from pandas.core.indexes.api import ensure_index, Index, RangeIndex
from pandas.core.dtypes.common import is_numeric_dtype, is_list_like
from typing import List, Hashable, Optional, Callable, Union, Dict
Expand Down Expand Up @@ -2672,7 +2673,18 @@ def _arrow_type_to_dtype(cls, arrow_type):
object
Any dtype compatible with pandas.
"""
res = arrow_type.to_pandas_dtype()
import pyarrow

try:
res = arrow_type.to_pandas_dtype()
# Conversion to pandas is not implemented for some arrow types,
# perform manual conversion for them:
except NotImplementedError:
if pyarrow.types.is_time(arrow_type):
res = np.dtype(datetime.time)
else:
raise

if not isinstance(res, (np.dtype, str)):
return np.dtype(res)
return res
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,23 @@ def _get_unsupported_cols(cls, obj):
else:
obj = at

def is_supported_dtype(dtype):
"""Check whether the passed pyarrow `dtype` is supported by OmniSci."""
if (
pyarrow.types.is_string(dtype)
or pyarrow.types.is_time(dtype)
or pyarrow.types.is_dictionary(dtype)
):
return True
try:
pandas_dtype = dtype.to_pandas_dtype()
return pandas_dtype != np.dtype("O")
except NotImplementedError:
return False

return (
obj,
[
field.name
for field in obj.schema
if not isinstance(field.type, pyarrow.DictionaryType)
and field.type.to_pandas_dtype() == np.dtype("O")
and field.type != "string"
],
[field.name for field in obj.schema if not is_supported_dtype(field.type)],
)

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import modin.pandas as pd
from modin.utils import try_cast_to_pandas
import pandas
import datetime
import numpy as np
from pandas.api.types import is_datetime64_any_dtype
import pyarrow as pa

Expand Down Expand Up @@ -88,21 +90,53 @@ def align_datetime_dtypes(*dfs):
Passed Modin frames may be casted to pandas in the result.
"""
datetime_cols = {}
time_cols = set()
for df in dfs:
for col, dtype in df.dtypes.items():
# If we already decided to cast this column to DateTime no more actions are needed
if col not in datetime_cols and is_datetime64_any_dtype(dtype):
datetime_cols[col] = dtype

casted_dfs = (
# datetime.time is considered to be an 'object' dtype in pandas that's why
# we have to explicitly check the values type in the column
elif (
dtype == np.dtype("O")
and col not in time_cols
# OmniSci has difficulties with empty frames, so explicitly skip them
# https://github.com/modin-project/modin/issues/3428
and len(df) > 0
and all(
isinstance(val, datetime.time) or pandas.isna(val)
for val in df[col]
)
):
time_cols.add(col)

if len(datetime_cols) == 0 and len(time_cols) == 0:
return dfs

def convert_to_time(value):
"""Convert passed value to `datetime.time`."""
if isinstance(value, datetime.time):
return value
elif isinstance(value, str):
return datetime.time.fromisoformat(value)
else:
return datetime.time(value)

time_cols_list = list(time_cols)
casted_dfs = []
for df in dfs:
# OmniSci has difficulties with casting to certain dtypes (i.e. datetime64),
# so casting it to pandas before doing 'astype'
tuple(try_cast_to_pandas(df).astype(datetime_cols) for df in dfs)
# This is required so we don't try to cast empty OmniSci frames to pandas:
# https://github.com/modin-project/modin/issues/3428
if len(datetime_cols)
else dfs
)
# so casting it to pandas
pandas_df = try_cast_to_pandas(df)
if datetime_cols:
pandas_df = pandas_df.astype(datetime_cols)
if time_cols:
pandas_df[time_cols_list] = pandas_df[time_cols_list].applymap(
convert_to_time
)
casted_dfs.append(pandas_df)

return casted_dfs


Expand Down
4 changes: 2 additions & 2 deletions requirements/env_omnisci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- pandas==1.3.5
- pyarrow==5.0.0
- pyarrow=6
- numpy>=1.16.5
- fsspec
- pip
Expand All @@ -12,7 +12,7 @@ dependencies:
- pytest-xdist>=2.1.0
- coverage<5.0
- pygithub==1.53
- pyomniscidbe<=5.8
- pyomniscidbe==5.10.1
- s3fs>=2021.8
- psutil
- openpyxl
Expand Down

0 comments on commit 018515f

Please sign in to comment.