Skip to content

Commit

Permalink
fix for datetime from pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
ianna committed Jun 10, 2021
1 parent 48e2d2c commit 2aa2ed5
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 42 deletions.
14 changes: 10 additions & 4 deletions src/awkward/operations/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,10 +970,16 @@ def to_list(array):
elif isinstance(array, ak.layout.NumpyArray):
if array.format.upper().startswith("M"):
return (
ak.nplike.of(array)
.asarray(array.view_int64)
.view(array.format)
.tolist()
[
x
for x in ak.nplike.of(array)
.asarray(array.view_int64)
.view(array.format)
]
# FIXME: .tolist() returns
# [[1567416600000000000], [1568367000000000000], [1569096000000000000]]
# instead of [numpy.datetime64('2019-09-02T09:30:00'), numpy.datetime64('2019-09-13T09:30:00'), numpy.datetime64('2019-09-21T20:00:00')]
# see test_from_pandas() test
)
else:
return ak.nplike.of(array).asarray(array).tolist()
Expand Down
48 changes: 14 additions & 34 deletions src/python/content.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2252,35 +2252,6 @@ NumpyArray_from_datetime(const std::string& name,
ak::kernel::lib::cpu);
}

const ak::NumpyArray
NumpyArray_from_pandas_datetime(const std::string& name,
const py::object& array,
const py::object& identities,
const py::object& parameters) {
const std::vector<ssize_t> shape = array.attr("shape").cast<std::vector<ssize_t>>();
const std::vector<ssize_t> strides = array.attr("strides").cast<std::vector<ssize_t>>();

void* ptr = reinterpret_cast<void*>(
py::cast<ssize_t>(array.attr("ctypes").attr("data")));

ak::util::dtype dtype= ak::util::name_to_dtype(
py::cast<std::string>(py::str(array.attr("dtype"))));

auto out = ak::NumpyArray(
unbox_identities_none(identities),
dict2parameters(parameters),
std::shared_ptr<void>(ptr, pyobject_deleter<void>(array.ptr())),
shape,
strides,
0,
py::dtype(array.attr("dtype")).itemsize(),
py::cast<std::string>(py::str(array.attr("dtype").attr("str"))).substr(1),
dtype,
ak::kernel::lib::cpu);
//std::cout << out.tostring() << "\n";
return out;
}

py::class_<ak::NumpyArray, std::shared_ptr<ak::NumpyArray>, ak::Content>
make_NumpyArray(const py::handle& m, const std::string& name) {
return content_methods(py::class_<ak::NumpyArray,
Expand All @@ -2300,6 +2271,7 @@ make_NumpyArray(const py::handle& m, const std::string& name) {
const py::object& identities,
const py::object& parameters) -> ak::NumpyArray {
std::string module = anyarray.get_type().attr("__module__").cast<std::string>();

if (module.rfind("cupy.", 0) == 0) {
return NumpyArray_from_cupy(name, anyarray, identities, parameters);
}
Expand All @@ -2313,14 +2285,22 @@ make_NumpyArray(const py::handle& m, const std::string& name) {
return NumpyArray_from_datetime(name, anyarray, identities, parameters);
}
}
else if (module.rfind("pandas.", 0) == 0) {
if (py::cast<bool>(anyarray.attr("_is_datelike_mixed_type"))) {
return NumpyArray_from_pandas_datetime(name, anyarray.attr("values"), identities, parameters);
}
}

py::array array = anyarray.cast<py::array>();

if (!PyObject_CheckBuffer(anyarray.ptr())) {
// anyarray does not support buffer protocol
if (py::hasattr(array, "dtype") &&
!py::cast<std::string>(py::str(py::dtype(array.attr("dtype")))).empty()) {
const auto& data_type = ak::util::dtype_to_format(ak::util::name_to_dtype(
py::cast<std::string>(py::str(py::dtype(array.attr("dtype"))))));
if (data_type == "M" || data_type == "m") {
// it's a datetime or timedelta
return NumpyArray_from_datetime(name, array, identities, parameters);
}
}
}

py::buffer_info info = array.request();
if (info.ndim == 0) {
throw std::invalid_argument(
Expand Down
37 changes: 33 additions & 4 deletions tests/test_0835-datetime-type.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,19 +472,48 @@ def test_ufunc_mul():
assert ak.Array([np.timedelta64(3, "D")])[0] == np.timedelta64(3, "D")


def test_NumpyArray_layout():
array0 = ak.layout.NumpyArray(
["2019-09-02T09:30:00", "2019-09-13T09:30:00", "2019-09-21T20:00:00"]
)

assert ak.to_list(array0) == [
"2019-09-02T09:30:00",
"2019-09-13T09:30:00",
"2019-09-21T20:00:00",
]

array = ak.layout.NumpyArray(
[
np.datetime64("2019-09-02T09:30:00"),
np.datetime64("2019-09-13T09:30:00"),
np.datetime64("2019-09-21T20:00:00"),
]
)

assert ak.to_list(array) == [
np.datetime64("2019-09-02T09:30:00"),
np.datetime64("2019-09-13T09:30:00"),
np.datetime64("2019-09-21T20:00:00"),
]


pandas = pytest.importorskip("pandas")


def test_from_pandas():
values = {"time": ["20190902093000", "20190913093000", "20190921200000"]}
df = pandas.DataFrame(values, columns=["time"])
df["time"] = pandas.to_datetime(df["time"], format="%Y%m%d%H%M%S")
print(df.dtypes)
print(df)
# FIXME: array = ak.layout.NumpyArray(df)
array = ak.Array(df.values)
array = ak.layout.NumpyArray(df)
assert ak.to_list(array) == [
np.datetime64("2019-09-02T09:30:00"),
np.datetime64("2019-09-13T09:30:00"),
np.datetime64("2019-09-21T20:00:00"),
]
array2 = ak.Array(df.values)
assert ak.to_list(array2) == [
np.datetime64("2019-09-02T09:30:00"),
np.datetime64("2019-09-13T09:30:00"),
np.datetime64("2019-09-21T20:00:00"),
]

0 comments on commit 2aa2ed5

Please sign in to comment.