Skip to content

Commit 21b7dad

Browse files
Added support for ingesting Arrow data types date32 and date64 (#534).
1 parent a13093a commit 21b7dad

File tree

9 files changed

+113
-2
lines changed

9 files changed

+113
-2
lines changed

doc/src/release_notes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ Common Changes
4141
#) Added support for all of the signed and unsigned fixed width integer types
4242
when ingesting data frames supporting the Arrow PyCapsule interface.
4343
Previously only ``int64`` was supported.
44+
#) Added support for types ``date32`` and ``date64`` when ingesting data
45+
frames supporting the Arrow PyCapsule interface as requested
46+
(`issue 534 <https://github.com/oracle/python-oracledb/issues/534>`__).
4447
#) Added ``fetch_lobs`` and ``fetch_decimals`` parameters where applicable to
4548
the methods used for fetching rows or data frames from the database. Note
4649
that for the creation of pipeline operations, if these parameters are not

src/oracledb/arrow_impl.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ cdef extern from "nanoarrow.h":
6262
cpdef enum ArrowType:
6363
NANOARROW_TYPE_BOOL
6464
NANOARROW_TYPE_BINARY
65+
NANOARROW_TYPE_DATE32
66+
NANOARROW_TYPE_DATE64
6567
NANOARROW_TYPE_DECIMAL128
6668
NANOARROW_TYPE_DOUBLE
6769
NANOARROW_TYPE_FIXED_SIZE_BINARY

src/oracledb/base_impl.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ from .arrow_impl cimport (
5555
NANOARROW_TYPE_NA,
5656
NANOARROW_TYPE_BOOL,
5757
NANOARROW_TYPE_BINARY,
58+
NANOARROW_TYPE_DATE32,
59+
NANOARROW_TYPE_DATE64,
5860
NANOARROW_TYPE_DECIMAL128,
5961
NANOARROW_TYPE_DOUBLE,
6062
NANOARROW_TYPE_FIXED_SIZE_BINARY,

src/oracledb/impl/arrow/array.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ cdef class ArrowArrayImpl:
365365
value[0] = (<int8_t*> ptr)[index]
366366
elif arrow_type == NANOARROW_TYPE_INT16:
367367
value[0] = (<int16_t*> ptr)[index]
368-
elif arrow_type == NANOARROW_TYPE_INT32:
368+
elif arrow_type in (NANOARROW_TYPE_INT32, NANOARROW_TYPE_DATE32):
369369
value[0] = (<int32_t*> ptr)[index]
370370
else:
371371
value[0] = (<int64_t*> ptr)[index]

src/oracledb/impl/arrow/schema.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ cdef class ArrowSchemaImpl:
129129
self.fixed_size = schema_view.fixed_size
130130
if schema_view.type == NANOARROW_TYPE_TIMESTAMP:
131131
self._set_time_unit(schema_view.time_unit)
132+
elif schema_view.type == NANOARROW_TYPE_DATE64:
133+
self._set_time_unit(NANOARROW_TIME_UNIT_MILLI)
132134
elif schema_view.type in (
133135
NANOARROW_TYPE_FIXED_SIZE_LIST,
134136
NANOARROW_TYPE_LIST
@@ -143,6 +145,8 @@ cdef class ArrowSchemaImpl:
143145
NANOARROW_TYPE_BINARY,
144146
NANOARROW_TYPE_BOOL,
145147
NANOARROW_TYPE_DECIMAL128,
148+
NANOARROW_TYPE_DATE32,
149+
NANOARROW_TYPE_DATE64,
146150
NANOARROW_TYPE_DOUBLE,
147151
NANOARROW_TYPE_FIXED_SIZE_BINARY,
148152
NANOARROW_TYPE_FLOAT,

src/oracledb/impl/base/converters.pyx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ cdef object convert_arrow_to_oracle_data(OracleMetadata metadata,
9090
rb = &data.buffer.as_raw_bytes
9191
array_impl.get_bytes(array_index, &data.is_null, <char**> &rb.ptr,
9292
&rb.num_bytes)
93-
elif arrow_type == NANOARROW_TYPE_TIMESTAMP:
93+
elif arrow_type in (NANOARROW_TYPE_TIMESTAMP, NANOARROW_TYPE_DATE64):
9494
array_impl.get_int(arrow_type, array_index, &data.is_null, &int_value)
9595
if not data.is_null:
9696
seconds = int_value // array_impl.schema_impl.time_factor
@@ -103,6 +103,10 @@ cdef object convert_arrow_to_oracle_data(OracleMetadata metadata,
103103
useconds //= 1_000
104104
return EPOCH_DATE + \
105105
cydatetime.timedelta_new(days, seconds, useconds)
106+
elif arrow_type == NANOARROW_TYPE_DATE32:
107+
array_impl.get_int(arrow_type, array_index, &data.is_null, &int_value)
108+
if not data.is_null:
109+
return EPOCH_DATE + cydatetime.timedelta_new(int_value, 0, 0)
106110
elif arrow_type == NANOARROW_TYPE_DECIMAL128:
107111
temp_bytes = array_impl.get_decimal(array_index, &data.is_null)
108112
if not data.is_null:

src/oracledb/impl/base/metadata.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ cdef class OracleMetadata:
187187
metadata.dbtype = DB_TYPE_BOOLEAN
188188
elif arrow_type == NANOARROW_TYPE_TIMESTAMP:
189189
metadata.dbtype = DB_TYPE_TIMESTAMP
190+
elif arrow_type in (NANOARROW_TYPE_DATE32, NANOARROW_TYPE_DATE64):
191+
metadata.dbtype = DB_TYPE_DATE
190192
elif arrow_type == NANOARROW_TYPE_LARGE_STRING:
191193
metadata.dbtype = DB_TYPE_LONG
192194
elif arrow_type == NANOARROW_TYPE_LARGE_BINARY:

tests/test_8900_dataframe_ingestion.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,53 @@ def test_8919(self):
898898
fetched_values = [int(s) for s, in self.cursor]
899899
self.assertEqual(fetched_values, values)
900900

901+
def test_8920(self):
902+
"8920 - test ingestion with alternative date types"
903+
scenarios = [
904+
(
905+
[
906+
datetime.datetime(1915, 9, 11),
907+
None,
908+
datetime.datetime(2045, 2, 28),
909+
],
910+
pyarrow.date32(),
911+
),
912+
(
913+
[
914+
datetime.datetime(1905, 3, 30),
915+
None,
916+
datetime.datetime(2060, 10, 5),
917+
],
918+
pyarrow.date64(),
919+
),
920+
]
921+
names = ["Id", "DateOfBirth"]
922+
for values, dtype in scenarios:
923+
with self.subTest(dtype=str(dtype)):
924+
arrays = [
925+
pyarrow.array([1, 2, 3], pyarrow.int8()),
926+
pyarrow.array(values, dtype),
927+
]
928+
df = pyarrow.table(arrays, names)
929+
self.cursor.execute("delete from TestDataFrame")
930+
self.cursor.executemany(
931+
"""
932+
insert into TestDataFrame (Id, DateOfBirth)
933+
values (:1, :2)
934+
""",
935+
df,
936+
)
937+
self.conn.commit()
938+
self.cursor.execute(
939+
"""
940+
select DateOfBirth
941+
from TestDataFrame
942+
order by Id
943+
"""
944+
)
945+
fetched_values = [d for d, in self.cursor]
946+
self.assertEqual(fetched_values, values)
947+
901948

902949
if __name__ == "__main__":
903950
test_env.run_test_cases()

tests/test_9000_dataframe_ingestion_async.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,53 @@ async def test_9019(self):
899899
fetched_values = [int(s) async for s, in self.cursor]
900900
self.assertEqual(fetched_values, values)
901901

902+
async def test_9020(self):
903+
"9020 - test ingestion with alternative date types"
904+
scenarios = [
905+
(
906+
[
907+
datetime.datetime(1915, 9, 11),
908+
None,
909+
datetime.datetime(2045, 2, 28),
910+
],
911+
pyarrow.date32(),
912+
),
913+
(
914+
[
915+
datetime.datetime(1905, 3, 30),
916+
None,
917+
datetime.datetime(2060, 10, 5),
918+
],
919+
pyarrow.date64(),
920+
),
921+
]
922+
names = ["Id", "DateOfBirth"]
923+
for values, dtype in scenarios:
924+
with self.subTest(dtype=str(dtype)):
925+
arrays = [
926+
pyarrow.array([1, 2, 3], pyarrow.int8()),
927+
pyarrow.array(values, dtype),
928+
]
929+
df = pyarrow.table(arrays, names)
930+
await self.cursor.execute("delete from TestDataFrame")
931+
await self.cursor.executemany(
932+
"""
933+
insert into TestDataFrame (Id, DateOfBirth)
934+
values (:1, :2)
935+
""",
936+
df,
937+
)
938+
await self.conn.commit()
939+
await self.cursor.execute(
940+
"""
941+
select DateOfBirth
942+
from TestDataFrame
943+
order by Id
944+
"""
945+
)
946+
fetched_values = [d async for d, in self.cursor]
947+
self.assertEqual(fetched_values, values)
948+
902949

903950
if __name__ == "__main__":
904951
test_env.run_test_cases()

0 commit comments

Comments
 (0)